1   package eu.fbk.dh.tint.readability;
2   
3   import edu.stanford.nlp.ling.CoreAnnotation;
4   import edu.stanford.nlp.ling.CoreAnnotations;
5   import edu.stanford.nlp.ling.CoreLabel;
6   import edu.stanford.nlp.pipeline.Annotation;
7   import edu.stanford.nlp.pipeline.Annotator;
8   import edu.stanford.nlp.util.ArraySet;
9   import edu.stanford.nlp.util.CoreMap;
10  import eu.fbk.dh.tint.readability.en.EnglishStandardReadability;
11  import eu.fbk.dh.tint.readability.es.SpanishStandardReadability;
12  import eu.fbk.dh.tint.readability.gl.GalicianStandardReadability;
13  import eu.fbk.dh.tint.readability.it.ItalianStandardReadability;
14  import eu.fbk.utils.core.PropertiesUtils;
15  import org.slf4j.Logger;
16  import org.slf4j.LoggerFactory;
17  
18  import java.lang.reflect.Constructor;
19  import java.util.*;
20  
21  /**
22   * Created by alessio on 21/09/16.
23   */
24  
25  public class ReadabilityAnnotator implements Annotator {
26  
27      private static final Logger LOGGER = LoggerFactory.getLogger(ReadabilityAnnotator.class);
28      public static Integer DEFAULT_MAX_SENTENCE_LENGTH = 25;
29  
30      private String language;
31      private String className;
32      private int maxSentenceLength;
33  
34      private Properties globalProperties;
35      private Properties localProperties;
36  
37      public ReadabilityAnnotator(String annotatorName, Properties props) {
38          globalProperties = props;
39          localProperties = PropertiesUtils.dotConvertedProperties(props, annotatorName);
40  
41          language = globalProperties.getProperty(annotatorName + ".language");
42          className = globalProperties.getProperty(annotatorName + ".className");
43          maxSentenceLength = PropertiesUtils
44                  .getInteger(localProperties.getProperty("maxSentenceLength"), DEFAULT_MAX_SENTENCE_LENGTH);
45      }
46  
47      /**
48       * Given an Annotation, perform a task on this Annotation.
49       *
50       * @param annotation
51       */
52      @Override public void annotate(Annotation annotation) {
53  
54          Readability readability = null;
55  
56          if (className != null) {
57              try {
58                  Class<? extends Readability> obj = (Class<? extends Readability>) Class.forName(className);
59                  Constructor<? extends Readability> constructor = obj.getConstructor(Properties.class, Properties.class, Annotation.class);
60                  readability = constructor.newInstance(globalProperties, localProperties, annotation);
61              } catch (Exception e) {
62                  LOGGER.error(e.getMessage());
63              }
64          }
65  
66          if (readability == null) {
67              if (language == null) {
68                  LOGGER.warn("Language variable is not defined, readability will be empty");
69                  return;
70              }
71  
72              switch (language) {
73              case "it":
74                  readability = new ItalianStandardReadability(globalProperties, localProperties, annotation);
75                  break;
76              case "es":
77                  readability = new SpanishStandardReadability(globalProperties, localProperties, annotation);
78                  break;
79              case "en":
80                  readability = new EnglishStandardReadability(globalProperties, localProperties, annotation);
81                  break;
82              case "gl":
83                  readability = new GalicianStandardReadability(globalProperties, localProperties, annotation);
84                  break;
85  //        default:
86  //            readability = new EnglishReadability();
87              }
88          }
89  
90          if (readability == null) {
91              return;
92          }
93  
94          List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
95          int tokenCount = 0;
96          readability.setSentenceCount(sentences.size());
97          for (CoreMap sentence : sentences) {
98              int sentenceID = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
99              int wordsNow = readability.getWordCount();
100             for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
101                 readability.addWord(token);
102                 tokenCount++;
103             }
104             int words = readability.getWordCount() - wordsNow;
105             if (words > maxSentenceLength) {
106                 readability.addTooLongSentence(sentenceID);
107             }
108         }
109         readability.setTokenCount(tokenCount);
110 
111         readability.finalizeReadability();
112 
113         annotation.set(ReadabilityAnnotations.ReadabilityAnnotation.class, readability);
114     }
115 
116     /**
117      * Returns a set of requirements for which tasks this annotator can
118      * provide.  For example, the POS annotator will return "pos".
119      */
120     @Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
121         return Collections.singleton(ReadabilityAnnotations.ReadabilityAnnotation.class);
122     }
123 
124     /**
125      * Returns the set of tasks which this annotator requires in order
126      * to perform.  For example, the POS annotator will return
127      * "tokenize", "ssplit".
128      */
129     @Override public Set<Class<? extends CoreAnnotation>> requires() {
130         return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
131                 CoreAnnotations.PartOfSpeechAnnotation.class,
132                 CoreAnnotations.TokensAnnotation.class,
133                 CoreAnnotations.LemmaAnnotation.class,
134                 CoreAnnotations.SentencesAnnotation.class
135         )));
136     }
137 }