1   package eu.fbk.dh.tint.digimorph.annotator;
2   
3   import edu.stanford.nlp.ling.CoreAnnotation;
4   import edu.stanford.nlp.ling.CoreAnnotations;
5   import edu.stanford.nlp.ling.CoreLabel;
6   import edu.stanford.nlp.pipeline.Annotation;
7   import edu.stanford.nlp.pipeline.Annotator;
8   import edu.stanford.nlp.util.ArraySet;
9   import edu.stanford.nlp.util.CoreMap;
10  
11  import java.util.*;
12  
13  /**
14   * Created by giovannimoretti on 15/02/17.
15   */
16  public class DigiCompMorphAnnotator implements Annotator {
17  
18      @Override
19      public void annotate(Annotation annotation) {
20          if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
21              for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
22                  List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
23                  for (CoreLabel c : tokens) {
24                      String[] morph_fatures = c.get(DigiMorphAnnotations.MorphoAnnotation.class).split(" ");
25                      String lemma = c.get(CoreAnnotations.LemmaAnnotation.class);
26                      if (morph_fatures.length > 1) {
27                          List<String> comps = new ArrayList<>();
28                          for (String m : morph_fatures) {
29                              if (m.startsWith(lemma + "+") || m.startsWith(lemma + "~")) {
30                                  comps.add(m);
31                              }
32                          }
33                          c.set(DigiMorphAnnotations.MorphoCompAnnotation.class, comps);
34                      } else {
35  
36                          if (morph_fatures[0].startsWith(lemma + "+") || morph_fatures[0].startsWith(lemma + "~")) {
37                              c.set(DigiMorphAnnotations.MorphoCompAnnotation.class,
38                                      new ArrayList<String>(Arrays.asList(morph_fatures[0])));
39                          }
40                      }
41                  }
42              }
43          }
44      }
45  
46      /**
47       * Returns a set of requirements for which tasks this annotator can
48       * provide.  For example, the POS annotator will return "pos".
49       */
50      @Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
51          return Collections.singleton(DigiMorphAnnotations.MorphoCompAnnotation.class);
52      }
53  
54      /**
55       * Returns the set of tasks which this annotator requires in order
56       * to perform.  For example, the POS annotator will return
57       * "tokenize", "ssplit".
58       */
59      @Override public Set<Class<? extends CoreAnnotation>> requires() {
60          return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
61                  CoreAnnotations.LemmaAnnotation.class,
62                  DigiMorphAnnotations.MorphoAnnotation.class
63          )));
64      }
65  }