1   package eu.fbk.dh.tint.verb;
2   
3   import edu.stanford.nlp.ling.CoreAnnotation;
4   import edu.stanford.nlp.ling.CoreAnnotations;
5   import edu.stanford.nlp.ling.CoreLabel;
6   import edu.stanford.nlp.pipeline.Annotation;
7   import edu.stanford.nlp.pipeline.Annotator;
8   import edu.stanford.nlp.util.ArraySet;
9   import edu.stanford.nlp.util.CoreMap;
10  import eu.fbk.utils.core.PropertiesUtils;
11  
12  import java.util.*;
13  
14  /**
15   * Created by alessio on 24/08/16.
16   */
17  
18  public class VerbAnnotator implements Annotator {
19  
20      // todo: try to deal with verb phrases ("cercare di fare", "provare a fare", etc.)
21  
22  //    static Set<String> noWords = new HashSet<>();
23  //    private boolean preceededByNot;
24  
25      private static final boolean DEFAULT_USE_PREFIX = true;
26      private static final boolean DEFAULT_MODAL_IS_PREFIX = true;
27      private static final boolean DEFAULT_AUX_IS_PREFIX = true;
28      private static final String DEFAULT_SKIP_TAGS = "B";
29      private static final String DEFAULT_VERB_TAGS = "V";
30      private static final String DEFAULT_AUX_TAGS = "VA";
31      private static final String DEFAULT_MODAL_TAGS = "VM";
32  
33      private boolean usePrefix, modalUsePrefix, auxUsePrefix;
34      private List<String> skipTags, verbTags, modalTags, auxTags;
35      private VerbModel model;
36  
37      public VerbAnnotator(String annotatorName, Properties prop) {
38          usePrefix = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".use_prefix"), DEFAULT_USE_PREFIX);
39          auxUsePrefix = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".aux_is_prefix"), DEFAULT_AUX_IS_PREFIX);
40          modalUsePrefix = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".modal_is_prefix"), DEFAULT_MODAL_IS_PREFIX);
41          String skipTagsText = prop.getProperty(annotatorName + ".skip_tags", DEFAULT_SKIP_TAGS);
42          String verbTagsText = prop.getProperty(annotatorName + ".verb_tags", DEFAULT_VERB_TAGS);
43          String auxTagsText = prop.getProperty(annotatorName + ".aux_tags", DEFAULT_AUX_TAGS);
44          String modalTagsText = prop.getProperty(annotatorName + ".modal_tags", DEFAULT_MODAL_TAGS);
45  
46          // todo: add custom filename
47          model = VerbModel.getInstance();
48  
49          skipTags = new ArrayList<>();
50          verbTags = new ArrayList<>();
51          modalTags = new ArrayList<>();
52          auxTags = new ArrayList<>();
53  
54          splitParts(skipTagsText, skipTags);
55          splitParts(verbTagsText, verbTags);
56          splitParts(modalTagsText, modalTags);
57          splitParts(auxTagsText, auxTags);
58      }
59  
60      static private void splitParts(String text, List<String> tags) {
61          String[] sParts = text.split("\\s*,\\s*");
62          for (String sPart : sParts) {
63              tags.add(sPart);
64          }
65      }
66  
67      @Override public void annotate(Annotation annotation) {
68          if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
69              for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
70  
71                  List<CoreLabel> lastVerb = new ArrayList<>();
72  
73                  List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
74                  boolean followedByExMark = tokens.get(tokens.size() - 1).word().equals("!");
75  //                boolean preceededByNot = false;
76  
77                  List<VerbMultiToken> verbs = new ArrayList<>();
78  
79                  for (int i = 0; i < tokens.size(); i++) {
80                      CoreLabel token = tokens.get(i);
81  
82                      String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
83  //                    System.out.println(token);
84  //                    System.out.println(pos);
85  //                    System.out.println();
86  //                    String form = token.word().toLowerCase();
87  //                    if (noWords.contains(form)) {
88  //                        preceededByNot = true;
89  //                    }
90  
91                      if (isSatisfied(pos, verbTags, usePrefix) || isSatisfied(pos, modalTags, modalUsePrefix)) {
92                          lastVerb.add(token);
93                      }
94                      if (isSatisfied(pos, skipTags, usePrefix)) {
95                          continue;
96                      }
97                      if (isSatisfied(pos, auxTags, auxUsePrefix)) {
98                          continue;
99                      }
100 
101                     if (lastVerb.size() > 0) {
102                         addVerbs(lastVerb, verbs, followedByExMark);
103                         lastVerb = new ArrayList<>();
104                     }
105                 }
106 
107                 if (lastVerb.size() > 0) {
108                     addVerbs(lastVerb, verbs, followedByExMark);
109                 }
110 
111                 sentence.set(VerbAnnotations.VerbsAnnotation.class, verbs);
112             }
113         }
114     }
115 
116     private void addVerbs(List<CoreLabel> lastVerb, List<VerbMultiToken> verbs, boolean followedByExMark) {
117         VerbMultiToken multiToken = new VerbMultiToken();
118         for (int i = 0; i < lastVerb.size(); i++) {
119             CoreLabel verb = lastVerb.get(i);
120             boolean last = (i == lastVerb.size() - 1);
121             multiToken.addToken(model, verb, last);
122         }
123         verbs.add(multiToken);
124     }
125 
126     private static boolean isSatisfied(String pos, List<String> tags, boolean usePrefix) {
127         boolean ret = false;
128         pos = pos.toLowerCase();
129 
130         if (usePrefix) {
131             for (String tag : tags) {
132                 tag = tag.toLowerCase();
133                 if (pos.startsWith(tag)) {
134                     ret = true;
135                 }
136             }
137         } else {
138             for (String tag : tags) {
139                 tag = tag.toLowerCase();
140                 if (pos.equals(tag)) {
141                     ret = true;
142                 }
143             }
144         }
145 
146         return ret;
147     }
148 
149     /**
150      * Returns a set of requirements for which tasks this annotator can
151      * provide.  For example, the POS annotator will return "pos".
152      */
153     @Override public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
154         return Collections.singleton(VerbAnnotations.VerbsAnnotation.class);
155     }
156 
157     /**
158      * Returns the set of tasks which this annotator requires in order
159      * to perform.  For example, the POS annotator will return
160      * "tokenize", "ssplit".
161      */
162     @Override public Set<Class<? extends CoreAnnotation>> requires() {
163         return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
164                 CoreAnnotations.PartOfSpeechAnnotation.class,
165                 CoreAnnotations.LemmaAnnotation.class,
166                 CoreAnnotations.TokensAnnotation.class,
167                 CoreAnnotations.SentencesAnnotation.class
168         )));
169     }
170 }