1   package eu.fbk.dh.tint.readability.en;
2   
3   import edu.stanford.nlp.pipeline.Annotation;
4   import org.slf4j.Logger;
5   import org.slf4j.LoggerFactory;
6   
7   import java.util.HashSet;
8   import java.util.Properties;
9   import java.util.Set;
10  import java.util.regex.Matcher;
11  import java.util.regex.Pattern;
12  
13  /**
14   * Created by alessio on 21/09/16.
15   */
16  
17  public class EnglishStandardReadability extends EnglishReadability {
18  
19      private static final Logger LOGGER = LoggerFactory.getLogger(EnglishStandardReadability.class);
20      private static final Pattern startsWithLetter = Pattern.compile("^[a-zA-Z].*");
21      private static final Set<String> immutablePos = new HashSet<>();
22  
23      static {
24          immutablePos.add("POS");
25          immutablePos.add("CC");
26          immutablePos.add("CD");
27          immutablePos.add("PDT");
28          immutablePos.add("TO");
29          immutablePos.add("IN");
30      }
31  
32      @Override protected String getGenericPos(String pos) {
33          if (immutablePos.contains(pos)) {
34              return pos;
35          }
36          if (pos.equals("SYM")) {
37              return "X";
38          }
39          if (pos.equals("MD")) {
40              return "V";
41          }
42  
43          Matcher matcher = startsWithLetter.matcher(pos);
44          if (matcher.find()) {
45              return super.getGenericPos(pos);
46          }
47  
48          return "X";
49      }
50  
51      public EnglishStandardReadability(Properties globalProperties, Properties localProperties, Annotation annotation) {
52          super(globalProperties, localProperties, annotation);
53  
54          contentPosList.add("N");
55          contentPosList.add("J");
56          contentPosList.add("V");
57          contentPosList.add("R");
58  
59          simplePosList.add("N");
60          simplePosList.add("V");
61  
62          nonWordPosList.add("X");
63  
64          genericPosDescription.put("J", "Adjective");
65          genericPosDescription.put("CC", "Conjunction");
66          genericPosDescription.put("CD", "Number");
67          genericPosDescription.put("D", "Determiner");
68          genericPosDescription.put("X", "Punctuation");
69          genericPosDescription.put("F", "Foreign word");
70          genericPosDescription.put("IN", "Subordinating (prep. or conj.)");
71          genericPosDescription.put("L", "List item marker");
72          genericPosDescription.put("PDT", "Pre-determiner");
73          genericPosDescription.put("POS", "Possessive");
74          genericPosDescription.put("P", "Pronoun");
75          genericPosDescription.put("R", "Adverb");
76          genericPosDescription.put("N", "Noun");
77          genericPosDescription.put("TO", "To");
78          genericPosDescription.put("U", "Interjection");
79          genericPosDescription.put("V", "Verb");
80          genericPosDescription.put("W", "Wh-stuff");
81  
82      }
83  
84  }