1   package eu.fbk.dh.tint.readability.es;
2   
3   import com.itextpdf.layout.hyphenation.Hyphenator;
4   import edu.stanford.nlp.ling.CoreLabel;
5   import edu.stanford.nlp.pipeline.Annotation;
6   import edu.stanford.nlp.util.CoreMap;
7   import eu.fbk.dh.tint.readability.Readability;
8   import eu.fbk.dh.tint.readability.ReadabilityAnnotations;
9   import eu.fbk.utils.gson.JSONExclude;
10  
11  import java.util.Properties;
12  
13  /**
14   * Created by alessio on 21/09/16.
15   */
16  
17  abstract class SpanishReadability extends Readability {
18  
19      @JSONExclude SpanishReadabilityModel model;
20      @JSONExclude int level1WordSize = 0, level2WordSize = 0, level3WordSize = 0;
21  //
22  //    @JSONExclude StringBuilder buffer = new StringBuilder();
23  //    @JSONExclude int lemmaIndex = 0;
24  //    @JSONExclude HashMap<Integer, Integer> lemmaIndexes = new HashMap<>();
25  //    @JSONExclude HashMap<Integer, Integer> tokenIndexes = new HashMap<>();
26  //    TreeMap<Integer, DescriptionForm> forms = new TreeMap<>();
27  
28      @Override public void finalizeReadability() {
29          super.finalizeReadability();
30  
31  
32          double fleschSzigriszt =
33                  206.835 - (62.3 * getHyphenCount() / getHyphenWordCount()) - (1.0 * getWordCount()
34                          / getSentenceCount());
35          double fernandezHuerta =
36                  206.84 - 0.6 * (100.0 * getHyphenCount() / getHyphenWordCount()) - 1.02 * (100.0 * getSentenceCount()
37                          / getWordCount());
38          labels.put("main", "Flesch-Szigriszt");
39          measures.put("main", fleschSzigriszt);
40          measures.put("fernandez-huerta", fernandezHuerta);
41          measures.put("level1", 100.0 * level1WordSize / getContentWordSize());
42          measures.put("level2", 100.0 * level2WordSize / getContentWordSize());
43          measures.put("level3", 100.0 * level3WordSize / getContentWordSize());
44  //
45  //        String lemmaText = buffer.toString().trim();
46  //        String text = annotation.get(CoreAnnotations.TextAnnotation.class);
47  //
48  //        HashMap<String, GlossarioEntry> glossario = model.getGlossario();
49  //
50  //        List<String> glossarioKeys = new ArrayList<>(glossario.keySet());
51  //        Collections.sort(glossarioKeys, new StringLenComparator());
52  //
53  //        for (String form : glossarioKeys) {
54  //
55  //            int numberOfTokens = form.split("\\s+").length;
56  //            List<Integer> allOccurrences = findAllOccurrences(text, form);
57  //            List<Integer> allLemmaOccurrences = findAllOccurrences(lemmaText, form);
58  //
59  //            for (Integer occurrence : allOccurrences) {
60  //                addDescriptionForm(form, tokenIndexes, occurrence, numberOfTokens, forms, annotation, glossario);
61  //            }
62  //            for (Integer occurrence : allLemmaOccurrences) {
63  //                addDescriptionForm(form, lemmaIndexes, occurrence, numberOfTokens, forms, annotation, glossario);
64  //            }
65  //        }
66  
67      }
68  
69      public SpanishReadability(Properties globalProperties, Properties localProperties, Annotation annotation) {
70          super("es", annotation, localProperties);
71          hyphenator = new Hyphenator("es", "es", 1, 1);
72          model = SpanishReadabilityModel.getInstance(globalProperties, localProperties);
73      }
74  
75      @Override public void addingContentWord(CoreLabel token) {
76          super.addingContentWord(token);
77  
78          token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
79          String lemma = token.lemma();
80          if (model.getLevel3Lemmas().contains(lemma)) {
81              level3WordSize++;
82              token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
83          }
84          if (model.getLevel2Lemmas().contains(lemma)) {
85              level2WordSize++;
86              token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
87          }
88          if (model.getLevel1Lemmas().contains(lemma)) {
89              level1WordSize++;
90              token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
91          }
92  //        System.out.println("Adding content word (lemma): " + lemma);
93  //        System.out.println(model.getLevel1Lemmas().contains(lemma));
94  //        System.out.println(model.getLevel2Lemmas().contains(lemma));
95  //        System.out.println(model.getLevel3Lemmas().contains(lemma));
96  //        System.out.println();
97  
98  //        HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
99  //        String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
100 //        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
101 //
102 //        if (easyWords.get(1).get(simplePos).contains(lemma)) {
103 //            level1WordSize++;
104 //        }
105 //        if (easyWords.get(2).get(simplePos).contains(lemma)) {
106 //            level2WordSize++;
107 //        }
108 //        if (easyWords.get(3).get(simplePos).contains(lemma)) {
109 //            level3WordSize++;
110 //        }
111     }
112 
113     @Override public void addingEasyWord(CoreLabel token) {
114 
115     }
116 
117     @Override public void addingWord(CoreLabel token) {
118         super.addingWord(token);
119     }
120 
121     @Override public void addingToken(CoreLabel token) {
122 //        lemmaIndexes.put(buffer.length(), lemmaIndex);
123 //        tokenIndexes.put(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), lemmaIndex);
124 //        lemmaIndex++;
125 //        buffer.append(token.get(CoreAnnotations.LemmaAnnotation.class)).append(" ");
126     }
127 
128     @Override public void addingSentence(CoreMap sentence) {
129 
130     }
131 }