1   package eu.fbk.dh.tint.readability.es;
2   
3   import org.slf4j.Logger;
4   import org.slf4j.LoggerFactory;
5   
6   import java.io.BufferedReader;
7   import java.io.InputStream;
8   import java.io.InputStreamReader;
9   import java.util.HashSet;
10  import java.util.Properties;
11  import java.util.Set;
12  
13  import static eu.fbk.dh.tint.readability.Readability.getStream;
14  
15  /**
16   * Created by alessio on 26/09/16.
17   */
18  public class SpanishReadabilityModel {
19  
20      private static final Logger LOGGER = LoggerFactory.getLogger(SpanishReadabilityModel.class);
21      private static SpanishReadabilityModel ourInstance = null;
22  
23      private static final int LIMIT_EASY = 500;
24      private static final int LIMIT_MEDIUM = 2500;
25      private static final int LIMIT_HARD = 5000;
26  
27      private Set<String> level1Lemmas = new HashSet<>();
28      private Set<String> level2Lemmas = new HashSet<>();
29      private Set<String> level3Lemmas = new HashSet<>();
30  
31  //    private HashMap<String, GlossarioEntry> glossario = new HashMap<>();
32  //    private HashMap<Integer, HashMultimap<String, String>> easyWords = new HashMap<>();
33  
34      public static SpanishReadabilityModel getInstance(Properties globalProperties, Properties localProperties) {
35          if (ourInstance == null) {
36              String freqLemmaFile = localProperties.getProperty("lemmasFile");
37  
38              LOGGER.info("Loading lemmas");
39              Set<String> level1Lemmas = new HashSet<>();
40              Set<String> level2Lemmas = new HashSet<>();
41              Set<String> level3Lemmas = new HashSet<>();
42  
43              try {
44                  InputStream stream = getStream(freqLemmaFile, "/models/content_words_freq_manualcheck.txt");
45                  BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
46                  String line;
47                  int i = 0;
48                  while ((line = reader.readLine()) != null) {
49                      String[] parts = line.split("\t");
50                      if (parts.length < 2) {
51                          continue;
52                      }
53  
54                      String lemma = parts[0];
55  
56                      boolean done = false;
57                      if (i < LIMIT_EASY) {
58                          level1Lemmas.add(lemma);
59                          done = true;
60                      }
61                      if (i < LIMIT_MEDIUM) {
62                          level2Lemmas.add(lemma);
63                          done = true;
64                      }
65                      if (i < LIMIT_HARD) {
66                          level3Lemmas.add(lemma);
67                          done = true;
68                      }
69  
70                      if (!done) {
71                          break;
72                      }
73  
74                      i++;
75                  }
76                  reader.close();
77              } catch (Exception e) {
78                  LOGGER.warn("Unable to load easyWords file: {}", e.getMessage());
79              }
80  
81              ourInstance = new SpanishReadabilityModel(level1Lemmas, level2Lemmas, level3Lemmas);
82          } else {
83              LOGGER.info("Readability model already loaded");
84          }
85          return ourInstance;
86      }
87  
88      private SpanishReadabilityModel(Set<String> level1Lemmas, Set<String> level2Lemmas,
89              Set<String> level3Lemmas) {
90          this.level1Lemmas = level1Lemmas;
91          this.level2Lemmas = level2Lemmas;
92          this.level3Lemmas = level3Lemmas;
93      }
94  
95      public Set<String> getLevel1Lemmas() {
96          return level1Lemmas;
97      }
98  
99      public Set<String> getLevel2Lemmas() {
100         return level2Lemmas;
101     }
102 
103     public Set<String> getLevel3Lemmas() {
104         return level3Lemmas;
105     }
106 }