1   package eu.fbk.dh.tint.digimorph;
2   
3   import com.google.common.base.Joiner;
4   import org.mapdb.SortedTableMap;
5   
6   import java.util.ArrayList;
7   import java.util.LinkedList;
8   import java.util.List;
9   import java.util.concurrent.Callable;
10  
11  /**
12   * @author Giovanni Moretti at Digital Humanities group at FBK.
13   * @version 0.2a
14   */
15  public class DigiMorph_Analizer implements Callable<List<String>> {
16  
17      // Volume volume = null;
18      private SortedTableMap<String, String> map = null;
19  
20      List<String> tokens;
21  
22      private ArrayList<String> prefix = new ArrayList() {{
23          add("anti");
24          add("ante");
25          add("arci");
26          add("inter");
27          add("super");
28          add("trans");
29          add("ultra");
30          add("tri");
31          add("bi");
32          add("xeno");
33          add("zoo");
34          add("circon");
35          add("circom");
36          add("in");
37      }};
38  
39      private ArrayList<String> suffix = new ArrayList() {{
40          add("li");
41          add("lo");
42          add("la");
43          add("le");
44          add("ci");
45          add("vi");
46          add("ti");
47          add("mi");
48          add("si");
49          add("ne");
50          add("gli");
51          add("glielo");
52          add("gliela");
53          add("gliele");
54          add("gliene");
55      }};
56  
57      //middle suffix
58      private ArrayList<String> middle_suffix = new ArrayList() {{
59          add("ci");
60          add("vi");
61          add("ce");
62          add("me");
63          add("te");
64          add("ve");
65          add("se");
66      }};
67  
68      public DigiMorph_Analizer(List<String> tokens, SortedTableMap<String, String> map) {
69          this.tokens = tokens;
70          this.map = map;
71      }
72  
73      public List<String> call() {
74          List<String> results = new LinkedList<String>();
75          for (String s : this.tokens) {
76              results.add(getMorphology(s));
77          }
78          return results;
79      }
80  
81      public String getMorphology(String token) {
82          String original_token = token;
83          String output = "";
84          String no_prefix_phase = process_token(token);
85          if (no_prefix_phase.length() == 0) {
86              for (String p : prefix) {
87                  if (token.startsWith(p)) {
88                      token = token.substring(p.length(), token.length());
89                      String prefix_phase = process_token(token);
90                      if (prefix_phase.length() > 0) {
91                          String prefisso = process_token(p);
92                          output = p + "/" + prefix_phase.replace(" ", " " + p);
93                      }
94                  }
95              }
96          } else {
97              output = no_prefix_phase;
98          }
99  
100         if (output.length() == 0) {
101             return original_token;
102         } else {
103             return output;
104         }
105         //this.volume.close();
106 
107     }
108 
109     private String process_token(String token) {
110         String orginal_token = token;
111         token = token.toLowerCase();
112         StringBuffer out_buffer = new StringBuffer();
113         String basic_result = this.map.get(token);
114         out_buffer.append(basic_result != null ? basic_result : "");
115 
116         /////////// fermati qui per risolvere formario completo ////////
117 
118         //return token;
119 
120         //forme composte
121         // suffix
122 
123         String suffix_substring = "";
124 
125         if (token.endsWith("gli")) {
126             suffix_substring = "gli";
127         } else {
128             for (String suf : this.suffix) {
129                 if (token.endsWith(suf)) {
130                     suffix_substring = suf;
131                 }
132             }
133         }
134 
135         if (suffix_substring.length() > 0) {
136             String head = token.substring(0, token.length() - suffix_substring.length());
137             String middle_suffix_substring = "";
138             for (String suf : this.middle_suffix) {
139                 if (head.endsWith(suf)) {
140                     middle_suffix_substring = suf;
141                 }
142             }
143 
144             head = head.substring(0, head.length() - middle_suffix_substring.length());
145 
146             String possible_middle_suffix =
147                     middle_suffix_substring.length() > 0 ? map.get(middle_suffix_substring) : "";
148             String possible_suffix = suffix_substring.length() > 0 ? map.get(suffix_substring) : "";
149 
150             //refine head
151             String possible_verb;
152             boolean ends_with_double = false;
153             char head_ending = '\0';
154             if (head.length() > 0) {
155                 head_ending = head.charAt(head.length() - 1);
156             }
157             char close_suffix_head = '\0';
158 
159             if (possible_middle_suffix.length() > 0) {
160                 close_suffix_head = middle_suffix_substring.charAt(0);
161             } else if (possible_suffix.length() > 0) {
162                 close_suffix_head = suffix_substring.charAt(0);
163             }
164 
165             if (head_ending == close_suffix_head) {
166                 head = head.substring(0, head.length() - 1);
167                 possible_verb = map.get((head));
168             } else {
169                 possible_verb = map.get((head + "e"));
170             }
171 
172             if (possible_verb == null && head.endsWith("r")) {
173 
174                 possible_verb = map.get((head + "re"));
175             } else if (possible_verb == null) {
176                 possible_verb = map.get((head));
177             }
178 
179             if (possible_verb == null && possible_middle_suffix.length() > 0) {  //try to re add middle suffix
180                 head = head + middle_suffix_substring;
181                 middle_suffix_substring = "";
182                 possible_middle_suffix = "";
183                 possible_verb = map.get((head));
184             }
185 
186             if (possible_verb != null) {
187                 String inf = "";
188                 String suf = "";
189                 String mid_suf = "";
190                 String[] verb_items = possible_verb.split(" ");
191                 String[] suffix_items = possible_suffix.split(" ");
192                 String[] mid_suffix_items = possible_middle_suffix.split(" ");
193 
194                 List<String> infiniti = new ArrayList<String>();
195                 List<String> mid_suff = new ArrayList<String>();
196                 List<String> suff = new ArrayList<String>();
197 
198                 // System.out.println(possible_verb);
199 
200                 for (String v : verb_items) {
201                     if (v.contains("+infinito") || v.contains("impr") || v.contains("part") || v.contains("gerundio")) {
202                         infiniti.add(" " + v);
203                         inf += " " + v;
204                     }
205                 }
206 
207                 for (String f : mid_suffix_items) {
208                     if (f.contains("+pron")) {
209                         mid_suff.add("/" + f);
210                     }
211                 }
212                 for (String f : suffix_items) {
213                     if (f.contains("+pron")) {
214                         suff.add("/" + f);
215                     }
216                 }
217                 List<List<String>> lists = new ArrayList<List<String>>();
218                 lists.add(infiniti);
219                 lists.add(mid_suff);
220                 lists.add(suff);
221 
222                 List<String> results = new ArrayList<String>();
223 
224                 if (inf.length() > 0) {
225                     if (mid_suff.size() > 0) {
226                         for (String verb_hypernym : infiniti) {
227                             for (String object_hypernym : mid_suff) {
228                                 for (String subject_hypernym : suff) {
229                                     results.add(verb_hypernym + object_hypernym + subject_hypernym);
230                                 }
231                             }
232                         }
233                     } else {
234                         for (String verb_hypernym : infiniti) {
235                             for (String subject_hypernym : suff) {
236                                 results.add(verb_hypernym + subject_hypernym);
237                             }
238                         }
239                     }
240                     out_buffer.append(Joiner.on(" ").join(results));
241 
242                 }
243             }
244         }
245         if (out_buffer.length() == 0) {
246             return "";
247         } else {
248             return (orginal_token + out_buffer.toString());
249         }
250     }
251 
252 }