1   package eu.fbk.dh.tint.eval.morpho;
2   
3   import com.google.common.base.Charsets;
4   import com.google.common.io.Files;
5   import eu.fbk.utils.core.CommandLine;
6   import org.slf4j.Logger;
7   import org.slf4j.LoggerFactory;
8   
9   import java.io.File;
10  import java.util.List;
11  
12  /**
13   * Created by alessio on 20/07/16.
14   */
15  
16  public class TextProEvaluation {
17  
18      private static final Logger LOGGER = LoggerFactory.getLogger(TextProEvaluation.class);
19  
20      private enum SimplePOS {VERB, NOUN, ADJECTIVE, ADVERB, OTHER}
21  
22      public static void main(String[] args) {
23          try {
24              final CommandLine cmd = CommandLine
25                      .parser()
26                      .withName("./evaluate-lemma")
27                      .withHeader("Calculate lemma evaluation for TextPro")
28                      .withOption("t", "guessed", "Input file", "FILE",
29                              CommandLine.Type.FILE_EXISTING, true, false, true)
30                      .withOption("g", "gold-standard", "Input gold standard file", "FILE",
31                              CommandLine.Type.FILE, true, false, true)
32                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
33  
34              File guessed = cmd.getOptionValue("guessed", File.class);
35              File gold = cmd.getOptionValue("gold-standard", File.class);
36  
37              List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
38              List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);
39  
40              int total = 0;
41              int correct = 0;
42  
43              for (int i = 0; i < trueLabels.size(); i++) {
44                  String goldLabel = trueLabels.get(i);
45  
46                  if (goldLabel.length() == 0) {
47                      continue;
48                  }
49  
50                  String guess = guesses.get(i + 4); // TextPro output file has 4 starting lines
51                  String[] parts;
52  
53                  parts = goldLabel.split("\t");
54                  goldLabel = parts[1];
55                  String pos = parts[2];
56  
57                  boolean doIt = false;
58                  if (pos.startsWith("V")) {
59                      doIt = true;
60                  } else if (pos.startsWith("S")) {
61                      doIt = true;
62                  } else if (pos.startsWith("A")) {
63                      doIt = true;
64                  } else if (pos.startsWith("B")) {
65                      doIt = true;
66                  }
67  
68                  if (goldLabel.equals("_")) {
69                      doIt = false;
70                  }
71  
72                  if (!doIt) {
73                      continue;
74                  }
75                  total++;
76  
77                  parts = guess.split("\t");
78                  guess = parts[2];
79                  guess = guess.replaceAll("\\s+.*", "");
80  
81                  if (guess.equalsIgnoreCase(goldLabel)) {
82                      correct++;
83                  } else {
84                      System.out.printf("%s -> %s\n", guess, goldLabel);
85                  }
86  
87              }
88  
89              System.out.println(correct);
90              System.out.println(total);
91              System.out.println(correct * 1.0 / total);
92          } catch (Exception e) {
93              CommandLine.fail(e);
94          }
95      }
96  }