1   package eu.fbk.dh.tint.eval.pos;
2   
3   import com.google.common.base.Charsets;
4   import com.google.common.io.Files;
5   import edu.stanford.nlp.stats.MultiClassChunkEvalStats;
6   import eu.fbk.utils.core.CommandLine;
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.File;
11  import java.util.List;
12  
13  /**
14   * Created by alessio on 20/07/16.
15   */
16  
17  public class TreeTaggerEvaluation {
18  
19      private static final Logger LOGGER = LoggerFactory.getLogger(TreeTaggerEvaluation.class);
20  
21      private enum SimplePOS {VERB, NOUN, ADJECTIVE, ADVERB, OTHER}
22  
23      public static void main(String[] args) {
24          try {
25              final CommandLine cmd = CommandLine
26                      .parser()
27                      .withName("./evaluate-pos")
28                      .withHeader("Calculate POS evaluation for TreeTagger")
29                      .withOption("t", "guessed", "Input file", "FILE",
30                              CommandLine.Type.FILE_EXISTING, true, false, true)
31                      .withOption("g", "gold-standard", "Input gold standard file", "FILE",
32                              CommandLine.Type.FILE, true, false, true)
33                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
34  
35              File guessed = cmd.getOptionValue("guessed", File.class);
36              File gold = cmd.getOptionValue("gold-standard", File.class);
37  
38              List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
39              List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);
40  
41              for (int i = 0; i < trueLabels.size(); i++) {
42                  String label = trueLabels.get(i);
43                  if (label.trim().length() == 0) {
44                      trueLabels.remove(i);
45                  }
46              }
47  
48              int total = 0;
49              int correct = 0;
50  
51              for (int i = 0; i < guesses.size(); i++) {
52                  String guess = guesses.get(i);
53                  String goldLabel = trueLabels.get(i);
54  
55                  if (goldLabel.equals("<eos>")) {
56                      continue;
57                  }
58  
59                  total++;
60  
61                  String[] parts = guess.split("\t");
62                  guess = parts[1];
63  
64                  SimplePOS goldPos = SimplePOS.OTHER;
65                  SimplePOS guessedPos = SimplePOS.OTHER;
66  
67                  if (goldLabel.startsWith("V")) {
68                      goldPos = SimplePOS.VERB;
69                  } else if (goldLabel.startsWith("S")) {
70                      goldPos = SimplePOS.NOUN;
71                  } else if (goldLabel.startsWith("A")) {
72                      goldPos = SimplePOS.ADJECTIVE;
73                  } else if (goldLabel.startsWith("B")) {
74                      goldPos = SimplePOS.ADVERB;
75                  }
76  
77                  if (guess.startsWith("ADV")) {
78                      guessedPos = SimplePOS.ADVERB;
79                  } else if (guess.startsWith("VER")) {
80                      guessedPos = SimplePOS.VERB;
81                  } else if (guess.startsWith("NOM")) {
82                      guessedPos = SimplePOS.NOUN;
83                  } else if (guess.startsWith("ADJ")) {
84                      guessedPos = SimplePOS.ADJECTIVE;
85                  }
86  
87                  if (goldPos.equals(guessedPos)) {
88                      correct++;
89                  }
90              }
91  
92              System.out.println(correct);
93              System.out.println(total);
94              System.out.println(correct * 1.0 / total);
95              System.exit(1);
96  
97              MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
98              stats.score(guesses, trueLabels);
99              System.out.println(stats.getConllEvalString());
100         } catch (Exception e) {
101             CommandLine.fail(e);
102         }
103     }
104 }