1   package eu.fbk.dh.tint.eval.pos;
2   
3   import com.google.common.base.Charsets;
4   import com.google.common.io.Files;
5   import edu.stanford.nlp.stats.MultiClassChunkEvalStats;
6   import eu.fbk.utils.core.CommandLine;
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.File;
11  import java.util.List;
12  
13  /**
14   * Created by alessio on 20/07/16.
15   */
16  
17  public class StanfordEvaluation {
18  
19      private static final Logger LOGGER = LoggerFactory.getLogger(StanfordEvaluation.class);
20  
21      private enum SimplePOS {VERB, NOUN, ADJECTIVE, ADVERB, OTHER}
22  
23      public static void main(String[] args) {
24          try {
25              final CommandLine cmd = CommandLine
26                      .parser()
27                      .withName("./evaluate-pos")
28                      .withHeader("Calculate POS evaluation for Stanford")
29                      .withOption("t", "guessed", "Input file", "FILE",
30                              CommandLine.Type.FILE_EXISTING, true, false, true)
31                      .withOption("g", "gold-standard", "Input gold standard file", "FILE",
32                              CommandLine.Type.FILE, true, false, true)
33                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
34  
35              File guessed = cmd.getOptionValue("guessed", File.class);
36              File gold = cmd.getOptionValue("gold-standard", File.class);
37  
38              List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
39              List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);
40  
41              int total = 0;
42              int correct = 0;
43  
44              for (int i = 0; i < guesses.size(); i++) {
45                  String guess = guesses.get(i);
46                  String goldLabel = trueLabels.get(i);
47  
48                  if (goldLabel.equals("<eos>") || goldLabel.length() == 0) {
49                      continue;
50                  }
51  
52                  total++;
53  
54                  String[] parts = guess.split("\t");
55                  guess = parts[1];
56  
57                  SimplePOS goldPos = SimplePOS.OTHER;
58                  SimplePOS guessedPos = SimplePOS.OTHER;
59  
60                  if (goldLabel.startsWith("V")) {
61                      goldPos = SimplePOS.VERB;
62                  } else if (goldLabel.startsWith("S")) {
63                      goldPos = SimplePOS.NOUN;
64                  } else if (goldLabel.startsWith("A")) {
65                      goldPos = SimplePOS.ADJECTIVE;
66                  } else if (goldLabel.startsWith("B")) {
67                      goldPos = SimplePOS.ADVERB;
68                  }
69  
70                  if (guess.startsWith("B")) {
71                      guessedPos = SimplePOS.ADVERB;
72                  } else if (guess.startsWith("V")) {
73                      guessedPos = SimplePOS.VERB;
74                  } else if (guess.startsWith("S")) {
75                      guessedPos = SimplePOS.NOUN;
76                  } else if (guess.startsWith("A")) {
77                      guessedPos = SimplePOS.ADJECTIVE;
78                  }
79  
80                  if (goldPos.equals(guessedPos)) {
81                      correct++;
82                  }
83              }
84  
85              System.out.println(correct);
86              System.out.println(total);
87              System.out.println(correct * 1.0 / total);
88              System.exit(1);
89  
90              MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
91              stats.score(guesses, trueLabels);
92              System.out.println(stats.getConllEvalString());
93          } catch (Exception e) {
94              CommandLine.fail(e);
95          }
96      }
97  }