1   package eu.fbk.dh.tint.eval.pos;
2   
3   import com.google.common.base.Charsets;
4   import com.google.common.io.Files;
5   import edu.stanford.nlp.stats.MultiClassChunkEvalStats;
6   import eu.fbk.utils.core.CommandLine;
7   import org.slf4j.Logger;
8   import org.slf4j.LoggerFactory;
9   
10  import java.io.File;
11  import java.util.ArrayList;
12  import java.util.List;
13  
14  /**
15   * Created by alessio on 20/07/16.
16   */
17  
18  public class TextProEvaluation {
19  
20      private static final Logger LOGGER = LoggerFactory.getLogger(TextProEvaluation.class);
21  
22      private enum SimplePOS {VERB, NOUN, ADJECTIVE, ADVERB, OTHER}
23  
24      public static void main(String[] args) {
25          try {
26              final CommandLine cmd = CommandLine
27                      .parser()
28                      .withName("./evaluate-pos")
29                      .withHeader("Calculate POS evaluation for TextPro")
30                      .withOption("t", "guessed", "Input file", "FILE",
31                              CommandLine.Type.FILE_EXISTING, true, false, true)
32                      .withOption("g", "gold-standard", "Input gold standard file", "FILE",
33                              CommandLine.Type.FILE, true, false, true)
34                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
35  
36              File guessed = cmd.getOptionValue("guessed", File.class);
37              File gold = cmd.getOptionValue("gold-standard", File.class);
38  
39              List<String> guesses = new ArrayList<>();
40              List<String> guessesTmp = Files.readLines(guessed, Charsets.UTF_8);
41              List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);
42  
43              for (int i = 0; i < guessesTmp.size(); i++) {
44                  String guess = guessesTmp.get(i);
45                  if (!guess.startsWith("#")) {
46                      guesses.add(guess);
47                  }
48              }
49  
50              int total = 0;
51              int correct = 0;
52  
53              for (int i = 0; i < guesses.size(); i++) {
54                  String guess = guesses.get(i);
55                  String goldLabel = trueLabels.get(i);
56  
57                  if (goldLabel.equals("<eos>") || goldLabel.length() == 0) {
58                      continue;
59                  }
60  
61                  total++;
62  
63                  String[] parts = guess.split("\t");
64                  guess = parts[1];
65  
66                  SimplePOS goldPos = SimplePOS.OTHER;
67                  SimplePOS guessedPos = SimplePOS.OTHER;
68  
69                  if (goldLabel.startsWith("V")) {
70                      goldPos = SimplePOS.VERB;
71                  } else if (goldLabel.startsWith("S")) {
72                      goldPos = SimplePOS.NOUN;
73                  } else if (goldLabel.startsWith("A")) {
74                      goldPos = SimplePOS.ADJECTIVE;
75                  } else if (goldLabel.startsWith("B")) {
76                      goldPos = SimplePOS.ADVERB;
77                  }
78  
79                  if (guess.startsWith("B")) {
80                      guessedPos = SimplePOS.ADVERB;
81                  } else if (guess.startsWith("V")) {
82                      guessedPos = SimplePOS.VERB;
83                  } else if (guess.startsWith("S")) {
84                      guessedPos = SimplePOS.NOUN;
85                  } else if (guess.startsWith("A")) {
86                      guessedPos = SimplePOS.ADJECTIVE;
87                  }
88  
89                  if (goldPos.equals(guessedPos)) {
90                      correct++;
91                  }
92              }
93  
94              System.out.println(correct);
95              System.out.println(total);
96              System.out.println(correct * 1.0 / total);
97              System.exit(1);
98  
99              MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
100             stats.score(guesses, trueLabels);
101             System.out.println(stats.getConllEvalString());
102         } catch (Exception e) {
103             CommandLine.fail(e);
104         }
105     }
106 }