1   package eu.fbk.dh.tint.eval.ner;
2   
3   import edu.stanford.nlp.stats.MultiClassChunkEvalStats;
4   import eu.fbk.utils.core.CommandLine;
5   import org.slf4j.Logger;
6   import org.slf4j.LoggerFactory;
7   
8   import java.io.BufferedReader;
9   import java.io.File;
10  import java.io.FileReader;
11  import java.util.ArrayList;
12  import java.util.List;
13  
14  /**
15   * Created by alessio on 20/07/16.
16   */
17  
18  public class PairEvaluation {
19  
20      private static final Logger LOGGER = LoggerFactory.getLogger(PairEvaluation.class);
21  
22      public static void main(String[] args) {
23          try {
24              final CommandLine cmd = CommandLine
25                      .parser()
26                      .withName("./evaluate-ner")
27                      .withHeader("Calculate NER evaluation")
28                      .withOption("t", "guessed", "Input file", "FILE",
29                              CommandLine.Type.FILE_EXISTING, true, false, true)
30                      .withOption("g", "gold-standard", "Input gold standard file", "FILE",
31                              CommandLine.Type.FILE, true, false, true)
32                      .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
33  
34              File guessed = cmd.getOptionValue("guessed", File.class);
35              File gold = cmd.getOptionValue("gold-standard", File.class);
36  
37              List<String> guesses = new ArrayList<>();
38              List<String> trueLabels = new ArrayList<>();
39  
40              BufferedReader tReader = new BufferedReader(new FileReader(guessed));
41              BufferedReader gReader = new BufferedReader(new FileReader(gold));
42  
43              String line;
44  
45              while ((line = tReader.readLine()) != null) {
46                  line = line.trim();
47                  if (line.startsWith("#")) {
48                      continue;
49                  }
50                  String[] parts = line.split("\t");
51                  if (parts.length < 2) {
52                      continue;
53                  }
54  
55                  String ner = parts[parts.length - 1];
56                  ner = ner.replaceAll("^[A-Za-z]-", "");
57                  if (ner.equals("GPE")) {
58                      ner = "LOC";
59                  }
60  
61                  guesses.add(ner);
62              }
63  
64              while ((line = gReader.readLine()) != null) {
65                  line = line.trim();
66                  String[] parts = line.split("\t");
67                  if (parts.length < 2) {
68                      continue;
69                  }
70  
71                  String ner = parts[parts.length - 1];
72                  trueLabels.add(ner);
73              }
74  
75              if (guesses.size() != trueLabels.size()) {
76                  LOGGER.error("Sizes are not identical");
77              }
78              else {
79                  MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
80                  stats.score(guesses, trueLabels);
81                  System.out.println(stats.getConllEvalString());
82              }
83              tReader.close();
84              gReader.close();
85          } catch (Exception e) {
86              CommandLine.fail(e);
87          }
88      }
89  }