1   import com.google.common.io.CharSource;
2   import com.google.common.io.CharStreams;
3   import edu.stanford.nlp.ling.CoreAnnotations;
4   import edu.stanford.nlp.ling.CoreLabel;
5   import edu.stanford.nlp.pipeline.Annotation;
6   import edu.stanford.nlp.pipeline.StanfordCoreNLP;
7   import edu.stanford.nlp.util.CoreMap;
8   import org.slf4j.Logger;
9   import org.slf4j.LoggerFactory;
10  
11  import java.io.*;
12  import java.util.Properties;
13  
14  /**
15   * Created by alessio on 04/08/16.
16   */
17  
18  public class TestTokenizer {
19  
20      private static final Logger LOGGER = LoggerFactory.getLogger(TestTokenizer.class);
21  
22      public static void main(String[] args) {
23          try {
24              String text = CharStreams.toString(new BufferedReader(new FileReader(new File("/Users/alessio/Desktop/GIA.txt"))));
25  
26  //            text = "Sei un cavolo di cazzabubbo.lo stronzo!";
27  
28              Properties props;
29              Annotation annotation;
30  
31              props = new Properties();
32              props.setProperty("annotators", "ita_toksent");
33              props.setProperty("ita_toksent.model", "/Users/alessio/Desktop/token-settings.xml");
34              props.setProperty("customAnnotatorClass.ita_toksent", "eu.fbk.dh.tint.tokenizer.annotators.ItalianTokenizerAnnotator");
35  
36              StanfordCoreNLP ITApipeline = new StanfordCoreNLP(props);
37              annotation = new Annotation(text);
38              ITApipeline.annotate(annotation);
39              System.out.println(ITApipeline.timingInformation());
40  
41              for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
42                  System.out.println(sentence.get(CoreAnnotations.TextAnnotation.class));
43              }
44  
45          } catch (Exception e) {
46              e.printStackTrace();
47          }
48  
49  
50      }
51  }