1   package eu.fbk.dh.tint.runner;
2   
3   import com.google.gson.GsonBuilder;
4   import edu.stanford.nlp.ling.CoreAnnotations;
5   import edu.stanford.nlp.pipeline.*;
6   import eu.fbk.utils.corenlp.outputters.JSONOutputter;
7   import eu.fbk.utils.corenlp.outputters.TextProOutputter;
8   import org.slf4j.Logger;
9   import org.slf4j.LoggerFactory;
10  
11  import javax.annotation.Nullable;
12  import java.io.*;
13  import java.time.Instant;
14  import java.util.Properties;
15  
16  //import eu.fbk.utils.corenlp.outputters.SerializerCollector;
17  
18  /**
19   * Created by alessio on 15/08/16.
20   */
21  
22  public class TintPipeline {
23  
24      private static final Logger LOGGER = LoggerFactory.getLogger(TintPipeline.class);
25      private String documentDate = null;
26      private Properties props = new Properties();
27  
28  //    private boolean DEFAULT_LOAD_SERIALIZER = false;
29  //    SerializerCollector serializerCollector = null;
30  
31  //    public void loadSerializers() {
32  //        serializerCollector = new SerializerCollector();
33  //    }
34  
35      public TintPipeline(Properties props) {
36          this.props = props;
37      }
38  
39      public TintPipeline() {
40          this(true);
41      }
42  
43      public TintPipeline(boolean loadDefaultProperties) {
44          if (loadDefaultProperties) {
45              try {
46                  loadDefaultProperties();
47              } catch (IOException e) {
48                  LOGGER.error("Unable to load default configuration");
49              }
50          }
51      }
52  
53      public void load() {
54  //        if (pipeline == null) {
55          StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
56  //        if (DEFAULT_LOAD_SERIALIZER) {
57  //            loadSerializers();
58  //        }
59  //        }
60      }
61  
62      public void loadDefaultProperties() throws IOException {
63          InputStream configStream = TintRunner.class.getResourceAsStream("/default-config.properties");
64          if (configStream != null) {
65              props.load(configStream);
66          }
67      }
68  
69      public void loadPropertiesFromStream(InputStream stream) throws IOException {
70          props.load(stream);
71      }
72  
73      public void loadPropertiesFromFile(File propsFile) throws IOException {
74          if (propsFile != null) {
75              InputStream configStream = new FileInputStream(propsFile);
76              loadPropertiesFromStream(configStream);
77          }
78      }
79  
80      public void addProperties(Properties properties) {
81          if (properties != null) {
82              props.putAll(properties);
83          }
84      }
85  
86      public void setProperty(String key, String value) {
87          props.setProperty(key, value);
88      }
89  
90      public String getProperty(String key) {
91          return props.getProperty(key);
92      }
93  
94      public String getDocumentDate() {
95          return documentDate;
96      }
97  
98      public void setDocumentDate(String documentDate) {
99          this.documentDate = documentDate;
100     }
101 
102     public Annotation runRaw(String text) {
103         return runRaw(text, null);
104     }
105 
106     public Annotation runRaw(String text, @Nullable StanfordCoreNLP pipeline) {
107         load();
108 
109         Annotation annotation = new Annotation(text);
110         LOGGER.debug("Text: {}", text);
111         if (documentDate == null) {
112             documentDate = Instant.now().toString().substring(0, 10);
113         }
114 
115         annotation.set(CoreAnnotations.DocDateAnnotation.class, documentDate);
116         if (pipeline == null) {
117             pipeline = new StanfordCoreNLP(props);
118         }
119         pipeline.annotate(annotation);
120         annotation.set(TimingAnnotations.TimingAnnotation.class, pipeline.timingInformation());
121 
122         return annotation;
123     }
124 
125     public Annotation run(String text, OutputStream outputStream, TintRunner.OutputFormat format)
126             throws IOException {
127         StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
128         Annotation annotation = runRaw(text, pipeline);
129 
130         switch (format) {
131         case CONLL:
132             CoNLLUOutputter.conllUPrint(annotation, outputStream, pipeline);
133             break;
134         case READABLE:
135             TextOutputter.prettyPrint(annotation, outputStream, pipeline);
136             break;
137         case XML:
138             XMLOutputter.xmlPrint(annotation, outputStream, pipeline);
139             break;
140         case JSON:
141             GsonBuilder gsonBuilder;
142             gsonBuilder = new GsonBuilder();
143 //            if (serializerCollector != null) {
144 //                gsonBuilder = serializerCollector.getGsonBuilder();
145 //            } else {
146 //                gsonBuilder = new GsonBuilder();
147 //            }
148             JSONOutputter.jsonPrint(gsonBuilder, annotation, outputStream, pipeline);
149             break;
150         case TEXTPRO:
151             TextProOutputter.tpPrint(annotation, outputStream, pipeline);
152             break;
153 //        case NAF:
154 //            KAFDocument doc = AbstractHandler.text2naf(text, new HashMap<>());
155 //            AnnotationPipeline pikesPipeline = new AnnotationPipeline(null, null);
156 //            pikesPipeline.addToNerMap("PER", "PERSON");
157 //            pikesPipeline.addToNerMap("ORG", "ORGANIZATION");
158 //            pikesPipeline.addToNerMap("LOC", "LOCATION");
159 //            pikesPipeline.annotateStanford(new Properties(), annotation, doc);
160 //            outputStream.write(doc.toString().getBytes());
161 //            outputStream.flush();
162         }
163 
164         return annotation;
165     }
166 
167     public Annotation run(InputStream inputStream, OutputStream outputStream, TintRunner.OutputFormat format)
168             throws IOException {
169 
170         Reader reader = new InputStreamReader(inputStream);
171         StringBuilder inputText = new StringBuilder();
172         int i;
173         while ((i = reader.read()) != -1) {
174             inputText.append((char) i);
175         }
176         reader.close();
177         String text = inputText.toString();
178 
179         return run(text, outputStream, format);
180 
181     }
182 }