1   package eu.fbk.dh.tint.resources.ner;
2   
3   import eu.fbk.twm.utils.ExtractorParameters;
4   import eu.fbk.twm.wiki.xmldump.AbstractWikipediaExtractor;
5   
6   import java.util.Locale;
7   
8   /**
9    * Created by alessio on 26/01/17.
10   */
11  
12  public class WikiNewsReader extends AbstractWikipediaExtractor {
13  
14      public WikiNewsReader(int numThreads, int numPages, Locale locale, String configurationFolder) {
15          super(numThreads, numPages, locale, configurationFolder);
16      }
17  
18      @Override public void start(ExtractorParameters extractorParameters) {
19  
20      }
21  
22      @Override public void disambiguationPage(String text, String title, int wikiID) {
23  
24      }
25  
26      @Override public void categoryPage(String text, String title, int wikiID) {
27  
28      }
29  
30      @Override public void templatePage(String text, String title, int wikiID) {
31  
32      }
33  
34      @Override public void redirectPage(String text, String title, int wikiID) {
35  
36      }
37  
38      @Override public void contentPage(String text, String title, int wikiID) {
39          if (title.startsWith("Wikinotizie:")) {
40              return;
41          }
42          
43          System.out.println(title);
44          System.out.println(text);
45          System.out.println("--------");
46      }
47  
48      @Override public void portalPage(String text, String title, int wikiID) {
49  
50      }
51  
52      @Override public void projectPage(String text, String title, int wikiID) {
53  
54      }
55  
56      @Override public void filePage(String text, String title, int wikiID) {
57  
58      }
59  
60      public static void main(String[] args) {
61          WikiNewsReader wikiNewsReader = new WikiNewsReader(8, Integer.MAX_VALUE, new Locale("it"), "/Users/alessio/Documents/scripts/twm-lib/configuration");
62          wikiNewsReader.startProcess("/Users/alessio/Desktop/itwikinews-20170120-pages-articles.xml");
63      }
64  }