The following document contains the results of PMD's CPD 5.3.2.

Duplications

File Project Line
eu/fbk/dh/tint/eval/pos/StanfordEvaluation.java tint-eval 41
eu/fbk/dh/tint/eval/pos/TextProEvaluation.java tint-eval 50
            int total = 0;
            int correct = 0;

            for (int i = 0; i < guesses.size(); i++) {
                String guess = guesses.get(i);
                String goldLabel = trueLabels.get(i);

                if (goldLabel.equals("<eos>") || goldLabel.length() == 0) {
                    continue;
                }

                total++;

                String[] parts = guess.split("\t");
                guess = parts[1];

                SimplePOS goldPos = SimplePOS.OTHER;
                SimplePOS guessedPos = SimplePOS.OTHER;

                if (goldLabel.startsWith("V")) {
                    goldPos = SimplePOS.VERB;
                } else if (goldLabel.startsWith("S")) {
                    goldPos = SimplePOS.NOUN;
                } else if (goldLabel.startsWith("A")) {
                    goldPos = SimplePOS.ADJECTIVE;
                } else if (goldLabel.startsWith("B")) {
                    goldPos = SimplePOS.ADVERB;
                }

                if (guess.startsWith("B")) {
                    guessedPos = SimplePOS.ADVERB;
                } else if (guess.startsWith("V")) {
                    guessedPos = SimplePOS.VERB;
                } else if (guess.startsWith("S")) {
                    guessedPos = SimplePOS.NOUN;
                } else if (guess.startsWith("A")) {
                    guessedPos = SimplePOS.ADJECTIVE;
                }

                if (goldPos.equals(guessedPos)) {
                    correct++;
                }
            }

            System.out.println(correct);
            System.out.println(total);
            System.out.println(correct * 1.0 / total);
            System.exit(1);

            MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
            stats.score(guesses, trueLabels);
            System.out.println(stats.getConllEvalString());
        } catch (Exception e) {
            CommandLine.fail(e);
        }
    }
}
File Project Line
eu/fbk/dh/tint/readability/es/SpanishReadability.java tint-readability 72
eu/fbk/dh/tint/readability/gl/GalicianReadability.java tint-readability 50
        model = SpanishReadabilityModel.getInstance(globalProperties, localProperties);
    }

    @Override public void addingContentWord(CoreLabel token) {
        super.addingContentWord(token);

        token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 4);
        String lemma = token.lemma();
        if (model.getLevel3Lemmas().contains(lemma)) {
            level3WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 3);
        }
        if (model.getLevel2Lemmas().contains(lemma)) {
            level2WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 2);
        }
        if (model.getLevel1Lemmas().contains(lemma)) {
            level1WordSize++;
            token.set(ReadabilityAnnotations.DifficultyLevelAnnotation.class, 1);
        }
//        System.out.println("Adding content word (lemma): " + lemma);
//        System.out.println(model.getLevel1Lemmas().contains(lemma));
//        System.out.println(model.getLevel2Lemmas().contains(lemma));
//        System.out.println(model.getLevel3Lemmas().contains(lemma));
//        System.out.println();

//        HashMap<Integer, HashMultimap<String, String>> easyWords = model.getEasyWords();
//        String simplePos = getGenericPos(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
//        String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
//
//        if (easyWords.get(1).get(simplePos).contains(lemma)) {
//            level1WordSize++;
//        }
//        if (easyWords.get(2).get(simplePos).contains(lemma)) {
//            level2WordSize++;
//        }
//        if (easyWords.get(3).get(simplePos).contains(lemma)) {
//            level3WordSize++;
//        }
    }

    @Override public void addingEasyWord(CoreLabel token) {

    }

    @Override public void addingWord(CoreLabel token) {
        super.addingWord(token);
    }

    @Override public void addingToken(CoreLabel token) {
//        lemmaIndexes.put(buffer.length(), lemmaIndex);
//        tokenIndexes.put(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), lemmaIndex);
//        lemmaIndex++;
//        buffer.append(token.get(CoreAnnotations.LemmaAnnotation.class)).append(" ");
    }

    @Override public void addingSentence(CoreMap sentence) {

    }
}
File Project Line
eu/fbk/dh/tint/eval/morpho/TextProEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/morpho/TreeTaggerEvaluation.java tint-eval 27
                    .withHeader("Calculate lemma evaluation for TextPro")
                    .withOption("t", "guessed", "Input file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withOption("g", "gold-standard", "Input gold standard file", "FILE",
                            CommandLine.Type.FILE, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            File guessed = cmd.getOptionValue("guessed", File.class);
            File gold = cmd.getOptionValue("gold-standard", File.class);

            List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
            List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);

            int total = 0;
            int correct = 0;

            for (int i = 0; i < trueLabels.size(); i++) {
                String goldLabel = trueLabels.get(i);

                if (goldLabel.length() == 0) {
                    continue;
                }

                String guess = guesses.get(i + 4); // TextPro output file has 4 starting lines
File Project Line
eu/fbk/dh/tint/eval/morpho/TintEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/pos/StanfordEvaluation.java tint-eval 28
                    .withHeader("Calculate lemma evaluation for Tint")
                    .withOption("t", "guessed", "Input file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withOption("g", "gold-standard", "Input gold standard file", "FILE",
                            CommandLine.Type.FILE, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            File guessed = cmd.getOptionValue("guessed", File.class);
            File gold = cmd.getOptionValue("gold-standard", File.class);

            List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
            List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);

            int total = 0;
            int correct = 0;

            for (int i = 0; i < guesses.size(); i++) {
                String guess = guesses.get(i);
                String goldLabel = trueLabels.get(i);

                if (goldLabel.length() == 0) {
File Project Line
eu/fbk/dh/tint/readability/es/SpanishStandardReadability.java tint-readability 17
eu/fbk/dh/tint/readability/gl/GalicianStandardReadability.java tint-readability 17
    public SpanishStandardReadability(Properties globalProperties, Properties localProperties, Annotation annotation) {
        super(globalProperties, localProperties, annotation);

        contentPosList.add("N");
        contentPosList.add("A");
        contentPosList.add("V");
        contentPosList.add("R");

        simplePosList.add("N");
        simplePosList.add("V");

        nonWordPosList.add("F");

        genericPosDescription.put("A", "Adjective");
        genericPosDescription.put("C", "Conjunction");
        genericPosDescription.put("D", "Determiner");
        genericPosDescription.put("F", "Punctuation");
        genericPosDescription.put("I", "Interjection");
        genericPosDescription.put("R", "Adverb");
        genericPosDescription.put("N", "Noun");
        genericPosDescription.put("S", "Preposition");
        genericPosDescription.put("P", "Pronoun");
        genericPosDescription.put("V", "Verb");
        genericPosDescription.put("X", "Other");
        genericPosDescription.put("Z", "Number");
        genericPosDescription.put("W", "Date");

    }

}
File Project Line
eu/fbk/dh/tint/readability/es/SpanishReadability.java tint-readability 28
eu/fbk/dh/tint/readability/gl/GalicianReadability.java tint-readability 29
    @Override public void finalizeReadability() {
        super.finalizeReadability();


        double fleschSzigriszt =
                206.835 - (62.3 * getHyphenCount() / getHyphenWordCount()) - (1.0 * getWordCount()
                        / getSentenceCount());
        double fernandezHuerta =
                206.84 - 0.6 * (100.0 * getHyphenCount() / getHyphenWordCount()) - 1.02 * (100.0 * getSentenceCount()
                        / getWordCount());
        labels.put("main", "Flesch-Szigriszt");
        measures.put("main", fleschSzigriszt);
        measures.put("fernandez-huerta", fernandezHuerta);
        measures.put("level1", 100.0 * level1WordSize / getContentWordSize());
        measures.put("level2", 100.0 * level2WordSize / getContentWordSize());
        measures.put("level3", 100.0 * level3WordSize / getContentWordSize());
//
//        String lemmaText = buffer.toString().trim();
//        String text = annotation.get(CoreAnnotations.TextAnnotation.class);
//
//        HashMap<String, GlossarioEntry> glossario = model.getGlossario();
//
//        List<String> glossarioKeys = new ArrayList<>(glossario.keySet());
//        Collections.sort(glossarioKeys, new StringLenComparator());
//
//        for (String form : glossarioKeys) {
//
//            int numberOfTokens = form.split("\\s+").length;
//            List<Integer> allOccurrences = findAllOccurrences(text, form);
//            List<Integer> allLemmaOccurrences = findAllOccurrences(lemmaText, form);
//
//            for (Integer occurrence : allOccurrences) {
//                addDescriptionForm(form, tokenIndexes, occurrence, numberOfTokens, forms, annotation, glossario);
//            }
//            for (Integer occurrence : allLemmaOccurrences) {
//                addDescriptionForm(form, lemmaIndexes, occurrence, numberOfTokens, forms, annotation, glossario);
//            }
//        }

    }

    public SpanishReadability(Properties globalProperties, Properties localProperties, Annotation annotation) {
File Project Line
eu/fbk/dh/tint/eval/morpho/TextProEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/morpho/TintEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/morpho/TreeTaggerEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/pos/StanfordEvaluation.java tint-eval 28
                    .withHeader("Calculate lemma evaluation for TextPro")
                    .withOption("t", "guessed", "Input file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withOption("g", "gold-standard", "Input gold standard file", "FILE",
                            CommandLine.Type.FILE, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            File guessed = cmd.getOptionValue("guessed", File.class);
            File gold = cmd.getOptionValue("gold-standard", File.class);

            List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
            List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);

            int total = 0;
            int correct = 0;

            for (int i = 0; i < trueLabels.size(); i++) {
File Project Line
eu/fbk/dh/tint/digimorph/annotator/GuessModel.java tint-digimorph-annotator 256
eu/fbk/dh/tint/digimorph/annotator/ModelHelper.java tint-digimorph-annotator 114
        DigiMorph digiMorph = new DigiMorph();
        SortedTableMap<String, String> map = digiMorph.getMap();
        Iterator<String> gmIterator = map.keyIterator();
        while (gmIterator.hasNext()) {
            String key = gmIterator.next();
            String value = map.get(key).trim();
            String[] words = value.split("[\\s/]+");
            for (String word : words) {
                String[] parts = word.split("\\+");

                if (parts.length < 2) {
                    continue;
                }
                String ePos = parts[1];
                if (ePos.length() == 0) {
                    continue;
                }
                if (!uMap.keySet().contains(ePos)) {
                    continue;
                }

                // Feats
                String feats = getMorphoFeatsForContentWords(word);
File Project Line
eu/fbk/dh/tint/eval/morpho/TextProEvaluation.java tint-eval 51
eu/fbk/dh/tint/eval/morpho/TintEvaluation.java tint-eval 51
                String[] parts;

                parts = goldLabel.split("\t");
                goldLabel = parts[1];
                String pos = parts[2];

                boolean doIt = false;
                if (pos.startsWith("V")) {
                    doIt = true;
                } else if (pos.startsWith("S")) {
                    doIt = true;
                } else if (pos.startsWith("A")) {
                    doIt = true;
                } else if (pos.startsWith("B")) {
                    doIt = true;
                }

                if (goldLabel.equals("_")) {
                    doIt = false;
                }

                if (!doIt) {
                    continue;
                }
                total++;

                parts = guess.split("\t");
                guess = parts[2];
File Project Line
eu/fbk/dh/tint/eval/morpho/TextProEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/morpho/TintEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/morpho/TreeTaggerEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/pos/StanfordEvaluation.java tint-eval 28
eu/fbk/dh/tint/eval/pos/TreeTaggerEvaluation.java tint-eval 28
                    .withHeader("Calculate lemma evaluation for TextPro")
                    .withOption("t", "guessed", "Input file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withOption("g", "gold-standard", "Input gold standard file", "FILE",
                            CommandLine.Type.FILE, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            File guessed = cmd.getOptionValue("guessed", File.class);
            File gold = cmd.getOptionValue("gold-standard", File.class);

            List<String> guesses = Files.readLines(guessed, Charsets.UTF_8);
            List<String> trueLabels = Files.readLines(gold, Charsets.UTF_8);
File Project Line
eu/fbk/dh/tint/resources/morpho/MorphItConverter.java tint-resources 50
eu/fbk/dh/tint/resources/morpho/MorphItEaglesConverter.java tint-resources 171
            List<String> lines = Files.readLines(inputPath, Charsets.UTF_8);
            for (String line : lines) {
                line = line.trim();
                if (line.length() == 0) {
                    continue;
                }

                String[] parts = line.split("\\s+");
                if (parts.length != 3) {
                    LOGGER.error("Invalid line: {}", line);
                    continue;
                }

                String form = parts[0];
                String lemma = parts[1];
                String morpho = parts[2];

                Matcher matcher = morphoType.matcher(morpho);
                if (!matcher.find()) {
                    LOGGER.warn("Invalid pattern: {}", morpho);
                    continue;
                }

                String type = matcher.group(1);
File Project Line
eu/fbk/dh/tint/eval/morpho/TextProEvaluation.java tint-eval 50
eu/fbk/dh/tint/eval/morpho/TreeTaggerEvaluation.java tint-eval 50
                String guess = guesses.get(i + 4); // TextPro output file has 4 starting lines
                String[] parts;

                parts = goldLabel.split("\t");
                goldLabel = parts[1];
                String pos = parts[2];

                boolean doIt = false;
                if (pos.startsWith("V")) {
                    doIt = true;
                } else if (pos.startsWith("S")) {
                    doIt = true;
                } else if (pos.startsWith("A")) {
                    doIt = true;
                } else if (pos.startsWith("B")) {
                    doIt = true;
                }

                if (goldLabel.equals("_")) {
                    doIt = false;
                }

                if (!doIt) {
                    continue;
                }
File Project Line
eu/fbk/dh/tint/eval/morpho/TintEvaluation.java tint-eval 51
eu/fbk/dh/tint/eval/morpho/TreeTaggerEvaluation.java tint-eval 51
                String[] parts;

                parts = goldLabel.split("\t");
                goldLabel = parts[1];
                String pos = parts[2];

                boolean doIt = false;
                if (pos.startsWith("V")) {
                    doIt = true;
                } else if (pos.startsWith("S")) {
                    doIt = true;
                } else if (pos.startsWith("A")) {
                    doIt = true;
                } else if (pos.startsWith("B")) {
                    doIt = true;
                }

                if (goldLabel.equals("_")) {
                    doIt = false;
                }

                if (!doIt) {
                    continue;
                }
File Project Line
eu/fbk/dh/tint/eval/pos/StanfordEvaluation.java tint-eval 48
eu/fbk/dh/tint/eval/pos/TextProEvaluation.java tint-eval 57
eu/fbk/dh/tint/eval/pos/TreeTaggerEvaluation.java tint-eval 55
                if (goldLabel.equals("<eos>") || goldLabel.length() == 0) {
                    continue;
                }

                total++;

                String[] parts = guess.split("\t");
                guess = parts[1];

                SimplePOS goldPos = SimplePOS.OTHER;
                SimplePOS guessedPos = SimplePOS.OTHER;

                if (goldLabel.startsWith("V")) {
                    goldPos = SimplePOS.VERB;
                } else if (goldLabel.startsWith("S")) {
                    goldPos = SimplePOS.NOUN;
                } else if (goldLabel.startsWith("A")) {
                    goldPos = SimplePOS.ADJECTIVE;
                } else if (goldLabel.startsWith("B")) {
                    goldPos = SimplePOS.ADVERB;
                }

                if (guess.startsWith("B")) {
File Project Line
eu/fbk/dh/tint/eval/ner/PairEvaluation.java tint-eval 27
eu/fbk/dh/tint/eval/pos/TextProEvaluation.java tint-eval 29
                    .withHeader("Calculate NER evaluation")
                    .withOption("t", "guessed", "Input file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withOption("g", "gold-standard", "Input gold standard file", "FILE",
                            CommandLine.Type.FILE, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            File guessed = cmd.getOptionValue("guessed", File.class);
            File gold = cmd.getOptionValue("gold-standard", File.class);

            List<String> guesses = new ArrayList<>();
            List<String> trueLabels = new ArrayList<>();
File Project Line
eu/fbk/dh/tint/eval/morpho/AnnotateLemma.java tint-eval 36
eu/fbk/dh/tint/tokenizer/util/SplitSentences.java tint-tokenizer 35
                    .withOption("o", "output", "Input file", "FILE",
                            CommandLine.Type.FILE_EXISTING, true, false, true)
                    .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);

            File input = cmd.getOptionValue("input", File.class);
            File output = cmd.getOptionValue("output", File.class);

            String text = new String(Files.readAllBytes(input.toPath()), Charsets.UTF_8);
            BufferedWriter writer = new BufferedWriter(new FileWriter(output));

            Properties props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, ita_morpho, ita_lemma");
File Project Line
eu/fbk/dh/tint/eval/pos/StanfordEvaluation.java tint-eval 76
eu/fbk/dh/tint/eval/pos/TextProEvaluation.java tint-eval 85
eu/fbk/dh/tint/eval/pos/TreeTaggerEvaluation.java tint-eval 83
                } else if (guess.startsWith("A")) {
                    guessedPos = SimplePOS.ADJECTIVE;
                }

                if (goldPos.equals(guessedPos)) {
                    correct++;
                }
            }

            System.out.println(correct);
            System.out.println(total);
            System.out.println(correct * 1.0 / total);
            System.exit(1);

            MultiClassChunkEvalStats stats = new MultiClassChunkEvalStats("O");
            stats.score(guesses, trueLabels);
            System.out.println(stats.getConllEvalString());
        } catch (Exception e) {
            CommandLine.fail(e);
        }
    }
}

Back to top

Last Published: 2018/01/04.

Reflow Maven skin by Andrius Velykis.