/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.sequences;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.IteratorFromReaderFactory;
import edu.stanford.nlp.objectbank.LineIterator;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class TrueCasingForNISTDocumentReaderAndWriter
implements DocumentReaderAndWriter<CoreLabel> {
    private static Redwood.RedwoodChannels log = Redwood.channels(TrueCasingForNISTDocumentReaderAndWriter.class);
    public static final String THREE_CLASSES_PROPERTY = "3class";
    public static final boolean THREE_CLASSES = Boolean.parseBoolean(System.getProperty("3class", "false"));
    private static final long serialVersionUID = -3000389291781534479L;
    private IteratorFromReaderFactory<List<CoreLabel>> factory;
    private Boolean verboseForTrueCasing = false;
    private static final Pattern alphabet = Pattern.compile("[A-Za-z]+");
    public static Set knownWords = null;

    public static void main(String[] args) throws IOException {
        BufferedReader reader = new BufferedReader(new FileReader(args[0]));
        TrueCasingForNISTDocumentReaderAndWriter raw = new TrueCasingForNISTDocumentReaderAndWriter();
        raw.init(null);
        Iterator<List<CoreLabel>> it = raw.getIterator(reader);
        while (it.hasNext()) {
            List<CoreLabel> l = it.next();
            for (CoreLabel cl : l) {
                System.out.println(cl);
            }
            System.out.println("========================================");
        }
    }

    @Override
    public void init(SeqClassifierFlags flags) {
        this.verboseForTrueCasing = flags.verboseForTrueCasing;
        this.factory = LineIterator.getFactory(new LineToTrueCasesParser());
    }

    public static boolean known(String s) {
        return knownWords.contains(s.toLowerCase());
    }

    @Override
    public Iterator<List<CoreLabel>> getIterator(Reader r) {
        return this.factory.getIterator(r);
    }

    @Override
    public void printAnswers(List<CoreLabel> doc, PrintWriter out2) {
        ArrayList<String> sentence = new ArrayList<String>();
        int wrong = 0;
        for (CoreLabel wi : doc) {
            StringBuilder sb = new StringBuilder();
            if (!((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals(wi.get(CoreAnnotations.GoldAnswerAnnotation.class))) {
                ++wrong;
            }
            if (!THREE_CLASSES && ((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("UPPER")) {
                sb.append(wi.word().toUpperCase());
            } else if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("LOWER")) {
                sb.append(wi.word().toLowerCase());
            } else if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("INIT_UPPER")) {
                sb.append(wi.word().substring(0, 1).toUpperCase()).append(wi.word().substring(1));
            } else if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("O")) {
                sb.append(wi.word());
                Matcher alphaMatcher = alphabet.matcher(wi.word());
                if (alphaMatcher.matches()) {
                    sb.append("/MIX");
                }
            }
            if (this.verboseForTrueCasing.booleanValue()) {
                sb.append("/GOLD-").append((String)wi.get(CoreAnnotations.GoldAnswerAnnotation.class)).append("/GUESS-").append((String)wi.get(CoreAnnotations.AnswerAnnotation.class));
            }
            sentence.add(sb.toString());
        }
        out2.print(StringUtils.join(sentence, " "));
        System.err.printf("> wrong = %d ; total = %d%n", wrong, doc.size());
        out2.println();
    }

    public static class LineToTrueCasesParser
    implements Function<String, List<CoreLabel>> {
        private static final Pattern allLower = Pattern.compile("[^A-Z]*?[a-z]+[^A-Z]*?");
        private static final Pattern allUpper = Pattern.compile("[^a-z]*?[A-Z]+[^a-z]*?");
        private static final Pattern startUpper = Pattern.compile("[A-Z].*");

        @Override
        public List<CoreLabel> apply(String line) {
            String[] toks;
            ArrayList<CoreLabel> doc = new ArrayList<CoreLabel>();
            int pos = 0;
            for (String word : toks = line.split(" ")) {
                CoreLabel wi = new CoreLabel();
                Matcher lowerMatcher = allLower.matcher(word);
                if (lowerMatcher.matches()) {
                    wi.set(CoreAnnotations.AnswerAnnotation.class, "LOWER");
                    wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "LOWER");
                } else {
                    Matcher upperMatcher = allUpper.matcher(word);
                    if (!THREE_CLASSES && upperMatcher.matches()) {
                        wi.set(CoreAnnotations.AnswerAnnotation.class, "UPPER");
                        wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "UPPER");
                    } else {
                        boolean isINIT_UPPER;
                        Matcher startUpperMatcher = startUpper.matcher(word);
                        if (word.length() > 1) {
                            String w2 = word.substring(1);
                            String lcw2 = w2.toLowerCase();
                            isINIT_UPPER = w2.equals(lcw2);
                        } else {
                            isINIT_UPPER = false;
                        }
                        if (startUpperMatcher.matches() && isINIT_UPPER) {
                            wi.set(CoreAnnotations.AnswerAnnotation.class, "INIT_UPPER");
                            wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "INIT_UPPER");
                        } else {
                            wi.set(CoreAnnotations.AnswerAnnotation.class, "O");
                            wi.set(CoreAnnotations.GoldAnswerAnnotation.class, "O");
                        }
                    }
                }
                wi.setWord(word.toLowerCase());
                wi.set(CoreAnnotations.PositionAnnotation.class, String.valueOf(pos));
                doc.add(wi);
                ++pos;
            }
            return doc;
        }
    }
}

