/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.formats.ad;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import opennlp.tools.chunker.ChunkSample;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import org.cogroo.formats.ad.ADChunk2SampleStream;
import org.cogroo.tools.featurizer.WordTag;
import org.cogroo.tools.shallowparser.ShallowParserSequenceValidator;

public class ADChunkBasedShallowParserSampleStream
extends ADChunk2SampleStream {
    private final Set<String> functTagSet;
    private String[] defaultFunctTags = new String[]{"SUBJ", "ACC", "DAT", "PIV", "ADVS", "ADVO", "SC", "OC", "P", "NPHR", "SA", "ADVL", "APP"};
    private boolean readChunk;
    private ShallowParserSequenceValidator sv = new ShallowParserSequenceValidator();
    private ArrayList<String> chunks;
    private SubjectTypes subjectTypes = new SubjectTypes();

    public ADChunkBasedShallowParserSampleStream(ObjectStream<String> lineStream, String commaSeparatedFunctTags, boolean isIncludePOSTags, boolean useCGTag, boolean expandME) {
        super(lineStream);
        if (commaSeparatedFunctTags == null || commaSeparatedFunctTags.trim().isEmpty()) {
            HashSet<String> functTagsSet = new HashSet<String>();
            functTagsSet.addAll(Arrays.asList(this.defaultFunctTags));
            this.functTagSet = Collections.unmodifiableSet(functTagsSet);
        } else {
            String[] tags = commaSeparatedFunctTags.split(",");
            HashSet<String> functTagsSet = new HashSet<String>();
            functTagsSet.addAll(Arrays.asList(tags));
            this.functTagSet = Collections.unmodifiableSet(functTagsSet);
        }
    }

    public ADChunkBasedShallowParserSampleStream(InputStreamFactory in, String charsetName, String commaSeparatedFunctTags, boolean isIncludePOSTags, boolean useCGTag, boolean expandME) throws IOException {
        super(in, charsetName);
        if (commaSeparatedFunctTags == null || commaSeparatedFunctTags.trim().isEmpty()) {
            HashSet<String> functTagsSet = new HashSet<String>();
            functTagsSet.addAll(Arrays.asList(this.defaultFunctTags));
            this.functTagSet = Collections.unmodifiableSet(functTagsSet);
        } else {
            String[] tags = commaSeparatedFunctTags.split(",");
            HashSet<String> functTagsSet = new HashSet<String>();
            functTagsSet.addAll(Arrays.asList(tags));
            this.functTagSet = Collections.unmodifiableSet(functTagsSet);
        }
    }

    @Override
    public ChunkSample read() throws IOException {
        ADSentenceStream.Sentence paragraph;
        while ((paragraph = (ADSentenceStream.Sentence)this.adSentenceStream.read()) != null) {
            this.readChunk = true;
            ADSentenceStream.SentenceParser.Node root = paragraph.getRoot();
            ArrayList<String> sentence = new ArrayList<String>();
            ArrayList<String> tags = new ArrayList<String>();
            this.chunks = new ArrayList();
            this.processRoot(root, sentence, tags, this.chunks);
            this.readChunk = false;
            sentence.clear();
            tags.clear();
            ArrayList<String> target = new ArrayList<String>();
            this.processRoot(root, sentence, tags, target);
            for (int i = 0; i < tags.size(); ++i) {
                tags.set(i, (String)tags.get(i) + "|" + this.chunks.get(i));
            }
            if (sentence.size() <= 0) continue;
            ChunkSample cs = new ChunkSample(sentence, tags, target);
            for (int i = 0; i < sentence.size(); ++i) {
                String[] outcomes = i > 0 ? target.subList(0, i).toArray(new String[i]) : new String[]{};
                if (this.sv.validSequence(i, WordTag.create(cs), outcomes, (String)target.get(i))) continue;
                System.out.println("failed, invalid outcome: " + (String)target.get(i));
            }
            return cs;
        }
        return null;
    }

    @Override
    protected String getChunkTag(ADSentenceStream.SentenceParser.Leaf leaf) {
        if (this.readChunk) {
            return super.getChunkTag(leaf);
        }
        String tag = leaf.getSyntacticTag();
        if (this.functTagSet.contains(tag)) {
            return tag;
        }
        return null;
    }

    @Override
    protected String getChunkTag(ADSentenceStream.SentenceParser.Node node, String parent, int index) {
        boolean valid;
        if (this.readChunk) {
            return super.getChunkTag(node, parent, index);
        }
        String tag = node.getSyntacticTag();
        String funcTag = tag.substring(0, tag.lastIndexOf(":"));
        if (!this.functTagSet.contains(funcTag)) {
            funcTag = "O";
        }
        if (funcTag.equals(parent)) {
            return "O";
        }
        if (funcTag.equals("O")) {
            return funcTag;
        }
        int leafs = this.countLeafs(node);
        String s = this.chunks.get(index);
        boolean bl = valid = s.equals("O") || s.startsWith("B-");
        if (valid) {
            if (this.chunks.size() == index + leafs) {
                return funcTag;
            }
            String end1 = this.chunks.get(index + leafs);
            boolean bl2 = valid = end1.equals("O") || end1.startsWith("B-");
        }
        if (valid) {
            return funcTag;
        }
        return "O";
    }

    private int countLeafs(ADSentenceStream.SentenceParser.Node node) {
        int counter = 0;
        for (ADSentenceStream.SentenceParser.TreeElement element : node.getElements()) {
            if (element.isLeaf()) {
                ++counter;
                continue;
            }
            counter += this.countLeafs((ADSentenceStream.SentenceParser.Node)element);
        }
        return counter;
    }

    @Override
    protected String getPhraseTagFromPosTag(String functionalTag) {
        return "O";
    }

    @Override
    protected boolean isIncludePunctuations() {
        if (this.readChunk) {
            return super.isIncludePunctuations();
        }
        return true;
    }

    static class SubjectTypes {
        private Map<String, AtomicInteger> subjects = new HashMap<String, AtomicInteger>();
        private Map<String, String> examples = new HashMap<String, String>();

        SubjectTypes() {
        }

        public void add(ChunkSample sample) {
            for (Span subj : sample.getPhrasesAsSpanList()) {
                if (!subj.getType().equals("SUBJ")) continue;
                String[] chunks = this.extractChunk(Arrays.copyOfRange(sample.getTags(), subj.getStart(), subj.getEnd()));
                Span[] c = ChunkSample.phrasesAsSpanList(chunks, chunks, chunks);
                StringBuilder sb = new StringBuilder();
                for (Span span : c) {
                    sb.append(span.getType()).append(" ");
                }
                String value = sb.toString().trim();
                if (!this.subjects.containsKey(value)) {
                    this.subjects.put(value, new AtomicInteger(1));
                    this.examples.put(value, Arrays.toString(Arrays.copyOfRange(sample.getSentence(), subj.getStart(), subj.getEnd())));
                    continue;
                }
                this.subjects.get(value).incrementAndGet();
            }
        }

        public void print() {
            TreeSet<String> chunks = new TreeSet<String>(new Comparator<String>(){

                @Override
                public int compare(String arg0, String arg1) {
                    if (arg0.equals(arg1)) {
                        return 0;
                    }
                    return ((AtomicInteger)subjects.get(arg0)).intValue() - ((AtomicInteger)subjects.get(arg1)).intValue();
                }
            });
            chunks.addAll(this.subjects.keySet());
            for (String string : chunks) {
                System.out.println(string + " -> " + this.subjects.get(string) + "->" + this.examples.get(string));
            }
        }

        private String[] extractChunk(String[] postags) {
            String[] out = new String[postags.length];
            for (int i = 0; i < postags.length; ++i) {
                out[i] = this.extractChunk(postags[i]);
            }
            return out;
        }

        private String extractChunk(String postag) {
            int i = postag.indexOf(124);
            return postag.substring(i + 1);
        }
    }
}

