/*
 * Decompiled with CFR 0.152.
 */
package org.cogroo.analyzer;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.Span;
import org.apache.log4j.Logger;
import org.cogroo.analyzer.Analyzer;
import org.cogroo.config.Analyzers;
import org.cogroo.text.Document;
import org.cogroo.text.Sentence;
import org.cogroo.text.Token;
import org.cogroo.tools.postag.GenderUtil;
import org.cogroo.util.EntityUtils;
import org.cogroo.util.TextUtils;

public class POSTagger
implements Analyzer {
    private static final Logger LOGGER = Logger.getLogger(POSTagger.class);
    private POSTaggerME tagger;

    public POSTagger(POSTaggerME tagger) {
        this.tagger = tagger;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void analyze(Document document) {
        List<Sentence> sentences = document.getSentences();
        for (Sentence sentence : sentences) {
            double[] probs;
            String[] tags;
            List<Token> tokens = sentence.getTokens();
            String[][] ac = TextUtils.additionalContext(tokens, Arrays.asList(Analyzers.CONTRACTION_FINDER, Analyzers.NAME_FINDER));
            String[] toks = TextUtils.tokensToString(sentence.getTokens());
            POSTaggerME pOSTaggerME = this.tagger;
            synchronized (pOSTaggerME) {
                tags = this.tagger.tag(toks, (Object[])ac);
                probs = this.tagger.probs();
            }
            double finalProb = this.computeFinalProb(probs);
            sentence.setTokensProb(finalProb);
            if (LOGGER.isDebugEnabled()) {
                StringBuilder sb = new StringBuilder("Probabilidades do tagger:\n");
                for (int i = 0; i < toks.length; ++i) {
                    sb.append("[").append(toks[i]).append("_").append(tags[i]).append(" ").append(probs[i]).append("] ");
                }
                LOGGER.debug(sb.toString());
                LOGGER.debug("Soma dos logs das probabilidades: " + finalProb);
            }
            tags = GenderUtil.removeGender(tags);
            for (int i = 0; i < tags.length; ++i) {
                tokens.get(i).setPOSTag(tags[i]);
                tokens.get(i).setPOSTagProb(probs[i]);
            }
            EntityUtils.groupTokens(sentence.getText(), tokens, POSTagger.createSpanList(this.toTokensArray(tokens), this.toTagsArray(tokens)));
            this.mergeHyphenedWords(sentence);
        }
    }

    private double computeFinalProb(double[] probs) {
        double finalProb = 0.0;
        for (double prob : probs) {
            finalProb += Math.log(prob);
        }
        if (probs.length > 0) {
            finalProb /= (double)probs.length;
        }
        return finalProb;
    }

    private String[] toTokensArray(List<Token> tokens) {
        String[] arr = new String[tokens.size()];
        for (int i = 0; i < tokens.size(); ++i) {
            arr[i] = tokens.get(i).getLexeme();
        }
        return arr;
    }

    private String[] toTagsArray(List<Token> tokens) {
        String[] arr = new String[tokens.size()];
        for (int i = 0; i < tokens.size(); ++i) {
            arr[i] = tokens.get(i).getPOSTag();
        }
        return arr;
    }

    public static List<Span> createSpanList(String[] toks, String[] tags) {
        ArrayList<Span> phrases = new ArrayList<Span>(toks.length);
        String startTag = "";
        int startIndex = 0;
        boolean foundPhrase = false;
        int cn = tags.length;
        for (int ci = 0; ci < cn; ++ci) {
            String pred = tags[ci];
            if (!tags[ci].startsWith("B-") && !tags[ci].startsWith("I-")) {
                pred = "O";
            }
            if (pred.startsWith("B-") || !pred.equals("I-" + startTag) && !pred.equals("O")) {
                if (foundPhrase) {
                    phrases.add(new Span(startIndex, ci, startTag));
                }
                startIndex = ci;
                startTag = pred.substring(2);
                foundPhrase = true;
                continue;
            }
            if (pred.equals("I-" + startTag) || !foundPhrase) continue;
            phrases.add(new Span(startIndex, ci, startTag));
            foundPhrase = false;
            startTag = "";
        }
        if (foundPhrase) {
            phrases.add(new Span(startIndex, tags.length, startTag));
        }
        return phrases;
    }

    private void mergeHyphenedWords(Sentence sentence) {
        List<Token> tokens = sentence.getTokens();
        boolean restart = true;
        int start = 1;
        while (restart) {
            restart = false;
            for (int i = start; i < tokens.size() - 1 && !restart; ++i) {
                if (!"-".equals(tokens.get(i).getLexeme()) || this.hasCharacterBetween(tokens.get(i - 1), tokens.get(i)) || this.hasCharacterBetween(tokens.get(i), tokens.get(i + 1))) continue;
                Token a = tokens.get(i - 1);
                Token b = tokens.get(i + 1);
                if (b.getPOSTag().startsWith("pron-")) {
                    b.setBoundaries(b.getStart() - 1, b.getEnd());
                    b.setLexeme("-" + b.getLexeme());
                    tokens.remove(i);
                    restart = true;
                    start = i + 1;
                    continue;
                }
                String res = this.merge(a.getPOSTag(), b.getPOSTag());
                if (res == null) continue;
                String lexeme = a.getLexeme() + "-" + b.getLexeme();
                b.setLexeme(lexeme);
                b.setPOSTag(res);
                b.setBoundaries(a.getStart(), b.getEnd());
                tokens.remove(i);
                tokens.remove(i - 1);
                start = i;
                restart = true;
            }
        }
    }

    private String merge(String a, String b) {
        if (this.isNoun(a) || this.isNoun(b)) {
            return "n";
        }
        if (this.isNoun(a) && this.isAdjective(b)) {
            return "n";
        }
        if (this.isVerb(a) && this.isNoun(b)) {
            return "n";
        }
        if (this.isAdjective(a) && this.isAdjective(b)) {
            return "n";
        }
        if ("prep".equals(b) || "art".equals(b)) {
            return a;
        }
        if (this.isVerb(a) && "adv".equals(b)) {
            return "n";
        }
        if (this.isNoun(b)) {
            return "n";
        }
        if (a.equals(b)) {
            return a;
        }
        return null;
    }

    private boolean isVerb(String a) {
        return a.startsWith("v-");
    }

    private boolean isNoun(String b) {
        return "n".equals(b) || "n-adj".equals(b);
    }

    private boolean isAdjective(String b) {
        return "adj".equals(b) || "n-adj".equals(b);
    }

    private boolean hasCharacterBetween(Token a, Token b) {
        int bStart;
        int aEnd = a.getEnd();
        return aEnd != (bStart = b.getStart());
    }
}

