/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.quoteattribution;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.pipeline.QuoteAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.quoteattribution.ExtractQuotesUtil;
import edu.stanford.nlp.quoteattribution.Person;
import edu.stanford.nlp.quoteattribution.QuoteAttributionUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.XMLUtils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class XMLToAnnotation {
    public static String getJustText(Node text) {
        StringBuilder sb = new StringBuilder();
        NodeList textElems = text.getChildNodes();
        for (int i = 0; i < textElems.getLength(); ++i) {
            Node child = textElems.item(i);
            String str = child.getTextContent();
            str = str.replaceAll("\n(?!\n)", " ");
            str = str.replaceAll("_", "");
            sb.append(str + " ");
        }
        return sb.toString();
    }

    public static Properties getProcessedCoreNLPProperties() {
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, quote");
        props.setProperty("ner.useSUTime", "false");
        props.setProperty("ner.applyNumericClassifiers", "false");
        props.setProperty("ssplit.newlineIsSentenceBreak", "always");
        props.setProperty("outputFormat", "serialized");
        props.setProperty("serializer", "edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer");
        props.setProperty("threads", "1");
        return props;
    }

    public static void processCoreNLPIfDoesNotExist(File processedFile, Properties coreNLPProps, String text) {
        if (!processedFile.exists()) {
            try {
                StanfordCoreNLP coreNLP = new StanfordCoreNLP(coreNLPProps);
                Annotation processedAnnotation = coreNLP.process(text);
                ProtobufAnnotationSerializer pas = new ProtobufAnnotationSerializer(true);
                BufferedOutputStream fos = new BufferedOutputStream(new FileOutputStream(processedFile.getAbsolutePath()));
                pas.write(processedAnnotation, fos);
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public static Annotation getAnnotatedFile(String text, String baseFileName, Properties props) throws IOException {
        File processedFile = new File(baseFileName + ".ser.gz");
        XMLToAnnotation.processCoreNLPIfDoesNotExist(processedFile, props, text);
        Annotation doc = ExtractQuotesUtil.readSerializedProtobufFile(processedFile);
        new QuoteAnnotator(new Properties()).annotate(doc);
        return doc;
    }

    public static List<Integer> readConnection(String connection) {
        String[] connections;
        ArrayList<Integer> connectionList = new ArrayList<Integer>();
        if (connection.equals("")) {
            return connectionList;
        }
        for (String c : connections = connection.split(",")) {
            connectionList.add(Integer.parseInt(c.substring(1)));
        }
        return connectionList;
    }

    public static int getEndIndex(int startIndex, List<CoreLabel> tokens, String text) {
        text = text.trim();
        int currIndex = startIndex;
        CoreLabel token = tokens.get(startIndex);
        int tokenBeginChar = (Integer)token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
        int offset = text.indexOf((String)token.get(CoreAnnotations.OriginalTextAnnotation.class));
        int tokenEndChar;
        while ((tokenEndChar = ((Integer)token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)).intValue()) - tokenBeginChar != text.length()) {
            if (tokenEndChar - tokenBeginChar > text.length()) {
                return currIndex - 1;
            }
            if (++currIndex == tokens.size()) {
                return currIndex - 1;
            }
            token = tokens.get(currIndex);
        }
        return currIndex;
    }

    public static List<Person> readXMLCharacterList(Document doc) {
        ArrayList<Person> personList = new ArrayList<Person>();
        NodeList characters = doc.getDocumentElement().getElementsByTagName("characters").item(0).getChildNodes();
        for (int i = 0; i < characters.getLength(); ++i) {
            Node child = characters.item(i);
            if (!child.getNodeName().equals("character")) continue;
            String name = child.getAttributes().getNamedItem("name").getNodeValue();
            char[] cName = name.toCharArray();
            cName[0] = Character.toUpperCase(cName[0]);
            name = new String(cName);
            List<String> aliases = Arrays.asList(child.getAttributes().getNamedItem("aliases").getNodeValue().split(";"));
            String gender = child.getAttributes().getNamedItem("gender") == null ? "" : child.getAttributes().getNamedItem("gender").getNodeValue();
            personList.add(new Person(child.getAttributes().getNamedItem("name").getNodeValue(), gender, aliases));
        }
        return personList;
    }

    public static void writeCharacterList(String fileName, List<Person> personList) throws IOException {
        StringBuilder text = new StringBuilder();
        for (Person p : personList) {
            String gender = "";
            switch (p.gender) {
                case MALE: {
                    gender = "M";
                    break;
                }
                case FEMALE: {
                    gender = "F";
                    break;
                }
                case UNK: {
                    gender = "";
                }
            }
            text.append(p.name + ";" + gender);
            for (String alias : p.aliases) {
                text.append(";" + alias);
            }
            text.append("\n");
        }
        PrintWriter pw = IOUtils.getPrintWriter(fileName);
        pw.print(text);
        pw.close();
    }

    public static Data readXMLFormat(String fileName) throws Exception {
        Document doc = XMLUtils.readDocumentFromFile(fileName);
        Node text = doc.getDocumentElement().getElementsByTagName("text").item(0);
        String docText = XMLToAnnotation.getJustText(text);
        Annotation document = XMLToAnnotation.getAnnotatedFile(docText, fileName, XMLToAnnotation.getProcessedCoreNLPProperties());
        List quotes = (List)document.get(CoreAnnotations.QuotationsAnnotation.class);
        List tokens = (List)document.get(CoreAnnotations.TokensAnnotation.class);
        ArrayList<GoldQuoteInfo> goldList = new ArrayList<GoldQuoteInfo>();
        HashMap<Integer, Mention> idToMention = new HashMap<Integer, Mention>();
        List<Person> personList = XMLToAnnotation.readXMLCharacterList(doc);
        Map<String, List<Person>> personMap = QuoteAttributionUtils.readPersonMap(personList);
        ArrayList<Pair<Integer, String>> mentionIdToSpeakerList = new ArrayList<Pair<Integer, String>>();
        int quoteIndex = 0;
        NodeList textElems = text.getChildNodes();
        int tokenIndex = 0;
        for (int i = 0; i < textElems.getLength(); ++i) {
            Node node = textElems.item(i);
            if (!node.getNodeName().equals("chapter")) continue;
            NodeList chapElems = node.getChildNodes();
            for (int j = 0; j < chapElems.getLength(); ++j) {
                Node child = chapElems.item(j);
                if (child.getNodeName().equals("quote")) {
                    int id;
                    NodeList quoteChildren = child.getChildNodes();
                    for (int k = 0; k < quoteChildren.getLength(); ++k) {
                        Node quoteChild = quoteChildren.item(k);
                        if (quoteChild.getNodeName().equals("mention")) {
                            String mentionText = quoteChild.getTextContent();
                            id = Integer.parseInt(quoteChild.getAttributes().getNamedItem("id").getTextContent().substring(1));
                            List<Integer> connections = XMLToAnnotation.readConnection(quoteChild.getAttributes().getNamedItem("connection").getNodeValue());
                            int endIndex = XMLToAnnotation.getEndIndex(tokenIndex, tokens, mentionText);
                            idToMention.put(id, new Mention(mentionText, tokenIndex, endIndex));
                            tokenIndex = endIndex + 1;
                            continue;
                        }
                        String quoteText = quoteChild.getTextContent();
                        quoteText = quoteText.replaceAll("\n(?!\n)", " ");
                        quoteText = quoteText.replaceAll("_", "");
                        tokenIndex = XMLToAnnotation.getEndIndex(tokenIndex, tokens, quoteText) + 1;
                    }
                    String quoteText = child.getTextContent();
                    quoteText = quoteText.replaceAll("\n(?!\n)", " ");
                    quoteText = quoteText.replaceAll("_", "");
                    int quotationOffset = 1;
                    if (quoteText.startsWith("``")) {
                        quotationOffset = 2;
                    }
                    List<Integer> connections = XMLToAnnotation.readConnection(child.getAttributes().getNamedItem("connection").getTextContent());
                    id = Integer.parseInt(child.getAttributes().getNamedItem("id").getTextContent().substring(1));
                    Integer mention_id = null;
                    if (connections.size() > 0) {
                        mention_id = connections.get(0);
                    } else {
                        System.out.println("quote w/ no mention. ID: " + id);
                    }
                    mentionIdToSpeakerList.add(new Pair<Integer, String>(mention_id, child.getAttributes().getNamedItem("speaker").getTextContent()));
                    String annotatedQuoteText = (String)((CoreMap)quotes.get(quoteIndex)).get(CoreAnnotations.TextAnnotation.class);
                    while (!quoteText.endsWith(annotatedQuoteText)) {
                        annotatedQuoteText = (String)((CoreMap)quotes.get(++quoteIndex)).get(CoreAnnotations.TextAnnotation.class);
                        mentionIdToSpeakerList.add(new Pair<Integer, String>(mention_id, child.getAttributes().getNamedItem("speaker").getTextContent()));
                    }
                    ++quoteIndex;
                    continue;
                }
                if (child.getNodeName().equals("mention")) {
                    String mentionText = child.getTextContent();
                    int id = Integer.parseInt(child.getAttributes().getNamedItem("id").getTextContent().substring(1));
                    List<Integer> connections = XMLToAnnotation.readConnection(child.getAttributes().getNamedItem("connection").getNodeValue());
                    int endIndex = XMLToAnnotation.getEndIndex(tokenIndex, tokens, mentionText);
                    idToMention.put(id, new Mention(mentionText, tokenIndex, endIndex));
                    tokenIndex = endIndex + 1;
                    continue;
                }
                String nodeText = child.getTextContent();
                nodeText = nodeText.replaceAll("\n(?!\n)", " ");
                nodeText = nodeText.replaceAll("_", "");
                if (tokenIndex >= tokens.size()) continue;
                tokenIndex = XMLToAnnotation.getEndIndex(tokenIndex, tokens, nodeText) + 1;
            }
        }
        for (Pair pair : mentionIdToSpeakerList) {
            Mention mention = (Mention)idToMention.get(pair.first);
            if (mention == null) {
                goldList.add(new GoldQuoteInfo(-1, -1, (String)pair.second, null));
                continue;
            }
            goldList.add(new GoldQuoteInfo(mention.begin, mention.end, (String)pair.second, mention.text));
        }
        if (((List)document.get(CoreAnnotations.QuotationsAnnotation.class)).size() != goldList.size()) {
            throw new RuntimeException("Quotes size and gold size don't match!");
        }
        return new Data(goldList, personList, document);
    }

    protected static class Mention {
        String text;
        int begin;
        int end;

        public Mention(String text, int begin, int end) {
            this.text = text;
            this.begin = begin;
            this.end = end;
        }
    }

    public static class Data {
        public List<GoldQuoteInfo> goldList;
        public List<Person> personList;
        public Annotation doc;

        public Data(List<GoldQuoteInfo> goldList, List<Person> personList, Annotation doc) {
            this.goldList = goldList;
            this.personList = personList;
            this.doc = doc;
        }
    }

    public static class GoldQuoteInfo {
        public int mentionStartTokenIndex;
        public int mentionEndTokenIndex;
        public String speaker;
        public String mention;

        public GoldQuoteInfo(int mentionStartTokenIndex, int mentionEndTokenIndex, String speaker, String mention) {
            this.mentionStartTokenIndex = mentionStartTokenIndex;
            this.mentionEndTokenIndex = mentionEndTokenIndex;
            this.speaker = speaker;
            this.mention = mention;
        }
    }
}

