/*
 * Decompiled with CFR 0.152.
 */
package kr.co.shineware.nlp.komoran.corpus.builder;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import kr.co.shineware.nlp.komoran.corpus.model.Dictionary;
import kr.co.shineware.nlp.komoran.corpus.model.Grammar;
import kr.co.shineware.nlp.komoran.corpus.parser.CorpusParser;
import kr.co.shineware.nlp.komoran.corpus.parser.IrregularParser;
import kr.co.shineware.nlp.komoran.corpus.parser.model.ProblemAnswerPair;
import kr.co.shineware.nlp.komoran.exception.FileFormatException;
import kr.co.shineware.nlp.komoran.interfaces.UnitParser;
import kr.co.shineware.nlp.komoran.parser.KoreanUnitParser;
import kr.co.shineware.util.common.file.FileUtil;
import kr.co.shineware.util.common.model.Pair;
import kr.co.shineware.util.common.string.StringUtil;

public class CorpusBuilder {
    private UnitParser unitParser = new KoreanUnitParser();
    private CorpusParser corpusParser = new CorpusParser();
    private IrregularParser irrParser = new IrregularParser();
    private Dictionary wordDic = new Dictionary();
    private Dictionary irrDic = new Dictionary();
    private Grammar grammar = new Grammar();
    private Set<String> irrExclusiveSet = new HashSet<String>();

    public void save(String savePathName) {
        File savePath = new File(savePathName);
        if (savePath.exists() && !savePath.isDirectory()) {
            System.err.println("CorpusBuilder.save error!");
            System.err.println("savePathName is exists, but it's not a directory.");
            System.err.println("please check path name to save");
            System.exit(1);
        }
        savePath.mkdirs();
        this.wordDic.save(savePathName + File.separator + "dic.word");
        this.irrDic.save(savePathName + File.separator + "dic.irregular");
        this.grammar.save(savePathName + File.separator + "grammar.in");
        savePath = null;
    }

    @Deprecated
    public void load(String loadPath) {
        this.wordDic.load(loadPath + File.separator + "dic.word");
        this.irrDic.load(loadPath + File.separator + "dic.irregular");
        this.grammar.load(loadPath + File.separator + "grammar.in");
    }

    public void buildPath(String corporaPath) {
        this.buildPath(corporaPath, null);
    }

    public void buildPath(String corporaPath, String suffix) {
        List<String> filenames = FileUtil.getFileNames(corporaPath);
        for (String filename : filenames) {
            if (suffix != null && filename.endsWith(suffix)) {
                System.out.println(filename);
                this.build(filename);
            }
            if (suffix != null) continue;
            System.out.println(filename);
            this.build(filename);
        }
    }

    public void build(String filename) {
        try {
            String line;
            BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(filename), StandardCharsets.UTF_8));
            int lineCount = 0;
            while ((line = br.readLine()) != null) {
                line = this.refineFormat(line);
                if (++lineCount < 10) {
                    System.out.println(line);
                }
                if (line.length() == 0) continue;
                ProblemAnswerPair paPair = null;
                try {
                    paPair = this.corpusParser.parse(line);
                }
                catch (FileFormatException e) {
                    System.err.println(lineCount + " : " + line);
                    e.printStackTrace();
                    System.exit(1);
                }
                this.appendWordDictionary(paPair.getAnswerList());
                this.appendIrregularDictionary(paPair);
                this.appendGrammar(paPair.getAnswerList());
            }
            br.close();
            br = null;
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private boolean isIrregular(String problem, List<Pair<String, String>> answerList) {
        StringBuffer answer = new StringBuffer();
        for (Pair<String, String> pair : answerList) {
            answer.append(pair.getFirst());
        }
        String problemUnits = this.unitParser.parse(problem);
        String answerUnits = this.unitParser.parse(answer.toString());
        return !StringUtil.getKorean(problemUnits).equals(StringUtil.getKorean(answerUnits));
    }

    private void appendIrregularDictionary(ProblemAnswerPair paPair) {
        if (this.isIrregular(paPair.getProblem(), paPair.getAnswerList())) {
            List<Pair<String, String>> irrRuleList = this.irrParser.parse(this.convertJaso(paPair.getProblem()), this.convertJaso(paPair.getAnswerList()));
            for (Pair<String, String> pair : irrRuleList) {
                if (pair.getSecond().trim().length() == 0 || this.irrExclusiveSet.contains(pair.getFirst() + "\t" + pair.getSecond().substring(0, pair.getSecond().lastIndexOf("/")))) continue;
                boolean hasJamoProblem = false;
                String tmpProblem = this.unitParser.combine(pair.getFirst());
                for (int i = 0; i < tmpProblem.length(); ++i) {
                    if (StringUtil.getUnicodeBlock(tmpProblem.charAt(i)) != Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) continue;
                    hasJamoProblem = true;
                    break;
                }
                if (hasJamoProblem || pair.getFirst().endsWith("\u3147\u3161") && pair.getSecond().endsWith("\u3147\u3161\u3145\u3163/EP")) continue;
                this.irrDic.append(this.unitParser.combine(pair.getFirst()), this.unitParser.combine(pair.getSecond()));
            }
        }
    }

    private List<Pair<String, String>> convertJaso(List<Pair<String, String>> answerList) {
        ArrayList<Pair<String, String>> resultList = new ArrayList<Pair<String, String>>();
        for (Pair<String, String> pair : answerList) {
            resultList.add(new Pair<String, String>(this.unitParser.parse(pair.getFirst()), pair.getSecond()));
        }
        return resultList;
    }

    private String convertJaso(String problem) {
        return this.unitParser.parse(problem);
    }

    private void appendGrammar(List<Pair<String, String>> answerList) {
        String prevPos = "BOE";
        for (Pair<String, String> wordPosPair : answerList) {
            this.grammar.append(prevPos, wordPosPair.getSecond());
            prevPos = wordPosPair.getSecond();
        }
        String endPos = "EOE";
        this.grammar.append(prevPos, endPos);
    }

    private void appendWordDictionary(List<Pair<String, String>> answerList) {
        for (Pair<String, String> pair : answerList) {
            if (pair.getFirst().trim().length() == 1 && StringUtil.getUnicodeBlock(pair.getFirst().trim().charAt(0)) == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO && pair.getSecond().contains("NN") || pair.getSecond().equals("SH") || pair.getSecond().equals("SN") || pair.getSecond().equals("SL")) continue;
            this.wordDic.append(pair.getFirst(), pair.getSecond());
        }
    }

    private String refineFormat(String line) {
        return line.replaceAll("[ ]+", " ").trim();
    }

    public void setExclusiveIrrRule(String filename) {
        try {
            this.irrExclusiveSet = new HashSet<String>();
            BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(filename), StandardCharsets.UTF_8));
            String line = null;
            while ((line = br.readLine()) != null) {
                if ((line = line.trim()).length() == 0) continue;
                String key = line.substring(6);
                line = br.readLine();
                String remove = line.substring(9);
                this.irrExclusiveSet.add(key + "\t" + remove);
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void appendUserDic(String filename) {
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(filename), StandardCharsets.UTF_8));
            String line = null;
            while ((line = br.readLine()) != null) {
                if ((line = line.trim()).length() == 0 || line.charAt(0) == '#' || this.wordDic.getPosList(line) != null) continue;
                this.wordDic.append(line, "NNP");
            }
            br.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void appendUserDicPath(String path, String suffix) {
        List<String> filenames = FileUtil.getFileNames(path);
        for (String filename : filenames) {
            if (!filename.endsWith("." + suffix)) continue;
            System.out.println(filename);
            this.appendUserDic(filename);
        }
        filenames = null;
    }
}

