/*
 * Decompiled with CFR 0.152.
 */
package pdfextract;

import com.google.common.collect.Lists;
import com.itextpdf.text.pdf.PdfEncryptor;
import com.itextpdf.text.pdf.PdfReader;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import pdfextract.Common;
import pdfextract.Config;
import pdfextract.DetectLanguage;
import pdfextract.HTMLObject;
import pdfextract.PDFToHtml;
import pdfextract.SentenceJoin;

public class PDFExtract {
    private final boolean runnable = true;
    private String logPath = "";
    private boolean writeLogFile = true;
    private Pattern patternPageOpen = Pattern.compile("<page ");
    private Pattern patternStyleOpen = Pattern.compile("<fontspec ");
    private Pattern patternText = Pattern.compile("<text ");
    private Pattern patternPageNo = Pattern.compile(".*number=\"([0-9\\.]+)\".*");
    private Pattern patternWidth = Pattern.compile(".*width=\"([0-9\\.]+)\".*");
    private Pattern patternHeight = Pattern.compile(".*height=\"([0-9\\.]+)\".*");
    private Pattern patternLeft = Pattern.compile(".*left=\"([0-9\\.]+)\".*");
    private Pattern patternTop = Pattern.compile(".*top=\"([0-9\\.]+)\".*");
    private Pattern patternFont = Pattern.compile(".*font=\"([0-9\\.]+)\".*");
    private Pattern patternId = Pattern.compile(".*id=\"([0-9]+)\".*");
    private Pattern patternSize = Pattern.compile(".*size=\"([0-9]+)\".*");
    private Pattern patternFamily = Pattern.compile(".*family=\"([^\"]+)\".*");
    private Pattern patternColor = Pattern.compile(".*color=\"(#[a-z0-9]+)\".*");
    private Pattern patternBold = Pattern.compile("<b>([^<]*)<\\/b>");
    private Pattern patternLink = Pattern.compile("<a ?[^>]*>([^<]*)<\\/a>");
    private Pattern patternWord = Pattern.compile("<text [^>]*>(.*?)<\\/text>");
    private Common common = new Common();
    private ExecutorService executor;
    private PDFToHtml pdf = null;
    private String paraMarker = "LSMARKERLS:PARA";
    private int maxWordsJoin = 5;
    private Object _objectWorker = new Object();
    private HashMap<String, SentenceJoin> _hashSentenceJoin = new HashMap();
    private Config config = null;

    private void initial(String logFilePath, int verbose, String configFile, long timeout) throws Exception {
        this.common.setVerbose(verbose);
        if (this.common.IsEmpty(logFilePath)) {
            this.writeLogFile = false;
        } else {
            boolean logFileValid;
            if (this.common.IsEmpty(this.common.getExtension(logFilePath))) {
                logFilePath = logFilePath + ".log";
            }
            if (!(logFileValid = this.common.validateFile(logFilePath))) {
                throw new Exception("Invalid log file path or permission denied.");
            }
            this.writeLogFile = true;
            this.logPath = logFilePath;
            this.common.print("Log File: " + this.logPath);
        }
        try {
            if (this.common.IsEmpty(configFile) || !this.common.IsExist(configFile)) {
                configFile = this.common.getConfigPath();
            }
            this.config = new Config(configFile);
        }
        catch (Exception e) {
            throw new Exception("initial failed. " + e.getMessage());
        }
        this.pdf = new PDFToHtml(timeout);
    }

    public PDFExtract() throws Exception {
        this.initial("", 0, "", 0L);
    }

    public PDFExtract(String configFile) throws Exception {
        this.initial("", 0, configFile, 0L);
    }

    public PDFExtract(long timeout) throws Exception {
        this.initial("", 0, "", timeout);
    }

    public PDFExtract(String configFile, long timeout) throws Exception {
        this.initial("", 0, configFile, timeout);
    }

    public PDFExtract(String logFilePath, String configFile, long timeout) throws Exception {
        this.initial(logFilePath, 0, configFile, timeout);
    }

    public PDFExtract(String logFilePath, int verbose, String configFile, long timeout) throws Exception {
        this.initial(logFilePath, verbose, configFile, timeout);
    }

    public void Extract(String inputFile, String outputFile, int keepBrTags, int getPermission) throws Exception {
        String outputPath = "";
        try {
            if (this.writeLogFile) {
                this.common.print(inputFile, "Start extract");
                this.common.writeLog(this.logPath, inputFile, "Start extract", false);
            } else {
                this.common.print(inputFile, "Start extract");
            }
            if (!this.common.IsExist(inputFile)) {
                throw new Exception("Input file does not exist.");
            }
            if (this.common.IsEmpty(outputFile)) {
                throw new Exception("Output file cannot be empty.");
            }
            outputPath = this.common.getParentPath(outputFile);
            if (!this.common.IsEmpty(outputPath) && !this.common.IsExist(outputPath)) {
                this.common.createDir(outputPath);
            }
            this.common.checkPermissions(inputFile);
            if (!this.common.getExtension(inputFile).toLowerCase().equals("pdf")) {
                throw new Exception("Input file extension is not pdf.");
            }
            StringBuffer htmlBuffer = new StringBuffer("");
            AtomicReference<HTMLObject.DocumentObject> refDoc = new AtomicReference<HTMLObject.DocumentObject>(new HTMLObject.DocumentObject());
            if (getPermission == 1) {
                this.getAccessPermissions(inputFile, refDoc);
            }
            htmlBuffer = this.convertPdfToHtml(inputFile);
            this.getHtmlObject(htmlBuffer, refDoc);
            this.repairAndAdjustment(refDoc);
            this.languageId(refDoc);
            this.sentenceJoin(refDoc);
            this.finalRepair(refDoc);
            htmlBuffer = this.generateOutput(refDoc, keepBrTags, getPermission);
            this.common.WriteFile(outputFile, htmlBuffer.toString());
            if (this.writeLogFile) {
                this.common.print(inputFile, "Extract success. -> " + outputFile + "");
                this.common.writeLog(this.logPath, inputFile, "Extract success. -> " + outputFile + "", false);
            } else {
                this.common.print(inputFile, "Extract success. -> " + outputFile + "");
            }
        }
        catch (Exception e) {
            if (!this.common.IsEmpty(outputFile)) {
                this.common.WriteFile(outputFile, this.common.getOutputError(e));
            }
            String message = e.getMessage();
            if (this.writeLogFile) {
                this.common.writeLog(this.logPath, inputFile, "Error: " + message, true);
            }
            throw e;
        }
    }

    public ByteArrayOutputStream Extract(ByteArrayInputStream inputStream, int keepBrTags, int getPermission) throws Exception {
        try {
            if (this.writeLogFile) {
                this.common.print("Input Stream", "Start extract");
                this.common.writeLog(this.logPath, "Input Stream", "Start extract", false);
            } else {
                this.common.print("Input Stream", "Start extract");
            }
            if (inputStream.available() <= 0) {
                throw new Exception("Input Stream does not exist.");
            }
            AtomicReference<HTMLObject.DocumentObject> refDoc = new AtomicReference<HTMLObject.DocumentObject>(new HTMLObject.DocumentObject());
            if (getPermission == 1) {
                this.getAccessPermissions(inputStream, refDoc);
            }
            StringBuffer htmlBuffer = this.convertPdfToHtml(inputStream);
            this.getHtmlObject(htmlBuffer, refDoc);
            this.repairAndAdjustment(refDoc);
            this.languageId(refDoc);
            this.sentenceJoin(refDoc);
            htmlBuffer = this.generateOutput(refDoc, keepBrTags, getPermission);
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            IOUtils.write(htmlBuffer.toString(), (OutputStream)outputStream, "UTF-8");
            if (this.writeLogFile) {
                this.common.print("Input Stream", "Extract success.");
                this.common.writeLog(this.logPath, "Input Stream", "Extract success.", false);
            } else {
                this.common.print("Input Stream", "Extract success.");
            }
            ByteArrayOutputStream byteArrayOutputStream = outputStream;
            return byteArrayOutputStream;
        }
        catch (Exception e) {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            IOUtils.write(this.common.getOutputError(e), (OutputStream)outputStream, "UTF-8");
            String message = e.getMessage();
            if (this.writeLogFile) {
                this.common.writeLog(this.logPath, "Input Stream", "Error: " + message, true);
            } else {
                this.common.print("Input Stream", "Error: " + message);
            }
            throw e;
        }
    }

    public void Extract(String batchFile, int threadCount, int keepBrTags, int getPermission) throws Exception {
        try {
            if (this.writeLogFile) {
                this.common.print("Start extract batch file: " + batchFile);
                this.common.writeLog(this.logPath, "Start extract batch file: " + batchFile);
            } else {
                this.common.print("Start extract batch file: " + batchFile);
            }
            if (!this.common.IsExist(batchFile)) {
                throw new Exception("Input batch file does not exist.");
            }
            if (threadCount == 0) {
                threadCount = 1;
            }
            int maxThreadCount = threadCount;
            List<String> lines = this.common.readLines(batchFile);
            this.executor = Executors.newFixedThreadPool(maxThreadCount);
            int ind = 0;
            int len = lines.size();
            while (ind < len) {
                String line = lines.get(ind);
                if (this.common.IsEmpty(line)) {
                    ++ind;
                    continue;
                }
                this.AddThreadExtract(ind, line, keepBrTags, getPermission);
                ++ind;
            }
            this.executor.shutdown();
            this.executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
        }
        catch (Exception e) {
            String message = e.getMessage();
            if (this.writeLogFile) {
                this.common.writeLog(this.logPath, message, true);
            } else {
                this.common.print("Error: " + e.getMessage());
            }
            throw e;
        }
        finally {
            if (this.writeLogFile) {
                this.common.print("Finish extract batch file: " + batchFile);
                this.common.writeLog(this.logPath, "Finish extract batch file: " + batchFile);
            } else {
                this.common.print("Finish extract batch file: " + batchFile);
            }
            this.shutdownProcess();
        }
    }

    private void AddThreadExtract(int index, final String line, final int keepBrTags, final int getPermission) {
        try {
            this.executor.execute(new Runnable(){

                @Override
                public void run() {
                    block3: {
                        String inputFile = "";
                        String outputFile = "";
                        try {
                            String[] cols = line.split("\t");
                            if (cols == null || cols.length < 2) {
                                throw new Exception("Invalid batch line: " + line);
                            }
                            inputFile = cols[0];
                            outputFile = cols[1];
                            PDFExtract.this.Extract(inputFile, outputFile, keepBrTags, getPermission);
                        }
                        catch (Exception e) {
                            String message = e.getMessage();
                            if (!PDFExtract.this.writeLogFile) break block3;
                            PDFExtract.this.common.print(inputFile, "Error: " + message);
                            PDFExtract.this.common.writeLog(PDFExtract.this.logPath, inputFile, "Error: " + message, true);
                        }
                    }
                }
            });
        }
        catch (Exception ex) {
            String message = ex.toString();
            if (this.writeLogFile) {
                this.common.print("Batch line: " + line + ", Error: " + message);
                this.common.writeLog(this.logPath, "Batch line: " + line + ", Error: " + message, true);
            }
            this.common.print("Batch line: " + line + ", Error: " + message);
        }
    }

    private void getAccessPermissions(String inputFile, AtomicReference<HTMLObject.DocumentObject> refDoc) throws Exception {
        HTMLObject.DocumentObject doc = refDoc.get();
        PdfReader reader = null;
        try {
            reader = new PdfReader(inputFile);
            this.setAccessPermissions(reader, refDoc);
            if (!doc.permission.canCopy) {
                this.pdf.decrypt(reader, inputFile);
            }
        }
        catch (Exception e) {
            throw e;
        }
        finally {
            if (reader != null) {
                reader.close();
                reader = null;
            }
        }
    }

    private void getAccessPermissions(InputStream inputStream, AtomicReference<HTMLObject.DocumentObject> refDoc) throws Exception {
        PdfReader reader = null;
        try {
            reader = new PdfReader(inputStream);
            this.setAccessPermissions(reader, refDoc);
        }
        catch (Exception e) {
            throw e;
        }
        finally {
            if (reader != null) {
                reader.close();
                reader = null;
            }
        }
    }

    private void setAccessPermissions(PdfReader reader, AtomicReference<HTMLObject.DocumentObject> refDoc) {
        HTMLObject.DocumentObject doc = refDoc.get();
        PdfReader.unethicalreading = true;
        int permissions = (int)reader.getPermissions();
        doc.permission.isEncrytped = reader.isEncrypted();
        doc.permission.canAssembly = PdfEncryptor.isAssemblyAllowed(permissions);
        doc.permission.canCopy = PdfEncryptor.isCopyAllowed(permissions);
        doc.permission.canPrint = PdfEncryptor.isPrintingAllowed(permissions);
        doc.permission.canPrintDegraded = PdfEncryptor.isDegradedPrintingAllowed(permissions);
        doc.permission.canModified = PdfEncryptor.isModifyContentsAllowed(permissions);
        doc.permission.canModifyAnnotations = PdfEncryptor.isModifyAnnotationsAllowed(permissions);
        doc.permission.canFillInForm = PdfEncryptor.isFillInAllowed(permissions);
        doc.permission.canScreenReader = PdfEncryptor.isScreenReadersAllowed(permissions);
        doc.permission.verbose = PdfEncryptor.getPermissionsVerbose(permissions);
    }

    private StringBuffer convertPdfToHtml(String inputFile) throws Exception {
        return this.pdf.extract(inputFile);
    }

    private StringBuffer convertPdfToHtml(ByteArrayInputStream inputStream) throws Exception {
        return this.pdf.extract(inputStream);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void getHtmlObject(StringBuffer htmlBuffer, AtomicReference<HTMLObject.DocumentObject> refDoc) throws Exception {
        BufferedReader b_in = null;
        HTMLObject.DocumentObject doc = refDoc.get();
        try {
            InputStreamReader i_in = new InputStreamReader((InputStream)new ByteArrayInputStream(htmlBuffer.toString().getBytes()), StandardCharsets.UTF_8);
            b_in = new BufferedReader(i_in);
            String line = "";
            int currentPage = 0;
            HTMLObject.PageObject page = new HTMLObject.PageObject();
            HashMap<Float, Integer> mapPageWidth = new HashMap<Float, Integer>();
            HashMap<Float, Integer> mapPageHeight = new HashMap<Float, Integer>();
            HashMap<String, HTMLObject.StyleObject> mapStyle = new HashMap<String, HTMLObject.StyleObject>();
            while ((line = b_in.readLine()) != null) {
                Matcher mStyleOpen = this.patternStyleOpen.matcher(line);
                Matcher patternText = this.patternText.matcher(line);
                Matcher mPageOpen = this.patternPageOpen.matcher(line);
                if (patternText.find()) {
                    HTMLObject.TextObject text = this.getTextObject(line, mapStyle);
                    if (!this.checkLineAdd(page.width, page.height, text)) continue;
                    page.texts.add(text);
                    continue;
                }
                if (mPageOpen.find()) {
                    if (currentPage > 0) {
                        doc.pages.add(page);
                        Thread.sleep(50L);
                    }
                    ++currentPage;
                    page = new HTMLObject.PageObject();
                    page.pageno = this.common.getInt(this.patternPageNo.matcher(line).replaceAll("$1"));
                    page.height = this.common.getFloat(this.patternHeight.matcher(line).replaceAll("$1"));
                    page.width = this.common.getFloat(this.patternWidth.matcher(line).replaceAll("$1"));
                    mapPageWidth.put(Float.valueOf(page.width), this.common.getInt(mapPageWidth.get(Float.valueOf(page.width))) + 1);
                    mapPageHeight.put(Float.valueOf(page.height), this.common.getInt(mapPageHeight.get(Float.valueOf(page.height))) + 1);
                    continue;
                }
                if (!mStyleOpen.find()) continue;
                HTMLObject.StyleObject style = new HTMLObject.StyleObject();
                style.id = this.common.getStr(this.patternId.matcher(line).replaceAll("$1"));
                style.size = this.common.getInt(this.patternSize.matcher(line).replaceAll("$1"));
                style.family = this.common.getStr(this.patternFamily.matcher(line).replaceAll("$1"));
                style.color = this.common.getStr(this.patternColor.matcher(line).replaceAll("$1"));
                mapStyle.put(style.id, style);
            }
            doc.pages.add(page);
            doc.width = this.getMaxCount(mapPageWidth).floatValue();
            doc.height = this.getMaxCount(mapPageHeight).floatValue();
        }
        finally {
            if (b_in != null) {
                b_in.close();
                b_in = null;
            }
            refDoc.set(doc);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void repairAndAdjustment(AtomicReference<HTMLObject.DocumentObject> refDoc) throws Exception {
        HTMLObject.DocumentObject doc = refDoc.get();
        try {
            HTMLObject.TextObject text;
            int i;
            Config.LangInfo commonInfo = null;
            if (this.config != null) {
                commonInfo = this.config.get("common");
            }
            for (HTMLObject.PageObject page : doc.pages) {
                HashMap<Float, Integer> mapHeight = new HashMap<Float, Integer>();
                int len = page.texts.size();
                for (i = 0; i < len; ++i) {
                    text = page.texts.get(i);
                    if (this.common.IsEmpty(text.text)) {
                        text.deleted = true;
                        continue;
                    }
                    if (text.height > 0.0f) {
                        int count = 0;
                        if (mapHeight.containsKey(Float.valueOf(text.height))) {
                            count = mapHeight.get(Float.valueOf(text.height));
                        }
                        mapHeight.put(Float.valueOf(text.height), count + 1);
                    }
                    if (commonInfo == null) continue;
                    text.text = this.common.replaceText(commonInfo.normalize, text.text).trim();
                }
                if (mapHeight.size() <= 0) continue;
                page.mostHeight = this.getMaxCount(mapHeight).floatValue();
            }
            for (HTMLObject.PageObject page : doc.pages) {
                int len = page.texts.size();
                for (int i2 = 0; i2 < len; ++i2) {
                    HTMLObject.TextObject text2 = page.texts.get(i2);
                    if (text2.deleted) continue;
                    if (this.common.IsEmpty(text2.text)) {
                        text2.deleted = true;
                        continue;
                    }
                    StringBuffer sb = new StringBuffer(text2.text.trim());
                    boolean hasUpdated = false;
                    if (i2 + 1 >= len) continue;
                    for (int j = i2 + 1; j < len; ++j) {
                        HTMLObject.TextObject nextText = page.texts.get(j);
                        if (text2.deleted) continue;
                        if (this.common.IsEmpty(nextText.text)) {
                            nextText.deleted = true;
                            continue;
                        }
                        if (!this.isMergeTop(text2, nextText)) {
                            i2 = j - 1;
                            break;
                        }
                        sb.append(" " + nextText.text.trim());
                        if (!nextText.islink) {
                            text2.color = nextText.color;
                        }
                        if (nextText.top < text2.top) {
                            text2.top = nextText.top;
                        }
                        if (nextText.bottom > text2.bottom) {
                            text2.bottom = nextText.bottom;
                        }
                        text2.right = nextText.right;
                        text2.width = text2.right - text2.left;
                        text2.height = text2.bottom - text2.top;
                        nextText.deleted = true;
                        hasUpdated = true;
                    }
                    if (!hasUpdated) continue;
                    text2.text = sb.toString();
                }
            }
            for (HTMLObject.PageObject page : doc.pages) {
                ArrayList<HTMLObject.TextObject> newTexts = new ArrayList<HTMLObject.TextObject>();
                int len = page.texts.size();
                block9: for (i = 0; i < len; ++i) {
                    text = page.texts.get(i);
                    if (text.deleted || this.common.IsEmpty(text.text)) continue;
                    newTexts.add(text);
                    if (i + 1 < len) {
                        for (int j = i + 1; j < len; ++j) {
                            HTMLObject.TextObject nextText = page.texts.get(j);
                            if (nextText.deleted || this.common.IsEmpty(nextText.text)) continue;
                            if (text.text.trim().endsWith(",") && this.isClassChanged(text, nextText)) {
                                newTexts.add(this.getNewText(this.paraMarker));
                                continue block9;
                            }
                            if (!this.isTooFar(text, nextText) && !this.isFontChanged(text, nextText)) continue block9;
                            newTexts.add(this.getNewText(this.paraMarker));
                            continue block9;
                        }
                        continue;
                    }
                    newTexts.add(this.getNewText(this.paraMarker));
                }
                page.texts.clear();
                page.texts.addAll(newTexts);
            }
            List<Object> joinList = new ArrayList();
            List<Object> eofList = new ArrayList();
            if (this.config != null && this.config.get("common") != null) {
                joinList = this.config.get((String)"common").joinWords;
                eofList = this.config.get((String)"common").absoluteEOF;
            }
            for (HTMLObject.PageObject page : doc.pages) {
                int len = page.texts.size();
                block12: for (int i3 = 0; i3 < len; ++i3) {
                    HTMLObject.TextObject text3 = page.texts.get(i3);
                    if (text3.deleted || text3.text.equals(this.paraMarker)) continue;
                    boolean isJoin = false;
                    if (i3 + 1 >= len) continue;
                    for (int j = i3 + 1; j < len; ++j) {
                        HTMLObject.TextObject nextText = page.texts.get(j);
                        if (nextText.deleted) continue;
                        if (nextText.text.equals(this.paraMarker) || !text3.lang.equals(nextText.lang)) continue block12;
                        boolean isEOF = false;
                        for (Object rule : eofList) {
                            if (!this.common.IsEmpty(((Config.EOFInfo)rule).front) && !this.common.IsEmpty(((Config.EOFInfo)rule).back)) {
                                if (text3.text.matches(((Config.EOFInfo)rule).front) && nextText.text.matches(((Config.EOFInfo)rule).back)) {
                                    isEOF = true;
                                }
                            } else if (!this.common.IsEmpty(((Config.EOFInfo)rule).front)) {
                                if (text3.text.matches(((Config.EOFInfo)rule).front)) {
                                    isEOF = true;
                                }
                            } else if (!this.common.IsEmpty(((Config.EOFInfo)rule).back) && nextText.text.matches(((Config.EOFInfo)rule).back)) {
                                isEOF = true;
                            }
                            if (!isEOF) continue;
                            isJoin = false;
                            break;
                        }
                        if (!isEOF) {
                            for (Object rule : joinList) {
                                if (!this.common.IsEmpty(((Config.JoinWordInfo)rule).front) && !this.common.IsEmpty(((Config.JoinWordInfo)rule).back)) {
                                    if (text3.text.matches(((Config.JoinWordInfo)rule).front) && nextText.text.matches(((Config.JoinWordInfo)rule).back)) {
                                        isJoin = true;
                                    }
                                } else if (!this.common.IsEmpty(((Config.JoinWordInfo)rule).front)) {
                                    if (text3.text.matches(((Config.JoinWordInfo)rule).front)) {
                                        isJoin = true;
                                    }
                                } else if (!this.common.IsEmpty(((Config.JoinWordInfo)rule).back) && nextText.text.matches(((Config.JoinWordInfo)rule).back)) {
                                    isJoin = true;
                                }
                                if (!isJoin) continue;
                                text3.text = text3.text + ((Config.JoinWordInfo)rule).joinText + nextText.text;
                                nextText.deleted = true;
                                break;
                            }
                        }
                        if (isJoin) continue;
                        i3 = j - 1;
                        continue block12;
                    }
                }
            }
        }
        finally {
            refDoc.set(doc);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void languageId(AtomicReference<HTMLObject.DocumentObject> refDoc) {
        HTMLObject.DocumentObject doc = refDoc.get();
        try {
            DetectLanguage detectLang = null;
            try {
                detectLang = new DetectLanguage();
            }
            catch (UnsatisfiedLinkError e) {
                doc.warningList.add(new HTMLObject.WarnObject("languageId", this.common.getStackTrace(e)));
            }
            catch (Exception e) {
                doc.warningList.add(new HTMLObject.WarnObject("languageId", this.common.getStackTrace(e)));
            }
            if (detectLang == null) {
                return;
            }
            ArrayList<Config.NormalizeInfo> normalizeList = new ArrayList();
            Config.LangInfo langInfo = new Config.LangInfo();
            HashMap<String, Integer> mapLang = new HashMap<String, Integer>();
            List<DetectLanguage.LanguageResult> results = null;
            for (HTMLObject.PageObject page : doc.pages) {
                int len = page.texts.size();
                for (int i = 0; i < len; ++i) {
                    HTMLObject.TextObject text = page.texts.get(i);
                    if (text.deleted || text.text.equals(this.paraMarker) || !this.canDetectLang(text) || (results = detectLang.find(text.text)) == null || results.size() <= 0) continue;
                    DetectLanguage.LanguageResult lang = results.get(0);
                    if (!lang.reliable) continue;
                    text.lang = lang.language;
                    if (this.config != null && (langInfo = this.config.get(text.lang)) != null) {
                        normalizeList = langInfo.normalize;
                        text.text = this.common.replaceText(normalizeList, text.text);
                    }
                    int count = 0;
                    if (mapLang.containsKey(lang.language)) {
                        count = mapLang.get(lang.language);
                    }
                    mapLang.put(lang.language, count + 1);
                }
            }
            doc.language = this.getMaxLangCount(mapLang);
            doc.langList = this.getLangList(mapLang);
        }
        catch (Exception exception) {
        }
        finally {
            refDoc.set(doc);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void sentenceJoin(AtomicReference<HTMLObject.DocumentObject> refDoc) {
        HTMLObject.DocumentObject doc = refDoc.get();
        if (this.common.IsEmpty(doc.language)) {
            return;
        }
        try {
            for (HTMLObject.PageObject page : doc.pages) {
                String currentLang = "";
                ArrayList<HTMLObject.TextObject> texts = new ArrayList<HTMLObject.TextObject>();
                ArrayList<HTMLObject.TextObject> newTexts = new ArrayList<HTMLObject.TextObject>();
                int len = page.texts.size();
                for (int i = 0; i < len; ++i) {
                    HTMLObject.TextObject text = page.texts.get(i);
                    if (text.deleted) continue;
                    if (this.common.IsEmpty(text.lang)) {
                        text.lang = doc.language;
                    }
                    if (i == 0 || this.common.IsEmpty(currentLang)) {
                        currentLang = !this.common.IsEmpty(text.lang) ? text.lang : doc.language;
                        texts.add(text);
                        continue;
                    }
                    if (text.text.equals(this.paraMarker) || !this.common.IsEmpty(currentLang) && !this.common.IsEmpty(text.lang) && !text.lang.equals(currentLang)) {
                        if (text.text.equals(this.paraMarker)) {
                            newTexts.add(text);
                        }
                        if (!text.lang.equals(currentLang)) {
                            newTexts.add(this.getNewText(this.paraMarker));
                        }
                        newTexts.addAll(this.sentenceJoin(texts, currentLang));
                        texts.clear();
                        if (!text.text.equals(this.paraMarker)) {
                            texts.add(text);
                            if (!this.common.IsEmpty(text.lang)) {
                                currentLang = text.lang;
                                continue;
                            }
                            currentLang = doc.language;
                            continue;
                        }
                        currentLang = "";
                        continue;
                    }
                    texts.add(text);
                }
                if (texts.size() > 0) {
                    newTexts.add(this.getNewText(this.paraMarker));
                    newTexts.addAll(this.sentenceJoin(texts, currentLang));
                    texts.clear();
                }
                page.texts.clear();
                page.texts.addAll(newTexts);
            }
        }
        finally {
            refDoc.set(doc);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void finalRepair(AtomicReference<HTMLObject.DocumentObject> refDoc) {
        HTMLObject.DocumentObject doc = refDoc.get();
        try {
            ArrayList<Config.NormalizeInfo> repairList = new ArrayList();
            if (this.config.get("common") != null) {
                repairList = this.config.get((String)"common").repair;
            }
            Config.LangInfo langInfo = new Config.LangInfo();
            for (HTMLObject.PageObject page : doc.pages) {
                int len = page.texts.size();
                for (int i = 0; i < len; ++i) {
                    HTMLObject.TextObject text = page.texts.get(i);
                    if (text.deleted || text.text.equals(this.paraMarker)) continue;
                    text.text = this.common.replaceText(repairList, text.text);
                    if (this.common.IsEmpty(text.lang) || (langInfo = this.config.get(text.lang)) == null) continue;
                    List<Config.NormalizeInfo> langRepairList = langInfo.repair;
                    text.text = this.common.replaceText(langRepairList, text.text);
                }
            }
        }
        finally {
            refDoc.set(doc);
        }
    }

    private StringBuffer generateOutput(AtomicReference<HTMLObject.DocumentObject> refDoc, int keepBrTags, int getPermission) {
        HTMLObject.DocumentObject doc = refDoc.get();
        StringBuffer sbOut = new StringBuffer();
        sbOut.append("<html>\n");
        sbOut.append("<head>\n");
        sbOut.append("<defaultLang abbr=\"" + doc.language + "\" />\n");
        sbOut.append("<languages>\n");
        ArrayList<String> noModel = new ArrayList<String>();
        for (HTMLObject.LangObject lang : doc.langList) {
            sbOut.append("<language abbr=\"" + lang.name + "\" percent=\"" + lang.percent + "\" />\n");
            SentenceJoin sj = this._hashSentenceJoin.get(lang.name);
            if (sj != null) continue;
            noModel.add(lang.name);
        }
        if (noModel.size() > 0) {
            doc.warningList.add(new HTMLObject.WarnObject("sentenceJoin", "No model for language: " + String.join((CharSequence)", ", noModel) + ""));
        }
        sbOut.append("</languages>\n");
        if (doc.warningList.size() > 0) {
            sbOut.append("<warnings>\n");
            for (HTMLObject.WarnObject warnObj : doc.warningList) {
                sbOut.append("<warning>\n");
                sbOut.append("<method>" + warnObj.method + "</method>\n");
                sbOut.append("<detail>");
                sbOut.append("<![CDATA[");
                sbOut.append(warnObj.detail + "");
                sbOut.append("]]>");
                sbOut.append("</detail>\n");
                sbOut.append("</warning>\n");
            }
            sbOut.append("</warnings>\n");
        }
        if (getPermission == 1) {
            sbOut.append("<permission isencrypted=\"" + doc.permission.isEncrytped + "\">\n");
            sbOut.append("<canassemply>" + doc.permission.canAssembly + "</canassemply>\n");
            sbOut.append("<cancopy>" + doc.permission.canCopy + "</cancopy>\n");
            sbOut.append("<canmodified>" + doc.permission.canModified + "</canmodified>\n");
            sbOut.append("<canmodifyannotations>" + doc.permission.canModifyAnnotations + "</canmodifyannotations>\n");
            sbOut.append("<canprint>" + doc.permission.canPrint + "</canprint>\n");
            sbOut.append("<canprintdegraded>" + doc.permission.canPrintDegraded + "</canprintdegraded>\n");
            sbOut.append("<canfillinform>" + doc.permission.canFillInForm + "</canfillinform>\n");
            sbOut.append("<canscreenreader>" + doc.permission.canScreenReader + "</canscreenreader>\n");
            sbOut.append("</permission>\n");
        }
        sbOut.append("</head>\n");
        sbOut.append("<body>\n");
        for (HTMLObject.PageObject page : doc.pages) {
            sbOut.append("<div id=\"page" + page.pageno + "\" class=\"page\">\n");
            int ipara = 1;
            boolean bpara = false;
            if (page.texts.size() > 0) {
                int len = page.texts.size();
                for (int i = 0; i < len; ++i) {
                    HTMLObject.TextObject text = page.texts.get(i);
                    if (text.deleted || this.common.IsEmpty(text.text)) continue;
                    if (text.text.equals(this.paraMarker)) {
                        if (bpara) {
                            sbOut.append("</p>\n");
                            bpara = false;
                        }
                        if (i + 1 >= len || page.texts.get((int)(i + 1)).text.equals(this.paraMarker) || this.common.IsEmpty(page.texts.get((int)(i + 1)).text)) continue;
                        String lang = page.texts.get((int)(i + 1)).lang;
                        String font = page.texts.get((int)(i + 1)).fontfamily;
                        sbOut.append("<p id=\"page" + page.pageno + "p" + ipara++ + "\" lang=\"" + lang + "\" fontname=\"" + font + "\">\n");
                        bpara = true;
                        continue;
                    }
                    if (!bpara) {
                        sbOut.append("<p id=\"page" + page.pageno + "p" + ipara++ + "\" lang=\"" + text.lang + "\" fontname=\"" + text.fontfamily + "\">\n");
                        bpara = true;
                    }
                    sbOut.append(text.text.trim());
                    sbOut.append((keepBrTags == 1 ? "<br />" : "") + "\n");
                }
                if (ipara > 1) {
                    sbOut.append("</p>\n");
                }
            }
            sbOut.append("</div>\n");
        }
        sbOut.append("</body>\n");
        sbOut.append("</html>\n");
        return sbOut;
    }

    private HTMLObject.TextObject getTextObject(String line, HashMap<String, HTMLObject.StyleObject> mapStyle) {
        String text;
        HTMLObject.TextObject obj = new HTMLObject.TextObject();
        obj.top = this.common.getFloat(this.patternTop.matcher(line).replaceAll("$1"));
        obj.left = this.common.getFloat(this.patternLeft.matcher(line).replaceAll("$1"));
        obj.width = this.common.getFloat(this.patternWidth.matcher(line).replaceAll("$1"));
        obj.height = this.common.getFloat(this.patternHeight.matcher(line).replaceAll("$1"));
        obj.bottom = obj.top + obj.height;
        obj.right = obj.left + obj.width;
        obj.text = this.common.getStr(this.patternWord.matcher(line).replaceAll("$1"));
        obj.class_ = this.common.getStr(this.patternFont.matcher(line).replaceAll("$1"));
        if (mapStyle != null && mapStyle.containsKey(obj.class_)) {
            HTMLObject.StyleObject css = mapStyle.get(obj.class_);
            obj.fontsize = css.size;
            obj.fontfamily = css.family;
            obj.color = css.color;
        }
        if (this.patternBold.matcher(text = obj.text).matches()) {
            obj.fontweight = "bold";
        }
        if (this.patternLink.matcher(text).matches()) {
            obj.islink = true;
        }
        text = text.replaceAll("<br\\/>", "_LSBRLS_");
        text = text.replaceAll("<[^>]*>", "");
        text = text.replaceAll("&#160;", " ").replaceAll("\\s{2,100}", " ");
        obj.text = text = text.replaceAll("_LSBRLS_", "<br/>");
        return obj;
    }

    private boolean checkLineAdd(float pageWidth, float pageHeight, HTMLObject.TextObject text) {
        return !(text.left < 0.0f || text.top < 0.0f || text.left > pageWidth) && !(text.top > pageHeight);
    }

    private boolean isFontChanged(HTMLObject.TextObject text, HTMLObject.TextObject nextText) {
        return text.fontsize != nextText.fontsize && !this.isEquals(Float.valueOf(text.height), Float.valueOf(nextText.height)) || !text.color.equals(nextText.color) && !this.isEquals(Float.valueOf(text.top), Float.valueOf(nextText.top));
    }

    private boolean isClassChanged(HTMLObject.TextObject text, HTMLObject.TextObject nextText) {
        return !text.class_.equals(nextText.class_);
    }

    private Float getMaxCount(HashMap<Float, Integer> map) {
        if (map == null || map.size() == 0) {
            return Float.valueOf(0.0f);
        }
        map = map.entrySet().stream().sorted(Map.Entry.comparingByValue().reversed()).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
        return (Float)map.entrySet().iterator().next().getKey();
    }

    private String getMaxLangCount(HashMap<String, Integer> map) {
        if (map == null || map.size() == 0) {
            return "";
        }
        map = map.entrySet().stream().sorted(Map.Entry.comparingByValue().reversed()).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new));
        return (String)map.entrySet().iterator().next().getKey();
    }

    private HTMLObject.TextObject getNewText(String val) {
        HTMLObject.TextObject t = new HTMLObject.TextObject();
        t.text = val;
        return t;
    }

    private boolean isEquals(Float f1, Float f2) {
        return Math.abs(f1.floatValue() - f2.floatValue()) <= 8.0f;
    }

    private boolean isTooFar(HTMLObject.TextObject text, HTMLObject.TextObject nextText) {
        return text.top - text.bottom > text.height || nextText.top - text.bottom > nextText.height || Math.abs(text.top - nextText.top) > (text.height + nextText.height) / 2.0f * 5.0f;
    }

    private boolean isMergeTop(HTMLObject.TextObject text, HTMLObject.TextObject nextText) {
        return this.isEquals(Float.valueOf(text.top), Float.valueOf(nextText.top)) && !this.isFontChanged(text, nextText) && nextText.left - text.right < 200.0f;
    }

    private boolean canDetectLang(HTMLObject.TextObject text) {
        String str = text.text;
        str = str.replaceAll("[\\?\\!\\,0-9\\-\\_\\#\\*\\&\\(\\)\\+\\=\\@\\%\\<\\>\\{\\}\\[\\]\\^\\\\\\/\\;\\.{2,100}]*", "");
        str = str.replaceAll("\\s\\.{2,100}", "");
        str = str.replaceAll("\\:", " ");
        str = str.replaceAll("[\u03c3\u03a6\u03d5\u2208\u2020\u03c4\u03c8\u03b7\u2207\u03c0\u2211\u2202\u03b1\u03b2\u2192\u220f\u03b5]*", "");
        str = str.replaceAll("\\s{2,100}", " ");
        str = str.trim();
        int wordlength = str.split(" ", -1).length;
        int charlength = str.replaceAll(" ", "").length();
        return wordlength > 10 || charlength > 30;
    }

    private List<HTMLObject.LangObject> getLangList(HashMap<String, Integer> map) {
        ArrayList<HTMLObject.LangObject> langList = new ArrayList<HTMLObject.LangObject>();
        float total = 0.0f;
        for (Map.Entry<String, Integer> hash : map.entrySet()) {
            total = (float)((double)total + this.common.getDouble(hash.getValue()));
        }
        for (Map.Entry<String, Integer> hash : map.entrySet()) {
            float percent = this.common.getFloat(hash.getValue()) * 100.0f / total;
            HTMLObject.LangObject lang = new HTMLObject.LangObject();
            lang.name = hash.getKey();
            lang.percent = percent;
            langList.add(lang);
        }
        Collections.sort(langList, new Comparator<HTMLObject.LangObject>(){

            @Override
            public int compare(HTMLObject.LangObject l1, HTMLObject.LangObject l2) {
                return Float.compare(l2.percent, l1.percent);
            }
        });
        return langList;
    }

    private String getFirstWords(String str) {
        String[] words = str.split(" ");
        int wordlength = words.length;
        String newStr = "";
        for (int i = 0; i <= this.maxWordsJoin && i < wordlength; ++i) {
            newStr = newStr + " " + words[i];
        }
        return newStr.trim();
    }

    private String getLastWords(String str) {
        String[] words = str.trim().split(" ");
        int wordlength = words.length;
        int count = 0;
        String newStr = "";
        for (int i = wordlength - 1; i >= 0 && count <= this.maxWordsJoin; --i, ++count) {
            newStr = words[i] + " " + newStr;
        }
        return newStr.trim();
    }

    private String getSentenceJoinModel(String lang) {
        if (this.config != null && this.config.get(lang) != null) {
            return this.config.get((String)lang).sentenceJoinModel;
        }
        return "";
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private List<HTMLObject.TextObject> sentenceJoin(List<HTMLObject.TextObject> texts, String lang) {
        ArrayList<HTMLObject.TextObject> newTexts = new ArrayList<HTMLObject.TextObject>();
        try {
            SentenceJoin sj;
            if (texts.size() < 2) {
                return texts;
            }
            Object object = this._objectWorker;
            synchronized (object) {
                if (this._hashSentenceJoin.containsKey(lang)) {
                    sj = this._hashSentenceJoin.get(lang);
                } else {
                    String scriptPath = this.config.getSentenceJoinScript();
                    String modelPath = this.getSentenceJoinModel(lang);
                    if (this.common.IsExist(scriptPath) && !this.common.IsEmpty(modelPath)) {
                        sj = new SentenceJoin(lang, scriptPath, modelPath);
                        this._hashSentenceJoin.put(lang, sj);
                    } else {
                        sj = null;
                        this._hashSentenceJoin.put(lang, sj);
                    }
                }
                if (sj != null && sj.status() != SentenceJoin.WorkerStatus.RUNNING && sj.status() != SentenceJoin.WorkerStatus.LOADING) {
                    sj.start();
                }
            }
            if (sj != null && sj.status() == SentenceJoin.WorkerStatus.RUNNING) {
                int start = 0;
                for (int i = texts.size() - 1; i >= start; --i) {
                    HTMLObject.TextObject text = texts.get(i);
                    if (i - 1 >= 0) {
                        HTMLObject.TextObject prevText = texts.get(i - 1);
                        String text1 = this.getLastWords(prevText.text).trim();
                        String text2 = this.getFirstWords(text.text).trim();
                        boolean isJoin = false;
                        if (!(this.common.IsEmpty(text1) || this.common.IsEmpty(text2) || text1.trim().endsWith(".") || text2.trim().startsWith("\u2022") || text1.trim().equals("\u00a0") || text2.trim().equals("\u00a0"))) {
                            isJoin = sj.execute(text1, text2);
                        }
                        if (isJoin) {
                            prevText.text = prevText.text.trim() + " " + text.text.trim();
                            text.deleted = true;
                            continue;
                        }
                        newTexts.add(text);
                        continue;
                    }
                    newTexts.add(text);
                }
                return Lists.reverse(newTexts);
            }
        }
        catch (Exception e) {
            this.common.print(e.getMessage());
        }
        return texts;
    }

    public void shutdownProcess() throws Exception {
        if (this._hashSentenceJoin != null && this._hashSentenceJoin.size() > 0) {
            for (Map.Entry<String, SentenceJoin> hash : this._hashSentenceJoin.entrySet()) {
                if (hash.getValue() == null) continue;
                hash.getValue().stop();
            }
        }
    }
}

