/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.ox.krr.logmap2.lexicon;

import java.io.InputStream;
import java.util.Calendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import uk.ac.ox.krr.logmap2.io.LogOutput;
import uk.ac.ox.krr.logmap2.io.ReadFile;
import uk.ac.ox.krr.logmap2.lexicon.NormalizeNumbers;
import uk.ac.ox.krr.logmap2.lexicon.stemming.StemmerManager;

public class LexicalUtilities {
    private ReadFile reader;
    private InputStream in;
    private String line;
    private String[] elements;
    private Map<String, Set<String>> spelling_variants_map = new HashMap<String, Set<String>>();
    private Map<String, Set<String>> normalization_map = new HashMap<String, Set<String>>();
    private final String lex_spl_file = "Lex_spelling_LRSPL.gz";
    private final String lex_norm_file = "Lex_norm_LRNOM.gz";
    private final String lex_plur_file = "Lex_plurals_LRAGR.gz";
    private Set<String> stopwordsSet = new HashSet<String>();
    private Set<String> stopwordsSetExtended = new HashSet<String>();
    private Map<String, Set<String>> label2wordnetsyn = new HashMap<String, Set<String>>();
    private Map<String, String> word2stemming = new HashMap<String, String>();
    private boolean useStemming = false;

    public void clearStructures() {
        this.stopwordsSetExtended.clear();
        this.label2wordnetsyn.clear();
        this.word2stemming.clear();
        this.spelling_variants_map.clear();
        this.normalization_map.clear();
    }

    public void clearStopWordsSet() {
        this.stopwordsSet.clear();
    }

    public Set<String> getStopwordsSet() {
        return this.stopwordsSet;
    }

    public Set<String> getStopwordsSetExtended() {
        return this.stopwordsSetExtended;
    }

    public Map<String, Set<String>> getLabel2wordnetsyn() {
        return this.label2wordnetsyn;
    }

    public void loadWordNetSynonyms(String wordnet_syn_file) throws Exception {
        ReadFile reader = new ReadFile(wordnet_syn_file);
        HashSet<String> setsyn = new HashSet<String>();
        while ((this.line = reader.readLine()) != null) {
            if (!this.line.contains("|")) continue;
            this.elements = this.line.split("\\|");
            if (this.elements.length <= 1) continue;
            String[] syn = this.elements[1].contains(":") ? this.elements[1].split(":") : new String[]{this.elements[1]};
            for (int i = 0; i < syn.length; ++i) {
                if (this.elements[0].equals(syn[i].toLowerCase()) || !syn[i].toLowerCase().matches("[a-z_]+")) continue;
                setsyn.add(syn[i].toLowerCase());
            }
            if (setsyn.size() <= 0) continue;
            this.label2wordnetsyn.put(this.elements[0], new HashSet());
            this.label2wordnetsyn.get(this.elements[0]).addAll(setsyn);
            setsyn.clear();
        }
        reader.closeBuffer();
        LogOutput.print("Size syn: " + this.label2wordnetsyn.size());
    }

    public void setStemming(boolean use_stemming) {
        this.useStemming = use_stemming;
    }

    public boolean isStemmingUp() {
        return this.useStemming;
    }

    public void setStemmer() {
        StemmerManager.setStemmerType(4);
    }

    public String getStemming4Word(String str) {
        if (this.word2stemming.containsKey(str)) {
            return this.word2stemming.get(str);
        }
        String stemmed_word = StemmerManager.getStemmer().stem(str);
        this.word2stemming.put(str, stemmed_word);
        return stemmed_word;
    }

    public void loadStopWords() throws Exception {
        this.reader = new ReadFile(LexicalUtilities.class.getResourceAsStream("stopwords.txt"));
        while ((this.line = this.reader.readLine()) != null) {
            if (this.line.startsWith("#")) continue;
            this.stopwordsSet.add(this.line);
        }
        this.reader.closeBuffer();
    }

    public void loadStopWordsExtended() throws Exception {
        this.reader = new ReadFile(LexicalUtilities.class.getResourceAsStream("stopwords.txt"));
        while ((this.line = this.reader.readLine()) != null) {
            if (!this.line.startsWith("#")) {
                this.stopwordsSetExtended.add(this.line);
                continue;
            }
            this.stopwordsSetExtended.add(this.line.substring(1));
        }
        this.reader.closeBuffer();
    }

    public boolean hasSpellingVariants(String str) {
        return this.spelling_variants_map.containsKey(str);
    }

    public boolean hasNormalization(String str) {
        return this.normalization_map.containsKey(str);
    }

    public Set<String> getSpellingVariants(String str) {
        return this.spelling_variants_map.get(str);
    }

    public Set<String> getNormalization(String str) {
        return this.normalization_map.get(str);
    }

    public String getRomanNormalization4Number(String word) {
        return NormalizeNumbers.getRomanNormalization(word);
    }

    public void loadUMLSLexiconResources() {
        long init = Calendar.getInstance().getTimeInMillis();
        try {
            this.load_UMLS_SpecialistLex_SpellingVariants();
            this.load_UMLS_SpecialistLex_Plurals();
            this.load_UMLS_SpecialistLex_Normalization();
        }
        catch (Exception e) {
            System.err.println("Error loading UMLS lexicon sources: " + e.getMessage());
            e.printStackTrace();
        }
        long fin = Calendar.getInstance().getTimeInMillis();
        LogOutput.print("Time loading UMLS lexicon sources (s): " + (double)((float)((double)fin - (double)init)) / 1000.0);
    }

    private void load_UMLS_SpecialistLex_SpellingVariants() throws Exception {
        this.in = new GZIPInputStream(LexicalUtilities.class.getResourceAsStream("Lex_spelling_LRSPL.gz"));
        this.reader = new ReadFile(this.in);
        while ((this.line = this.reader.readLine()) != null) {
            if (!this.line.contains("|")) continue;
            this.elements = this.line.split("\\|");
            if (this.elements.length <= 2 || this.elements[1].toLowerCase().equals(this.elements[2].toLowerCase())) continue;
            if (!this.spelling_variants_map.containsKey(this.elements[1].toLowerCase())) {
                this.spelling_variants_map.put(this.elements[1].toLowerCase(), new HashSet());
            }
            this.spelling_variants_map.get(this.elements[1].toLowerCase()).add(this.elements[2].toLowerCase());
        }
        this.reader.closeBuffer();
    }

    private void load_UMLS_SpecialistLex_Plurals() throws Exception {
        this.in = new GZIPInputStream(LexicalUtilities.class.getResourceAsStream("Lex_plurals_LRAGR.gz"));
        this.reader = new ReadFile(this.in);
        while ((this.line = this.reader.readLine()) != null) {
            if (!this.line.contains("|")) continue;
            this.elements = this.line.split("\\|");
            if (this.elements.length <= 5) continue;
            if (!this.normalization_map.containsKey(this.elements[1].toLowerCase())) {
                this.normalization_map.put(this.elements[1].toLowerCase(), new HashSet());
            }
            this.normalization_map.get(this.elements[1].toLowerCase()).add(this.elements[5].toLowerCase());
        }
        this.reader.closeBuffer();
    }

    private void load_UMLS_SpecialistLex_Normalization() throws Exception {
        this.in = new GZIPInputStream(LexicalUtilities.class.getResourceAsStream("Lex_norm_LRNOM.gz"));
        this.reader = new ReadFile(this.in);
        while ((this.line = this.reader.readLine()) != null) {
            if (!this.line.contains("|")) continue;
            this.elements = this.line.split("\\|");
            if (this.elements.length <= 4) continue;
            if (!this.normalization_map.containsKey(this.elements[1].toLowerCase())) {
                this.normalization_map.put(this.elements[1].toLowerCase(), new HashSet());
            }
            this.normalization_map.get(this.elements[1].toLowerCase()).add(this.elements[4].toLowerCase());
        }
        this.reader.closeBuffer();
    }
}

