/*
 * Decompiled with CFR 0.152.
 */
package ca.pfv.spmf.algorithms.classifiers.naive_bayes_text_classifier;

import ca.pfv.spmf.algorithms.classifiers.naive_bayes_text_classifier.MemoryFile;
import ca.pfv.spmf.algorithms.classifiers.naive_bayes_text_classifier.OccurrenceProbabilties;
import ca.pfv.spmf.algorithms.classifiers.naive_bayes_text_classifier.TestRecord;
import ca.pfv.spmf.tools.MemoryLogger;
import ca.pfv.spmf.tools.textprocessing.PorterStemmer;
import ca.pfv.spmf.tools.textprocessing.StopWordAnalyzer;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

public class AlgoNaiveBayesClassifier {
    private String mTestDataDirectory = "";
    private String mTrainingDataDirectory = "";
    private boolean mInMemoryFlag = false;
    private HashMap<String, List<File>> mFileLists = new HashMap();
    private ArrayList<String> mClassNames;
    private StopWordAnalyzer mAnalyzer;
    private PorterStemmer mStemmer;
    private String mOutputDirectory = "";
    private ArrayList<MemoryFile> mMemFiles = new ArrayList();
    long mStartTimestamp = 0L;
    long mEndTimeStamp = 0L;
    HashMap<String, Integer> classProb;

    public void runAlgorithm(String trainingDirectory, String testDirectory, String outputDirectory, boolean memoryFlag) throws Exception {
        this.mTrainingDataDirectory = trainingDirectory;
        this.mTestDataDirectory = testDirectory;
        this.mOutputDirectory = outputDirectory;
        this.mInMemoryFlag = memoryFlag;
        this.runAlgorithm();
        Runtime.getRuntime().freeMemory();
    }

    private void runAlgorithm() throws Exception {
        File f;
        this.mStartTimestamp = System.currentTimeMillis();
        this.mAnalyzer = new StopWordAnalyzer();
        this.mStemmer = new PorterStemmer();
        this.classProb = new HashMap();
        BufferedWriter writer = new BufferedWriter(new FileWriter(new File(String.valueOf(this.mOutputDirectory) + "/output.tsv")));
        ArrayList<OccurrenceProbabilties> op = new ArrayList<OccurrenceProbabilties>();
        File[] listOfTestFiles = new File(this.mTestDataDirectory).listFiles();
        File[] listOfTrainingFiles = new File(this.mTrainingDataDirectory).listFiles();
        this.mClassNames = new ArrayList();
        int totalTrainingFiles = 0;
        File[] fileArray = listOfTrainingFiles;
        int n = listOfTrainingFiles.length;
        int n2 = 0;
        while (n2 < n) {
            f = fileArray[n2];
            this.mClassNames.add(f.getName());
            OccurrenceProbabilties oc = new OccurrenceProbabilties();
            oc.setClassName(f.getName());
            oc.setOccuranceMap(new HashMap<String, Double>());
            op.add(oc);
            File[] classTraining = new File(String.valueOf(this.mTrainingDataDirectory) + "/" + f.getName()).listFiles();
            this.mFileLists.put(f.getName(), Arrays.asList(classTraining));
            this.classProb.put(f.getName(), classTraining.length);
            ++totalTrainingFiles;
            ++n2;
        }
        if (this.mInMemoryFlag) {
            System.out.println("Loading Data in to memory.... May take a while depending upon the size of the data");
            this.loadIntoMemory();
        }
        fileArray = listOfTestFiles;
        n = listOfTestFiles.length;
        n2 = 0;
        while (n2 < n) {
            f = fileArray[n2];
            TreeMap<String, BigDecimal> probabilities = new TreeMap<String, BigDecimal>();
            System.out.println("---------------Computing for Test File:" + f.getName() + "-----------");
            for (String currentClass : this.mClassNames) {
                TestRecord testRecord = this.readOneTestFile(f);
                BigDecimal prob = new BigDecimal("1.0");
                for (String word : testRecord.getWords()) {
                    double termProbInClass = 0.0;
                    if (this.getFromExistingProbability(word, op, currentClass) != 0.0) {
                        termProbInClass = this.getFromExistingProbability(word, op, currentClass);
                    } else {
                        termProbInClass = this.mInMemoryFlag ? this.calculateProbabilityInMemory(word, op, currentClass) : this.calculateProbability(word, op, currentClass);
                        for (OccurrenceProbabilties oc : op) {
                            if (!oc.getClassName().equalsIgnoreCase(currentClass)) continue;
                            oc.getOccuranceMap().put(word, termProbInClass);
                            break;
                        }
                    }
                    prob = prob.multiply(new BigDecimal("" + termProbInClass));
                }
                prob = prob.multiply(new BigDecimal("" + (double)this.classProb.get(currentClass).intValue() / (double)totalTrainingFiles));
                probabilities.put(currentClass, prob);
            }
            Map.Entry maxEntry = null;
            for (Map.Entry entry : probabilities.entrySet()) {
                if (maxEntry != null && ((BigDecimal)entry.getValue()).compareTo((BigDecimal)maxEntry.getValue()) <= 0) continue;
                maxEntry = entry;
            }
            System.out.println(String.valueOf(f.getName()) + "\t" + (String)maxEntry.getKey());
            writer.write(String.valueOf(f.getName()) + "\t" + (String)maxEntry.getKey() + "\n");
            ++n2;
        }
        writer.close();
        this.mEndTimeStamp = System.currentTimeMillis();
    }

    private void loadIntoMemory() throws IOException {
        for (String s : this.mClassNames) {
            List<File> classTraining = this.mFileLists.get(s);
            MemoryFile memfile = new MemoryFile();
            ArrayList<String> words = new ArrayList<String>();
            memfile.setClassname(s);
            for (File f : classTraining) {
                BufferedReader reader = new BufferedReader(new FileReader(f));
                String currentLine = "";
                while ((currentLine = reader.readLine()) != null) {
                    currentLine = currentLine.replaceAll("\\P{L}", " ").toLowerCase().replaceAll("\n", " ");
                    currentLine = currentLine.replaceAll("\\s+", " ");
                    currentLine = this.mAnalyzer.removeStopWords(currentLine);
                    String[] stringArray = currentLine.split("\\s+");
                    int n = stringArray.length;
                    int n2 = 0;
                    while (n2 < n) {
                        String processedWord = stringArray[n2];
                        if ((processedWord = this.mStemmer.stem(processedWord)).length() > 1) {
                            words.add(processedWord);
                        }
                        ++n2;
                    }
                }
                reader.close();
            }
            memfile.setContent(words);
            this.mMemFiles.add(memfile);
        }
    }

    private double calculateProbabilityInMemory(String word, ArrayList<OccurrenceProbabilties> op, String currentClass) {
        double prob = 0.0;
        int count = 0;
        int occurances = 0;
        for (MemoryFile memFile : this.mMemFiles) {
            if (!memFile.getClassname().equals(currentClass)) continue;
            occurances += Collections.frequency(memFile.getContent(), word) * 50;
            count += memFile.getContent().size();
        }
        prob = ((double)occurances + 50.0) / ((double)count + 100.0);
        return prob;
    }

    private double calculateProbability(String word, ArrayList<OccurrenceProbabilties> op, String currentClass) throws Exception {
        double probability = 0.0;
        List<File> classTraining = this.mFileLists.get(currentClass);
        ArrayList<String> words = new ArrayList<String>();
        double count = 0.0;
        for (File f : classTraining) {
            BufferedReader reader = new BufferedReader(new FileReader(f));
            String currentLine = "";
            while ((currentLine = reader.readLine()) != null) {
                currentLine = currentLine.replaceAll("\\P{L}", " ").toLowerCase().replaceAll("\n", " ");
                currentLine = currentLine.replaceAll("\\s+", " ");
                currentLine = this.mAnalyzer.removeStopWords(currentLine);
                String[] stringArray = currentLine.split("\\s+");
                int n = stringArray.length;
                int n2 = 0;
                while (n2 < n) {
                    String processedWord = stringArray[n2];
                    if ((processedWord = this.mStemmer.stem(processedWord)).length() > 1) {
                        words.add(processedWord);
                    }
                    if (processedWord.equalsIgnoreCase(word)) {
                        count += 20.0;
                    }
                    ++n2;
                }
            }
            reader.close();
        }
        probability = (count + 50.0) / ((double)words.size() + 100.0);
        return probability;
    }

    public double getFromExistingProbability(String word, ArrayList<OccurrenceProbabilties> probabilties, String className) {
        double value = 0.0;
        for (OccurrenceProbabilties op : probabilties) {
            if (!op.getClassName().equals(className)) continue;
            Set<String> myKeys = op.getOccuranceMap().keySet();
            for (String s : myKeys) {
                if (op.getOccuranceMap().get(s) == null || !s.equals(word)) continue;
                value = op.getOccuranceMap().get(s);
            }
        }
        return value;
    }

    public TestRecord readOneTestFile(File f) throws Exception {
        String currentLine;
        TestRecord record = new TestRecord();
        ArrayList<String> words = new ArrayList<String>();
        BufferedReader br = new BufferedReader(new FileReader(f));
        while ((currentLine = br.readLine()) != null) {
            String[] lineWords;
            currentLine = currentLine.toLowerCase();
            currentLine = currentLine.replaceAll("\\P{L}", " ");
            currentLine = currentLine.replaceAll("\n", " ");
            currentLine = currentLine.replaceAll("\\s+", " ").trim();
            currentLine = this.mAnalyzer.removeStopWords(currentLine);
            String[] stringArray = lineWords = currentLine.split("\\s+");
            int n = lineWords.length;
            int n2 = 0;
            while (n2 < n) {
                String eachWord = stringArray[n2];
                String processedWord = this.mStemmer.stem(eachWord);
                if (processedWord.length() > 1) {
                    words.add(processedWord);
                }
                ++n2;
            }
        }
        record.setRecordId(Integer.parseInt(f.getName().replaceAll("\\D+", "")));
        record.setWords(words);
        br.close();
        return record;
    }

    public void printStatistics() {
        System.out.println("========== Naive Bayes Classifier Stats ============");
        System.out.println(" Total time ~: " + (this.mEndTimeStamp - this.mStartTimestamp) + " ms");
        System.out.println(" Max memory:" + MemoryLogger.getInstance().getMaxMemory() + " mb ");
        System.out.println("=====================================");
    }
}

