/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.wlv.wkaclass;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import uk.ac.wlv.sentistrength.ClassificationOptions;
import uk.ac.wlv.sentistrength.ClassificationResources;
import uk.ac.wlv.sentistrength.Paragraph;
import uk.ac.wlv.sentistrength.TextParsingOptions;
import uk.ac.wlv.utilities.FileOps;
import uk.ac.wlv.utilities.Sort;
import uk.ac.wlv.utilities.StringIndex;
import uk.ac.wlv.wkaclass.PredictClass;

public class Arff {
    public static final int igArffNone = 0;
    public static final int igArffBinary = 1;
    public static final int igArffTrinary = 2;
    public static final int igArffScale = 3;
    public static final int igArffPosNeg = 4;
    public static boolean bgSaveArffAsCondensed = true;

    public void main(String[] args) {
        boolean[] bArgumentRecognised = new boolean[args.length];
        String sUnlabelledTextFile = "";
        String sLabelledTextFile = "";
        String sArffFileIn = "";
        String sTextFileOut = "";
        String sClassifier = "smo";
        int iNGrams = 3;
        int iMaxFeatures = 0;
        int iClassType = 4;
        int iClassFor0 = 0;
        int iMinFeatureFrequency = 1;
        boolean bCompleteProcessing = false;
        Arff.overallHelp();
        int i = 0;
        while (i < args.length) {
            bArgumentRecognised[i] = false;
            ++i;
        }
        i = 0;
        while (i < args.length) {
            if (args[i].equalsIgnoreCase("arff")) {
                bArgumentRecognised[i] = true;
            }
            if (args[i].equalsIgnoreCase("complete")) {
                bCompleteProcessing = true;
                bArgumentRecognised[i] = true;
            }
            if (args[i].equalsIgnoreCase("scale")) {
                iClassType = 3;
                bArgumentRecognised[i] = true;
            }
            if (args[i].equalsIgnoreCase("binary")) {
                iClassType = 1;
                bArgumentRecognised[i] = true;
            }
            if (args[i].equalsIgnoreCase("trinary")) {
                iClassType = 2;
                bArgumentRecognised[i] = true;
            }
            if (args[i].equalsIgnoreCase("posneg")) {
                iClassType = 4;
                bArgumentRecognised[i] = true;
            }
            if (i < args.length - 1) {
                if (args[i].equalsIgnoreCase("unlabelledText")) {
                    sUnlabelledTextFile = args[i + 1];
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("labelledText")) {
                    sLabelledTextFile = args[i + 1];
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("arffFileIn")) {
                    sArffFileIn = args[i + 1];
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("textFileOut")) {
                    sTextFileOut = args[i + 1];
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("nGrams")) {
                    iNGrams = Integer.parseInt(args[i + 1]);
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("maxFeatures")) {
                    iMaxFeatures = Integer.parseInt(args[i + 1]);
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("classifier")) {
                    sClassifier = args[i + 1];
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("zeros")) {
                    iClassFor0 = Integer.parseInt(args[i + 1]);
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
                if (args[i].equalsIgnoreCase("minFeatureFreq")) {
                    iMinFeatureFrequency = Integer.parseInt(args[i + 1]);
                    bArgumentRecognised[i] = true;
                    bArgumentRecognised[i + 1] = true;
                }
            }
            ++i;
        }
        i = 0;
        while (i < args.length) {
            if (!bArgumentRecognised[i]) {
                System.out.println("Unrecognised Arff command - wrong spelling or case?: " + args[i]);
                return;
            }
            ++i;
        }
        if (bCompleteProcessing) {
            if (sUnlabelledTextFile.length() == 0) {
                System.out.println("An unlabelled text file must be specified [complete]");
                return;
            }
            if (sLabelledTextFile.length() == 0) {
                System.out.println("A labelled text file must be specified [complete]");
                return;
            }
            System.out.println("Complete processing starting...");
            System.out.println();
            System.out.println("Convert unlabelled texts " + sUnlabelledTextFile + " to Arff based on labelled text file " + sLabelledTextFile);
            System.out.println("Options: classtype " + iClassType + " Ngrams: 1-" + iNGrams + " max features: " + iMaxFeatures + " min freq for features: " + iMinFeatureFrequency);
            System.out.println(" Classtype: None=0, Binary=1, Trinary=2, Scale=3, PosNeg=4. max features = 0 => use all features (100 per 1k is optimal)");
            String[] sLabelledUnlabelled = Arff.convertUnlabelledTextFileToArffBasedOnLabelledTextFile(sLabelledTextFile, iClassType, iNGrams, iMinFeatureFrequency, iMaxFeatures, sUnlabelledTextFile);
            if (iClassType == 4) {
                System.out.println("predictArffClass " + sLabelledUnlabelled[0] + " training for " + sLabelledUnlabelled[2]);
                System.out.println();
                String sClassifiedUnlabelledArff = PredictClass.predictArffClass(sLabelledUnlabelled[0], sClassifier, sLabelledUnlabelled[2], iClassFor0);
                String sClassifiedUnlabelledTextFile = String.valueOf(FileOps.s_ChopFileNameExtension(sClassifiedUnlabelledArff)) + "_Nout.txt";
                System.out.println("convertArffToText " + sClassifiedUnlabelledArff + " -> " + sClassifiedUnlabelledTextFile);
                System.out.println();
                Arff.convertArffToText(sClassifiedUnlabelledArff, sClassifiedUnlabelledTextFile);
                String sMergedTextFile = String.valueOf(FileOps.s_ChopFileNameExtension(sClassifiedUnlabelledTextFile)) + "_Nmerged.txt";
                System.out.println("mergeLabelledAndUnlabelledTextFiles " + sClassifiedUnlabelledTextFile + ", " + sUnlabelledTextFile + " -> " + sMergedTextFile);
                Arff.mergeLabelledAndUnlabelledTextFiles(sClassifiedUnlabelledTextFile, sUnlabelledTextFile, sMergedTextFile);
                System.out.println("predictArffClass " + sLabelledUnlabelled[1] + " training for " + sLabelledUnlabelled[3]);
                System.out.println();
                sClassifiedUnlabelledArff = PredictClass.predictArffClass(sLabelledUnlabelled[1], sClassifier, sLabelledUnlabelled[3], iClassFor0);
                sClassifiedUnlabelledTextFile = String.valueOf(FileOps.s_ChopFileNameExtension(sClassifiedUnlabelledArff)) + "_Pout.txt";
                System.out.println("convertArffToText " + sClassifiedUnlabelledArff + " -> " + sClassifiedUnlabelledTextFile);
                System.out.println();
                Arff.convertArffToText(sClassifiedUnlabelledArff, sClassifiedUnlabelledTextFile);
                sMergedTextFile = String.valueOf(FileOps.s_ChopFileNameExtension(sClassifiedUnlabelledTextFile)) + "_Pmerged.txt";
                System.out.println("mergeLabelledAndUnlabelledTextFiles " + sClassifiedUnlabelledTextFile + ", " + sUnlabelledTextFile + " -> " + sMergedTextFile);
                Arff.mergeLabelledAndUnlabelledTextFiles(sClassifiedUnlabelledTextFile, sUnlabelledTextFile, sMergedTextFile);
            } else {
                System.out.println("predictArffClass " + sLabelledUnlabelled[0] + " training for " + sLabelledUnlabelled[1]);
                System.out.println();
                String sClassifiedUnlabelledArff = PredictClass.predictArffClass(sLabelledUnlabelled[0], sClassifier, sLabelledUnlabelled[1], iClassFor0);
                String sClassifiedUnlabelledTextFile = String.valueOf(FileOps.s_ChopFileNameExtension(sClassifiedUnlabelledArff)) + "_out.txt";
                System.out.println("convertArffToText " + sClassifiedUnlabelledArff + " -> " + sClassifiedUnlabelledTextFile);
                System.out.println();
                Arff.convertArffToText(sClassifiedUnlabelledArff, sClassifiedUnlabelledTextFile);
                String sMergedTextFile = String.valueOf(FileOps.s_ChopFileNameExtension(sClassifiedUnlabelledTextFile)) + "_merged.txt";
                System.out.println("mergeLabelledAndUnlabelledTextFiles " + sClassifiedUnlabelledTextFile + ", " + sUnlabelledTextFile + " -> " + sMergedTextFile);
                Arff.mergeLabelledAndUnlabelledTextFiles(sClassifiedUnlabelledTextFile, sUnlabelledTextFile, sMergedTextFile);
            }
        } else if (sUnlabelledTextFile.length() > 0 && sLabelledTextFile.length() > 0 && sTextFileOut.length() > 0) {
            System.out.println("mergeLabelledAndUnlabelledTextFiles " + sLabelledTextFile + ", " + sUnlabelledTextFile + ", " + sTextFileOut);
            Arff.mergeLabelledAndUnlabelledTextFiles(sLabelledTextFile, sUnlabelledTextFile, sTextFileOut);
        } else if (sLabelledTextFile.length() > 0 && sUnlabelledTextFile.length() > 0) {
            System.out.println("convertUnlabelledTextFileToArffBasedOnLabelledTextFile " + sLabelledTextFile + ", " + sUnlabelledTextFile);
            Arff.convertUnlabelledTextFileToArffBasedOnLabelledTextFile(sLabelledTextFile, iClassType, iNGrams, iMinFeatureFrequency, iMaxFeatures, sUnlabelledTextFile);
        } else if (sArffFileIn.length() > 0 && sTextFileOut.length() > 0) {
            System.out.println("convertArffToText " + sArffFileIn + ", " + sTextFileOut);
            Arff.convertArffToText(sArffFileIn, sTextFileOut);
        } else {
            System.out.println("Not enough parameters entered to run a process from the arff submenu. Must enter one of the following:");
            System.out.println(" complete - and parameters, to make classify unclassified text with ML");
            System.out.println(" labelledText, unlabelledText and textFileOut - merges labelled and unlabelled files");
            System.out.println(" labelledText, unlabelledText - converts unlabelled to ARFF based on labelled");
            System.out.println(" arffFileIn, textFileOut - converts ARFF to plain text");
        }
        System.out.println("[arff] finished");
    }

    private static String[] convertUnlabelledTextFileToArffBasedOnLabelledTextFile(String sLabelledTextFile, int iClassType, int iNGrams, int iMinFeatureFrequency, int iMaxFeatures, String sUnlabelledTextFile) {
        TextParsingOptions textParsingOptions = new TextParsingOptions();
        ClassificationOptions classOptions = new ClassificationOptions();
        textParsingOptions.igNgramSize = iNGrams;
        ClassificationResources resources = new ClassificationResources();
        resources.sgSentiStrengthFolder = "c:/SentStrength_Data/";
        resources.initialise(classOptions);
        String[] sLabelledArffFiles = Arff.convertSentimentTextToArffMultiple(sLabelledTextFile, true, textParsingOptions, classOptions, resources, iClassType, iMinFeatureFrequency, "");
        int i = 0;
        while (i < 99) {
            if (sLabelledArffFiles[i] == null || sLabelledArffFiles[i].equals("")) break;
            ++i;
        }
        int iLabelledArffFileCount = i;
        if (iMaxFeatures > 0) {
            String[] sLabelledArffFilesReduced = new String[sLabelledArffFiles.length];
            i = 0;
            while (i < iLabelledArffFileCount) {
                sLabelledArffFilesReduced[i] = String.valueOf(FileOps.s_ChopFileNameExtension(sLabelledArffFiles[i])) + " " + iMaxFeatures + ".arff";
                Arff.makeArffWithTopNAttributes(sLabelledArffFiles[i], iMaxFeatures, sLabelledArffFilesReduced[i]);
                ++i;
            }
            sLabelledArffFiles = sLabelledArffFilesReduced;
        }
        String[] sUnlabelledArffFiles = Arff.convertSentimentTextToArffMultiple(sUnlabelledTextFile, true, textParsingOptions, classOptions, resources, iClassType, 1, sLabelledArffFiles[i - 1]);
        String[] sResults = iClassType == 4 ? new String[]{sLabelledArffFiles[iLabelledArffFileCount - 1], sLabelledArffFiles[iLabelledArffFileCount - 2], sUnlabelledArffFiles[iLabelledArffFileCount - 1], sUnlabelledArffFiles[iLabelledArffFileCount - 2]} : new String[]{sLabelledArffFiles[iLabelledArffFileCount - 1], sUnlabelledArffFiles[iLabelledArffFileCount - 1]};
        return sResults;
    }

    private static void overallHelp() {
        System.out.println("--------------------------------------------------------------------------");
        System.out.println("- Text processing and ML prediction commands - arff to trigger this menu -");
        System.out.println("--------------------------------------------------------------------------");
        System.out.println("NB There is no command to convert labelled text to ARFF");
        System.out.println("A) Convert unlabelled textfile to ARFF using features in labelled textfile");
        System.out.println("unlabelledText [filename]");
        System.out.println("labelledText [filename]");
        System.out.println(" nGrams [3] 3 means all 1-3grams");
        System.out.println(" maxFeatures [0] 0=no feature reduction");
        System.out.println(" minFeatureFreq [1] ignore less frequent features");
        System.out.println(" scale binary trinary posneg(default)");
        System.out.println(" zeros [class] - class if 0 predicted. Default 0");
        System.out.println("B) Convert Arff to labelled text file");
        System.out.println("arffFileIn [filename] convert to textfile");
        System.out.println("textFileOut [filename] target textfile");
        System.out.println("C) Merge Unlabelled and labelled text files");
        System.out.println("unlabelledText [filename]");
        System.out.println("labelledText [filename]");
        System.out.println("textFileOut [filename]");
        System.out.println("D) Do all above");
        System.out.println("complete - input labelled, unlabelled, output classified text");
        System.out.println(" classifier [smo] classifier name for complete (slog, smoreg, ada, dec, libsvm, j48, mlp, jrip, bayes, liblin");
        System.out.println("*run this via command line in parallel with wkaMachineLearning");
        System.out.println("*ALL DATA must have header row, unless specified otherwise");
        System.out.println("-----------------------------------------------------------------------------");
    }

    public static boolean convertSentimentTextToArff(String sSentiTextFileIn, String sArffFileOut, boolean bHeaderLine, TextParsingOptions textParsingOptions, ClassificationOptions classOptions, ClassificationResources resources, int iSentimentType, int iMinFeatureFrequency, StringIndex arffStringIndex) {
        if (arffStringIndex != null) {
            Arff.buildIndexFromTextFile(sSentiTextFileIn, bHeaderLine, textParsingOptions, classOptions, resources, iSentimentType, arffStringIndex, true);
            Arff.writeArffFromIndex(sSentiTextFileIn, arffStringIndex, bHeaderLine, textParsingOptions, classOptions, resources, iSentimentType, iMinFeatureFrequency, sArffFileOut, true);
        } else {
            StringIndex stringIndex = new StringIndex();
            stringIndex.initialise(0, true, false);
            Arff.buildIndexFromTextFile(sSentiTextFileIn, bHeaderLine, textParsingOptions, classOptions, resources, iSentimentType, stringIndex, false);
            Arff.writeArffFromIndex(sSentiTextFileIn, stringIndex, bHeaderLine, textParsingOptions, classOptions, resources, iSentimentType, iMinFeatureFrequency, sArffFileOut, false);
        }
        return true;
    }

    private static boolean writeArffFromIndex(String sSentiTextFileIn, StringIndex stringIndex, boolean bHeaderLine, TextParsingOptions textParsingOptions, ClassificationOptions classOptions, ClassificationResources resources, int iSentimentType, int iMinFeatureFrequency, String sArffFileOut, boolean bArffIndex) {
        String[] sData = null;
        boolean[] bIndexEntryUsed = new boolean[stringIndex.getLastWordID() + 2];
        boolean bOnlyCountNgramsUsed = false;
        try {
            BufferedWriter wWriter = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(sArffFileOut), "UTF8"));
            Arff.writeArffHeadersFromIndex(sSentiTextFileIn, stringIndex, iSentimentType, textParsingOptions.igNgramSize, iMinFeatureFrequency, bArffIndex, bIndexEntryUsed, wWriter);
            BufferedReader rReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(sSentiTextFileIn), "UTF8"));
            if (bHeaderLine && rReader.ready()) {
                String string = rReader.readLine();
            }
            while (rReader.ready()) {
                String sLine = rReader.readLine();
                if (sLine.length() <= 0) continue;
                int iNgramCount = 0;
                stringIndex.setAllCountsToZero();
                Paragraph para = new Paragraph();
                if (iSentimentType == 4) {
                    sData = sLine.split("\t");
                    if (sData.length > 2) {
                        para.setParagraph(sData[2], resources, classOptions);
                    }
                    if (sData.length == 1 && sLine.length() > 0) {
                        para.setParagraph(sLine, resources, classOptions);
                    }
                } else if (iSentimentType == 0) {
                    para.setParagraph(sLine, null, null);
                } else {
                    sData = sLine.split("\t");
                    if (sData.length > 1) {
                        para.setParagraph(sData[1], resources, classOptions);
                    }
                    if (sData.length == 1 && sLine.length() > 0) {
                        para.setParagraph(sLine, resources, classOptions);
                    }
                }
                iNgramCount = para.addToStringIndex(stringIndex, textParsingOptions, true, bArffIndex);
                if (bOnlyCountNgramsUsed) {
                    iNgramCount = 0;
                }
                int iClassOffset = 0;
                if (iSentimentType == 4) {
                    iClassOffset = 2;
                    if (sData.length > 2) {
                        if (sData[1].length() > 1 && sData[1].substring(0, 1).equals("-")) {
                            sData[1] = sData[1].substring(1);
                        }
                        if (bgSaveArffAsCondensed) {
                            wWriter.write("{0 " + sData[0].trim() + ",1 " + sData[1].trim() + ",");
                        } else {
                            wWriter.write(String.valueOf(sData[0].trim()) + "," + sData[1].trim() + ",");
                        }
                    } else if (bgSaveArffAsCondensed) {
                        wWriter.write("{0 1,1 1,");
                    } else {
                        wWriter.write("1,1,");
                    }
                } else if (iSentimentType != 0) {
                    iClassOffset = 1;
                    if (sData.length > 1) {
                        if (bgSaveArffAsCondensed) {
                            wWriter.write("{0 " + sData[0].trim() + ",");
                        } else {
                            wWriter.write(String.valueOf(sData[0].trim()) + ",");
                        }
                    } else if (bgSaveArffAsCondensed) {
                        wWriter.write("{0 1,");
                    } else {
                        wWriter.write("1,");
                    }
                } else if (bgSaveArffAsCondensed) {
                    wWriter.write("{");
                }
                int iAttUsed = -1;
                int w = 0;
                while (w <= stringIndex.getLastWordID()) {
                    if (bIndexEntryUsed[w]) {
                        ++iAttUsed;
                        if (bgSaveArffAsCondensed) {
                            if (stringIndex.getCount(w) != 0) {
                                wWriter.write(String.valueOf(Integer.toString(iAttUsed + iClassOffset)) + " " + stringIndex.getCount(w) + ",");
                            }
                        } else {
                            wWriter.write(String.valueOf(stringIndex.getCount(w)) + ",");
                        }
                    }
                    if (bOnlyCountNgramsUsed) {
                        iNgramCount += stringIndex.getCount(w);
                    }
                    ++w;
                }
                if (bgSaveArffAsCondensed) {
                    wWriter.write(String.valueOf(Integer.toString(++iAttUsed + iClassOffset)) + " " + iNgramCount + "}\n");
                    continue;
                }
                wWriter.write(String.valueOf(iNgramCount) + "\n");
            }
            rReader.close();
            wWriter.close();
        }
        catch (IOException e) {
            System.out.println("Could not open file for writing or write to file: " + sArffFileOut);
            e.printStackTrace();
            return false;
        }
        return true;
    }

    private static void mergeLabelledAndUnlabelledTextFiles(String sLabelledTextFileIn, String sUnlabelledTextFileIn, String sTextFileOut) {
        try {
            BufferedReader rLabelled = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(sLabelledTextFileIn), "UTF8"));
            BufferedReader rUnlabelled = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(sUnlabelledTextFileIn), "UTF8"));
            BufferedWriter wWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sTextFileOut)));
            while (rLabelled.ready() && rUnlabelled.ready()) {
                String sLineL = rLabelled.readLine();
                String[] sDataL = sLineL.split("\t");
                String sLineU = rUnlabelled.readLine();
                if (sDataL.length > 1) {
                    wWriter.write(String.valueOf(sDataL[0]) + "\t" + sLineU + "\t" + sDataL[1] + "\n");
                    continue;
                }
                if (sDataL.length == 1) {
                    wWriter.write("0\t" + sLineU + "\t" + sDataL[0] + "\n");
                    continue;
                }
                System.out.println("short labelled line [mergeLabelledAndUnlabelledTextFiles]\n" + sLineL);
            }
            rLabelled.close();
            rUnlabelled.close();
            wWriter.close();
        }
        catch (Exception e) {
            System.out.println("Error [mergeLabelledAndUnlabelledTextFiles]");
            e.printStackTrace();
        }
    }

    private static boolean writeArffHeadersFromIndex(String sSourceFile, StringIndex stringIndex, int iSentimentType, int iNgram, int iMinFeatureFrequency, boolean bArffIndex, boolean[] bArffIndexEntryUsed, BufferedWriter wWriter) {
        String sIndexWord = "";
        try {
            wWriter.write("%Arff file from Arff.java\n");
            SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss");
            Date date = new Date();
            wWriter.write("%Date: " + dateFormat.format(date) + "\n");
            wWriter.write("%filename: " + sSourceFile + "\n");
            wWriter.write("@relation AllTerms\n");
            if (iSentimentType == 4) {
                wWriter.write("@attribute Pos {1,2,3,4,5}\n");
                wWriter.write("@attribute Neg {1,2,3,4,5}\n");
            } else if (iSentimentType == 1) {
                wWriter.write("@attribute Binary {-1,1}\n");
            } else if (iSentimentType == 2) {
                wWriter.write("@attribute Trinary {-1,0,1}\n");
            } else if (iSentimentType == 3) {
                wWriter.write("@attribute Scale {-4,-3,-2,-1,0,1,2,3,4}\n");
            }
            int w = 0;
            while (w <= stringIndex.getLastWordID()) {
                if (bArffIndex) {
                    sIndexWord = stringIndex.getString(w);
                    if (Arff.i_CharsInString(sIndexWord, "+".charAt(0)) < iNgram && stringIndex.getCount(w) >= iMinFeatureFrequency) {
                        bArffIndexEntryUsed[w] = true;
                        if (sIndexWord.indexOf("Q_") == 0 || sIndexWord.indexOf("R_") == 0) {
                            wWriter.write("@attribute " + stringIndex.getComment(w) + " numeric\n");
                        } else {
                            wWriter.write("@attribute " + sIndexWord + " numeric\n");
                        }
                    } else {
                        bArffIndexEntryUsed[w] = false;
                    }
                } else if (Arff.i_CharsInString(stringIndex.getString(w), " ".charAt(0)) < iNgram && stringIndex.getCount(w) >= iMinFeatureFrequency) {
                    bArffIndexEntryUsed[w] = true;
                    wWriter.write("@attribute " + Arff.arffSafeWordEncode(stringIndex.getString(w), true) + " numeric\n");
                } else {
                    bArffIndexEntryUsed[w] = false;
                }
                ++w;
            }
            wWriter.write("@attribute Ngram_" + iNgram + "count numeric\n");
            wWriter.write("@data\n");
        }
        catch (IOException e) {
            System.out.println("Could not write ARFF headers to file [writeArffHeadersFromIndex]");
            e.printStackTrace();
            return false;
        }
        return true;
    }

    private static int i_CharsInString(String sText, char sChar) {
        int iCount = 0;
        int i = 0;
        while (i < sText.length()) {
            try {
                if (sText.charAt(i) == sChar) {
                    ++iCount;
                }
            }
            catch (Exception e) {
                System.out.println("i_CharsInString error with text [" + sText + "] at position i = " + i);
                System.out.println(e.getMessage());
            }
            ++i;
        }
        return iCount;
    }

    public static String arffSafeWordEncode(String sWord, boolean bCodeNumbersForQuestionMarksNotUsed) {
        String sEncodedWord = "";
        try {
            sEncodedWord = URLEncoder.encode(sWord, "UTF-8");
        }
        catch (UnsupportedEncodingException e) {
            System.out.print("Fatal UnsupportedEncodingException UTF-8");
            e.printStackTrace();
        }
        if (sEncodedWord.equals(sWord)) {
            return "U_" + sWord;
        }
        if (sEncodedWord.indexOf("%") >= 0) {
            sEncodedWord = sEncodedWord.replace("%", "_pc");
        }
        if (sEncodedWord.indexOf("}") >= 0) {
            sEncodedWord = sEncodedWord.replace("}", "_brak");
        }
        return "E_" + sEncodedWord;
    }

    private static boolean buildIndexFromTextFile(String sSentiTextFileIn, boolean bHeaderLine, TextParsingOptions textParsingOptions, ClassificationOptions classOptions, ClassificationResources resources, int iSentimentType, StringIndex stringIndex, boolean bArffIndex) {
        File f = new File(sSentiTextFileIn);
        if (!f.exists()) {
            System.out.println("Could not find the vocab file: " + sSentiTextFileIn);
            return false;
        }
        try {
            BufferedReader rReader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(sSentiTextFileIn), "UTF8"));
            if (bHeaderLine && rReader.ready()) {
                String string = rReader.readLine();
            }
            while (rReader.ready()) {
                String[] sData;
                String sLine = rReader.readLine();
                if (sLine.length() <= 0) continue;
                Paragraph para = new Paragraph();
                if (iSentimentType == 4) {
                    sData = sLine.split("\t");
                    if (sData.length > 2) {
                        para.setParagraph(sData[2], resources, classOptions);
                    }
                    if (sData.length == 1 && sLine.length() > 0) {
                        para.setParagraph(sLine, resources, classOptions);
                    }
                } else if (iSentimentType == 0) {
                    para.setParagraph(sLine, null, null);
                } else {
                    sData = sLine.split("\t");
                    if (sData.length > 1) {
                        para.setParagraph(sData[1], resources, classOptions);
                    }
                    if (sData.length == 1 && sLine.length() > 0) {
                        para.setParagraph(sLine, resources, classOptions);
                    }
                }
                para.addToStringIndex(stringIndex, textParsingOptions, true, bArffIndex);
            }
            rReader.close();
        }
        catch (IOException e) {
            System.out.println("Could not open file for reading or read from file: " + sSentiTextFileIn);
            e.printStackTrace();
            return false;
        }
        return true;
    }

    private static StringIndex buildIndexFromArff(String sArffFileIn) {
        File f = new File(sArffFileIn);
        if (!f.exists()) {
            System.out.println("Could not find the ARFF file: " + sArffFileIn);
            return null;
        }
        StringIndex stringIndex = new StringIndex();
        stringIndex.initialise(0, true, true);
        int iPos = 0;
        int iStringLastOld = -1;
        int iDummyNumber = 898989;
        try {
            BufferedReader rReader = new BufferedReader(new InputStreamReader(new FileInputStream(sArffFileIn)));
            while (rReader.ready()) {
                String[] sData;
                String sLine = rReader.readLine();
                if (sLine.indexOf("@data") >= 0) break;
                if (sLine.length() <= 0 || (sData = sLine.split(" ")).length != 3 || !sData[0].equals("@attribute") || !sData[2].equals("numeric") || sData[1].length() <= 2 || sData[1].indexOf("Ngram") >= 0) continue;
                iStringLastOld = stringIndex.getLastWordID();
                if (sData[1].substring(1).equals("Q")) {
                    iPos = sData[1].indexOf("_");
                    if (iPos > 0) {
                        stringIndex.addString(sData[1].substring(iPos), false);
                        if (iStringLastOld != stringIndex.getLastWordID()) {
                            stringIndex.addComment(iStringLastOld + 1, sLine);
                        }
                    } else {
                        System.out.println("Invalid Q index entry: " + sLine + " in " + sArffFileIn);
                    }
                } else {
                    stringIndex.addString(sData[1], false);
                }
                while (iStringLastOld == stringIndex.getLastWordID()) {
                    stringIndex.addString("R_" + iDummyNumber++, false);
                    if (iStringLastOld != stringIndex.getLastWordID()) {
                        stringIndex.addComment(iStringLastOld + 1, sLine);
                    }
                    System.out.println("Invalid or duplicate index entry: " + sLine + " in " + sArffFileIn);
                }
            }
            rReader.close();
        }
        catch (IOException e) {
            System.out.println("Couldn't open/read from: " + sArffFileIn);
            e.printStackTrace();
            return null;
        }
        return stringIndex;
    }

    public static boolean combineTwoARFFs(String sArffFile1, String sArffFile2, boolean bVerbose, String sMergedArffFile) {
        File f = new File(sArffFile1);
        if (!f.exists()) {
            System.out.println("Couldn't find Arff file: " + sArffFile1);
            return false;
        }
        f = new File(sArffFile2);
        if (!f.exists()) {
            System.out.println("Couldn't find Arff file: " + sArffFile2);
            return false;
        }
        int[] iAttributeArray = new int[1];
        try {
            BufferedReader rReader1 = new BufferedReader(new InputStreamReader(new FileInputStream(sArffFile1)));
            BufferedReader rReader2 = new BufferedReader(new InputStreamReader(new FileInputStream(sArffFile2)));
            BufferedWriter wWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sMergedArffFile)));
            Arff.printArffHeader(rReader1, wWriter, false);
            Arff.printArffHeader(rReader2, wWriter, true);
            String[] sAttributes1 = Arff.loadArffAttributes(rReader1, iAttributeArray);
            int iAttributes1Count = iAttributeArray[0];
            String[] sAttributes2 = Arff.loadArffAttributes(rReader2, iAttributeArray);
            int iAttributes2Count = iAttributeArray[0];
            boolean[] bDuplicate2 = Arff.printNonDuplicateAttributes(sAttributes1, iAttributes1Count, sAttributes2, iAttributes2Count, bVerbose, wWriter);
            Arff.printDataWithoutDuplicates(rReader1, rReader2, bDuplicate2, iAttributes1Count, iAttributes2Count, wWriter);
            rReader1.close();
            rReader2.close();
            wWriter.close();
        }
        catch (IOException e) {
            System.out.println("I/O error with input or output file, e.g.,: " + sArffFile1);
            e.printStackTrace();
            return false;
        }
        return true;
    }

    public static boolean deleteColAndMoveRemainingFirstColToEnd(String sArffFileIn, int iColToDelete, String sArffFileOut) {
        File f = new File(sArffFileIn);
        if (!f.exists()) {
            System.out.println("Could not find Arff file: " + sArffFileIn);
            return false;
        }
        String sArffTemp = String.valueOf(sArffFileIn) + ".temp";
        f = new File(sArffTemp);
        if (f.exists()) {
            f.delete();
        }
        Arff.deleteColumnFromArff(sArffFileIn, iColToDelete, sArffTemp);
        Arff.moveColumnToEndOfArff(sArffTemp, 1, sArffFileOut);
        f = new File(sArffTemp);
        if (f.exists()) {
            f.delete();
        }
        return true;
    }

    public static boolean moveColumnToEndOfArff(String sArffFileIn, int iColToMove, String sArffFileOut) {
        File f = new File(sArffFileIn);
        if (!f.exists()) {
            System.out.println("Could not find Arff file: " + sArffFileIn);
            return false;
        }
        String[] sAttributes = null;
        int[] iAttArr = new int[1];
        try {
            BufferedReader srArff = new BufferedReader(new InputStreamReader(new FileInputStream(sArffFileIn)));
            BufferedWriter swNew = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sArffFileOut)));
            Arff.printArffHeader(srArff, swNew, true);
            sAttributes = Arff.loadArffAttributes(srArff, iAttArr);
            int iAttributesCount = iAttArr[0];
            boolean[] bDelete = new boolean[iAttributesCount + 1];
            int i = 0;
            while (i <= iAttributesCount) {
                bDelete[i] = false;
                ++i;
            }
            bDelete[iColToMove] = true;
            Arff.printSelectedAttributes(sAttributes, iAttributesCount, bDelete, swNew, false);
            swNew.write(String.valueOf(sAttributes[iColToMove]) + "\n");
            Arff.printSelectedData(srArff, bDelete, iAttributesCount, swNew, true, false);
            swNew.close();
            srArff.close();
        }
        catch (IOException e) {
            System.out.println("File i/o error [moveColumnToEndOfArff]" + sArffFileIn + " or " + sArffFileOut);
            e.printStackTrace();
            return false;
        }
        return true;
    }

    public static String ngramFileName(String sSentiTextFileIn, int iNgram) {
        return String.valueOf(FileOps.s_ChopFileNameExtension(sSentiTextFileIn)) + "_" + iNgram + ".arff";
    }

    public static String oneToNgramFileName(String sSentiTextFileIn, int iNgram) {
        return String.valueOf(FileOps.s_ChopFileNameExtension(sSentiTextFileIn)) + "_1-" + iNgram + ".arff";
    }

    public static String ngramFileNamePosNeg(String sSentiTextFileIn, int iNgram, boolean bPos) {
        if (bPos) {
            return String.valueOf(FileOps.s_ChopFileNameExtension(sSentiTextFileIn)) + "_" + iNgram + "pos.arff";
        }
        return String.valueOf(FileOps.s_ChopFileNameExtension(sSentiTextFileIn)) + "_" + iNgram + "neg.arff";
    }

    public static String oneToNgramFileNamePosNeg(String sSentiTextFileIn, int iNgram, boolean bPos) {
        if (bPos) {
            return String.valueOf(FileOps.s_ChopFileNameExtension(sSentiTextFileIn)) + "_1-" + iNgram + "pos.arff";
        }
        return String.valueOf(FileOps.s_ChopFileNameExtension(sSentiTextFileIn)) + "_1-" + iNgram + "neg.arff";
    }

    public static String[] convertSentimentTextToArffMultiple(String sSentiTextFileIn, boolean bHeaderLine, TextParsingOptions textParsingOptions, ClassificationOptions classOptions, ClassificationResources resources, int iSentimentType, int iMinFeatureFrequency, String sArffFileForPermittedFeaturesList) {
        String[] sFinalOutFile;
        block12: {
            int iOutfileLast;
            int iNgram;
            int iNgramMax;
            File f;
            block11: {
                String sOutFile;
                f = new File(sSentiTextFileIn);
                if (!f.exists()) {
                    System.out.println("Could not find sentiment file: " + sSentiTextFileIn);
                    return null;
                }
                StringIndex arffStringIndex = null;
                if (!sArffFileForPermittedFeaturesList.equals("")) {
                    arffStringIndex = Arff.buildIndexFromArff(sArffFileForPermittedFeaturesList);
                }
                iNgramMax = textParsingOptions.igNgramSize;
                String sLastCombinedOutFile = "";
                iNgram = 1;
                while (iNgram <= iNgramMax) {
                    textParsingOptions.igNgramSize = iNgram;
                    sOutFile = Arff.ngramFileName(sSentiTextFileIn, iNgram);
                    f = new File(sOutFile);
                    if (f.exists()) {
                        f.delete();
                    }
                    Arff.convertSentimentTextToArff(sSentiTextFileIn, sOutFile, bHeaderLine, textParsingOptions, classOptions, resources, iSentimentType, iMinFeatureFrequency, arffStringIndex);
                    if (iNgram > 1) {
                        String sNewCombinedOutFile = Arff.oneToNgramFileName(sSentiTextFileIn, iNgram);
                        f = new File(sNewCombinedOutFile);
                        if (f.exists()) {
                            f.delete();
                        }
                        Arff.combineTwoARFFs(sLastCombinedOutFile, sOutFile, false, sNewCombinedOutFile);
                        sLastCombinedOutFile = sNewCombinedOutFile;
                    } else {
                        sLastCombinedOutFile = sOutFile;
                    }
                    ++iNgram;
                }
                iOutfileLast = -1;
                sFinalOutFile = new String[100];
                if (iSentimentType != 4) break block11;
                iNgram = 1;
                while (iNgram <= iNgramMax) {
                    sOutFile = Arff.ngramFileName(sSentiTextFileIn, iNgram);
                    sFinalOutFile[++iOutfileLast] = Arff.ngramFileNamePosNeg(sSentiTextFileIn, iNgram, true);
                    Arff.deleteColAndMoveRemainingFirstColToEnd(sOutFile, 2, sFinalOutFile[iOutfileLast]);
                    sFinalOutFile[++iOutfileLast] = Arff.ngramFileNamePosNeg(sSentiTextFileIn, iNgram, false);
                    Arff.deleteColAndMoveRemainingFirstColToEnd(sOutFile, 1, sFinalOutFile[iOutfileLast]);
                    f = new File(sOutFile);
                    f.delete();
                    if (iNgram > 1) {
                        sOutFile = Arff.oneToNgramFileName(sSentiTextFileIn, iNgram);
                        sFinalOutFile[++iOutfileLast] = Arff.oneToNgramFileNamePosNeg(sSentiTextFileIn, iNgram, true);
                        Arff.deleteColAndMoveRemainingFirstColToEnd(sOutFile, 2, sFinalOutFile[iOutfileLast]);
                        sFinalOutFile[++iOutfileLast] = Arff.oneToNgramFileNamePosNeg(sSentiTextFileIn, iNgram, false);
                        Arff.deleteColAndMoveRemainingFirstColToEnd(sOutFile, 1, sFinalOutFile[iOutfileLast]);
                        f = new File(sOutFile);
                        f.delete();
                    }
                    ++iNgram;
                }
                break block12;
            }
            if (iSentimentType != 1 && iSentimentType != 2 && iSentimentType != 3) break block12;
            iNgram = 1;
            while (iNgram <= iNgramMax) {
                sFinalOutFile[++iOutfileLast] = Arff.ngramFileName(sSentiTextFileIn, iNgram);
                File g = new File(String.valueOf(sFinalOutFile[iOutfileLast]) + ".temp");
                f = new File(sFinalOutFile[iOutfileLast]);
                f.renameTo(g);
                Arff.moveColumnToEndOfArff(String.valueOf(sFinalOutFile[iOutfileLast]) + ".temp", 1, sFinalOutFile[iOutfileLast]);
                g.delete();
                if (iNgram > 1) {
                    sFinalOutFile[++iOutfileLast] = Arff.oneToNgramFileName(sSentiTextFileIn, iNgram);
                    g = new File(String.valueOf(sFinalOutFile[iOutfileLast]) + ".temp");
                    f = new File(sFinalOutFile[iOutfileLast]);
                    f.renameTo(g);
                    Arff.moveColumnToEndOfArff(String.valueOf(sFinalOutFile[iOutfileLast]) + ".temp", 1, sFinalOutFile[iOutfileLast]);
                    g.delete();
                }
                ++iNgram;
            }
        }
        return sFinalOutFile;
    }

    public static boolean deleteColumnFromArff(String sArffFile, int iColToRemove, String sNewArffFile) {
        File f = new File(sArffFile);
        if (!f.exists()) {
            System.out.println("Could not find Arff file: " + sArffFile);
            return false;
        }
        String[] sAttributes = null;
        int iAttributesCount = 0;
        int[] iAttArr = new int[1];
        try {
            BufferedReader rArff = new BufferedReader(new InputStreamReader(new FileInputStream(sArffFile)));
            BufferedWriter wNew = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sNewArffFile)));
            Arff.printArffHeader(rArff, wNew, true);
            sAttributes = Arff.loadArffAttributes(rArff, iAttArr);
            iAttributesCount = iAttArr[0];
            boolean[] bDelete = new boolean[iAttributesCount + 1];
            int i = 0;
            while (i <= iAttributesCount) {
                bDelete[i] = false;
                ++i;
            }
            bDelete[iColToRemove] = true;
            Arff.printSelectedAttributes(sAttributes, iAttributesCount, bDelete, wNew, false);
            Arff.printSelectedData(rArff, bDelete, iAttributesCount, wNew, false, true);
            wNew.close();
            rArff.close();
        }
        catch (IOException e) {
            System.out.println("I/O error with input or output file, e.g.,: " + sArffFile);
            e.printStackTrace();
            return false;
        }
        return true;
    }

    private static boolean printSelectedAttributes(String[] sAttributes, int iAttributesCount, boolean[] bDelete, BufferedWriter swNew, boolean bVerbose) {
        int iDelCount = 0;
        String sDelList = "";
        try {
            if (sAttributes[0] != null) {
                swNew.write(String.valueOf(sAttributes[0]) + "\n");
            }
            int i = 1;
            while (i <= iAttributesCount) {
                if (!bDelete[i]) {
                    swNew.write(String.valueOf(sAttributes[i]) + "\n");
                } else {
                    ++iDelCount;
                    if (bVerbose) {
                        sDelList = String.valueOf(sDelList) + sAttributes[i];
                    }
                }
                ++i;
            }
            if (bVerbose) {
                System.out.println(String.valueOf(iDelCount) + " deleted out of " + iAttributesCount + "\n" + sDelList);
            }
        }
        catch (IOException e) {
            System.out.println("Error writing [printSelectedAttributes]");
            e.printStackTrace();
            return false;
        }
        return true;
    }

    private static boolean printSelectedData(BufferedReader srArff, boolean[] bDeleteCol, int iAttributeCount, BufferedWriter swOutput, boolean bPrintDeletedColsAtEnd, boolean bVerbose) {
        int[] iAttID = new int[iAttributeCount + 1];
        int[] iData = new int[iAttributeCount + 1];
        int iPairs = -1;
        int iCount = 0;
        int iLastPrintedAttribute = 0;
        int[] iNewAttributeID = new int[iAttributeCount + 1];
        int iAttUsed = 0;
        int iCol = 1;
        while (iCol <= iAttributeCount) {
            if (!bDeleteCol[iCol]) {
                iLastPrintedAttribute = iCol;
                iNewAttributeID[iCol - 1] = iAttUsed++;
            }
            ++iCol;
        }
        iCol = 1;
        while (iCol <= iAttributeCount) {
            if (bDeleteCol[iCol]) {
                iNewAttributeID[iCol - 1] = iAttUsed++;
            }
            ++iCol;
        }
        try {
            swOutput.write("@data\n");
            while (srArff.ready()) {
                String[] sData;
                String sLine = srArff.readLine();
                ++iCount;
                if (sLine.length() <= 0) continue;
                if (bgSaveArffAsCondensed) {
                    iPairs = -1;
                    sData = sLine.substring(1, sLine.length() - 1).split(",");
                    int iPair = 0;
                    while (iPair < sData.length) {
                        if (sData[iPair].length() > 2) {
                            String[] sIDVal = sData[iPair].trim().split(" ");
                            int iSourceID = Integer.parseInt(sIDVal[0]);
                            if (bPrintDeletedColsAtEnd || !bDeleteCol[iSourceID + 1]) {
                                iAttID[++iPairs] = iNewAttributeID[iSourceID];
                                try {
                                    iData[iPairs] = Integer.parseInt(sIDVal[1]);
                                }
                                catch (Exception e) {
                                    iData[iPairs] = 0;
                                }
                            }
                        }
                        ++iPair;
                    }
                    Arff.printCondensedData(swOutput, iAttID, iData, iPairs);
                    continue;
                }
                String sDeletedCols = "";
                sData = sLine.split(",");
                iCol = 1;
                while (iCol < iLastPrintedAttribute) {
                    if (!bDeleteCol[iCol]) {
                        swOutput.write(String.valueOf(sData[iCol - 1]) + ",");
                    } else {
                        sDeletedCols = String.valueOf(sDeletedCols) + sData[iCol - 1] + ",";
                    }
                    ++iCol;
                }
                if (bPrintDeletedColsAtEnd) {
                    iCol = iLastPrintedAttribute;
                    while (iCol <= iAttributeCount) {
                        if (bDeleteCol[iCol]) {
                            sDeletedCols = String.valueOf(sDeletedCols) + sData[iCol - 1] + ",";
                        }
                        ++iCol;
                    }
                    if (sDeletedCols.length() > 0) {
                        swOutput.write(String.valueOf(sData[iLastPrintedAttribute - 1]) + "," + sDeletedCols.substring(0, sDeletedCols.length() - 1) + "\n");
                        continue;
                    }
                    swOutput.write(String.valueOf(sData[iLastPrintedAttribute - 1]) + "\n");
                    continue;
                }
                swOutput.write(String.valueOf(sData[iLastPrintedAttribute - 1]) + "\n");
            }
        }
        catch (IOException e) {
            System.out.println("I/O error with input or output file [printSelectedData]");
            e.printStackTrace();
            return false;
        }
        if (bVerbose) {
            System.out.println(String.valueOf(iCount) + " lines of data saved");
        }
        return true;
    }

    private static void printCondensedData(BufferedWriter swArff, int[] iAtt, int[] iData, int iLastPair) {
        Sort.quickSortIntWithInt(iAtt, iData, 0, iLastPair);
        try {
            swArff.write("{");
            if (iLastPair > -1) {
                swArff.write(String.valueOf(iAtt[0]) + " " + iData[0]);
            }
            int iPair = 1;
            while (iPair <= iLastPair) {
                swArff.write("," + iAtt[iPair] + " " + iData[iPair]);
                ++iPair;
            }
            swArff.write("}\n");
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void printDataWithoutDuplicates(BufferedReader rArff1, BufferedReader rArff2, boolean[] bDuplicate2, int iAttributes1Count, int iAttributes2Count, BufferedWriter wMerged) {
        int iAttUsed = 0;
        int[] iAttribute2ID = new int[iAttributes2Count + 1];
        int iCol = 1;
        while (iCol <= iAttributes2Count) {
            if (!bDuplicate2[iCol]) {
                iAttribute2ID[iCol] = iAttUsed++ + iAttributes1Count;
            }
            ++iCol;
        }
        try {
            wMerged.write("@data\n");
            while (rArff1.ready() && rArff2.ready()) {
                String[] sData2;
                String sLine1 = rArff1.readLine();
                String sLine2 = rArff2.readLine();
                if (sLine2.equals("")) continue;
                if (bgSaveArffAsCondensed) {
                    wMerged.write(sLine1.substring(0, sLine1.length() - 1));
                    iAttUsed = iAttributes1Count;
                    iAttUsed = 0;
                    sData2 = sLine2.substring(1, sLine2.length() - 1).split(",");
                    int iPair = 0;
                    while (iPair < sData2.length) {
                        try {
                            String[] sIDValue = sData2[iPair].trim().split(" ");
                            iCol = Integer.parseInt(sIDValue[0]) + 1;
                            if (!bDuplicate2[iCol]) {
                                wMerged.write(", " + iAttribute2ID[iCol] + " " + sIDValue[1]);
                            }
                        }
                        catch (Exception e) {
                            System.out.println("Error processing ID value pair " + sData2[iPair] + " [printDataWithoutDuplicates]");
                            e.printStackTrace();
                        }
                        ++iPair;
                    }
                    wMerged.write("}\n");
                    continue;
                }
                wMerged.write(sLine1);
                sData2 = sLine2.split(",");
                iCol = 1;
                while (iCol <= iAttributes2Count) {
                    if (!bDuplicate2[iCol]) {
                        wMerged.write("," + sData2[iCol - 1]);
                    }
                    ++iCol;
                }
                wMerged.write("\n");
            }
        }
        catch (IOException e) {
            System.out.println("Error writing to file [printDataWithoutDuplicates]");
            e.printStackTrace();
        }
    }

    private static boolean[] printNonDuplicateAttributes(String[] sAttributes1, int iAttributes1Count, String[] sAttributes2, int iAttributes2Count, boolean bVerbose, BufferedWriter wMerged) {
        int iDupCount = 0;
        String sDuplicateList = "";
        boolean[] bDuplicate2 = new boolean[iAttributes2Count + 1];
        try {
            int i = 1;
            while (i <= iAttributes1Count) {
                wMerged.write(String.valueOf(sAttributes1[i]) + "\n");
                ++i;
            }
            int j = 1;
            while (j <= iAttributes2Count) {
                i = 1;
                while (i <= iAttributes1Count) {
                    if (sAttributes2[j].equals(sAttributes1[i])) {
                        if (bVerbose) {
                            sDuplicateList = String.valueOf(sDuplicateList) + sAttributes1[i] + " | ";
                        }
                        bDuplicate2[j] = true;
                        ++iDupCount;
                        break;
                    }
                    ++i;
                }
                if (!bDuplicate2[j]) {
                    wMerged.write(String.valueOf(sAttributes2[j]) + "\n");
                }
                ++j;
            }
        }
        catch (IOException e) {
            System.out.println("Error writing to file file [printNonDuplicateAttributes]");
            e.printStackTrace();
        }
        if (bVerbose) {
            System.out.println(String.valueOf(iDupCount) + " duplicates found out of " + iAttributes1Count + "\n" + sDuplicateList);
        }
        return bDuplicate2;
    }

    private static int printArffHeader(BufferedReader rArffIn, BufferedWriter wArffOut, boolean bPrintRelation) {
        int iLineCount = 0;
        String sLine = "";
        try {
            if (rArffIn.ready()) {
                sLine = rArffIn.readLine();
            }
            while (rArffIn.ready() && sLine.indexOf("@relation ") != 0) {
                wArffOut.write(String.valueOf(sLine) + "\n");
                ++iLineCount;
                sLine = rArffIn.readLine();
            }
            if (bPrintRelation) {
                wArffOut.write(String.valueOf(sLine) + "\n");
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return iLineCount;
    }

    private static String[] loadArffAttributes(BufferedReader rArffIn, int[] iAttributeCountArr) {
        String sLine = "";
        int iMaxAttributes = 10000;
        int iAttributesCount = 0;
        String[] sAttributes = new String[iMaxAttributes];
        try {
            if (rArffIn.ready()) {
                sLine = rArffIn.readLine();
            }
            while (rArffIn.ready() && sLine.indexOf("@data") != 0) {
                if (!sLine.equals("") && !sLine.substring(0, 1).equals("%")) {
                    if (sLine.indexOf("@relation ") == 0) {
                        sAttributes[0] = sLine;
                    } else {
                        if (++iAttributesCount == iMaxAttributes - 1) {
                            sAttributes = Arff.increaseArraySize(sAttributes, iMaxAttributes, 2 * iMaxAttributes);
                            iMaxAttributes *= 2;
                        }
                        sAttributes[iAttributesCount] = sLine;
                    }
                }
                sLine = rArffIn.readLine();
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        iAttributeCountArr[0] = iAttributesCount;
        return sAttributes;
    }

    private static String[] increaseArraySize(String[] sArray, int iCurrentArraySize, int iNewArraySize) {
        if (iNewArraySize <= iCurrentArraySize) {
            return sArray;
        }
        String[] sArrayTemp = new String[iNewArraySize];
        System.arraycopy(sArray, 0, sArrayTemp, 0, iCurrentArraySize);
        return sArrayTemp;
    }

    private static void selectTopNAttributes(double[] fColIG, int iAttributeCount, int iFeaturesToSelect, boolean[] bUseCol) {
        int[] iIndex = new int[iAttributeCount + 1];
        int i = 1;
        while (i <= iAttributeCount) {
            iIndex[i] = i;
            bUseCol[i] = false;
            ++i;
        }
        Sort.quickSortNumbersDescendingViaIndex(fColIG, iIndex, 1, iAttributeCount);
        bUseCol[iAttributeCount] = true;
        bUseCol[0] = true;
        if (iFeaturesToSelect > 0) {
            bUseCol[iIndex[1]] = true;
            --iFeaturesToSelect;
            i = 2;
            while (i <= iAttributeCount) {
                if (iFeaturesToSelect < 1) break;
                bUseCol[iIndex[i]] = true;
                --iFeaturesToSelect;
                ++i;
            }
        }
    }

    private static void printInformationGainValues(double[] fColIG, String[] sAttributes, int iAttributeCount, String sIGListOut) {
        DecimalFormat df = new DecimalFormat("#.######");
        int[] iIndex = new int[iAttributeCount + 1];
        int iCol = 1;
        while (iCol <= iAttributeCount) {
            iIndex[iCol] = iCol;
            ++iCol;
        }
        Sort.quickSortNumbersDescendingViaIndex(fColIG, iIndex, 1, iAttributeCount);
        try {
            BufferedWriter wWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sIGListOut)));
            iCol = 1;
            while (iCol <= iAttributeCount) {
                wWriter.write(String.valueOf(sAttributes[iIndex[iCol]]) + " " + df.format(fColIG[iIndex[iCol]]) + "\r\n");
                ++iCol;
            }
            wWriter.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void calculateInformationGainOfData(int[][] iData, int iAttributeCount, int iDataCount, double[] fColIG) {
        int iClassAttribute = iAttributeCount;
        int[] iAttributeValue = new int[1001];
        int iFirstClass = Arff.findFirstClassInData(iData, iDataCount, iAttributeCount);
        int iLastClass = Arff.findLastClassInData(iData, iDataCount, iAttributeCount);
        int[] iClass = new int[iLastClass + 1];
        double fOverallEntropy = Arff.calculateClassesAndEntropyOfData(iData, iAttributeCount, iDataCount, iClass, iFirstClass, iLastClass);
        int[] iAttributeValueClassCount = new int[iLastClass + 1];
        int iCol = 1;
        while (iCol < iAttributeCount) {
            int i;
            int iAttributeValueCount = 0;
            int iRow = 1;
            while (iRow <= iDataCount) {
                boolean bFound = false;
                i = 1;
                while (i <= iAttributeValueCount) {
                    if (iAttributeValue[i] == iData[iCol][iRow]) {
                        bFound = true;
                        break;
                    }
                    ++i;
                }
                if (!bFound) {
                    iAttributeValue[++iAttributeValueCount] = iData[iCol][iRow];
                }
                ++iRow;
            }
            double fAttributeEntropySum = 0.0;
            i = 1;
            while (i <= iAttributeValueCount) {
                int iAttributeValueFreq = 0;
                int j = iFirstClass;
                while (j <= iLastClass) {
                    iAttributeValueClassCount[j] = 0;
                    ++j;
                }
                iRow = 1;
                while (iRow <= iDataCount) {
                    if (iAttributeValue[i] == iData[iCol][iRow]) {
                        int n = iData[iClassAttribute][iRow];
                        iAttributeValueClassCount[n] = iAttributeValueClassCount[n] + 1;
                        ++iAttributeValueFreq;
                    }
                    ++iRow;
                }
                double fAttributeEntropy = 0.0;
                j = iFirstClass;
                while (j <= iLastClass) {
                    double p = (double)iAttributeValueClassCount[j] / (double)iAttributeValueFreq;
                    if (p > 0.0) {
                        fAttributeEntropy -= p * Math.log(p) / Math.log(2.0);
                    }
                    ++j;
                }
                fAttributeEntropySum += fAttributeEntropy * (double)iAttributeValueFreq / (double)iDataCount;
                ++i;
            }
            fColIG[iCol] = fOverallEntropy - fAttributeEntropySum;
            ++iCol;
        }
    }

    private static double calculateClassesAndEntropyOfData(int[][] iData, int iAttributeCount, int iDataCount, int[] iClass, int iFirstClass, int iLastClass) {
        double fOverallEntropy = 0.0;
        int i = 1;
        while (i <= iDataCount) {
            int n = iData[iAttributeCount][i];
            iClass[n] = iClass[n] + 1;
            ++i;
        }
        i = iFirstClass;
        while (i <= iLastClass) {
            double p = (double)iClass[i] / (double)iDataCount;
            if (p > 0.0) {
                fOverallEntropy -= p * Math.log(p) / Math.log(2.0);
            }
            ++i;
        }
        return fOverallEntropy;
    }

    private static int findFirstClassInData(int[][] iData, int iDataCount, int iAttributeCount) {
        int iFirstClass = 999999;
        int i = 1;
        while (i <= iDataCount) {
            if (iData[iAttributeCount][i] < iFirstClass) {
                iFirstClass = iData[iAttributeCount][i];
            }
            ++i;
        }
        return iFirstClass;
    }

    private static int findLastClassInData(int[][] iData, int iDataCount, int iAttributeCount) {
        int iLastClass = 0;
        int i = 1;
        while (i <= iDataCount) {
            if (iData[iAttributeCount][i] > iLastClass) {
                iLastClass = iData[iAttributeCount][i];
            }
            ++i;
        }
        return iLastClass;
    }

    public static void convertArffToTextMultiple(String[] sArffIn, int iArffInCount, String[] sTextOut) {
        int i = 0;
        while (i < iArffInCount) {
            sTextOut[i] = String.valueOf(FileOps.s_ChopFileNameExtension(sArffIn[i])) + " out.txt";
            Arff.convertArffToText(sArffIn[i], sTextOut[i]);
            ++i;
        }
    }

    public static void convertArffToText(String sArffIn, String sTextOut) {
        int[] iAttData = Arff.countAttributesAndDataInArff(sArffIn);
        int iAttributeCount = iAttData[0];
        int iDataCount = iAttData[1];
        int[][] iData = new int[iAttributeCount + 1][iDataCount + 1];
        String[] sAttributes = new String[iAttributeCount + 1];
        Arff.readArffAttributesAndData(sArffIn, iAttributeCount, iDataCount, sAttributes, iData);
        Arff.writeArffAttributesAndDataToText(sAttributes, iData, iAttributeCount, iDataCount, sTextOut);
    }

    public static void makeArffsWithTopNAttributes(String[] sArffIn, int iArffInCount, int iTopNAttributes, String[] sArffOut) {
        int i = 0;
        while (i < iArffInCount) {
            sArffOut[i] = String.valueOf(FileOps.s_ChopFileNameExtension(sArffIn[i])) + " " + iTopNAttributes + ".arff";
            Arff.makeArffWithTopNAttributes(sArffIn[i], iTopNAttributes, sArffOut[i]);
            ++i;
        }
    }

    public static void makeArffWithTopNAttributes(String sArffIn, int iTopNAttributes, String sArffOut) {
        int[] iAttData = Arff.countAttributesAndDataInArff(sArffIn);
        int iAttributeCount = iAttData[0];
        int iDataCount = iAttData[1];
        try {
            System.out.println("AttributeSelection: Attributes " + iAttributeCount + " data " + iDataCount + " attribute x data " + Long.toString((long)(iAttributeCount + 1) * (long)(iAttributeCount + 1)));
            int[][] iData = new int[iAttributeCount + 1][iDataCount + 1];
            double[] fColIG = new double[iAttributeCount + 1];
            boolean[] bUseCol = new boolean[iAttributeCount + 1];
            String[] sAttributes = new String[iAttributeCount + 1];
            String sHeader = Arff.readArffAttributesAndData(sArffIn, iAttributeCount, iDataCount, sAttributes, iData);
            Arff.calculateInformationGainOfData(iData, iAttributeCount, iDataCount, fColIG);
            Arff.selectTopNAttributes(fColIG, iAttributeCount, iTopNAttributes, bUseCol);
            Arff.printInformationGainValues(fColIG, sAttributes, iAttributeCount, String.valueOf(sArffOut) + "_IG.txt");
            Arff.writeArffAttributesAndData(sHeader, sAttributes, iData, iAttributeCount, iDataCount, bUseCol, sArffOut);
        }
        catch (Exception e) {
            System.out.println("makeArffWithTopNAttributes error - probably insufficient to create attribute x data array");
            System.out.println("attribute " + iAttributeCount + " data " + iDataCount + " attribute x data " + Integer.toString((iAttributeCount + 1) * (iAttributeCount + 1)));
            e.printStackTrace();
            System.exit(0);
        }
    }

    private static int[] countAttributesAndDataInArff(String sArffIn) {
        int iAttCount = 0;
        int iDataCount = 0;
        try {
            BufferedReader rArff = new BufferedReader(new InputStreamReader(new FileInputStream(sArffIn)));
            String sLine = "";
            if (rArff.ready()) {
                sLine = rArff.readLine();
            }
            while (rArff.ready() && sLine.indexOf("@data") != 0) {
                if (sLine.length() > 0 && sLine.indexOf("@attribute ") == 0) {
                    ++iAttCount;
                }
                sLine = rArff.readLine();
            }
            iDataCount = 0;
            while (rArff.ready()) {
                sLine = rArff.readLine();
                if (sLine.length() <= 0) continue;
                ++iDataCount;
            }
            rArff.close();
        }
        catch (Exception e) {
            System.out.println("[countAttributesAndDataInArff]Error reading file " + sArffIn);
            e.printStackTrace();
        }
        int[] iAttData = new int[]{iAttCount, iDataCount};
        return iAttData;
    }

    private static String readArffAttributesAndData(String sArffIn, int iAttributeCount, int iDataCount, String[] sAttributes, int[][] iData) {
        String sHeader = "";
        String sLine = "";
        int iAtt = 0;
        int iPair = 0;
        try {
            BufferedReader rArff = new BufferedReader(new InputStreamReader(new FileInputStream(sArffIn)));
            if (rArff.ready()) {
                sLine = rArff.readLine();
            }
            while (rArff.ready() && sLine.indexOf("@data") != 0) {
                if (sLine.length() > 0 && sLine.charAt(0) != "%".charAt(0)) {
                    if (sLine.indexOf("@relation ") == 0) {
                        sAttributes[0] = sLine;
                    } else {
                        sAttributes[++iAtt] = sLine;
                    }
                } else {
                    sHeader = String.valueOf(sHeader) + sLine + "\n";
                }
                sLine = rArff.readLine();
            }
            iDataCount = 0;
            while (rArff.ready()) {
                String[] sData;
                sLine = rArff.readLine();
                if (sLine.length() <= 1) continue;
                ++iDataCount;
                if (sLine.indexOf("{") >= 0) {
                    iAtt = 1;
                    while (iAtt <= iAttributeCount) {
                        iData[iAtt][iDataCount] = 0;
                        ++iAtt;
                    }
                    if (sLine.length() <= 4) continue;
                    sData = sLine.substring(1, sLine.length() - 1).split(",");
                    iPair = 0;
                    while (iPair < sData.length) {
                        String[] sIDValue = sData[iPair].trim().split(" ");
                        iData[Integer.parseInt((String)sIDValue[0]) + 1][iDataCount] = Integer.parseInt(sIDValue[1]);
                        ++iPair;
                    }
                    continue;
                }
                sData = sLine.split(",");
                iAtt = 1;
                while (iAtt <= iAttributeCount) {
                    iData[iAtt][iDataCount] = Integer.parseInt(sData[iAtt - 1].trim());
                    ++iAtt;
                }
            }
            rArff.close();
        }
        catch (Exception e) {
            System.out.println("[readArffAttributesAndData]Error reading file " + sArffIn);
            e.printStackTrace();
        }
        return sHeader;
    }

    private static void writeArffAttributesAndData(String sHeader, String[] sAttribute, int[][] iData, int iAttributeCount, int iDataCount, boolean[] bUseCol, String sArffOut) {
        try {
            BufferedWriter wWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sArffOut)));
            wWriter.write(sHeader);
            int iCol = 0;
            while (iCol <= iAttributeCount) {
                if (bUseCol[iCol]) {
                    wWriter.write(String.valueOf(sAttribute[iCol]) + "\n");
                }
                ++iCol;
            }
            wWriter.write("@data\n");
            int iDat = 1;
            while (iDat <= iDataCount) {
                if (bgSaveArffAsCondensed) {
                    wWriter.write("{");
                    int iColUsed = 0;
                    iCol = 1;
                    while (iCol < iAttributeCount) {
                        if (bUseCol[iCol]) {
                            ++iColUsed;
                            if (iData[iCol][iDat] > 0) {
                                wWriter.write(String.valueOf(iColUsed - 1) + " " + iData[iCol][iDat] + ",");
                            }
                        }
                        ++iCol;
                    }
                    wWriter.write(String.valueOf(iColUsed) + " " + iData[iAttributeCount][iDat] + "}\n");
                } else {
                    iCol = 1;
                    while (iCol < iAttributeCount) {
                        if (bUseCol[iCol]) {
                            wWriter.write(String.valueOf(iData[iCol][iDat]) + ",");
                        }
                        ++iCol;
                    }
                    wWriter.write(String.valueOf(iData[iAttributeCount][iDat]) + "\n");
                }
                ++iDat;
            }
            wWriter.close();
        }
        catch (Exception e) {
            System.out.println("[writeArffAttributesAndData]Error writing file " + sArffOut);
            e.printStackTrace();
        }
    }

    private static void writeArffAttributesAndDataToText(String[] sAttribute, int[][] iData, int iAttributeCount, int iDataCount, String sTextOut) {
        int iCol = 1;
        while (iCol <= iAttributeCount) {
            String[] sData = sAttribute[iCol].split(" ");
            sAttribute[iCol] = sData[1];
            int iPos = sAttribute[iCol].indexOf("_");
            if (iPos > 0) {
                sAttribute[iCol] = sAttribute[iCol].substring(iPos + 1);
            }
            if ((iPos = sAttribute[iCol].indexOf("_pc")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("_pc", "%");
            }
            if ((iPos = sAttribute[iCol].indexOf("%2C")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%2C", ",");
            }
            if ((iPos = sAttribute[iCol].indexOf("%28")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%28", "(");
            }
            if ((iPos = sAttribute[iCol].indexOf("%29")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%29", ")");
            }
            if ((iPos = sAttribute[iCol].indexOf("%3F")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%3F", "?");
            }
            if ((iPos = sAttribute[iCol].indexOf("%21")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%21", "!");
            }
            if ((iPos = sAttribute[iCol].indexOf("%25")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%25", "%");
            }
            if ((iPos = sAttribute[iCol].indexOf("%26")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%26", "&");
            }
            if ((iPos = sAttribute[iCol].indexOf("%27")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%27", "'");
            }
            if ((iPos = sAttribute[iCol].indexOf("%2F")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%2F", "/");
            }
            if ((iPos = sAttribute[iCol].indexOf("%3A")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%3A", ":");
            }
            if ((iPos = sAttribute[iCol].indexOf("%3B")) >= 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("%3B", ";");
            }
            if ((iPos = sAttribute[iCol].indexOf("+")) > 0) {
                sAttribute[iCol] = sAttribute[iCol].replace("+", "_");
            }
            ++iCol;
        }
        try {
            BufferedWriter wWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(sTextOut)));
            wWriter.write(String.valueOf(sAttribute[iAttributeCount]) + "\tText\n");
            int iDat = 1;
            while (iDat <= iDataCount) {
                wWriter.write(String.valueOf(iData[iAttributeCount][iDat]) + "\t");
                iCol = 1;
                while (iCol < iAttributeCount) {
                    if (iData[iCol][iDat] > 1) {
                        wWriter.write(String.valueOf(sAttribute[iCol]) + "[" + iData[iCol][iDat] + "] ");
                    } else if (iData[iCol][iDat] == 1) {
                        wWriter.write(String.valueOf(sAttribute[iCol]) + " ");
                    }
                    ++iCol;
                }
                wWriter.write("\r\n");
                ++iDat;
            }
            wWriter.close();
        }
        catch (Exception e) {
            System.out.println("[writeArffAttributesAndDataToText]Error writing file " + sTextOut);
            e.printStackTrace();
        }
    }
}

