/*
 * Decompiled with CFR 0.152.
 */
package projects.schweden;

import de.jstacs.DataType;
import de.jstacs.algorithms.optimization.termination.IterationCondition;
import de.jstacs.algorithms.optimization.termination.TerminationCondition;
import de.jstacs.classifiers.differentiableSequenceScoreBased.gendismix.GenDisMixClassifier;
import de.jstacs.data.alphabets.DNAAlphabetContainer;
import de.jstacs.data.sequences.Sequence;
import de.jstacs.data.sequences.annotation.SequenceAnnotation;
import de.jstacs.parameters.AbstractSelectionParameter;
import de.jstacs.parameters.FileParameter;
import de.jstacs.parameters.Parameter;
import de.jstacs.parameters.ParameterSet;
import de.jstacs.parameters.SelectionParameter;
import de.jstacs.parameters.SimpleParameter;
import de.jstacs.parameters.SimpleParameterSet;
import de.jstacs.parameters.validation.NumberValidator;
import de.jstacs.results.Result;
import de.jstacs.results.ResultSet;
import de.jstacs.results.TextResult;
import de.jstacs.sequenceScores.statisticalModels.StatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.differentiable.DifferentiableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.differentiable.mixture.StrandDiffSM;
import de.jstacs.sequenceScores.statisticalModels.trainable.PFMWrapperTrainSM;
import de.jstacs.sequenceScores.statisticalModels.trainable.TrainableStatisticalModel;
import de.jstacs.sequenceScores.statisticalModels.trainable.mixture.AbstractMixtureTrainSM;
import de.jstacs.sequenceScores.statisticalModels.trainable.mixture.StrandTrainSM;
import de.jstacs.tools.JstacsTool;
import de.jstacs.tools.ProgressUpdater;
import de.jstacs.tools.Protocol;
import de.jstacs.tools.ToolResult;
import de.jstacs.tools.ui.cli.CLI;
import de.jstacs.utils.IntList;
import de.jstacs.utils.SafeOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import projects.dimont.AbstractSingleMotifChIPper;
import projects.inmode.models.variableStructure.parsimonious.inhomogeneous.InhomogeneousPMM;
import umontreal.iro.lecuyer.probdist.NormalDist;

public class DimontGenomeScan
implements JstacsTool {
    static IntList starts = new IntList();
    static ArrayList<Sequence> seqs = new ArrayList();

    public static void main(String[] args) throws Exception {
        CLI cl = new CLI(new DimontGenomeScan());
        cl.run(args);
    }

    @Override
    public ParameterSet getToolParameters() {
        LinkedList<Parameter> parameters = new LinkedList<Parameter>();
        try {
            SelectionParameter sp = new SelectionParameter(DataType.PARAMETERSET, new String[]{"Dimont", "HOCOMOCO", "InMoDe"}, new ParameterSet[]{new SimpleParameterSet(new FileParameter("Dimont classifier", "The classifier from the Dimont output for one motif", "xml", true)), new SimpleParameterSet(new FileParameter("HOCOMOCO motif", "The HOCOMOCO motif", "pwm", true)), new SimpleParameterSet(new FileParameter("InMoDe model", "The model from the InMoDe output for one motif", "xml", true))}, "Motif source", "", true);
            parameters.add(sp);
        }
        catch (AbstractSelectionParameter.InconsistentCollectionException | SimpleParameter.DatatypeNotValidException | SimpleParameter.IllegalValueException e1) {
            e1.printStackTrace();
        }
        parameters.add(new FileParameter("Input file", "The file containing the sequences to be scanned (e.g., a genome)", "fasta,fa,fas", true));
        parameters.add(new FileParameter("Negative file", "The file containing the negative sequences to be scanned (e.g., a genome)", "fasta,fa,fas", true));
        try {
            parameters.add(new SimpleParameter(DataType.BOOLEAN, "Best Strand", "switch which allows to output at a specific position only the best strand or both strands if the corresponding score is above the threshold", true, true));
            parameters.add(new SimpleParameter(DataType.DOUBLE, "Significance Level", "The threshold on the p-values for making predictions", true, new NumberValidator<Double>(0.0, 1.0), 0.001));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return new SimpleParameterSet(parameters.toArray(new Parameter[0]));
    }

    @Override
    public ToolResult run(ParameterSet parameters, Protocol protocol, ProgressUpdater progress, int threads) throws Exception {
        StatisticalModel model2 = null;
        if (((SelectionParameter)parameters.getParameterAt(0)).getSelected() == 1) {
            String sb = ((FileParameter)((ParameterSet)((SelectionParameter)parameters.getParameterAt(0)).getValue()).getParameterAt(0)).getFileContents().getContent();
            String[] lines = sb.split("\n");
            String name = lines[0].substring(1).trim();
            double[][] pssm = new double[lines.length - 1][4];
            int i = 1;
            while (i < lines.length) {
                String[] parts = lines[i].split("\t");
                int j = 0;
                while (j < parts.length) {
                    pssm[i - 1][j] = Double.parseDouble(parts[j]);
                    ++j;
                }
                ++i;
            }
            PFMWrapperTrainSM model = new PFMWrapperTrainSM(DNAAlphabetContainer.SINGLETON, name, pssm);
            model2 = new StrandTrainSM((TrainableStatisticalModel)model, 1, 0.5, 1.0, (TerminationCondition)new IterationCondition(1), AbstractMixtureTrainSM.Parameterization.LAMBDA);
        } else if (((SelectionParameter)parameters.getParameterAt(0)).getSelected() == 0) {
            GenDisMixClassifier cl = new GenDisMixClassifier(new StringBuffer(((FileParameter)((ParameterSet)((SelectionParameter)parameters.getParameterAt(0)).getValue()).getParameterAt(0)).getFileContents().getContent()));
            DifferentiableStatisticalModel model = ((AbstractSingleMotifChIPper)cl.getDifferentiableSequenceScore(0)).getFunction(0);
            model2 = new StrandDiffSM(model, 1, true, StrandDiffSM.InitMethod.INIT_FORWARD_STRAND, 0.5);
        } else {
            InhomogeneousPMM pmm = new InhomogeneousPMM(new StringBuffer(((FileParameter)((ParameterSet)((SelectionParameter)parameters.getParameterAt(0)).getValue()).getParameterAt(0)).getFileContents().getContent()));
            model2 = new StrandTrainSM((TrainableStatisticalModel)pmm, 1, 0.5, 1.0, (TerminationCondition)new IterationCondition(1), AbstractMixtureTrainSM.Parameterization.LAMBDA);
        }
        StringBuffer lastHeader = new StringBuffer();
        BufferedReader read = new BufferedReader(new FileReader(((FileParameter)parameters.getParameterAt(2)).getFileContents().getFilename()));
        File out = File.createTempFile("dimontscan", "_dgs.temp", new File("."));
        out.deleteOnExit();
        SafeOutputStream sos = SafeOutputStream.getSafeOutputStream(new FileOutputStream(out));
        boolean best = (Boolean)parameters.getParameterAt(3).getValue();
        double alpha = (Double)parameters.getParameterAt(4).getValue();
        double mean = 0.0;
        double meansq = 0.0;
        double n = 0.0;
        while (DimontGenomeScan.readNextSequences(read, lastHeader, model2.getLength())) {
            for (Sequence seq : seqs) {
                int j = 0;
                while (j < seq.getLength() - model2.getLength() + 1) {
                    boolean idx;
                    double[] compScore = null;
                    compScore = model2 instanceof StrandDiffSM ? ((StrandDiffSM)model2).getComponentScores(seq, j) : new double[]{((StrandTrainSM)model2).getLogProbFor(0, seq, j, j + model2.getLength() - 1), ((StrandTrainSM)model2).getLogProbFor(1, seq, j, j + model2.getLength() - 1)};
                    boolean bl = idx = !(compScore[0] >= compScore[1]);
                    if (!best || !idx) {
                        mean += compScore[0];
                        meansq += compScore[0] * compScore[0];
                        n += 1.0;
                    }
                    if (!best || idx) {
                        mean += compScore[1];
                        meansq += compScore[1] * compScore[1];
                        n += 1.0;
                    }
                    ++j;
                }
            }
        }
        read.close();
        double sd = Math.sqrt((meansq /= n) - (mean /= n) * mean);
        NormalDist nd = new NormalDist(mean, sd);
        lastHeader = new StringBuffer();
        read = new BufferedReader(new FileReader(((FileParameter)parameters.getParameterAt(1)).getFileContents().getFilename()));
        while (DimontGenomeScan.readNextSequences(read, lastHeader, model2.getLength())) {
            Iterator<Sequence> it = seqs.iterator();
            int i = 0;
            while (it.hasNext()) {
                Sequence seq = it.next();
                String id = seq.getSequenceAnnotationByType("id", 0).getIdentifier().trim();
                int off = starts.get(i);
                int j = 0;
                while (j < seq.getLength() - model2.getLength() + 1) {
                    int idx;
                    double[] compScore = null;
                    compScore = model2 instanceof StrandDiffSM ? ((StrandDiffSM)model2).getComponentScores(seq, j) : new double[]{((StrandTrainSM)model2).getLogProbFor(0, seq, j, j + model2.getLength() - 1), ((StrandTrainSM)model2).getLogProbFor(1, seq, j, j + model2.getLength() - 1)};
                    int n2 = idx = compScore[0] >= compScore[1] ? 0 : 1;
                    if (1.0 - nd.cdf(compScore[idx]) < alpha) {
                        if (1.0 - nd.cdf(compScore[0]) < alpha && (!best || idx == 0)) {
                            sos.writeln(String.valueOf(id) + "\t" + (off + j) + "\t" + compScore[0] + "\t+\t" + (1.0 - nd.cdf(compScore[0])) + "\t" + seq.getSubSequence(j, model2.getLength()));
                        }
                        if (1.0 - nd.cdf(compScore[1]) < alpha && (!best || idx == 1)) {
                            sos.writeln(String.valueOf(id) + "\t" + (off + j) + "\t" + compScore[1] + "\t-\t" + (1.0 - nd.cdf(compScore[1])) + "\t" + seq.getSubSequence(j, model2.getLength()).reverseComplement());
                        }
                    }
                    ++j;
                }
                ++i;
            }
        }
        sos.close();
        return new ToolResult("predictions", "", null, new ResultSet(new TextResult("predictions", "Result", new FileParameter.FileRepresentation(out.getAbsolutePath()), "txt", this.getToolName(), null, true)), parameters, this.getToolName(), new Date());
    }

    public static boolean readNextSequences(BufferedReader read, StringBuffer lastHeader, int modelLength) throws Exception {
        String str = null;
        StringBuffer line = new StringBuffer();
        starts.clear();
        seqs.clear();
        Pattern acgt = Pattern.compile("[ACGT]+", 2);
        DNAAlphabetContainer con = DNAAlphabetContainer.SINGLETON;
        int size = 0;
        while ((str = read.readLine()) != null || line.length() > 0) {
            if (str != null) {
                str = str.trim();
            }
            if (str == null || str.startsWith(">")) {
                String header = lastHeader.toString();
                if (str != null) {
                    lastHeader.delete(0, lastHeader.length());
                    lastHeader.append(str.substring(1).trim());
                }
                if (line.length() <= 0) continue;
                int idx = header.indexOf(" ");
                if (idx > 0) {
                    header = header.substring(0, idx);
                }
                SequenceAnnotation annotation = new SequenceAnnotation("id", header, (Result[][])new Result[0][]);
                String seqStr = line.toString();
                line.delete(0, line.length());
                Matcher match = acgt.matcher(seqStr);
                while (match.find()) {
                    int start = match.start();
                    int end = match.end();
                    int l = end - start;
                    if (l < modelLength) continue;
                    Sequence seq = Sequence.create(con, seqStr.substring(start, end));
                    seq = seq.annotate(false, annotation);
                    seqs.add(seq);
                    size += l;
                    starts.add(start);
                }
                if (!((double)size > 1.0E7) && str != null) continue;
                return true;
            }
            line.append(str);
        }
        return false;
    }

    @Override
    public String getToolName() {
        return "Dimont genome scan";
    }

    @Override
    public String getToolVersion() {
        return "1.0";
    }

    @Override
    public String getShortName() {
        return "scan";
    }

    @Override
    public String getDescription() {
        return "scans a genome for prediction of a Dimont model";
    }

    @Override
    public String getHelpText() {
        return "";
    }

    @Override
    public JstacsTool.ResultEntry[] getDefaultResultInfos() {
        return null;
    }
}

