/*
 * Decompiled with CFR 0.152.
 */
package picard.util;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.regex.Pattern;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.programgroups.ReferenceProgramGroup;

@CommandLineProgramProperties(summary="Designs oligonucleotide baits for hybrid selection reactions.<p>This tool is used to design custom bait sets for hybrid selection experiments. The following files are input into BaitDesigner: a (TARGET) interval list indicating the sequences of interest, e.g. exons with their respective coordinates, a reference sequence, and a unique identifier string (DESIGN_NAME). </p><p>The tool will output interval_list files of both bait and target sequences as well as the actual bait sequences in FastA format. At least two baits are output for each target sequence, with greater numbers for larger intervals. Although the default values for both bait size  (120 bases) nd offsets (80 bases) are suitable for most applications, these values can be customized. Offsets represent the distance between sequential baits on a contiguous stretch of target DNA sequence. </p><p>The tool will also output a pooled set of 55,000 (default) oligonucleotides representing all of the baits redundantly. This redundancy achieves a uniform concentration of oligonucleotides for synthesis by a vendor as well as equal numbersof each bait to prevent bias during the hybrid selection reaction. </p><h4>Usage example:</h4><pre>java -jar picard.jar BaitDesigner \\<br />      TARGET=targets.interval_list \\<br />      DESIGN_NAME=new_baits \\<br />      R=reference_sequence.fasta </pre> <hr />", oneLineSummary="Designs oligonucleotide baits for hybrid selection reactions.", programGroup=ReferenceProgramGroup.class)
@DocumentedFeature
public class BaitDesigner
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Designs oligonucleotide baits for hybrid selection reactions.";
    static final String USAGE_DETAILS = "<p>This tool is used to design custom bait sets for hybrid selection experiments. The following files are input into BaitDesigner: a (TARGET) interval list indicating the sequences of interest, e.g. exons with their respective coordinates, a reference sequence, and a unique identifier string (DESIGN_NAME). </p><p>The tool will output interval_list files of both bait and target sequences as well as the actual bait sequences in FastA format. At least two baits are output for each target sequence, with greater numbers for larger intervals. Although the default values for both bait size  (120 bases) nd offsets (80 bases) are suitable for most applications, these values can be customized. Offsets represent the distance between sequential baits on a contiguous stretch of target DNA sequence. </p><p>The tool will also output a pooled set of 55,000 (default) oligonucleotides representing all of the baits redundantly. This redundancy achieves a uniform concentration of oligonucleotides for synthesis by a vendor as well as equal numbersof each bait to prevent bias during the hybrid selection reaction. </p><h4>Usage example:</h4><pre>java -jar picard.jar BaitDesigner \\<br />      TARGET=targets.interval_list \\<br />      DESIGN_NAME=new_baits \\<br />      R=reference_sequence.fasta </pre> <hr />";
    @Argument(shortName="T", doc="The file with design parameters and targets")
    public File TARGETS;
    @Argument(doc="The name of the bait design")
    public String DESIGN_NAME;
    @Argument(doc="The left amplification primer to prepend to all baits for synthesis")
    public String LEFT_PRIMER = "ATCGCACCAGCGTGT";
    @Argument(doc="The right amplification primer to prepend to all baits for synthesis")
    public String RIGHT_PRIMER = "CACTGCGGCTCCTCA";
    @Argument(doc="The design strategy to use to layout baits across each target")
    public DesignStrategy DESIGN_STRATEGY = DesignStrategy.FixedOffset;
    @Argument(doc="The length of each individual bait to design")
    public int BAIT_SIZE = 120;
    @Argument(doc="The minimum number of baits to design per target.")
    public int MINIMUM_BAITS_PER_TARGET = 2;
    @Argument(doc="The desired offset between the start of one bait and the start of another bait for the same target.")
    public int BAIT_OFFSET = 80;
    @Argument(doc="Pad the input targets by this amount when designing baits. Padding is applied on both sides in this amount.")
    public int PADDING = 0;
    @Argument(doc="Baits that have more than REPEAT_TOLERANCE soft or hard masked bases will not be allowed")
    public int REPEAT_TOLERANCE = 50;
    @Argument(doc="The size of pools or arrays for synthesis. If no pool files are desired, can be set to 0.")
    public int POOL_SIZE = 55000;
    @Argument(doc="If true, fill up the pools with alternating fwd and rc copies of all baits. Equal copies of all baits will always be maintained")
    public boolean FILL_POOLS = true;
    @Argument(doc="If true design baits on the strand of the target feature, if false always design on the + strand of the genome.")
    public boolean DESIGN_ON_TARGET_STRAND = false;
    @Argument(doc="If true merge targets that are 'close enough' that designing against a merged target would be more efficient.")
    public boolean MERGE_NEARBY_TARGETS = true;
    @Argument(doc="If true also output .design.txt files per pool with one line per bait sequence")
    public boolean OUTPUT_AGILENT_FILES = true;
    @Argument(shortName="O", optional=true, doc="The output directory. If not provided then the DESIGN_NAME will be used as the output directory")
    public File OUTPUT_DIRECTORY;
    int TARGET_TERRITORY;
    int TARGET_COUNT;
    int BAIT_TERRITORY;
    int BAIT_COUNT;
    int BAIT_TARGET_TERRITORY_INTERSECTION;
    int ZERO_BAIT_TARGETS;
    double DESIGN_EFFICIENCY;
    private static final Log log = Log.getInstance(BaitDesigner.class);
    private final NumberFormat fmt = NumberFormat.getIntegerInstance();

    @Override
    protected boolean requiresReference() {
        return true;
    }

    String makeBaitName(String targetName, int baitIndex, int totalBaits) {
        String total = this.fmt.format(totalBaits);
        String bait = this.fmt.format(baitIndex);
        while (bait.length() < total.length()) {
            bait = "0" + bait;
        }
        return targetName + "_bait#" + bait;
    }

    public static int getMaskedBaseCount(byte[] bases, int from, int until) {
        int count = 0;
        for (int i = from; i < until; ++i) {
            byte b = bases[i];
            if (b == 65 || b == 67 || b == 71 || b == 84) continue;
            ++count;
        }
        return count;
    }

    public static void main(String[] args) {
        new BaitDesigner().instanceMainWithExit(args);
    }

    @Override
    protected String[] customCommandLineValidation() {
        ArrayList<String> errors = new ArrayList<String>();
        Pattern p = Pattern.compile("^[ACGTacgt]*$");
        if (this.LEFT_PRIMER != null && !p.matcher(this.LEFT_PRIMER).matches()) {
            errors.add("Left primer " + this.LEFT_PRIMER + " is not a valid primer sequence.");
        }
        if (this.RIGHT_PRIMER != null && !p.matcher(this.RIGHT_PRIMER).matches()) {
            errors.add("Right primer " + this.RIGHT_PRIMER + " is not a valid primer sequence.");
        }
        if (!errors.isEmpty()) {
            return errors.toArray(new String[errors.size()]);
        }
        return null;
    }

    int estimateBaits(int start, int end) {
        int length = end - start + 1;
        return Math.max(this.MINIMUM_BAITS_PER_TARGET, (int)(Math.ceil(length - this.BAIT_SIZE) / (double)this.BAIT_OFFSET) + 1);
    }

    @Override
    protected int doWork() {
        IntervalList targets;
        if (this.OUTPUT_DIRECTORY == null) {
            this.OUTPUT_DIRECTORY = new File(this.DESIGN_NAME);
        }
        IOUtil.assertFileIsReadable(this.TARGETS);
        IOUtil.assertFileIsReadable(this.REFERENCE_SEQUENCE);
        if (!this.OUTPUT_DIRECTORY.exists()) {
            this.OUTPUT_DIRECTORY.mkdirs();
        }
        IOUtil.assertDirectoryIsWritable(this.OUTPUT_DIRECTORY);
        IntervalList originalTargets = IntervalList.fromFile(this.TARGETS);
        IntervalList padded = new IntervalList(originalTargets.getHeader());
        SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary();
        for (Interval i : originalTargets.getIntervals()) {
            padded.add(new Interval(i.getContig(), Math.max(i.getStart() - this.PADDING, 1), Math.min(i.getEnd() + this.PADDING, dict.getSequence(i.getContig()).getSequenceLength()), i.isNegativeStrand(), i.getName()));
        }
        log.info("Starting with " + padded.size() + " targets.");
        padded.uniqued();
        log.info("After uniquing " + padded.size() + " targets remain.");
        if (this.MERGE_NEARBY_TARGETS) {
            ListIterator<Interval> iterator = padded.getIntervals().listIterator();
            Interval previous = iterator.next();
            targets = new IntervalList(padded.getHeader());
            while (iterator.hasNext()) {
                Interval next = iterator.next();
                if (previous.getContig().equals(next.getContig()) && this.estimateBaits(previous.getStart(), previous.getEnd()) + this.estimateBaits(next.getStart(), next.getEnd()) >= this.estimateBaits(previous.getStart(), next.getEnd())) {
                    previous = new Interval(previous.getContig(), previous.getStart(), Math.max(previous.getEnd(), next.getEnd()), previous.isNegativeStrand(), previous.getName());
                    continue;
                }
                targets.add(previous);
                previous = next;
            }
            if (previous != null) {
                targets.add(previous);
            }
            log.info("After collapsing nearby targets " + targets.size() + " targets remain.");
        } else {
            targets = padded;
        }
        ReferenceSequenceFileWalker referenceWalker = new ReferenceSequenceFileWalker(this.REFERENCE_SEQUENCE);
        SequenceUtil.assertSequenceDictionariesEqual(referenceWalker.getSequenceDictionary(), targets.getHeader().getSequenceDictionary());
        int discardedBaits = 0;
        IntervalList baits = new IntervalList(targets.getHeader());
        for (Interval target : targets) {
            int sequenceIndex = targets.getHeader().getSequenceIndex(target.getContig());
            ReferenceSequence reference = referenceWalker.get(sequenceIndex);
            for (Bait bait : this.DESIGN_STRATEGY.design(this, target, reference)) {
                if (bait.length() != this.BAIT_SIZE) {
                    throw new PicardException("Bait designed at wrong length: " + bait);
                }
                if (bait.getMaskedBaseCount() <= this.REPEAT_TOLERANCE) {
                    baits.add(bait);
                    for (byte b : bait.getBases()) {
                        byte upper = StringUtil.toUpperCase(b);
                        if (upper == 65 || upper == 67 || upper == 71 || upper == 84) continue;
                        log.warn("Bait contains non-synthesizable bases: " + bait);
                    }
                    continue;
                }
                log.debug("Discarding bait: " + bait);
                ++discardedBaits;
            }
        }
        this.calculateStatistics(targets, baits);
        log.info("Designed and kept " + baits.size() + " baits, discarded " + discardedBaits);
        originalTargets.write(new File(this.OUTPUT_DIRECTORY, this.DESIGN_NAME + ".targets.interval_list"));
        baits.write(new File(this.OUTPUT_DIRECTORY, this.DESIGN_NAME + ".baits.interval_list"));
        this.writeParametersFile(new File(this.OUTPUT_DIRECTORY, this.DESIGN_NAME + ".design_parameters.txt"));
        this.writeDesignFastaFile(new File(this.OUTPUT_DIRECTORY, this.DESIGN_NAME + ".design.fasta"), baits);
        if (this.POOL_SIZE > 0) {
            this.writePoolFiles(this.OUTPUT_DIRECTORY, this.DESIGN_NAME, baits);
        }
        return 0;
    }

    void calculateStatistics(IntervalList targets, IntervalList baits) {
        this.TARGET_TERRITORY = (int)targets.getUniqueBaseCount();
        this.TARGET_COUNT = targets.size();
        this.BAIT_TERRITORY = (int)baits.getUniqueBaseCount();
        this.BAIT_COUNT = baits.size();
        this.DESIGN_EFFICIENCY = (double)this.TARGET_TERRITORY / (double)this.BAIT_TERRITORY;
        IntervalList tmp = new IntervalList(targets.getHeader());
        OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0);
        detector.addAll(baits.getIntervals(), baits.getIntervals());
        for (Interval target : targets) {
            Set overlaps = detector.getOverlaps(target);
            if (overlaps.isEmpty()) {
                ++this.ZERO_BAIT_TARGETS;
                continue;
            }
            for (Interval i : overlaps) {
                tmp.add(target.intersect(i));
            }
        }
        tmp.uniqued();
        this.BAIT_TARGET_TERRITORY_INTERSECTION = (int)tmp.getBaseCount();
    }

    void writeParametersFile(File file) {
        try {
            BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
            for (Field field : this.getClass().getDeclaredFields()) {
                Object value;
                String name;
                if (Modifier.isPrivate(field.getModifiers()) || !(name = field.getName()).toUpperCase().equals(name) || name.equals("USAGE") || (value = field.get(this)) == null) continue;
                out.append(name);
                out.append("=");
                out.append(value.toString());
                out.newLine();
            }
            out.close();
        }
        catch (Exception e) {
            throw new PicardException("Error writing out parameters file.", e);
        }
    }

    void writeDesignFastaFile(File file, IntervalList baits) {
        BufferedWriter out = IOUtil.openFileForBufferedWriting(file);
        for (Interval i : baits) {
            this.writeBaitFasta(out, i, false);
        }
        CloserUtil.close(out);
    }

    private void writeBaitFasta(BufferedWriter out, Interval i, boolean rc) {
        try {
            Bait bait = (Bait)i;
            out.append(">");
            out.append(bait.getName());
            out.newLine();
            String sequence = this.getBaitSequence(bait, rc);
            out.append(sequence);
            out.newLine();
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing out bait information.", ioe);
        }
    }

    private String getBaitSequence(Bait bait, boolean rc) {
        String sequence = (this.LEFT_PRIMER == null ? "" : this.LEFT_PRIMER) + StringUtil.bytesToString(bait.getBases()) + (this.RIGHT_PRIMER == null ? "" : this.RIGHT_PRIMER);
        if (rc) {
            sequence = SequenceUtil.reverseComplement(sequence);
        }
        return sequence;
    }

    void writePoolFiles(File dir, String basename, IntervalList baits) {
        int copies = this.FILL_POOLS && baits.size() < this.POOL_SIZE ? (int)Math.floor((double)this.POOL_SIZE / (double)baits.size()) : 1;
        int written = 0;
        int nextPool = 0;
        BufferedWriter out = null;
        BufferedWriter agilentOut = null;
        String prefix = this.DESIGN_NAME.substring(0, Math.min(this.DESIGN_NAME.length(), 8)) + "_";
        DecimalFormat fmt = new DecimalFormat("000000");
        try {
            for (int i = 0; i < copies; ++i) {
                boolean rc = i % 2 == 1;
                int baitId = 1;
                for (Interval interval : baits) {
                    Bait bait = (Bait)interval;
                    if (written++ % this.POOL_SIZE == 0) {
                        if (out != null) {
                            out.close();
                        }
                        if (agilentOut != null) {
                            agilentOut.close();
                        }
                        String filename = basename + ".pool" + nextPool++ + ".design.";
                        out = IOUtil.openFileForBufferedWriting(new File(dir, filename + "fasta"));
                        if (this.OUTPUT_AGILENT_FILES) {
                            agilentOut = IOUtil.openFileForBufferedWriting(new File(dir, filename + "txt"));
                        }
                    }
                    this.writeBaitFasta(out, interval, rc);
                    if (!this.OUTPUT_AGILENT_FILES) continue;
                    agilentOut.append(prefix).append(fmt.format(baitId++));
                    agilentOut.append("\t");
                    agilentOut.append(this.getBaitSequence(bait, rc).toUpperCase());
                    agilentOut.newLine();
                }
            }
            CloserUtil.close(out);
            CloserUtil.close(agilentOut);
        }
        catch (Exception e) {
            throw new PicardException("Error while writing pool files.", e);
        }
    }

    public static enum DesignStrategy {
        CenteredConstrained{

            @Override
            List<Bait> design(BaitDesigner designer, Interval target, ReferenceSequence reference) {
                LinkedList<Bait> baits = new LinkedList<Bait>();
                int baitSize = designer.BAIT_SIZE;
                int baitOffset = designer.BAIT_OFFSET;
                if (target.length() <= baitSize) {
                    int midpoint = target.getStart() + target.length() / 2;
                    int baitStart = midpoint - baitSize / 2;
                    Bait bait = new Bait(target.getContig(), baitStart, CoordMath.getEnd(baitStart, baitSize), target.isNegativeStrand(), designer.makeBaitName(target.getName(), 1, 1));
                    bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND);
                    baits.add(bait);
                } else {
                    int baitCount = 1 + (int)Math.ceil((double)(target.length() - baitSize) / (double)baitOffset);
                    int firstBaitStart = target.getStart();
                    int lastBaitStart = CoordMath.getStart(target.getEnd(), baitSize);
                    double actualShift = (double)(lastBaitStart - firstBaitStart) / (double)(baitCount - 1);
                    int baitIndex = 1;
                    int start = firstBaitStart;
                    while (start <= lastBaitStart) {
                        int end = CoordMath.getEnd(start, baitSize);
                        Bait bait = new Bait(target.getContig(), start, end, target.isNegativeStrand(), designer.makeBaitName(target.getName(), baitIndex, baitCount));
                        bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND);
                        baits.add(bait);
                        start = firstBaitStart + (int)Math.round(actualShift * (double)baitIndex);
                        ++baitIndex;
                    }
                }
                return baits;
            }
        }
        ,
        FixedOffset{

            @Override
            List<Bait> design(BaitDesigner designer, Interval target, ReferenceSequence reference) {
                int start;
                int end;
                Interval t2;
                LinkedList<Bait> baits = new LinkedList<Bait>();
                int baitSize = designer.BAIT_SIZE;
                int baitOffset = designer.BAIT_OFFSET;
                int minTargetSize = baitSize + baitOffset * (designer.MINIMUM_BAITS_PER_TARGET - 1);
                if (target.length() < minTargetSize) {
                    int addon = minTargetSize - target.length();
                    int left = addon / 2;
                    int right = addon - left;
                    t2 = new Interval(target.getContig(), Math.max(target.getStart() - left, 1), Math.min(target.getEnd() + right, reference.length()), target.isNegativeStrand(), target.getName());
                } else {
                    t2 = target;
                }
                int baitCount = 1 + (int)Math.ceil((double)(t2.length() - baitSize) / (double)baitOffset);
                int baitedBases = baitSize + baitOffset * (baitCount - 1);
                int firstBaitStart = Math.max(t2.getStart() - (baitedBases - t2.length()) / 2, 1);
                byte[] bases = reference.getBases();
                int MAX_MASKED = designer.REPEAT_TOLERANCE;
                for (int i = 1; i <= baitCount && (end = CoordMath.getEnd(start = firstBaitStart + baitOffset * (i - 1), baitSize)) <= reference.length(); ++i) {
                    if (BaitDesigner.getMaskedBaseCount(bases, start - 1, end) > MAX_MASKED) {
                        int maxMove = baitOffset * 3 / 4;
                        for (int move = 1; move <= maxMove; ++move) {
                            if (start - move >= 1 && BaitDesigner.getMaskedBaseCount(bases, start - move - 1, end - move) <= MAX_MASKED) {
                                start -= move;
                                end -= move;
                                break;
                            }
                            if (end + move > reference.length() || BaitDesigner.getMaskedBaseCount(bases, start + move - 1, end + move) > MAX_MASKED) continue;
                            start += move;
                            end += move;
                            break;
                        }
                    }
                    Bait bait = new Bait(t2.getContig(), start, end, t2.isNegativeStrand(), designer.makeBaitName(t2.getName(), i, baitCount));
                    bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND);
                    baits.add(bait);
                }
                return baits;
            }
        }
        ,
        Simple{

            @Override
            List<Bait> design(BaitDesigner designer, Interval target, ReferenceSequence reference) {
                LinkedList<Bait> baits = new LinkedList<Bait>();
                int baitSize = designer.BAIT_SIZE;
                int baitOffset = designer.BAIT_OFFSET;
                int lastPossibleBaitStart = Math.min(target.getEnd(), reference.length() - baitSize);
                int baitCount = 1 + (int)Math.floor((double)(lastPossibleBaitStart - target.getStart()) / (double)baitOffset);
                int i = 0;
                for (int start = target.getStart(); start < lastPossibleBaitStart; start += baitOffset) {
                    Bait bait = new Bait(target.getContig(), start, CoordMath.getEnd(start, baitSize), target.isNegativeStrand(), designer.makeBaitName(target.getName(), ++i, baitCount));
                    bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND);
                    baits.add(bait);
                }
                return baits;
            }
        };


        abstract List<Bait> design(BaitDesigner var1, Interval var2, ReferenceSequence var3);
    }

    static class Bait
    extends Interval {
        byte[] bases;

        public Bait(String sequence, int start, int end, boolean negative, String name) {
            super(sequence, start, end, negative, name);
        }

        public void addBases(ReferenceSequence reference, boolean useStrandInfo) {
            byte[] tmp = new byte[this.length()];
            System.arraycopy(reference.getBases(), this.getStart() - 1, tmp, 0, this.length());
            if (useStrandInfo && this.isNegativeStrand()) {
                SequenceUtil.reverseComplement(tmp);
            }
            this.setBases(tmp);
        }

        public int getMaskedBaseCount() {
            return BaitDesigner.getMaskedBaseCount(this.bases, 0, this.bases.length);
        }

        @Override
        public String toString() {
            return "Bait{name=" + this.getName() + ", bases=" + StringUtil.bytesToString(this.bases) + '}';
        }

        public void setBases(byte[] bases) {
            this.bases = bases;
        }

        public byte[] getBases() {
            return this.bases;
        }
    }
}

