/*
 * Decompiled with CFR 0.152.
 */
package picard.sam;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.util.function.Function;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;

@CommandLineProgramProperties(summary="Splits a SAM or BAM file to multiple BAMs.This tool splits the input query-grouped SAM/BAM file into multiple BAM files while maintaining the sort order. This can be used to split a large unmapped BAM in order to parallelize alignment. It will traverse the bam twice unless TOTAL_READS_IN_INPUT is provided.<br /><h4>Usage example:</h4><pre>java -jar picard.jar SplitSamByNumberOfReads \\<br />     I=paired_unmapped_input.bam \\<br />     OUTPUT=out_dir \\ <br />     TOTAL_READS_IN_INPUT=800000000 \\ <br />     SPLIT_TO_N_READS=48000000</pre><hr />", oneLineSummary="Splits a SAM or BAM file to multiple BAMs.", programGroup=ReadDataManipulationProgramGroup.class)
@DocumentedFeature
public class SplitSamByNumberOfReads
extends CommandLineProgram {
    static final String USAGE_SUMMARY = "Splits a SAM or BAM file to multiple BAMs.";
    static final String USAGE_DETAILS = "This tool splits the input query-grouped SAM/BAM file into multiple BAM files while maintaining the sort order. This can be used to split a large unmapped BAM in order to parallelize alignment. It will traverse the bam twice unless TOTAL_READS_IN_INPUT is provided.<br /><h4>Usage example:</h4><pre>java -jar picard.jar SplitSamByNumberOfReads \\<br />     I=paired_unmapped_input.bam \\<br />     OUTPUT=out_dir \\ <br />     TOTAL_READS_IN_INPUT=800000000 \\ <br />     SPLIT_TO_N_READS=48000000</pre><hr />";
    @Argument(doc="Input SAM/BAM file to split", shortName="I")
    public File INPUT;
    @Argument(shortName="N_READS", doc="Split to have approximately N reads per output file. The actual number of reads per output file will vary by no more than the number of output files * (the maximum number of reads with the same queryname - 1).", mutex={"SPLIT_TO_N_FILES"})
    public int SPLIT_TO_N_READS;
    @Argument(shortName="N_FILES", doc="Split to N files.", mutex={"SPLIT_TO_N_READS"})
    public int SPLIT_TO_N_FILES;
    @Argument(shortName="TOTAL_READS", doc="Total number of reads in the input file. If this is not provided, the input will be read twice, the first time to get a count of the total reads.", optional=true)
    public long TOTAL_READS_IN_INPUT;
    @Argument(shortName="O", doc="Directory in which to output the split BAM files.")
    public File OUTPUT;
    @Argument(shortName="OUT_PREFIX", doc="Output files will be named <OUT_PREFIX>_N.bam, where N enumerates the output file.")
    public String OUT_PREFIX = "shard";
    private final Log log = Log.getInstance(SplitSamByNumberOfReads.class);

    @Override
    protected int doWork() {
        IOUtil.assertFileIsReadable(this.INPUT);
        if (this.TOTAL_READS_IN_INPUT == 0L && !Files.isRegularFile(this.INPUT.toPath(), new LinkOption[0])) {
            this.log.error(String.format("INPUT is not a regular file: %s. If TOTAL_READS_IN_INPUT is not supplied, INPUT cannot be a stream.", this.INPUT));
            return 1;
        }
        IOUtil.assertDirectoryIsWritable(this.OUTPUT);
        SamReaderFactory readerFactory = SamReaderFactory.makeDefault();
        SamReader reader = readerFactory.referenceSequence(this.REFERENCE_SEQUENCE).open(this.INPUT);
        SAMFileHeader header = reader.getFileHeader();
        if (header.getSortOrder() == SAMFileHeader.SortOrder.coordinate) {
            this.log.warn("Splitting a coordinate sorted bam may result in invalid bams that do not always contain each read's mate in the same bam.");
        }
        if (!header.getVersion().equals("1.5")) {
            this.log.warn(String.format("Input file's version is %s, but the current SAM format version is %s. Outputs will be written with current version.", header.getVersion(), "1.5"));
        }
        ProgressLogger firstPassProgress = new ProgressLogger(this.log, 1000000, "Counted");
        if (this.TOTAL_READS_IN_INPUT == 0L) {
            SamReader firstPassReader = readerFactory.referenceSequence(this.REFERENCE_SEQUENCE).open(this.INPUT);
            this.log.info("First pass traversal to count number of reads is beginning. If number of reads is known, use TOTAL_READS_IN_INPUT to skip first traversal.");
            for (SAMRecord rec : firstPassReader) {
                firstPassProgress.record(rec);
            }
            CloserUtil.close(firstPassReader);
            this.log.info(String.format("First pass traversal to count number of reads ended, found %d total reads.", firstPassProgress.getCount()));
        }
        long totalReads = this.TOTAL_READS_IN_INPUT == 0L ? firstPassProgress.getCount() : this.TOTAL_READS_IN_INPUT;
        SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
        int splitToNFiles = this.SPLIT_TO_N_FILES != 0 ? this.SPLIT_TO_N_FILES : (int)Math.ceil((double)totalReads / (double)this.SPLIT_TO_N_READS);
        int readsPerFile = (int)Math.ceil((double)totalReads / (double)splitToNFiles);
        int readsWritten = 0;
        int fileIndex = 1;
        Function<Integer, SAMFileWriter> createWriter = index -> writerFactory.makeSAMOrBAMWriter(header, true, new File(this.OUTPUT, this.OUT_PREFIX + "_" + String.format("%04d", index) + ".bam"));
        SAMFileWriter currentWriter = createWriter.apply(fileIndex++);
        String lastReadName = "";
        ProgressLogger progress = new ProgressLogger(this.log);
        for (SAMRecord currentRecord : reader) {
            if (readsWritten >= readsPerFile && !lastReadName.equals(currentRecord.getReadName())) {
                currentWriter.close();
                currentWriter = createWriter.apply(fileIndex++);
                readsWritten = 0;
            }
            currentWriter.addAlignment(currentRecord);
            lastReadName = currentRecord.getReadName();
            ++readsWritten;
            progress.record(currentRecord);
        }
        currentWriter.close();
        CloserUtil.close(reader);
        if (progress.getCount() != totalReads) {
            this.log.warn(String.format("The totalReads (%d) provided does not match the reads found in the input file (%d). Files may not be split evenly or number of files may not match what was requested. There were %d files generated each with around %d reads except the last file which contained %d reads.", totalReads, progress.getCount(), fileIndex - 1, readsPerFile, readsWritten));
        }
        return 0;
    }

    @Override
    protected String[] customCommandLineValidation() {
        if (this.TOTAL_READS_IN_INPUT < 0L) {
            return new String[]{String.format("Cannot set TOTAL_READS_IN_INPUT to a number less than 1, found %d.", this.TOTAL_READS_IN_INPUT)};
        }
        if (this.SPLIT_TO_N_FILES <= 1 && this.SPLIT_TO_N_READS <= 1) {
            return new String[]{String.format("One of SPLIT_TO_N_FILES or SPLIT_TO_N_READS must be greater than 0. Found SPLIT_TO_N_FILES is %d and SPLIT_TO_N_READS is %d.", this.SPLIT_TO_N_FILES, this.SPLIT_TO_N_READS)};
        }
        return null;
    }
}

