/*
 * Decompiled with CFR 0.152.
 */
package picard.analysis;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import java.io.File;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.SinglePassSamProgram;
import picard.analysis.directed.RnaSeqMetricsCollector;
import picard.annotation.Gene;
import picard.annotation.GeneAnnotationReader;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.RExecutor;

@CommandLineProgramProperties(summary="Produces RNA alignment metrics for a SAM or BAM file.  <p>This tool takes a SAM/BAM file containing the aligned reads from an RNAseq experiment and produces metrics describing the distribution of the bases within the transcripts.  It calculates the total numbers and the fractions of nucleotides within specific genomic regions including untranslated regions (UTRs), introns, intergenic sequences (between discrete genes), and peptide-coding sequences (exons). This tool also determines the numbers of bases that pass quality filters that are specific to Illumina data (PF_BASES).  For more information please see the corresponding GATK <a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>Dictionary</a> entry.</p><p>Other metrics include the median coverage (depth), the ratios of 5 prime /3 prime-biases, and the numbers of reads with the correct/incorrect strand designation. The 5 prime /3 prime-bias results from errors introduced by reverse transcriptase enzymes during library construction, ultimately leading to the over-representation of either the 5 prime or 3 prime ends of transcripts.  Please see the CollectRnaSeqMetrics <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#RnaSeqMetrics'>definitions</a> for details on how these biases are calculated. </p><p>The sequence input must be a valid SAM/BAM file containing RNAseq data aligned by an RNAseq-aware genome aligner such a <a href='http://github.com/alexdobin/STAR'>STAR</a> or <a href='http://ccb.jhu.edu/software/tophat/index.shtml'>TopHat</a>. The tool also requires a REF_FLAT file, a tab-delimited file containing information about the location of RNA transcripts, exon start and stop sites, etc. For an example refFlat file for GRCh38, see refFlat.txt.gz at <a href='http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database'>http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database</a>.  The first five lines of the tab-limited text file appear as follows.</p><pre>DDX11L1\tNR_046018\tchr1\t+\t11873\t14409\t14409\t14409\t3\t11873,12612,13220,\t12227,12721,14409,WASH7P\tNR_024540\tchr1\t-\t14361\t29370\t29370\t29370\t11\t14361,14969,15795,16606,16857,17232,17605,17914,18267,24737,29320,\t14829,15038,15947,16765,17055,17368,17742,18061,18366,24891,29370,DLGAP2-AS1\tNR_103863\tchr8_KI270926v1_alt\t-\t33083\t35050\t35050\t35050\t3\t33083,33761,35028,\t33281,33899,35050,MIR570\tNR_030296\tchr3\t+\t195699400\t195699497\t195699497\t195699497\t1\t195699400,\t195699497,MIR548A3\tNR_030330\tchr8\t-\t104484368\t104484465\t104484465\t104484465\t1\t104484368,\t104484465,</pre><p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectRnaSeqMetrics \\<br />      I=input.bam \\<br />      O=output.RNA_Metrics \\<br />      REF_FLAT=ref_flat.txt \\<br />      STRAND=SECOND_READ_TRANSCRIPTION_STRAND \\<br />      RIBOSOMAL_INTERVALS=ribosomal.interval_list</pre>Please see the CollectRnaSeqMetrics <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#RnaSeqMetrics'>definitions</a> for a complete description of the metrics produced by this tool.<hr />", oneLineSummary="Produces RNA alignment metrics for a SAM or BAM file.  ", programGroup=DiagnosticsAndQCProgramGroup.class)
@DocumentedFeature
public class CollectRnaSeqMetrics
extends SinglePassSamProgram {
    static final String USAGE_SUMMARY = "Produces RNA alignment metrics for a SAM or BAM file.  ";
    static final String USAGE_DETAILS = "<p>This tool takes a SAM/BAM file containing the aligned reads from an RNAseq experiment and produces metrics describing the distribution of the bases within the transcripts.  It calculates the total numbers and the fractions of nucleotides within specific genomic regions including untranslated regions (UTRs), introns, intergenic sequences (between discrete genes), and peptide-coding sequences (exons). This tool also determines the numbers of bases that pass quality filters that are specific to Illumina data (PF_BASES).  For more information please see the corresponding GATK <a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>Dictionary</a> entry.</p><p>Other metrics include the median coverage (depth), the ratios of 5 prime /3 prime-biases, and the numbers of reads with the correct/incorrect strand designation. The 5 prime /3 prime-bias results from errors introduced by reverse transcriptase enzymes during library construction, ultimately leading to the over-representation of either the 5 prime or 3 prime ends of transcripts.  Please see the CollectRnaSeqMetrics <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#RnaSeqMetrics'>definitions</a> for details on how these biases are calculated. </p><p>The sequence input must be a valid SAM/BAM file containing RNAseq data aligned by an RNAseq-aware genome aligner such a <a href='http://github.com/alexdobin/STAR'>STAR</a> or <a href='http://ccb.jhu.edu/software/tophat/index.shtml'>TopHat</a>. The tool also requires a REF_FLAT file, a tab-delimited file containing information about the location of RNA transcripts, exon start and stop sites, etc. For an example refFlat file for GRCh38, see refFlat.txt.gz at <a href='http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database'>http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database</a>.  The first five lines of the tab-limited text file appear as follows.</p><pre>DDX11L1\tNR_046018\tchr1\t+\t11873\t14409\t14409\t14409\t3\t11873,12612,13220,\t12227,12721,14409,WASH7P\tNR_024540\tchr1\t-\t14361\t29370\t29370\t29370\t11\t14361,14969,15795,16606,16857,17232,17605,17914,18267,24737,29320,\t14829,15038,15947,16765,17055,17368,17742,18061,18366,24891,29370,DLGAP2-AS1\tNR_103863\tchr8_KI270926v1_alt\t-\t33083\t35050\t35050\t35050\t3\t33083,33761,35028,\t33281,33899,35050,MIR570\tNR_030296\tchr3\t+\t195699400\t195699497\t195699497\t195699497\t1\t195699400,\t195699497,MIR548A3\tNR_030330\tchr8\t-\t104484368\t104484465\t104484465\t104484465\t1\t104484368,\t104484465,</pre><p>Note: Metrics labeled as percentages are actually expressed as fractions!</p><h4>Usage example:</h4><pre>java -jar picard.jar CollectRnaSeqMetrics \\<br />      I=input.bam \\<br />      O=output.RNA_Metrics \\<br />      REF_FLAT=ref_flat.txt \\<br />      STRAND=SECOND_READ_TRANSCRIPTION_STRAND \\<br />      RIBOSOMAL_INTERVALS=ribosomal.interval_list</pre>Please see the CollectRnaSeqMetrics <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#RnaSeqMetrics'>definitions</a> for a complete description of the metrics produced by this tool.<hr />";
    private static final Log LOG = Log.getInstance(CollectRnaSeqMetrics.class);
    @Argument(doc="Gene annotations in refFlat form.  Format described here: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat")
    public File REF_FLAT;
    @Argument(doc="Location of rRNA sequences in genome, in interval_list format.  If not specified no bases will be identified as being ribosomal.  Format described <a href=\"http://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/util/IntervalList.html\">here</a>:", optional=true)
    public File RIBOSOMAL_INTERVALS;
    @Argument(shortName="STRAND", doc="For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand.")
    public RnaSeqMetricsCollector.StrandSpecificity STRAND_SPECIFICITY;
    @Argument(doc="When calculating coverage based values (e.g. CV of coverage) only use transcripts of this length or greater.")
    public int MINIMUM_LENGTH = 500;
    @Argument(doc="The PDF file to write out a plot of normalized position vs. coverage.", shortName="CHART", optional=true)
    public File CHART_OUTPUT;
    @Argument(doc="If a read maps to a sequence specified with this option, all the bases in the read are counted as ignored bases.  These reads are not counted as ")
    public Set<String> IGNORE_SEQUENCE = new HashSet<String>();
    @Argument(doc="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair to be considered rRNA.")
    public double RRNA_FRAGMENT_PERCENTAGE = 0.8;
    @Argument(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.  ")
    public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
    private RnaSeqMetricsCollector collector;
    private String plotSubtitle = "";

    public static void main(String[] argv) {
        new CollectRnaSeqMetrics().instanceMainWithExit(argv);
    }

    @Override
    protected String[] customCommandLineValidation() {
        if (this.RIBOSOMAL_INTERVALS == null && this.RRNA_FRAGMENT_PERCENTAGE == 0.0) {
            throw new PicardException("Must use a RIBOSOMAL_INTERVALS file if RRNA_FRAGMENT_PERCENTAGE = 0.0");
        }
        return super.customCommandLineValidation();
    }

    @Override
    protected void setup(SAMFileHeader header, File samFile) {
        if (this.CHART_OUTPUT != null) {
            IOUtil.assertFileIsWritable(this.CHART_OUTPUT);
        }
        OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadRefFlat(this.REF_FLAT, header.getSequenceDictionary());
        LOG.info("Loaded " + geneOverlapDetector.getAll().size() + " genes.");
        Long ribosomalBasesInitialValue = this.RIBOSOMAL_INTERVALS != null ? Long.valueOf(0L) : null;
        OverlapDetector<Interval> ribosomalSequenceOverlapDetector = RnaSeqMetricsCollector.makeOverlapDetector(samFile, header, this.RIBOSOMAL_INTERVALS, LOG);
        HashSet<Integer> ignoredSequenceIndices = RnaSeqMetricsCollector.makeIgnoredSequenceIndicesSet(header, this.IGNORE_SEQUENCE);
        this.collector = new RnaSeqMetricsCollector(this.METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), ribosomalBasesInitialValue, geneOverlapDetector, ribosomalSequenceOverlapDetector, ignoredSequenceIndices, this.MINIMUM_LENGTH, this.STRAND_SPECIFICITY, this.RRNA_FRAGMENT_PERCENTAGE, true);
        List<SAMReadGroupRecord> readGroups = header.getReadGroups();
        if (readGroups.size() == 1) {
            this.plotSubtitle = readGroups.get(0).getLibrary();
            if (null == this.plotSubtitle) {
                this.plotSubtitle = "";
            }
        }
    }

    @Override
    protected void acceptRead(SAMRecord rec, ReferenceSequence refSeq) {
        this.collector.acceptRecord(rec, refSeq);
    }

    @Override
    protected void finish() {
        int rResult;
        this.collector.finish();
        MetricsFile file = this.getMetricsFile();
        this.collector.addAllLevelsToFile(file);
        file.write(this.OUTPUT);
        boolean atLeastOneHistogram = false;
        for (Histogram histo : file.getAllHistograms()) {
            atLeastOneHistogram = atLeastOneHistogram || !histo.isEmpty();
        }
        if (this.CHART_OUTPUT != null && atLeastOneHistogram && (rResult = RExecutor.executeFromClasspath("picard/analysis/rnaSeqCoverage.R", this.OUTPUT.getAbsolutePath(), this.CHART_OUTPUT.getAbsolutePath(), this.INPUT.getName(), this.plotSubtitle)) != 0) {
            throw new PicardException("Problem invoking R to generate plot.");
        }
    }
}

