#!/usr/bin/env python

from __future__ import print_function

import sys

import argparse
import os
from epic2.version import __version__
from epic2.main import _main

parser = argparse.ArgumentParser(
    description="""epic2, version: {}
(Visit github.com/endrebak/epic2 for examples and help. Run epic2-example for a simple example command.)
    """.format(__version__),
    prog=os.path.basename(__file__))

parser.add_argument(
    '--treatment',
    '-t',
    required=True
    if not ("--version" in sys.argv or "-v" in sys.argv or "-ex" in sys.argv
            or "--example" in sys.argv) else False,
    type=str,
    nargs='+',
    help=
    '''Treatment (pull-down) file(s) in one of these formats: bed, bedpe, bed.gz, bedpe.gz or (single-end) bam, sam. The --guess-bampe flag enables optional support for (paired-end) bampe and sampe formats. Mixing file formats is allowed.'''
)

parser.add_argument(
    '--control',
    '-c',
    required=False,
    type=str,
    nargs='+',
    help=
    '''Control (input) file(s) in one of these formats: bed, bedpe, bed.gz, bedpe.gz or (single-end) bam, sam. The --guess-bampe flag enables optional support for (paired-end) bampe and sampe formats. Mixing file formats is allowed.'''
)

parser.add_argument(
    '--genome',
    '-gn',
    required=False,
    default="hg19",
    type=str,
    help=
    '''Which genome to analyze. Default: hg19. If --chromsizes and --egf flag is given, --genome is not required.'''
)

parser.add_argument(
    '--keep-duplicates',
    '-kd',
    required=False,
    default=False,
    action='store_true',
    help=
    '''Keep reads mapping to the same position on the same strand within a library. Default: False.
                   ''')

parser.add_argument(
    '--original-algorithm',
    '-oa',
    required=False,
    default=False,
    action='store_true',
    help=
    '''Use the original SICER algorithm, without the epic2 fix. This will use all reads in your files to compute the p-values, including those falling outside the genome boundaries.'''
)


parser.add_argument(
    '--bin-size',
    '-bin',
    required=False,
    default=200,
    type=int,
    help=
    '''Size of the windows to scan the genome. BIN-SIZE is the smallest possible island. Default 200.
                   ''')

parser.add_argument(
    '--gaps-allowed',
    '-g',
    required=False,
    default=3,
    type=int,
    help=
    '''This number is multiplied by the window size to determine the number of gaps
                   (ineligible windows) allowed between two eligible windows.
                   Must be an integer. Default: 3. ''')

parser.add_argument(
    '--fragment-size',
    '-fs',
    required=False,
    default=150,
    type=int,
    help=
    '''(Single end reads only) Size of the sequenced fragment. Each read is extended half the fragment size from the 5' end. Default 150 (i.e. extend by 75).'''
)

parser.add_argument(
    '--false-discovery-rate-cutoff',
    '-fdr',
    required=False,
    default=0.05,
    type=float,
    help='''Remove all islands with an FDR above cutoff. Default 0.05.
                   ''')

parser.add_argument(
    '--effective-genome-fraction',
    '-egf',
    required=False,
    type=float,
    help=
    '''Use a different effective genome fraction than the one included in epic2. The default value depends on the genome and readlength, but is a number between 0 and 1.'''
)

parser.add_argument(
    '--chromsizes',
    '-cs',
    required=False,
    type=str,
    help=
    '''Set the chromosome lengths yourself in a file with two columns: chromosome names and sizes. Useful to analyze custom genomes, assemblies or simulated data. Only chromosomes included in the file will be analyzed.'''
)

parser.add_argument(
    '--e-value',
    '-e',
    required=False,
    default=1000,
    type=int,
    help=
    '''The E-value controls the genome-wide error rate of identified islands under the random background assumption. Should be used when not using a control library. Default: 1000.'''
)

parser.add_argument(
    '--required-flag',
    '-f',
    required=False,
    default=0,
    type=int,
    help=
    '''(bampe/bam only.) Keep reads with these bits set in flag. Same as `samtools view -f`. Default 0
                   ''')

parser.add_argument(
    '--filter-flag',
    '-F',
    required=False,
    default=1540,
    type=int,
    help=
    '''(bampe/bam only.) Discard reads with these bits set in flag. Same as `samtools view -F`. Default 1540 (hex: 0x604).
See https://broadinstitute.github.io/picard/explain-flags.html for more info.
                   ''')

parser.add_argument(
    '--mapq',
    '-m',
    required=False,
    default=5,
    type=int,
    help=
    '''(bampe/bam only.) Discard reads with mapping quality lower than this. Default 5.
                   ''')

parser.add_argument(
    '--autodetect-chroms',
    '-a',
    required=False,
    default=False,
    action="store_true",
    help=
    '''(bampe/bam only.) Autodetect chromosomes from bam file. Use with --discard-chromosomes flag to avoid non-canonical chromosomes.'''
)

parser.add_argument(
    '--discard-chromosomes-pattern',
    '-d',
    required=False,
    default="_",
    type=str,
    help=
    '''(bampe/bam only.) Discard reads from chromosomes matching this pattern. Default
                   '_'. Note that if you are not interested in the results from
                   non-canonical chromosomes, you should ensure they are
                   removed with this flag, otherwise they will make the
                   statistical analysis too stringent.''')


parser.add_argument(
    '--output',
    '-o',
    required=False,
    default=False,
    type=str,
    help='''File to write results to. Default: stdout.''')



parser.add_argument(
    '--original-statistics',
    required=False,
    action='store_true',
    help=
    '''Use the original SICER way of computing the statistics. Like SICER itself, this method raises an error on large datasets. Only included for debugging-purposes.'''
)

parser.add_argument(
    '--guess-bampe',
    required=False,
    default=False,
    action="store_true",
    help=
    '''Autodetect bampe file format based on flags from the first 100 reads. If all of them are paired, then the format is bampe. Only properly paired reads are processed by default (0x1 and 0x2 samtools flags).'''
)

parser.add_argument(
    '--quiet',
    '-q',
    required=False,
    default=False,
    action="store_true",
    help='''Do not write output messages to stderr.''')

parser.add_argument(
    '--example',
    '-ex',
    required=False,
    default=False,
    action="store_true",
    help='''Show the paths of the example data and an example command.''')


parser.add_argument('--version', "-v", action='version', version=__version__)

if __name__ == "__main__":

    args = vars(parser.parse_args())

    if args["example"]:

        import pkg_resources
        treatment = pkg_resources.resource_filename("epic2",
                                                    "examples/test.bed.gz")
        control = pkg_resources.resource_filename("epic2",
                                                  "examples/control.bed.gz")
        print("Treatment: " + treatment)
        print("Control: " + control)
        print("Example command: epic2 -t {} -c {} > deleteme.txt".format(
            treatment, control))
        sys.exit(0)

    _main(args)
