#!/usr/bin/env python

"""
BiasAway tool originally developed to generate adapted background for motif
overrepresentation.

 BiasAway comes with the possibility of using very different ways of
 generating backgrounds lying into two categories:
 - Creation of new random sequences:
   - k-mer shuffling using the foreground sequences
   - k-mer shuffling within a sliding window using foreground sequences
- Extraction of sequences from a set of possible background sequences:
   - respecting the %GC distribution of the foreground (using %GC bins)
   - respecting the %GC distribution as in the previous item and also
     respecting the %GC composition within a sliding window for %GC bin

Authors: BiasAway has been originally developed by Anthony Mathelier, Luis Del
Peso, and Rebecca Worsley-Hunt at the University of British Columbia.

Modified by Peter Arner, Aziz Khan and Anthony Mathelier
"""

import argparse
from biasaway import kmer_shuffling_generator as kmer_shuff
from biasaway import kmer_window_shuffling_generator as kmer_win_shuff
from biasaway import GC_compo_matching as GC_compo
from biasaway import GC_window_compo_matching as GC_window_compo
from biasaway.utils import get_seqs
from biasaway.utils import make_gc_plot
from biasaway.utils import make_len_plot
from biasaway.utils import make_dinuc_plot
from biasaway.utils import make_dinuc_acgt_only_plot
import sys
import os
import errno
import time
from biasaway import __version__ as VERSION


def kmer_shuffling_generator(argu):
    seqs, fg_gc_list, fg_lengths, fg_dinuc = get_seqs(argu.fg_file)
    bg_gc_list, bg_lengths, bg_dinuc = kmer_shuff.generate_sequences(
        seqs, argu.kmer, argu.nfold, argu.random_seed)
    if(argu.plot_filename and len(bg_lengths) > 0):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def kmer_shuffling_window_generator(argu):
    seqs, fg_gc_list, fg_lengths, fg_dinuc = get_seqs(argu.fg_file)
    bg_gc_list, bg_lengths, bg_dinuc = kmer_win_shuff.generate_sequences(
        seqs, argu.kmer, argu.winlen, argu.step, argu.nfold, argu.random_seed)
    if(argu.plot_filename and len(bg_lengths) > 0):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def test_empty_bg_dir(bg_dir):
    if os.path.isdir(bg_dir):
        if os.listdir(bg_dir):
            msg = "### EXITING since both a non-empty background directory"
            msg += "and a background file are given ###"
            sys.exit(msg)
    else:
        try:
            os.makedirs(bg_dir)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(bg_dir):
                pass
            else:
                raise


def test_non_empty_bg_dir(bg_dir):
    if not (os.path.isdir(bg_dir) and os.listdir(bg_dir)):
        msg = "### EXITING since the background directory does not exist"
        msg += "or is empty ###"
        sys.exit(msg)


def gc_compo_generator(argu):
    if argu.len_opt:
        gc_compo_len_generator(argu)
    else:
        gc_compo_generator_no_len(argu)


def gc_compo_generator_no_len(argu):
    fg_gc_list, fg_gc_bins, fg_lengths, fg_dinuc = GC_compo.fg_GC_bins(
        argu.fg_file)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_compo.bg_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_compo.generate_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.nfold, argu.random_seed)
    if(argu.plot_filename and len(bg_lengths) > 0):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def gc_compo_len_generator(argu):
    fg_gc_list, fg_gc_bins, fg_lengths, fg_dinuc = GC_compo.fg_len_GC_bins(
        argu.fg_file)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_compo.bg_len_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_compo.generate_len_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.nfold, argu.random_seed)
    if(argu.plot_filename and len(bg_lengths) > 0):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def gc_compo_window_generator(argu):
    if argu.len_opt:
        gc_compo_len_window_generator(argu)
    else:
        gc_compo_window_generator_no_len(argu)


def gc_compo_len_window_generator(argu):
    (fg_gc_list, fg_gc_bins, fg_lengths,
     fg_dinuc) = GC_window_compo.fg_len_GC_bins(argu.fg_file, argu.winlen,
                                                argu.step)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_window_compo.bg_len_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_window_compo.generate_len_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.deviation, argu.winlen,
        argu.step, argu.nfold, argu.random_seed)
    if(argu.plot_filename and len(bg_lengths) > 0):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def gc_compo_window_generator_no_len(argu):
    fg_gc_list, fg_gc_bins, fg_lengths, fg_dinuc = GC_window_compo.fg_GC_bins(
        argu.fg_file, argu.winlen, argu.step)
    bg_gc_bins = None
    if argu.bg_file:
        test_empty_bg_dir(argu.bg_dir)
        _, _, _, _ = GC_window_compo.bg_GC_bins(argu.bg_file, argu.bg_dir)
    else:
        test_non_empty_bg_dir(argu.bg_dir)
    bg_gc_list, bg_lengths, bg_dinuc = GC_window_compo.generate_sequences(
        fg_gc_bins, bg_gc_bins, argu.bg_dir, argu.deviation, argu.winlen,
        argu.step, argu.nfold, argu.random_seed)
    if(argu.plot_filename and len(bg_lengths) > 0):
        make_gc_plot(fg_gc_list, bg_gc_list, argu.plot_filename)
        make_len_plot(fg_lengths, bg_lengths, argu.plot_filename)
        make_dinuc_plot(fg_dinuc, bg_dinuc, argu.plot_filename)
        make_dinuc_acgt_only_plot(fg_dinuc, bg_dinuc, argu.plot_filename)


def kmer_shuffling_arg_parsing(subparsers):
    parser_k = subparsers.add_parser('k',
                                     help="k-mer shuffling generator")
    parser_k.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    parser_k.add_argument("-k", "--kmer", required=False, type=int,
                          dest="kmer", action="store", default=2,
                          help="K-mer used for the shuffling (default: 2)")
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be generated (default: 1)"
    parser_k.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_k.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_k.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_k.set_defaults(func=kmer_shuffling_generator)


def kmer_window_shuffling_arg_parsing(subparsers):
    help_str = "k-mer shuffling within a sliding window generator"
    parser_w = subparsers.add_parser('w', help=help_str)
    parser_w.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    parser_w.add_argument("-k", "--kmer", required=False, type=int,
                          dest="kmer", action="store", default=2,
                          help="K-mer used for the shuffling (default: 2")
    parser_w.add_argument("-w", "--winlen", required=False, type=int,
                          dest="winlen", action="store", default=100,
                          help="Window length (default: 100)")
    parser_w.add_argument("-s", "--step", required=False, type=int,
                          dest="step", action="store", default=50,
                          help="Sliding step (default: 50)")
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be generated (default: 1)"
    parser_w.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_w.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_w.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_w.set_defaults(func=kmer_shuffling_window_generator)


def gc_compo_arg_parsing(subparsers):
    help_str = "%%GC distribution-based background chooser"
    parser_g = subparsers.add_parser('g', help=help_str)
    parser_g.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    help_str = "Background directory (must be empty if --background "
    help_str += "is used). See documentation for details."
    parser_g.add_argument("-r", "--bgdirectory", required=True, type=str,
                          dest="bg_dir", action="store",
                          help=help_str)
    help_str = "Background file in fasta format. Not necessary if a background"
    help_str += "directory has already been computed previously."
    parser_g.add_argument("-b", "--background", required=False, type=str,
                          dest="bg_file", action="store",
                          help=help_str)
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be choosen (default: 1)"
    parser_g.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Try to match the length as closely as possible "
    help_str += "(not set by default)"
    parser_g.add_argument('-l', '--length', required=False, dest="len_opt",
                          action="store_const", const=1, default=0,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_g.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_g.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_g.set_defaults(func=gc_compo_generator)


def gc_compo_window_arg_parsing(subparsers):
    help_str = "%%GC distribution and %%GC composition within a sliding "
    help_str += "window background chooser"
    parser_c = subparsers.add_parser('c', help=help_str)
    parser_c.add_argument('-f', '--foreground', required=True, type=str,
                          dest="fg_file", action="store",
                          help="Foreground file in fasta format")
    help_str = "Background directory (must be empty if --background "
    help_str += "is used). See documentation for details."
    parser_c.add_argument("-r", "--bgdirectory", required=True, type=str,
                          dest="bg_dir", action="store",
                          help=help_str)
    help_str = "Background file in fasta format. Not necessary if a background"
    help_str += "directory has already been computed previously."
    parser_c.add_argument("-b", "--background", required=False, type=str,
                          dest="bg_file", action="store",
                          help=help_str)
    parser_c.add_argument("-w", "--winlen", required=False, type=int,
                          dest="winlen", action="store", default=100,
                          help="Window length (default: 100)")
    parser_c.add_argument("-s", "--step", required=False, type=int,
                          dest="step", action="store", default=50,
                          help="Sliding step (default: 50)")
    help_str = "Deviation from the mean (default: 2.6 for a "
    help_str += "threshold of mean + 2.6 * stdev)"
    parser_c.add_argument("-d", "--deviation", required=False, type=float,
                          dest="deviation", action="store", default=2.6,
                          help=help_str)
    help_str = "How many background sequences per each foreground sequence "
    help_str += "will be choosen (default: 1)"
    parser_c.add_argument('-n', '--nfold', required=False, type=int,
                          dest="nfold", action="store", default=1,
                          help=help_str)
    help_str = "Try to match the length as closely as possible "
    help_str += "(not set by default)"
    parser_c.add_argument('-l', '--length', required=False, dest="len_opt",
                          action="store_const", const=1, default=0,
                          help=help_str)
    help_str = "Basename for the QC plot and metric files "
    help_str += "(default: no QC plot created)"
    parser_c.add_argument('-p', '--plot_filename', required=False,
                          dest="plot_filename", type=str, action="store",
                          default="", help=help_str)
    timeseed = int(round(time.time()))
    help_str = "Seed number to initialize the random number generator"
    parser_c.add_argument('-e', '--seed', required=False, dest='random_seed',
                          type=int, action="store", default=timeseed,
                          help=help_str)
    parser_c.set_defaults(func=gc_compo_window_generator)


class MyParser(argparse.ArgumentParser):
    """
    Class modified to print help when error message occurs.
    """
    def error(self, message):
        sys.stderr.write('error: %s\n' % message)
        self.print_help()
        sys.exit(1)


def arg_parsing():
    descr = '''Background generator offering the possibility of using very
    different ways of generating backgrounds lying into two categories:
        - Creation of new random sequences (generators):
            - k-mer shuffling using the foreground sequences
                -> type: `biasaway k -h`
            - k-mer shuffling within a sliding window using foreground
              sequences
                -> type: `biasaway w -h`
        - Extraction of sequences from a set of possible background sequences
          (choosers):
            - respecting the %GC distribution of the foreground (using %GC
              bins)
                -> type: `biasaway g -h`
            - respecting the %GC distribution as in the previous item and also
              respecting the %GC composition within a sliding window for %GC
              bin
                -> type: `biasaway c -h`
    '''
    parser = MyParser(description=descr,
                      formatter_class=argparse.RawDescriptionHelpFormatter)
    msg = 'generator/chooser. For more information, run the program '
    msg += 'again and type -h for help.'
    msg += '\n\n\n' + descr
    subparsers = parser.add_subparsers(dest='subcommand',
                                       title="Subcommands",
                                       description="Valid subcommands")
    parser.add_argument('-v', '--version', dest='version', action='version',
                        version='%(prog)s version '+VERSION)

    subparsers.required = True
    kmer_shuffling_arg_parsing(subparsers)
    kmer_window_shuffling_arg_parsing(subparsers)
    gc_compo_arg_parsing(subparsers)
    gc_compo_window_arg_parsing(subparsers)
    argu = parser.parse_args()
    return argu


###############################################################################
#                                   MAIN
###############################################################################
if __name__ == "__main__":
    arguments = arg_parsing()
    arguments.func(arguments)
