#!/usr/bin/env python
from __future__ import division
import os
import sys
import argparse
import logging
import unittest
from PyDAIR.seq.IgSeq import IgSeq
from PyDAIR.io.PyDAIRIO import *
from PyDAIR.io.PyDAIRReport import *
from PyDAIR.utils.PyDAIRUtils import *
from PyDAIR.utils.PyDAIRArgs import *
from PyDAIR.app.PyDAIRAPP import *
from PyDAIR.stats.PyDAIRStats import *
#from PyDAIR.plot.PyDAIRPlot import *





def main_parse(args):
    v_gene_align_args = PyDAIRBlastArgs(args.v_blastdb, args.v_match_score,
                                       args.v_mismatch_score, args.v_gap_open_penalty,
                                       args.v_gap_extend_penalty, args.v_wordsize, args.v_evalue_cutoff)
    d_gene_align_args = PyDAIRBlastArgs(args.d_blastdb, args.d_match_score,
                                       args.d_mismatch_score, args.d_gap_open_penalty,
                                       args.d_gap_extend_penalty, args.d_wordsize, args.d_evalue_cutoff)
    j_gene_align_args = PyDAIRBlastArgs(args.j_blastdb, args.j_match_score,
                                       args.j_mismatch_score, args.j_gap_open_penalty,
                                       args.j_gap_extend_penalty, args.j_wordsize, args.j_evalue_cutoff)
    pydair_args = PyDAIRParseSeqArgs(args.q_fasta, args.v_fasta, args.d_fasta, args.j_fasta, args.output,
                                     v_gene_align_args, d_gene_align_args, j_gene_align_args,
                                     args.v_motif, args.j_motif)
    logging.info('[ 1/10] Setting up parameters.')
    pydair = PyDAIRAPPParseSeq(pydair_args)
    logging.info('[ 2/10] BLAST for V genes.')
    pydair.blast('v')
    logging.info('[ 3/10] BLAST for J genes.')
    pydair.blast('j')
    logging.info('[ 4/10] Parsing VJ BLAST results.')
    pydair.parse_VJ()
    logging.info('[ 5/10] Writing temporary files.')
    pydair.write_pydair()
    logging.info('[ 6/10] Writing unaligned sequences.')
    pydair.write_fasta('unaligned_seq')
    logging.info('[ 7/10] BLAST for D genes.')
    pydair.blast('d')
    logging.info('[ 8/10] Parsing D BLAST result.')
    pydair.parse_VDJ()
    logging.info('[ 9/10] Writing result file.')
    pydair.write_pydair()
    logging.info('[10/10] Finishing.')




def main_stats(args):
    pydair_files  = args.input[0]
    sample_names = args.name[0]
    logging.info('[1/4] Read data files.')
    pydair_args = PyDAIRStatsArgs(sample_names, pydair_files,
                                discard_ambiguous_D = args.discard_ambiguous_D,
                                productive_only = args.productive_only,
                                estimate_vdj_combination = args.estimate_vdj_combination,
                                output_prefix = args.output)
    pydairapp = PyDAIRAPPStats(pydair_args)
    logging.info('[2/4] Write statistical data.')
    pydairapp.write_summary()
    pydairapp.create_report()
    logging.info('[4/4] Finishing...')



def main_sim(args):
    pydairappargs = PyDAIRSimArgs(args.output, args.n,
                              args.v_fasta, args.n_v_5del, args.n_v_3del,
                              args.d_fasta, args.n_d_5del, args.n_d_3del,
                              args.j_fasta, args.n_j_5del, args.n_j_3del,
                              args.n_vd_ins, args.n_dj_ins,
                              args.p_mutation, args.seed)
    pydairapp = PyDAIRAPPSim(pydairappargs)
    pydairapp.generate_seq()


def main_eval(args):
    pydairappargs = PyDAIREvalArgs(args.sim_condition,
                                   args.parse_result,
                                   args.output)
    pydairapp = PyDAIRAPPEval(pydairappargs)
    pydairapp.eval()



def parse_args():
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(help = 'actions')

    """
    Parse Sequence.
    """
    parse_parser = subparsers.add_parser('parse', help = 'Identifying of VDJ and estimation of CDR3.')
    ## general parameters
    parse_parser.add_argument('-q', '--q-fasta',  help = 'Path to FASTA file that contains immunoglobulin heavy chain sequences.', required = True)
    parse_parser.add_argument('-o', '--output',   help = 'Path to file for writing anaysis results.', required = True)
    parse_parser.add_argument('-v', '--v-fasta', help = 'Path to FASTA file of V sequences.', required = True)
    parse_parser.add_argument('-d', '--d-fasta', help = 'Path to FASTA file of D sequences.', required = True)
    parse_parser.add_argument('-j', '--j-fasta', help = 'Path to FASTA file of J sequences.', required = True)
    ## alignment parameters V
    parse_parser.add_argument('--v-blastdb',            help = 'Path to V gene BLAST database index.', required = True)
    parse_parser.add_argument('--v-match-score',        help = 'Score (> 0) for a nucleotide match for V gene.',    type = int, default =  3)
    parse_parser.add_argument('--v-mismatch-score',     help = 'Score (< 0) for a nucleotide mismatch for V gene.', type = int, default = -3)
    parse_parser.add_argument('--v-gap-open-penalty',   help = 'Penalty (> 0) to open a gap for V gene.',           type = int, default =  6)
    parse_parser.add_argument('--v-gap-extend-penalty', help = 'Penaly (> 0 )to extend a gap for V gene.',          type = int, default =  6)
    parse_parser.add_argument('--v-wordsize',           help = 'Word size to find hotspots by BLAST for V gene.',   type = int, default = 10)
    parse_parser.add_argument('--v-evalue-cutoff',      help = 'Expectation value (e-value) threshold for assiging V gene to immunoglobulin sequence.', type = float, default = 1e-50)
    ## alignment parameters D
    parse_parser.add_argument('--d-blastdb',            help = 'Path to D gene BLAST database index.', required = True)
    parse_parser.add_argument('--d-match-score',        help = 'Score (> 0) for a nucleotide match for D gene.',    type = int, default  =  1)
    parse_parser.add_argument('--d-mismatch-score',     help = 'Score (< 0) for a nucleotide mismatch for D gene.', type = int, default  = -1)
    parse_parser.add_argument('--d-gap-open-penalty',   help = 'Penalty (> 0) to open a gap for D gene.',           type = int, default  =  0)
    parse_parser.add_argument('--d-gap-extend-penalty', help = 'Penaly (> 0 )to extend a gap for D gene.',          type = int, default  =  2)
    parse_parser.add_argument('--d-wordsize',           help = 'Word size to find hotspots by BLAST for D gene.',   type = int, default  =  4)
    parse_parser.add_argument('--d-evalue-cutoff',      help = 'Expectation value (e-value) threshold for assiging D gene to immunoglobulin sequence.', type = float, default = 1e-2)
    ## alignment parameters J
    parse_parser.add_argument('--j-blastdb',            help = 'Path to J gene BLAST database index.', required = True)
    parse_parser.add_argument('--j-match-score',        help = 'Score (> 0) for a nucleotide match for J gene.',    type = int, default  =  3)
    parse_parser.add_argument('--j-mismatch-score',     help = 'Score (< 0) for a nucleotide mismatch for J gene.', type = int, default  = -3)
    parse_parser.add_argument('--j-gap-open-penalty',   help = 'Penalty (> 0) to open a gap for J gene.',           type = int, default  =  6)
    parse_parser.add_argument('--j-gap-extend-penalty', help = 'Penaly (> 0 )to extend a gap for J gene.',          type = int, default  =  6)
    parse_parser.add_argument('--j-wordsize',           help = 'Word size to find hotspots by BLAST for J gene.',   type = int, default  =  7)
    parse_parser.add_argument('--j-evalue-cutoff',      help = 'Expectation value (e-value) threshold for assiging J gene to immunoglobulin sequence.', type = float, default = 1e-5)
    # motif
    parse_parser.add_argument('--v-motif',  help = 'The motif at V gene for detemrining CDR3 segment. Usually it is YYC.', type = str, default = 'YYC')
    parse_parser.add_argument('--j-motif',  help = 'The motif at J gene for detemrining CDR3 segment. Usually it is WG.G or FG.G.', type = str, default = 'WG.G')
    
    parse_parser.set_defaults(action = 'parse')
    
    """
    Statistlca Analysis
    """
    stat_parser = subparsers.add_parser('stats', help = 'Statistical analysis.')
    ## general parameters
    stat_parser.add_argument('-i', '--input', help = 'Input PyDAIR files. Multiple files should be separated by a blank.', nargs = '*', action = 'append', required = True)
    stat_parser.add_argument('-n', '--name',  help = 'Sample names for each PyDAIR file. Multiple names should be separated by a blank.', nargs= '*', action = 'append', required = True)
    stat_parser.add_argument('-o', '--output', help = 'Path to file for writing anaysis results.', required = True)
    stat_parser.add_argument('--discard-ambiguous-D', help = 'Contain immunoglobulin heavy chain sequences that has unidentifiable D genes when calculating the frequencies.', action = 'store_true')
    stat_parser.add_argument('--productive-only', help = 'Only analysis the productive sequences.', action = 'store_true')
    #stat_parser.add_argument('--estimate-cdr3-population', help = 'Estimation of CDR-H3 population sizes by sampling-resampling study.', action = 'store_true')
    stat_parser.add_argument('--estimate-vdj-combination', help = 'Estimation of the numbers of VDJ combinations by rarefaction study.', action = 'store_true')
    stat_parser.set_defaults(discard_ambiguous_D = False)
    stat_parser.set_defaults(productive_only     = False)
    #stat_parser.set_defaults(estimate_cdr3_population = False)
    stat_parser.set_defaults(estimate_vdj_combination = False)
    stat_parser.set_defaults(action = 'stats')
    
    
    """
    Generate simulation
    """
    sim_parser = subparsers.add_parser('sim', help = 'Generate artificial IgH sequences.')
    sim_parser.add_argument('-n', help = 'Number of sequences', type = int, default = 10000)
    sim_parser.add_argument('-o', '--output', help = 'Path to file for writing artificial sequences.', required = True)
    sim_parser.add_argument('-v', '--v-fasta', help = 'Path to FASTA file of V sequences.', required = True)
    sim_parser.add_argument('-d', '--d-fasta', help = 'Path to FASTA file of D sequences.', required = True)
    sim_parser.add_argument('-j', '--j-fasta', help = 'Path to FASTA file of J sequences.', required = True)
    sim_parser.add_argument('--n-v-5del', help = 'Number of deletions on 5-end of V gene.', type = int, default = 10)
    sim_parser.add_argument('--n-v-3del', help = 'Number of deletions on 3-end of V gene.', type = int, default = 3)
    sim_parser.add_argument('--n-d-5del', help = 'Number of deletions on 5-end of D gene.', type = int, default = 3)
    sim_parser.add_argument('--n-d-3del', help = 'Number of deletions on 3-end of D gene.', type = int, default = 3)
    sim_parser.add_argument('--n-j-5del', help = 'Number of deletions on 5-end of J gene.', type = int, default = 5)
    sim_parser.add_argument('--n-j-3del', help = 'Number of deletions on 3-end of J gene.', type = int, default = 10)
    sim_parser.add_argument('--n-vd-ins', help = 'Number of insertions on VD junction.', type = int, default = 5)
    sim_parser.add_argument('--n-dj-ins', help = 'Number of insertions on DJ junction.', type = int, default = 5)
    sim_parser.add_argument('--p-mutation', help = 'Probability of mutation on each nucleotide.', type = float, default = 0.05)
    sim_parser.add_argument('--seed', help = 'random seed.', type = int, default = None)
    sim_parser.set_defaults(action = 'sim')
    


    """
    Evaluation
    """
    eval_parser = subparsers.add_parser('eval', help = 'Evaluation performances.')
    eval_parser.add_argument('-o', '--output', help = 'Path to file for writing evaluation results.', required = True)
    eval_parser.add_argument('--sim-condition', help = 'Path to the FASTA file which generated by sim action mode.', required = True)
    eval_parser.add_argument('--parse-result', help = 'Path to the PYDAIR file which generated by parse action mode.', required = True)
    eval_parser.set_defaults(action = 'eval')
    




    
    
    args = parser.parse_args()
    return args



if __name__ == '__main__':
    desc = '''
                                         ______
                             <><        /      \  /
                O        <><           / O      \/
              O            <><         \        /\\
        _    o  ___                     \______/  \\
       /o\/    /o  \/         O O     __
       \_/\    \___/\           O   \/ o\\
             _            __   O    /\__/      __
            /o\/        \/ o\ o               /o \/
            \_/\        /\__/     ><>         \__/\\

'''
    print(desc)
    args = parse_args()
    if args.action == 'parse':
        main_parse(args)
    elif args.action == 'stats':
        main_stats(args)
    elif args.action == 'sim':
        main_sim(args)
    elif args.action == 'eval':
        main_eval(args)
    





