#!/usr/bin/env python

import argparse
import logging
import os
import sys

from caspredict.controller import Controller
from caspredict.prodigal import Prodigal
from caspredict.hmmer import HMMER
from caspredict.castyping import Typer
from caspredict.minced import Minced
from caspredict.xgb import XGB
from caspredict.crisprcas import CRISPRCas
from caspredict.plot import Map

########## Arguments ##########
ap = argparse.ArgumentParser()

# Required
ap.add_argument('input', help='Input fasta file')
ap.add_argument('output', help='Prefix for output directory')

# Optional
ap.add_argument('-t', '--threads', help='Number of parallel processes [%(default)s].', default=4, type=int)
ap.add_argument('--prodigal', help='Which mode to run prodigal in [%(default)s].', default='single', type=str, choices=['single','meta'])
ap.add_argument('--skip_check', help='Skip check of input.', action='store_true',)
ap.add_argument('--keep_tmp', help='Keep temporary files (prodigal, hmmer, minced).', action='store_true')
ap.add_argument('--log_lvl', help='Logging level [%(default)s].', default='INFO', type=str, choices=['DEBUG','INFO','WARNING','ERROR'])
ap.add_argument('--redo_typing', help='Redo the typing. Skip prodigal and HMMER and load the hmmer.tab from the output dir.', action='store_true')

# Data
apd = ap.add_argument_group('data arguments')
apd.add_argument('--db', help='Path to database.', default='', type=str)

# Thresholds
apt = ap.add_argument_group('cas threshold arguments')
apt.add_argument('--dist', help='Max allowed distance between genes in operon [%(default)s].', default=3, type=int)
apt.add_argument('--overall_eval', help='Overall E-value threshold [%(default)s].', default=0.01, type=float)
apt.add_argument('--overall_cov_seq', help='Overall sequence coverage threshold [%(default)s].', default=0.3, type=float)
apt.add_argument('--overall_cov_hmm', help='Overall HMM coverage threshold [%(default)s].', default=0.3, type=float)

# CRISPRs
apc = ap.add_argument_group('crispr threshold arguments')
apc.add_argument('--ccd', help='Distance (bp) threshold to connect Cas operons and CRISPR arrays [%(default)s].', default=10000, type=int)
apc.add_argument('--pred_prob', help='Prediction probability cut-off for assigning subtype to CRISPR repeats [%(default)s].', default=0.75, type=float)
apc.add_argument('--kmer', help='kmer size. Has to match training kmer size! [%(default)s].', default=4, type=int)

# Plot
app = ap.add_argument_group('plotting arguments')
app.add_argument('--no_plot', help='Do not draw a map of CRISPR-Cas.', action='store_true')
app.add_argument('--scale', help='Scaling of plot [%(default)s].', default=10, type=int)
app.add_argument('--no_grid', help='Do not add grid to plot.', action='store_true')
app.add_argument('--expand', help='Expand operons with un-annotated genes. The value determines by how genes in each end to expand. 0 only fills gaps [%(default)s].', default=0, type=int)
app.add_argument('--plot_expand', help='How many bp to expand plot in each direction per gene expanded (see argument above) [%(default)s].', default=1000, type=int)

# Workflow starts here


########## Initialize ##########
master = Controller(ap.parse_args())

########## Prodigal ##########
proteins = Prodigal(master)
proteins.run_prod()

########## Hmmer ##########
hmmeri = HMMER(proteins)
hmmeri.main_hmm()

########## Operons ##########
castyper = Typer(hmmeri)
castyper.typing()

########## CRISPRs ##########
crispr = Minced(castyper)
crispr.run_minced()

########## RepeatType ########
repeatPred = XGB(crispr)
repeatPred.xgb_run()

######### CRISPR-Cas ########
criscas = CRISPRCas(repeatPred)
criscas.crisprcas()

######### Plot ###########
plotting = Map(criscas)
plotting.plot()

######### Clean ###########
master.clean()


