#!/usr/bin/env python

import argparse
import logging
import os
import sys
import pkg_resources

from cctyper.controller import Controller
from cctyper.prodigal import Prodigal
from cctyper.hmmer import HMMER
from cctyper.castyping import Typer
from cctyper.minced import Minced
from cctyper.xgb import XGB
from cctyper.crisprcas import CRISPRCas
from cctyper.plot import Map

########## Arguments ##########
ap = argparse.ArgumentParser(description='CRISPRCasTyper version {}'.format(pkg_resources.require("cctyper")[0].version))

# Required
ap.add_argument('input', help='Input fasta file')
ap.add_argument('output', help='Prefix for output directory')

# Optional
ap.add_argument('-t', '--threads', help='Number of parallel processes [%(default)s].', default=4, type=int)
ap.add_argument('--prodigal', help='Which mode to run prodigal in [%(default)s].', default='single', type=str, choices=['single','meta'])
ap.add_argument('--circular', help='Input should be treated as circular.', action='store_true')
ap.add_argument('--skip_check', help='Skip check of input.', action='store_true')
ap.add_argument('--keep_tmp', help='Keep temporary files (prodigal, hmmer, minced).', action='store_true')
ap.add_argument('--log_lvl', help='Logging level [%(default)s].', default='INFO', type=str, choices=['DEBUG','INFO','WARNING','ERROR'])
ap.add_argument('--redo_typing', help='Redo the typing. Skip prodigal and HMMER and load the hmmer.tab from the output dir.', action='store_true')
ap.add_argument('--simplelog', help='No color or progress bar in log.', action='store_true')

# Data
apd = ap.add_argument_group('data arguments')
apd.add_argument('--db', help='Path to database.', default='', type=str)

# Thresholds
apt = ap.add_argument_group('cas threshold arguments')
apt.add_argument('--dist', help='Max allowed number of unknown genes between cas genes in operon [%(default)s].', default=3, type=int)
apt.add_argument('--overall_eval', help='Overall E-value threshold [%(default)s].', default=0.01, type=float)
apt.add_argument('--overall_cov_seq', help='Overall sequence coverage threshold [%(default)s].', default=0.3, type=float)
apt.add_argument('--overall_cov_hmm', help='Overall HMM coverage threshold [%(default)s].', default=0.3, type=float)

# CRISPRs
apc = ap.add_argument_group('crispr threshold arguments')
apc.add_argument('--ccd', help='Distance (bp) threshold to connect Cas operons and CRISPR arrays [%(default)s].', default=10000, type=int)
apc.add_argument('--pred_prob', help='Prediction probability cut-off for assigning subtype to CRISPR repeats [%(default)s].', default=0.75, type=float)
apc.add_argument('--kmer', help='kmer size. Has to match training kmer size! [%(default)s].', default=4, type=int)
apc.add_argument('--repeat_id', help='Minimum average sequence identity between repeats for trusted arrays [%(default)s].', default=70, type=int)
apc.add_argument('--spacer_id', help='Maximum average sequence identity between spacers for trusted arrays [%(default)s].', default=55, type=int)
apc.add_argument('--spacer_sem', help='Maximum spacer length standard error of the mean for trusted arrays [%(default)s].', default=3.5, type=float)

# Plot
app = ap.add_argument_group('plotting arguments')
app.add_argument('--no_plot', help='Do not draw a map of CRISPR-Cas.', action='store_true')
app.add_argument('--scale', help='Scaling of plot [%(default)s].', default=10, type=int)
app.add_argument('--no_grid', help='Do not add grid to plot.', action='store_true')
app.add_argument('--expand', help='Expand operons with un-annotated genes. The value determines by how many bp in each end to expand. 0 only fills gaps [%(default)s].', default=0, type=int)
app.add_argument('--custom_hmm', help='Path to custom HMM database to decorate plot. Warning: This overwrites plotting of low-quality matches to Cas HMMs', default='', type=str)

# Workflow starts here


########## Initialize ##########
master = Controller(ap.parse_args())

########## Prodigal ##########
proteins = Prodigal(master)
proteins.run_prod()

########## Hmmer ##########
hmmeri = HMMER(proteins)
hmmeri.main_hmm()

########## Operons ##########
castyper = Typer(hmmeri)
castyper.typing()

########## CRISPRs ##########
crispr = Minced(castyper)
crispr.run_minced()

########## RepeatType ########
repeatPred = XGB(crispr)
repeatPred.xgb_run()

######### CRISPR-Cas ########
criscas = CRISPRCas(repeatPred)
criscas.crisprcas()

######### Plot ###########
plotting = Map(criscas)
plotting.plot()

######### Clean ###########
master.clean()


