#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: alerojo, 0mician
"""

import argparse
import logging
import os
import shutil
import sys


from mapper import mauve, union
from summary import extract_main
from gene_predict import prokka, blast
from kmer import kmer, clustermap
from quastunmap import quast
from coverage import cvg_main
from tandem_repeats import trf

""" SASpector

SASpector is a tool that compares a short-read assembly with a reference bacterial genome (for example obtained via hybrid assembly) by extracting missing (unmapped) regions from the reference and analyzing them to see functional and compositional pattern.
The aim of the analysis is to explain why these regions are missed by the short-read assembly and if important parts of the genome are missed when a resolved genome is lacking.

The tool takes as global inputs the reference genome and a short-read assembly as contigs/draft genome, both in FASTA format.

"""

def is_available(program):
    """Verify if program is available on the PATH"""
    
    if shutil.which(program) is not None:
        logging.info(program + " installed?: OK")
    else:
        logging.info(program + " installed?: Not Available on the path.. stopping SASpector.")
        sys.exit()

def main():
    """ Main function of SASpector

    """
    
    parser = argparse.ArgumentParser(prog = 'SASpector - Short-read Assembly inSpector', description = '')
    parser.add_argument('reference', type = str, metavar = 'Reference FASTA file', help = 'Hybrid assembly FASTA file as reference genome')
    parser.add_argument('contigs', type = str, metavar = 'Contigs FASTA file', help = 'Illumina FASTA file as contigs/draft genome')
    parser.add_argument('-p', '--prefix', metavar = 'Prefix', type = str, help = 'Genome ID')
    parser.add_argument('-dir', '--outdir', metavar = 'Output path', help = 'Output directory')
    parser.add_argument('--force', help = 'Force output directory overwrite', action = 'store_true')
    parser.add_argument('-fl', '--flanking', nargs = '?' , metavar = 'Length', const = 'flanking', type = int, help = 'Add flanking regions [Default = 100]', default = 0)
    parser.add_argument('-db', '--proteindb', nargs = '?', metavar = 'Protein FASTA file', const = 'proteindb', type = str, help = 'BLAST protein database FASTA file')
    parser.add_argument('-trf', '--tandem_repeats', help = 'Run tandem repeat finder within missing regions', action = 'store_true')
    parser.add_argument('-k', '--kmers', nargs = '?' ,const = 'kmers', metavar = 'k size', type = int, help = 'Calculate kmer frequencies', default = 0)
    parser.add_argument('-q','--quast', help = 'Run QUAST for unmapped regions against reference assembly', action = 'store_true')
    parser.add_argument('-c', '--coverage', nargs='?', const='coverage', metavar='BAM file', type = str, help = 'Run SAMtools bedcov to look at short-read coverage in the missing regions. Needs alignment of reads to the reference genome in BAM format')
        
    args = parser.parse_args()
    
    logging.basicConfig(
        level = logging.INFO,
        format = '[%(asctime)s] %(levelname)s: %(message)s',
        datefmt = '%d/%m %H:%M:%S'
    )
    logging.info("welcome to SASpector v0.0.2")
    
    # checking if all softwares are available
    is_available("progressiveMauve")
    is_available("prokka")
    is_available("blastx")
    is_available("quast.py")
    is_available("samtools")
    is_available("trf")
    is_available("union")

    if os.path.exists(args.outdir):
        if args.force:
            shutil.rmtree(args.outdir, ignore_errors=True)
        else:
            logging.info("Output folder already exists (user --force if you really want to overwrite it) - SASpector exit")
            sys.exit()
    os.makedirs(args.outdir)
    
    concatenated = union(args.reference, args.prefix, args.outdir)

    if(concatenated):
        args.reference = "{prefix}_concatenated.fasta".format(prefix=args.prefix)
        
    mauve(args.reference, args.contigs, args.prefix, args.outdir)
    mappedlocations, unmappedlocations, conflictlocations, reverselocations = extract_main(args.reference, args.prefix, args.flanking, args.outdir)
    prokka(args.prefix, args.outdir)
    if args.proteindb:
        if args.proteindb == "proteindb":
            logging.info("Please provide a protein database file or using the protein database provided with SASpector")
            sys.exit()
        blast(args.outdir, args.prefix, args.proteindb)
    if args.kmers:
        kmer(args.kmers, args.prefix, args.outdir)
        clustermap(args.prefix, args.outdir)
    if args.tandem_repeats is True:
        trf(args.prefix, args.outdir)
    if args.quast is True:
         quast(args.reference, args.outdir, args.prefix)
    if args.coverage:
        cvg_main(mappedlocations, conflictlocations, args.coverage, args.reference, args.outdir, args.prefix)

    logging.info("SASpector has completed the analysis!")

if __name__ == '__main__':
    main()
