#!/usr/bin/env python
#-*- coding: utf-8 -*-
import warnings
warnings.filterwarnings("ignore")
import sys
import argparse
import datetime
import getpass
import os
import easy_prime
"""

Output
--------

The output folder will contain:
1. all pegRNA + ngRNA combination for the input vcf file
2. top1 pegRNA + ngRNA combination for each variant
3. visualization of the top1s [TODO]
4. a summary file of each variant

"""

def my_args():
	mainParser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,description="easy_prime for pegRNA design")
	username = getpass.getuser()
	
	mainParser.add_argument('-f','--input_file',  help="vcf or fasta",required=True)
	mainParser.add_argument('-c','--config',  help="A YAML file specifying parameters",default=None)
	mainParser.add_argument('-v','--version', action='version', version='Easy-Prime version: %s'%(easy_prime.__version__))
	mainParser.add_argument('-o','--output',  help="output dir",default="easy_prime_%s_%s_result_dir"%(username,str(datetime.date.today())))
	
	##------- add parameters above ---------------------
	args = mainParser.parse_args()	
	return args

def run_steps(t,**kwargs):

	t.init(**kwargs)
	t.search(**kwargs)
	t.predict(**kwargs)

	return [t.topX,t.rawX,t.X_p,t.found_PE3b,t.found_PE3,t.found_dPAM,t.found_PE2,t.N_sgRNA_found]

def main():

	args = my_args()
	
	# ------------------------- get parameters ----------------------------------------------
	from easy_prime.utils import get_parameters, print_parameters,vcf2fasta,fasta2vcf
	parameters = get_parameters(args.config)
	print_parameters(parameters)

	# ------------------------- get variant input ----------------------------------------------
	from easy_prime import target_mutation
	import pandas as pd
	## ## modified for fasta input
	try:
		vcf = pd.read_csv(args.input_file,comment="#",sep="\t",header=None)
		vcf[1] = vcf[1].astype(int)
		vcf =vcf.drop_duplicates(2) # remove duplicated names
		vcf[3] = [x.upper() for x in vcf[3]]
		vcf[4] = [x.upper() for x in vcf[4]]
		vcf[5] = vcf2fasta(vcf,**parameters)
		vcf = vcf[list(range(6))]
		
		## for each target, create target mutation class
		
	except:
		try:
			print ("Reading fasta file: %s"%(args.input_file))
			vcf = fasta2vcf(args.input_file)
			print (vcf)
		except:
			print ("Can't read %s as vcf or fasta. Please check input. Exit..."%(args.input_file))
			exit()

	variant_list = vcf[2].tolist()
	my_targets = [target_mutation(*r) for i,r in vcf.iterrows()]	
	
	# ------------------------- find best pegRNAs ----------------------------------------------
	# backend can affect this parallization, if so, user show use n_jobs=1
	if parameters['n_jobs'] == 1:
		df_list = [run_steps(t,**parameters) for t in my_targets]
	else:
		from joblib import Parallel, delayed
		df_list = Parallel(n_jobs=parameters['n_jobs'],verbose=10)(delayed(run_steps)(t,**parameters) for t in my_targets)

	
	
	# ------------------------- save output ----------------------------------------------
	# TODO, possibly updating the format to be more user-friendly
	import subprocess
	subprocess.call("mkdir -p %s"%(args.output),shell=True)
	summary = pd.DataFrame([x[3:8] for x in df_list]).astype(int)
	summary.columns = ['found_PE3b','found_PE3','found_dPAM','found_PE2',"N_sgRNA_found"]
	summary.index = variant_list
	summary.to_csv("%s/summary.csv"%(args.output),index=True)
	
	df_top = pd.concat([x[0] for x in df_list])
	if df_top.shape[0]==0:
		print ("no pegRNA were found for the input file: %s"%(args.input_file))
		sys.exit()
	df_top = df_top.sort_values("predicted_efficiency",ascending=False)
	df_top.to_csv("%s/topX_pegRNAs.csv"%(args.output),index=False)
	
	df_all = pd.concat([x[1] for x in df_list])
	df_all = df_all.sort_values("predicted_efficiency",ascending=False)
	df_all.to_csv("%s/rawX_pegRNAs.csv.gz"%(args.output),index=False,compression="gzip")
	
	X_p = pd.concat([x[2] for x in df_list])
	X_p = X_p.sort_values("predicted_efficiency",ascending=False)
	X_p.to_csv("%s/X_p_pegRNAs.csv.gz"%(args.output),index=True,compression="gzip")
	


if __name__ == "__main__":
	main()


























