#!/usr/bin/env python3.7

# Author: Roujia Li
# email: Roujia.li@mail.utoronto.ca
import argparse
import os
import pandas as pd
from TileSeqMut.parse_clinvar import parse_clinvar_gnomad
from TileSeqMut.parse_clinvar import get_varity
from TileSeqMut.parse_clinvar import get_clinvar
from TileSeqMut.parse_clinvar import get_provean


parser = argparse.ArgumentParser(description='Make PRC curve using DMS scores')
# user input arguments
# required!
parser.add_argument("-s", "--scores", help="Input score file to make prc curve. score file ends with "
                                               "_simple_aa.csv", type=str, required=True)
parser.add_argument("-g", "--gene", help="Gene symbol", type=str, required=True)

# optional
parser.add_argument("-c", "--clinvar", help="Path to Clinvar data, if not provided, clinvar data will be takend from maveQuest (might not be the newest version")
parser.add_argument("--downloadClinvar", action="store_true", help="if provided, raw clinvar data will be downloaded from NCBI")
parser.add_argument("-o", "--output", help="Output folder, if not specified, output plots will be saved with "
                                               "score file", type=str)
parser.add_argument("-r", "--range", help="Two integers to indicate the start/end of the targeted region. If "
                                              "specified, only variants in this range will be included. e.g -r 0 180 means variants in the range of (0, 180] will be included for PRC curve",nargs=2, type=int)
parser.add_argument("-v", "--varity", help="File contains hgvsp and VARITY scores (VARITY_R and VARITY_ER) must "
                                               "be in columns.", type=str)
parser.add_argument("--provean", help="file contains provean predictions", type=str)
args = parser.parse_args()

# for testing
#get_clinvar_API("LDLR")

# process input range
if args.range is None:
    aa_range = ()
else:
    aa_range = tuple(args.range)
# define output directory
if args.output is None:
    output_dir = os.path.dirname(args.scores)
else:
    output_dir = args.output

if args.clinvar is None:
    clinvar_data = ""
else:
    clinvar_data = args.clinvar

if args.downloadClinvar:
    clinvar_data = get_clinvar(output_dir)

if args.varity is not None:
    varity_data = get_varity(args.varity)
else:
    varity_data = pd.DataFrame({})

if args.provean is not None:
    provean_data = get_provean(args.provean)
else:
    provean_data = pd.DataFrame({})

parse_clinvar_gnomad(clinvar_data, args.gene, args.scores, output_dir, aa_range, varity_data, provean_data)
