#!/usr/bin/env python

import argparse
import pandas as pd
import numpy as np
import json
import sys
import os

import matplotlib
matplotlib.use('Agg')
from miner2 import preprocess, causal_inference, util
from miner2 import GIT_SHA, __version__ as pkg_version


DESCRIPTION = """miner-causalinf-post - MINER post-compute causal inference
MINER Version %s (Git SHA %s)""" % (pkg_version, GIT_SHA.replace('$Id: ', '').replace(' $', ''))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
                                     description=DESCRIPTION)

    parser.add_argument('expfile', help="input matrix")
    parser.add_argument('mapfile', help="identifier mapping file")
    parser.add_argument('eigengenes', help="eigengenes.csv file generated by miner-causalinf-pre")
    parser.add_argument('mutations', help="path to mutations CSV file")
    parser.add_argument('neoresults', help="NEO results directory")
    parser.add_argument('datadir', help="data directory")
    parser.add_argument('outdir', help="output directory")

    args = parser.parse_args()

    if not os.path.exists(args.expfile):
        sys.exit("expression file not found")
    if not os.path.exists(args.mapfile):
        sys.exit("identifier mapping file not found")
    if not os.path.exists(args.datadir):
        sys.exit("data directory not found")
    if not os.path.exists(args.eigengenes):
        sys.exit("Eigengenes file not found")
    if not os.path.exists(args.mutations):
        sys.exit("Mutations CSV file not found")
    if not os.path.exists(args.neoresults):
        sys.exit("NEO results directory not found")

    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    with open(os.path.join(args.outdir, 'run_info.txt'), 'w') as outfile:
        util.write_dependency_infos(outfile)

    exp_data, conv_table = preprocess.main(args.expfile, args.mapfile)

    # After running NEO (in R), proceed with the following
    preprocessed_causal_results = causal_inference.process_causal_results(causalPath=args.neoresults)

    # load complete mutation matrix with default filters for use in postprocessing
    # WW: TODO: I just picked something that I had, we need to ask Matt where to get those
    # or let the user decide
    filtered_mutations = causal_inference.mutation_matrix([args.mutations], minNumMutations=None)
    # load tf expression matrix for use in postprocessing
    tf_exp = causal_inference.tf_expression(exp_data,
                                            motifPath=os.path.join(args.datadir, "all_tfs_to_motifs.pkl"))

    # load eigengenes matrix for use in postprocessing
    eigengenes = pd.read_csv(args.eigengenes, index_col=0, header=0)

    # generate p values for all network edges for use in filtering
    postprocessed_causal_analysis = causal_inference.post_process_causal_results(
        preprocessed_causal_results,
        filtered_mutations,
        tf_exp, eigengenes,
        mechanisticOutput=None,
        numCores=5)

    # write post-processed analysis to json file
    with open(os.path.join(args.outdir, "regulonNetworkPValues.json"), 'w') as outfile:
        json.dump(postprocessed_causal_analysis, outfile)

    # generate causal mechanistic network in dataframe format
    causal_mechanistic_network = causal_inference.causal_mechanistic_network_dictionary(
        postprocessed_causal_analysis,
        biclusterRegulatorPvalue=0.05,
        regulatorMutationPvalue=0.05,
        mutationFrequency=0.01,
        requireCausal=True)

    # save causalMechanisticNetwork to csv
    causal_mechanistic_network.to_csv(os.path.join(args.outdir, "regulonCausalMechanisticNetwork.csv"))
