#!/usr/bin/env python

import argparse
import pandas as pd
import numpy as np
import json
import sys
import os

import matplotlib
matplotlib.use('Agg')
from miner2 import preprocess, util, biclusters
from miner2 import GIT_SHA, __version__ as pkg_version


DESCRIPTION = """miner-bcmembers - MINER compute bicluster membership inference.
MINER Version %s (Git SHA %s)""" % (pkg_version, GIT_SHA.replace('$Id: ', '').replace(' $', ''))

if __name__ == '__main__':
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
                                     description=DESCRIPTION)
    parser.add_argument('expfile', help="input matrix")
    parser.add_argument('mapfile', help="identifier mapping file")
    parser.add_argument('regulons', help="regulons.json file from miner-mechinf")
    parser.add_argument('outdir', help="output directory")
    parser.add_argument('--skip_tpm', action="store_true",
                        help="overexpression threshold")

    args = parser.parse_args()

    if not os.path.exists(args.expfile):
        sys.exit("expression file not found")
    if not os.path.exists(args.mapfile):
        sys.exit("identifier mapping file not found")
    if not os.path.exists(args.regulons):
        sys.exit("regulon file not found")

    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    with open(os.path.join(args.outdir, 'run_info.txt'), 'w') as outfile:
        util.write_dependency_infos(outfile)

    exp_data, conv_table = preprocess.main(args.expfile, args.mapfile, do_preprocess_tpm=(not args.skip_tpm))

    with open(args.regulons) as infile:
        regulon_modules = json.load(infile)

    bkgd = preprocess.background_df(exp_data)
    overexpressed_members = biclusters.make_membership_dictionary(regulon_modules,
                                                                  bkgd, label=2, p=0.05)
    underexpressed_members = biclusters.make_membership_dictionary(regulon_modules,
                                                                   bkgd, label=0, p=0.05)
    dysregulated_members = biclusters.make_membership_dictionary(regulon_modules,
                                                                 bkgd, label="excluded")
    coherent_members = biclusters.make_membership_dictionary(regulon_modules,
                                                             bkgd, label="included")

    # write the overexpressed/underexpressed members as JSON, tools later in the pipeline can
    # easier access them
    with open(os.path.join(args.outdir, 'overExpressedMembers.json'), 'w') as out:
        json.dump(overexpressed_members, out)
    with open(os.path.join(args.outdir, 'underExpressedMembers.json'), 'w') as out:
        json.dump(underexpressed_members, out)

    overexpressed_members_matrix = biclusters.membership_to_incidence(overexpressed_members,
                                                                      exp_data)
    overexpressed_members_matrix.to_csv(os.path.join(args.outdir,
                                                     "overExpressedMembers.csv"))

    underexpressed_members_matrix = biclusters.membership_to_incidence(underexpressed_members,
                                                                       exp_data)
    underexpressed_members_matrix.to_csv(os.path.join(args.outdir,
                                                      "underExpressedMembers.csv"))

    dysregulated_members_matrix = biclusters.membership_to_incidence(dysregulated_members,
                                                                     exp_data)
    dysregulated_members_matrix.to_csv(os.path.join(args.outdir, "dysregulatedMembers.csv"))

    coherent_members_matrix = biclusters.membership_to_incidence(coherent_members,
                                                                 exp_data)
    coherent_members_matrix.to_csv(os.path.join(args.outdir,
                                                "coherentMembers.csv"))
