#!/usr/bin/env python
import argparse
from pyphe.interpret import interpret 
import pandas as pd

if __name__ == '__main__':
    ###Set up parsing of command line arguments with argparse###
    parser = argparse.ArgumentParser(description='Welcome to pyphe-interpret, part of the pyphe toolbox. Written by stephan.kamrad@crick.ac.uk and maintained at https://github.com/Bahler-Lab/pyphe. Setting your column names correctly is crucial. Let us assume you have measured many strains in many conditions. Now you would like to know for each strain-condition pair if it is significant. There are essentially two ways of doing this. (1) Check for each condition separately (--axis_column <condition_column>) if there is a significant difference in means between a mutant strain and a control strain (--grouping_column <strain_id_column>). Or (2) Check for each strain separately (--axis_column <strain_id_column>) if there is a significant difference in the means of the strain in the assay condition versus the control condition (--grouping_column <condition_column>). The second option tests for condition-specific growth effects (i.e. is does not return significant results if a strain is always faster or always slower growing than the grid strain). In both cases you need to specify the control against which to test usiing --control and this has to be a value that appears in the axis column. You should define the dependent variable of the t-test using --values_column.')
  
   
    parser.add_argument('--ld', type=str, required=True, help="Path to the Data Report Table produced by pyphe-analyse.")
    parser.add_argument('--out', type=str, default='pyphe-quantify-report', help='Specifies the path where to save the output data result. By default, a table with all replicates will be saved as pyphe-quantify-report_reps.csv and the statistic table will be saved as pyphe-quantify-report_summaryStats.csv in the current working directory. Existing files will be overwritten.')
    parser.add_argument('--axis_column', type=str, required=True, help='Name of the column in the data report to repeat t-tests along. Data will be grouped by the grouping_column and differences between all unique values found in the axis column versus the specified control will be tested for.')
    parser.add_argument('--grouping_column', type=str, required=True, help='Name of the column in the data report to use as the grouping variable for t-tests.')
    parser.add_argument('--values_column', type=str, default='Colony_size_corr_checked', help='Name of the column in the data report to use as fitness values. This will be the dependent variable for t-tests. Defaults to "Colony_size_corr_checked".')
    parser.add_argument('--control', type=str, required=True, help='Name of the control to compare against. This must be a value found in the axis column.')
    parser.add_argument('--ld_encoding', default='utf-8', type=str, help='Encoding of the data report table to be passed to pandas.read_csv().')
    parser.add_argument('--circularity', type=float, default=None, help='Exclude colonies from the analysis with a circularity below the one specified. A circularity of 1 corresponds to a perfect circle. We recommend a threshold around 0.85.')
    parser.add_argument('--set_missing_na', action='store_true', default=False, help='Set 0-sized colonies to NA. This is recommended if you expect no missing colonies in your data, which means these are probably due to pinning errors.')

    args = parser.parse_args()

    #Run analysis
    print('Interpretation is starting, with following parameters:')
    for k, v in vars(args).items():
        print('%s: %s'%(k, str(v)))

    #Load ld
    ld = pd.read_csv(args.ld, index_col=0, encoding=args.ld_encoding)

    interpret(ld, args.axis_column, args.grouping_column, args.values_column, args.control, args.out, circularity=args.circularity, set_missing_na=args.set_missing_na)
