#!python

import argparse
import json
import logging
import sys

from sgsuite.ClusterNews import ClusterNews
from sgsuite.util import dumpJsonToFile
from sgsuite.util import genericErrorInfo
from sgsuite.util import get_entities_frm_links
from sgsuite.util import parse_inpt_for_links

logging.basicConfig(format='', level=logging.INFO)
logger = logging.getLogger(__name__)

def get_generic_args():
    parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=30), description='Run StoryGraph\'s (https://storygraph.cs.odu.edu/) news similarity graph generator algorithm on input news articles.')
    parser.add_argument('input', nargs='+', help='Multiple news links and/or JSON or text files containing links')
    
    #group from here
    parser.add_argument('--log-file', default='', help='Log output filename')
    parser.add_argument('--log-format', default='', help='Log print format, see: https://docs.python.org/3/howto/logging-cookbook.html')
    parser.add_argument('--log-level', default='info', choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'], help='Log level')

    parser.add_argument('-j', '--jaccard-weight', default=0.3, type=float, help='Jaccard weight [0, 1] in weighted-jaccard-overlap similarity equation.')
    parser.add_argument('-m', '--min-sim', default=0.3, type=float, help='The minimum similarity threshold for linking a pair of nodes.')
    parser.add_argument('--thread-count', default=5, type=int, help='Count of threads to use for parallel tasks (Deref. URIs & NER).')
    parser.add_argument('--no-storygraph', action='store_true', help='Do not run graph generation algorithm, stop at boilerplate removal.')
    

    #alphabetical from here
    parser.add_argument('-d', '--graph-description', default='Graph description', help='Graph description.')
    parser.add_argument('-g', '--graph-name', default='Untitled graph', help='Graph name.')
    parser.add_argument('-o', '--output', help='Output file')
    parser.add_argument('--pretty-print', help='Pretty print JSON output', action='store_true')
    return parser

def run_get_entities_frm_links(only_links, args):

    links = get_entities_frm_links(only_links, thread_count=args.thread_count)
    if( args.output is None ):
        print('Use -o output/file/path.jsonl.txt to write output')
        return

    with open(args.output, 'w') as outfile:
        for line in links:
            try:
                line = json.dumps(line, ensure_ascii=False) + '\n'
                outfile.write( line )
            except:
                genericErrorInfo()

    print(f'wrote: {args.output}')

def run_cluster_news(all_link_details, only_links, output, params, args):

    sgc = ClusterNews(min_sim=params.pop('min_sim'), jaccard_weight=params.pop('jaccard_weight'), sim_metric='weighted-jaccard-overlap', graph_name=params.pop('graph_name'), graph_description=params.pop('graph_description'), **params)
    sg_nodes = sgc.gen_storygraph(only_links)

    if( len(all_link_details) == len(sg_nodes['nodes']) ):
        for i in range( len(all_link_details) ):
            if( len(all_link_details[i].keys()) > 1 ):
                
                for k,v in all_link_details[i].items():
                    sg_nodes['nodes'][i][k] = v

    if( output is None ):
        print('use -o output/file/path.json to write output')
    else:
        dumpJsonToFile(output, sg_nodes, indentFlag=args.pretty_print)
        print(f'{output} may be visualized by uploading at: https://storygraph.cs.odu.edu/')

def proc_req(args):
    
    params = vars(args)
    
    all_link_details = parse_inpt_for_links(args.input)
    only_links = [l['link'] for l in all_link_details]

    if( args.no_storygraph ):
        run_get_entities_frm_links(only_links, args)
    else:
        run_cluster_news(all_link_details, only_links, args.output, params, args)

def main():
    
    if( len(sys.argv) > 1 ):
        if( sys.argv[1] == '-v' or sys.argv[1] == '--version' ):
            
            from sgsuite.version import __appversion__
            print(__appversion__)
            return

    parser = get_generic_args()
    args = parser.parse_args()
    proc_req(args)

if __name__ == '__main__':
    main()