#!/usr/bin/env python
#
# Copyright (C) 2013-2015 DNAnexus, Inc.
#
# This file is part of dx-toolkit (DNAnexus platform client libraries).
#
#   Licensed under the Apache License, Version 2.0 (the "License"); you may not
#   use this file except in compliance with the License. You may obtain a copy
#   of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#   License for the specific language governing permissions and limitations
#   under the License.

import sys, os, argparse, json, pipes, shutil
from collections import OrderedDict
import re
import dxpy

from dxpy.utils.resolver import ResolutionError, resolve_existing_path, get_app_from_path
from dxpy.utils.printing import *

warning = BOLD('WARNING:') + ''' dx-workflow-to-applet is deprecated, and will be removed
in a future release. Your workflows have been migrated. Please use
''' + BOLD('dx run <workflow>') + ' to run them. See ' + BOLD('dx run <workflow> --help') + '''.

'''

parser = argparse.ArgumentParser(description=warning+fill('Takes a ' + BOLD() + 'workflow' + ENDC() + ' on the DNAnexus platform and makes a local directory out of it which can be built as an ' + BOLD() + 'applet' + ENDC() + ' using the "dx build" command.') + '\n\n' + fill('When built, any referenced applets and data objects will be bundled with the applet (copying the applet from one project to another will also copy any dependencies).  If the applet is run without its dependencies in the same project, it may fail.') + '\n\n' + fill('You may also choose to build an app instead of an applet using the command "dx build --create-app --no-temp-build-project", but make sure all applet and data dependencies are present in your current project at the time you run the command.  Otherwise, the dependencies will not be packaged with the app.'),
                                 formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('path', help=fill('DNAnexus path to a workflow, e.g. projectName:/folder/workflowname', width_adjustment=-24))
parser.add_argument('--name', help=fill('Name of applet to be created (default is the workflow name)', width_adjustment=-24))
parser.add_argument('--overwrite', '-f', help=fill('Whether to overwrite a local directory with the same name', width_adjustment=-24), action='store_true')

if len(sys.argv) < 2:
    parser.print_help()
    parser.exit(3)
else:
    args = parser.parse_args()

try:
    project, folder, result = resolve_existing_path(args.path, expected='entity')
except ResolutionError as details:
    parser.exit(3, fill(str(details)) + '\n')

if result is None:
    parser.exit(3, fill('Error: could not resolve the given path to a workflow') + '\n')

if result['describe']['class'] != 'record':
    parser.exit(3, fill('Error: given workflow path does not resolve to a record') + '\n')
if 'pipeline' not in result['describe']['types']:
    parser.exit(3, fill('Error: given workflow path does not resolve to a record with the correct type "pipeline"') + '\n')

if args.name is None:
    args.name = re.sub("[^a-zA-Z0-9.\_\-]", "_", result['describe']['name'] + '_applet')

if "/" in args.name:
    parser.exit(3, fill('Error: the new name for the applet must not include the "/" character') + '\n')

handler = dxpy.DXRecord(dxid=result['id'], project=project)
workflow = handler.get_details()

if workflow.get('version') < 5:
    sys.stderr.write(fill('WARNING: workflow version is less than 5; if any errors occur, try opening the workflow in the website before rerunning the tool') + '\n')

if args.overwrite:
    try:
        shutil.rmtree(args.name)
    except:
        pass

try:
    os.mkdir(args.name)
except OSError as e:
    parser.exit(3, fill('Error: could not create local directory with name "' + args.name + '": ' + str(e)) + '\n')

dxapp_json = OrderedDict()
dxapp_json['name'] = args.name
dxapp_json['title'] = args.name
dxapp_json['dxapi'] = '1.0.0'
dxapp_json['version'] = '0.0.1'
dxapp_json['runSpec'] = {
    'file': 'run.py',
    'interpreter': 'python2.7',
    'bundledDepends': []
    }
dxapp_json['inputSpec'] = []
dxapp_json['outputSpec'] = []
dxapp_json['details'] = workflow

code = '''#!/usr/bin/env python
#
# Generated by the dx-workflow-to-applet tool from the workflow
# {workflow_name} ({workflow_id})

import dxpy

@dxpy.entry_point('main')
def main(**kwargs):
    output = {{}}
'''.format(workflow_name=result['describe']['name'],
           workflow_id=result['describe']['id'])

# mapping from stage ID (from the workflow) to the key or index in the
# workflow's stages (or applet_stages)
stage_ids = {}
# applet_stages is a list of aggregated metadata for the purposes of
# the new applet's stages.
# - name name of the stage to be used internally to the applet
# - exec_id app or applet ID to be run
# - inputSpec subset of the original executable's input spec (with
#   group and default values) to be put into the applet's input spec.
#   Keys are the original executable's input names.
# - outputSpec output specs of the original executable; keys are the
#   original executable's output names
# - connections will contain for each stage a hash from original input
#   names to values which contain connection hashes (i.e. a dict with
#   the "connectedTo" key)
applet_stages = []

def is_connection(thing):
    return (isinstance(thing, dict) and 'connectedTo' in thing)

def stringify_thing(thing):
    if is_connection(thing):
        i = stage_ids[thing['connectedTo']['stage']]
        return "{stage_name}_job.get_output_ref('{field}')".format(stage_name=applet_stages[i]['name'],
                                                                   field=thing['connectedTo']['output'])
    return repr(thing)

for i, workflow_stage in enumerate(workflow['stages']):
    for j in workflow_stage.get('inputs', {}):
        if workflow_stage['inputs'][j] == "":
            del workflow_stage['inputs'][j]

    exec_id = workflow_stage['app']['id'] if 'id' in workflow_stage['app'] else workflow_stage['app']
    if dxpy.is_dxlink(exec_id):
        exec_id = exec_id['$dnanexus_link']
    if exec_id.startswith('app-'):
        exec_id = get_app_from_path(exec_id)['id']

    exec_handler = dxpy.get_handler(exec_id)
    exec_desc = exec_handler.describe()

    # Sanitizing the app/applet name as an input name prefix
    exec_name_prefix = re.sub("[^a-zA-Z0-9\_]", "", exec_desc['name']).lstrip('0123456789')

    stage_ids[workflow_stage['id']] = i
    applet_stages.append({"name": exec_name_prefix + "_" + str(i),
                          "inputSpec": OrderedDict(),
                          "outputSpec": OrderedDict(),
                          "exec_id": exec_id,
                          "connections": {}})
    applet_stage = applet_stages[i]
    stage_name = applet_stage['name']

    if exec_desc['class'] == 'applet':
        dxapp_json['runSpec']['bundledDepends'].append({'name': stage_name + '_applet',
                                                        'id': {'$dnanexus_link': exec_id}})

    for spec in ['inputSpec', 'outputSpec']:
        for param in exec_desc[spec]:
            applet_stage[spec][param["name"]] = param

    # Set bound inputs as default values in the appropriate stage
    bound_input = workflow_stage["inputs"]
    for input_name, input_val in bound_input.iteritems():
        # Find 'connectedTo' hashes (in arrays or otherwise); if any
        # exist, then remove the input from the new applet's spec
        # entirely.

        if isinstance(input_val, list):
            list_of_vals = input_val
        else:
            list_of_vals = [input_val]

        has_connections = False

        # NOTE: Does not support arbitrary depth, but the workflow
        # doesn't link to arbitrary depth anyway.
        default_input_index = 0
        for val in list_of_vals:
            if is_connection(val):
                has_connections = True
            elif isinstance(val, dict) and '$dnanexus_link' in val:
                if isinstance(val['$dnanexus_link'], basestring):
                    dxapp_json['runSpec']['bundledDepends'].append({'name': stage_name + '_' + input_name + '_' + str(default_input_index),
                                                                    'id': val})
                    default_input_index += 1
                elif isinstance(val['$dnanexus_link'], dict) and isinstance(val['$dnanexus_link'].get('id'), basestring):
                    dxapp_json['runSpec']['bundledDepends'].append({'name': stage_name + '_' + input_name + '_' + str(default_input_index),
                                                                    'id': {'$dnanexus_link': val['$dnanexus_link']['id']}})
                    # Delete any project references if present (but keep symbolic references intact)
                    del val['$dnanexus_link']['project']
                    if len(val['$dnanexus_link']) == 1:
                        val['$dnanexus_link'] = val['$dnanexus_link']['id']
                    default_input_index += 1
        if has_connections:
            del applet_stage["inputSpec"][input_name]
            applet_stage['connections'][input_name] = input_val
        else:
            applet_stage["inputSpec"][input_name]["default"] = input_val

    # Add each stage's input and output spec to the new applet's spec.
    for spec in ['inputSpec', 'outputSpec']:
        for name, param in applet_stage[spec].iteritems():
            new_param = param.copy()
            new_param['name'] = applet_stage['name'] + '_' + name
            new_param['group'] = applet_stage["name"]
            dxapp_json[spec].append(new_param)

code += '''
    # Make handlers for running the stages of the app and make sure
    # each executable is available
'''
for applet_stage in applet_stages:
    # Check whether user has access to required applets and resource files
    code += '''
    {stage_name} = dxpy.get_handler('{exec_id}')
    try:
        {stage_name}.describe()
    except:
        raise dxpy.AppError("Unable to find required executable for stage {stage_name} with ID {exec_id}. You may need to request access to it or copy it (if it is an applet) to the same project as this applet before running it again.")
'''.format(stage_name=applet_stage['name'],
           exec_id=applet_stage['exec_id'])

code += '''
    # Run each stage of the workflow
'''

for applet_stage in applet_stages:
    stage_name = applet_stage['name']
    code += '''
    # Stage {stage_name}

    {stage_name}_input_hash = {{}}

'''.format(stage_name=applet_stage['name'])
    for input_name in applet_stage["inputSpec"]:
        code += '''    if '{effective_name}' in kwargs:
        {stage_name}_input_hash['{input_name}'] = kwargs['{effective_name}']
'''.format(stage_name=stage_name,
           effective_name=applet_stage['name'] + '_' + input_name,
           input_name=input_name)

    # Now add the jbors if there are any
    for input_name in applet_stage['connections']:
        if isinstance(applet_stage['connections'][input_name], list):
            stringified_input = "[" + ",".join([stringify_thing(thing) for thing in applet_stage['connections'][input_name]]) + "]"
        else:
            stringified_input = stringify_thing(applet_stage['connections'][input_name])
        code += '''    {stage_name}_input_hash['{input_name}'] = {stringified_input}
'''.format(stage_name=stage_name,
           input_name=input_name,
           stringified_input=stringified_input)

    # And run the job
    code += '''
    {stage_name}_job = {stage_name}.run({stage_name}_input_hash)

'''.format(stage_name=stage_name)

    for output_name in applet_stage['outputSpec']:
        code += '''    output['{stage_name}_{output_name}'] = {stage_name}_job.get_output_ref('{output_name}')
'''.format(stage_name=stage_name,
           output_name=output_name)

code += '''
    return output

dxpy.run()
'''

with open(os.path.join(args.name, 'dxapp.json'), 'w') as fd:
    json.dump(dxapp_json, fd, indent=4)
    fd.write('\n')

with open(os.path.join(args.name, 'run.py'), 'w') as fd:
    fd.write(code)

print 'Created applet directory successfully'
if re.search("[^a-zA-Z0-9.\_\-]", args.name) is None and re.match("app-", args.name) is None:
    print 'You can also build this as an ' + BOLD() + 'app' + ENDC() + ' with:'
    print '  dx build --create-app --no-temp-build-project ' + pipes.quote(args.name)
else:
    print fill('NOTE: The "name" field in your ' + pipes.quote(os.path.join(args.name, 'dxapp.json')) + ' file contains disallowed characters and cannot be built as an ' + BOLD() + 'app' + ENDC() + ' unless this is changed.')
print 'You can build this as an ' + BOLD() + 'applet' + ENDC() + ' with:'
print '  dx build ' + pipes.quote(args.name)
