#!/usr/bin/env python3
#
# (c) 2020 Fetal-Neonatal Neuroimaging & Developmental Science Center
#                   Boston Children's Hospital
#
#              http://childrenshospital.org/FNNDSC/
#                        dev@babyMRI.org
#

import  pudb
import sys, os
sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../pfdo_run'))

import  pfdo_run
from    argparse            import RawTextHelpFormatter
from    argparse            import ArgumentParser

import  pfmisc
from    pfmisc._colors      import Colors
from    pfmisc              import other

str_version = "1.0.8"
str_desc = Colors.CYAN + """

                              __    _                         
                             / _|  | |                        
                       _ __ | |_ __| | ___   _ __ _   _ _ __  
                      | '_ \|  _/ _` |/ _ \ | '__| | | | '_ \ 
                      | |_) | || (_| | (_) || |  | |_| | | | |
                      | .__/|_| \__,_|\___/ |_|   \__,_|_| |_|
                      | |               ______                
                      |_|              |______|               
                      


                          Path-File Do Run

        Recursively walk down a directory tree and run a CLI spec'd app
        on files in each directory (optionally filtered by some simple
        expression). Results of each operation are saved in an output tree
        that preserves the input directory structure.


                             -- version """ + \
             Colors.YELLOW + str_version + Colors.CYAN + """ --

        'pfdo_run' demonstrates how to use ``pftree`` to transverse
        directory trees and execute some user specified CLI operation
        at each directory level (that optionally contains files of interest).

        As part of the "pf*" suite of applications, it is geared to IO as
        directories. Nested directory trees within some input directory
        are reconstructed in an output directory, preserving directory
        structure.


""" + Colors.NO_COLOUR

def synopsis(ab_shortOnly = False):
    scriptName = os.path.basename(sys.argv[0])
    shortSynopsis =  """
    NAME

	    pfdo_run

    SYNOPSIS

        pfdo_run                                                \\
                     -I|--inputDir <inputDir>                   \\
                     -O|--outputDir <outputDir>                 \\
                    --exec <CLIcmdToExec>                       \\
                    [-i|--inputFile <inputFile>]                \\
                    [--analyzeFileIndex <someIndex>]            \\
                    [--threads <numThreads>]                    \\
                    [--noJobLogging]                            \\
                    [--test]                                    \\
                    [-x|--man]                                  \\
                    [-y|--synopsis]                             \\
                    [--followLinks]                             \\
                    [--json]

    BRIEF EXAMPLE

        pfdo_run                                                \\
            -I /var/www/html/data --filter jpg                  \\
            -O /var/www/html/png                                \\
            --exec "convert %inputWorkingDir/%inputWorkingFile
            %outputWorkingDir/%_rmext_inputWorkingFile.png"     \\
            --threads 0 --printElapsedTime
    """

    description =  '''
    DESCRIPTION

        ``pfdo_run`` runs some user specified CLI at each path/file location
        in an input directory, storing results (and logs) at a corresponding 
        dir location rooted in the output directory.

    ARGS

        -I|--inputDir <inputDir>
        Input base directory to traverse.

        -O|--outputDir <outputDir>
        The output root directory that will contain a tree structure identical
        to the input directory, and each "leaf" node will contain the analysis
        results.

        --exec <CLIcmdToExec>
        The command line expression to apply at each directory node of the
        input tree. See the CLI SPECIFICATION section for more information.

        [-i|--inputFile <inputFile>]
        An optional <inputFile> specified relative to the <inputDir>. If
        specified, then do not perform a directory walk, but convert only
        this file.

        [-f|--filterExpression <someFilter>]
        An optional string to filter the files of interest from the
        <inputDir> tree.

        [--analyzeFileIndex <someIndex>]
        An optional string to control which file(s) in a specific directory
        to which the analysis is applied. The default is "-1" which implies
        *ALL* files in a given directory. Other valid <someIndex> are:

            'm':   only the "middle" file in the returned file list
            "f":   only the first file in the returned file list
            "l":   only the last file in the returned file list
            "<N>": the file at index N in the file list. If this index
                   is out of bounds, no analysis is performed.

            "-1" means all files.

        [--outputLeafDir <outputLeafDirFormat>]
        If specified, will apply the <outputLeafDirFormat> to the output
        directories containing data. This is useful to blanket describe
        final output directories with some descriptive text, such as
        'anon' or 'preview'.

        This is a formatting spec, so

            --outputLeafDir 'preview-%%s'

        where %%s is the original leaf directory node, will prefix each
        final directory containing output with the text 'preview-' which
        can be useful in describing some features of the output set.

        [--threads <numThreads>]
        If specified, break the innermost analysis loop into <numThreads>
        threads.

        [--noJobLogging]
        If specified, then suppress the logging of per-job output. Usually
        each job that is run will have, in the output directory, three
        additional files:

                %inputWorkingFile-returncode
                %inputWorkingFile-stderr
                %inputWorkingFile-stdout

        By specifying this option, the above files are not recorded.

        [-x|--man]
        Show full help.

        [-y|--synopsis]
        Show brief help.

        [--json]
        If specified, output a JSON dump of final return.

        [--followLinks]
        If specified, follow symbolic links.

        -v|--verbosity <level>
        Set the app verbosity level.

            0: No internal output;
            1: Run start / stop output notification;
            2: As with level '1' but with simpleProgress bar in 'pftree';
            3: As with level '2' but with list of input dirs/files in 'pftree';
            5: As with level '3' but with explicit file logging for
                    - read
                    - analyze
                    - write

    CLI SPECIFICATION

    Any text in the CLI prefixed with a percent char '%' is interpreted in one
    of two ways.

    First, any CLI to the ``pfdo_run`` itself can be accessed via '%'. Thus,
    for example a ``%outputDir`` in the ``--exec`` string will be expanded
    to the ``outputDir`` of the ``pfdo_run``.

    Secondly, three internal '%' variables are available:

        * '%inputWorkingDir'  - the current input tree working directory
        * '%outputWorkingDir' - the current output tree working directory
        * '%inputWorkingFile' - the current file being processed

    These internal variables allow for contextual specification of values. For
    example, a simple CLI touch command could be specified as

        --exec "touch %outputWorkingDir/%inputWorkingFile"

    or a command to convert an input ``png`` to an output ``jpg`` using the
    ImageMagick ``convert`` utility

        --exec "convert %inputWorkingDir/%inputWorkingFile
                        %outputWorkingDir/%inputWorkingFile.jpg"

    SPECIAL FUNCTIONS

    Furthermore, `pfdo_run` offers the ability to apply some interal functions
    to a tag. The template for specifying a function to apply is:

        %_<functionName>[|arg1|arg2|...]_<tag>

    thus, a function is identified by a function name that is prefixed and
    suffixed by an underscore and appears in front of the tag to process.
    Possible args to the <functionName> are separated by pipe "|" characters.

    For example a string snippet that contains

        %_strrepl|.|-_inputWorkingFile.txt

    will replace all occurences of '.' in the %inputWorkingFile with '-'.
    Also of interest, the trailing ".txt" is preserved in the final pattern
    for the result.

    The following functions are available:

        %_md5[|<len>]_<tagName>
        Apply an 'md5' hash to the value referenced by <tagName> and optionally
        return only the first <len> characters.

        %_strmsk|<mask>_<tagName>
        Apply a simple mask pattern to the value referenced by <tagName>. Chars
        that are "*" in the mask are passed through unchanged. The mask and its
        target should be the same length.

        %_strrepl|<target>|<replace>_<tagName>
        Replace the string <target> with <replace> in the value referenced by
        <tagName>.

        %_rmext_<tagName>
        Remove the "extension" of the value referenced by <tagName>. This
        of course only makes sense if the <tagName> denotes something with
        an extension!

        %_name_<tag>
        Replace the value referenced by <tag> with a name generated by the
        faker module.

    Functions cannot currently be nested.


    EXAMPLES

    Perform a `pfdo_run` down some input directory and convert all input
    ``jpg`` files to ``png`` in the output tree:

        pfdo_run                                                \\
            -I /var/www/html/data --filter jpg                  \\
            -O /var/www/html/png                                \\
            --exec "convert %inputWorkingDir/%inputWorkingFile
            %outputWorkingDir/%_rmext_inputWorkingFile.png"     \\
            --threads 0 --printElapsedTime

    The above will find all files in the tree structure rooted at
    /var/www/html/data that also contain the string "jpg" anywhere
    in the filename. For each file found, a `convert` conversion
    will be called, storing a converted file in the same tree location
    in the output directory as the original input.

    Note the special construct, %_rmext_inputWorkingFile.png -- the
    %_<func>_ designates a built in funtion to apply to the
    tag value. In this case, to "remove the extension" from the
    %inputWorkingFile string.

    Finally the elapsed time and a JSON output are printed.

    '''

    if ab_shortOnly:
        return shortSynopsis
    else:
        return shortSynopsis + description



parser  = ArgumentParser(description = str_desc, formatter_class = RawTextHelpFormatter)

parser.add_argument("-I", "--inputDir",
                    help    = "input dir",
                    dest    = 'inputDir')
parser.add_argument("-i", "--inputFile",
                    help    = "input file",
                    dest    = 'inputFile',
                    default = '')
parser.add_argument("-e", "--exec",
                    help    = "command line execution string to perform",
                    dest    = 'exec',
                    default = '')
parser.add_argument("--filterExpression",
                    help    = "string file filter",
                    dest    = 'filter',
                    default = '')
parser.add_argument("--analyzeFileIndex",
                    help    = "file index per directory to analyze",
                    dest    = 'analyzeFileIndex',
                    default = '-1')
parser.add_argument("-O", "--outputDir",
                    help    = "output image directory",
                    dest    = 'outputDir',
                    default = '')
parser.add_argument("--printElapsedTime",
                    help    = "print program run time",
                    dest    = 'printElapsedTime',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--threads",
                    help    = "number of threads for innermost loop processing",
                    dest    = 'threads',
                    default = "0")
parser.add_argument("--outputLeafDir",
                    help    = "formatting spec for output leaf directory",
                    dest    = 'outputLeafDir',
                    default = "")
parser.add_argument("--test",
                    help    = "test",
                    dest    = 'test',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--noJobLogging",
                    help    = "Turn off per-job logging to file system",
                    dest    = 'noJobLogging',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-x", "--man",
                    help    = "man",
                    dest    = 'man',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-y", "--synopsis",
                    help    = "short synopsis",
                    dest    = 'synopsis',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--json",
                    help    = "output final return in json",
                    dest    = 'json',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--overwrite",
                    help    = "overwrite files if already existing",
                    dest    = 'overwrite',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--followLinks",
                    help    = "follow symbolic links",
                    dest    = 'followLinks',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-v", "--verbosity",
                    help    = "verbosity level for app",
                    dest    = 'verbosity',
                    default = "1")
parser.add_argument('--version',
                    help    = 'if specified, print version number',
                    dest    = 'b_version',
                    action  = 'store_true',
                    default = False)

args = parser.parse_args()

if args.man or args.synopsis:
    print(str_desc)
    if args.man:
        str_help     = synopsis(False)
    else:
        str_help     = synopsis(True)
    print(str_help)
    sys.exit(1)

if args.b_version:
    print("Version: %s" % str_version)
    sys.exit(1)

args.str_version    = str_version
args.str_desc       = synopsis(True)

pf_do_shell         = pfdo_run.pfdo_run(vars(args))

# And now run it!
# pudb.set_trace()
d_pfdo_shell        = pf_do_shell.run(timerStart = True)

if args.printElapsedTime:
    pf_do_shell.dp.qprint(
            "Elapsed time = %f seconds" %
            d_pfdo_shell['runTime']
    )

sys.exit(0)
