#!/usr/bin/env python3
#
# (c) 2021 Fetal-Neonatal Neuroimaging & Developmental Science Center
#                   Boston Children's Hospital
#
#              http://childrenshospital.org/FNNDSC/
#                        dev@babyMRI.org
#

import  pudb
import sys, os
sys.path.insert(1, os.path.join(os.path.dirname(__file__), '../pfdo_run'))

import  pfdo_run
from    argparse            import RawTextHelpFormatter
from    argparse            import ArgumentParser

import  pfmisc
from    pfmisc._colors      import Colors
from    pfmisc              import other

str_version = "2.0.2"
str_desc = Colors.CYAN + """

                          __    _
                         / _|  | |
                   _ __ | |_ __| | ___   _ __ _   _ _ __
                  | '_ \|  _/ _` |/ _ \ | '__| | | | '_ \ 
                  | |_) | || (_| | (_) || |  | |_| | | | |
                  | .__/|_| \__,_|\___/ |_|   \__,_|_| |_|
                  | |               ______
                  |_|              |______|



                          Path-File Do Run

        Recursively walk down a directory tree and run a CLI spec'd app
        on files in each directory (optionally filtered by some simple
        expression). Results of each operation are saved in an output tree
        that preserves the input directory structure.


                             -- version """ + \
             Colors.YELLOW + str_version + Colors.CYAN + """ --

        'pfdo_run' demonstrates how to use ``pftree`` to transverse
        directory trees and execute some user specified CLI operation
        at each directory level (that optionally contains files of interest).

        As part of the "pf*" suite of applications, it is geared to IO as
        directories. Nested directory trees within some input directory
        are reconstructed in an output directory, preserving directory
        structure.


""" + Colors.NO_COLOUR

def synopsis(ab_shortOnly = False):
    scriptName = os.path.basename(sys.argv[0])
    shortSynopsis =  """
    NAME

	    pfdo_run

    SYNOPSIS

        pfdo_run                                                \\
                     -I|--inputDir <inputDir>                   \\
                     -O|--outputDir <outputDir>                 \\
                    --exec <CLIcmdToExec>                       \\
                    [-i|--inputFile <inputFile>]                \\
                    [-f|--fileFilter <filter1,filter2,...>]     \\
                    [-d|--dirFilter <filter1,filter2,...>]      \\
                    [--analyzeFileIndex <someIndex>]            \\
                    [--outputLeafDir <outputLeafDirFormat>]     \\
                    [--threads <numThreads>]                    \\
                    [--noJobLogging]                            \\
                    [--test]                                    \\
                    [-x|--man]                                  \\
                    [-y|--synopsis]                             \\
                    [--followLinks]                             \\
                    [--json]

    BRIEF EXAMPLE

        pfdo_run                                                \\
            -I /var/www/html/data --filter jpg                  \\
            -O /var/www/html/png                                \\
            --exec "convert %inputWorkingDir/%inputWorkingFile
            %outputWorkingDir/%_rmext_inputWorkingFile.png"     \\
            --threads 0 --printElapsedTime
    """

    description =  '''
    DESCRIPTION

        ``pfdo_run`` runs some user specified CLI at each path/file location
        in an input directory, storing results (and logs) at a corresponding
        dir location rooted in the output directory.

    ARGS

        -I|--inputDir <inputDir>
        Input base directory to traverse.

        -O|--outputDir <outputDir>
        The output root directory that will contain a tree structure identical
        to the input directory, and each "leaf" node will contain the analysis
        results.

        --exec <CLIcmdToExec>
        The command line expression to apply at each directory node of the
        input tree. See the CLI SPECIFICATION section for more information.

        [-i|--inputFile <inputFile>]
        An optional <inputFile> specified relative to the <inputDir>. If
        specified, then do not perform a directory walk, but convert only
        this file.

        [-f|--fileFilter <someFilter1,someFilter2,...>]
        An optional comma-delimated string to filter out files of interest
        from the <inputDir> tree. Each token in the expression is applied in
        turn over the space of files in a directory location, and only files
        that contain this token string in their filename are preserved.

        [-d|--dirFilter <someFilter1,someFilter2,...>]
        Similar to the `fileFilter` but applied over the space of leaf node
        in directory paths. A directory must contain at least one file
        to be considered.

        If a directory leaf node contains a string that corresponds to any of
        the filter tokens, a special "hit" is recorded in the file hit list,
        "%d-<leafnode>". For example, a directory of

                            /some/dir/in/the/inputspace/here1234

        with a `dirFilter` of `1234` will create a "special" hit entry of
        "%d-here1234" to tag this directory for processing.

        In addition, if a directory is filtered through, all the files in
        that directory will be added to the filtered file list. If no files
        are to be added, passing an explicit file filter with an "empty"
        single string argument, i.e. `--fileFilter " "`, is advised.

        [--analyzeFileIndex <someIndex>]
        An optional string to control which file(s) in a specific directory
        to which the analysis is applied. The default is "-1" which implies
        *ALL* files in a given directory. The space of valid <someIndex> are:

            'm':   only the "middle" file in the returned file list
            "f":   only the first file in the returned file list
            "l":   only the last file in the returned file list
            "<N>": the file at index N in the file list. If this index
                   is out of bounds, no analysis is performed.
            "-1":  all files.

        [--outputLeafDir <outputLeafDirFormat>]
        If specified, will apply the <outputLeafDirFormat> to the output
        directories containing data. This is useful to blanket describe
        final output directories with some descriptive text, such as
        'anon' or 'preview'.

        This is a formatting spec, so

            --outputLeafDir 'preview-%s'

        where %s is the original leaf directory node, will prefix each
        final directory containing output with the text 'preview-' which
        can be useful in describing some features of the output set.

        [--threads <numThreads>]
        If specified, break the innermost analysis loop into <numThreads>
        threads.

        [--noJobLogging]
        If specified, then suppress the logging of per-job output. Usually
        each job that is run will have, in the output directory, three
        additional files:

                %inputWorkingFile-returncode
                %inputWorkingFile-stderr
                %inputWorkingFile-stdout

        By specifying this option, the above files are not recorded.

        [-x|--man]
        Show full help.

        [-y|--synopsis]
        Show brief help.

        [--json]
        If specified, output a JSON dump of final return.

        [--followLinks]
        If specified, follow symbolic links.

        -v|--verbosity <level>
        Set the app verbosity level.

            0: No internal output;
            1: Run start / stop output notification;
            2: As with level '1' but with simpleProgress bar in 'pftree';
            3: As with level '2' but with list of input dirs/files in 'pftree';
            5: As with level '3' but with explicit file logging for
                    - read
                    - analyze
                    - write

    CLI SPECIFICATION

    Any text in the CLI prefixed with a percent char '%' is interpreted in one
    of two ways.

    First, any CLI to the ``pfdo_run`` itself can be accessed via '%'. Thus,
    for example a ``%outputDir`` in the ``--exec`` string will be expanded
    to the ``outputDir`` of the ``pfdo_run``.

    Secondly, three internal '%' variables are available:

        * '%inputWorkingDir'  - the current input tree working directory
        * '%outputWorkingDir' - the current output tree working directory
        * '%inputWorkingFile' - the current file being processed

    These internal variables allow for contextual specification of values. For
    example, a simple CLI touch command could be specified as

        --exec "touch %outputWorkingDir/%inputWorkingFile"

    or a command to convert an input ``png`` to an output ``jpg`` using the
    ImageMagick ``convert`` utility

        --exec "convert %inputWorkingDir/%inputWorkingFile
                        %outputWorkingDir/%inputWorkingFile.jpg"

    SPECIAL FUNCTIONS

    Furthermore, `pfdo_run` offers the ability to apply some interal functions
    to a tag. The template for specifying a function to apply is:

        %_<functionName>[|arg1|arg2|...]_<tag>

    thus, a function is identified by a function name that is prefixed and
    suffixed by an underscore and appears in front of the tag to process.
    Possible args to the <functionName> are separated by pipe "|" characters.

    For example a string snippet that contains

        %_strrepl|.|-_inputWorkingFile.txt

    will replace all occurences of '.' in the %inputWorkingFile with '-'.
    Also of interest, the trailing ".txt" is preserved in the final pattern
    for the result.

    The following functions are available:

        %_md5[|<len>]_<tagName>
        Apply an 'md5' hash to the value referenced by <tagName> and optionally
        return only the first <len> characters.

        %_strmsk|<mask>_<tagName>
        Apply a simple mask pattern to the value referenced by <tagName>. Chars
        that are "*" in the mask are passed through unchanged. The mask and its
        target should be the same length.

        %_strrepl|<target>|<replace>_<tagName>
        Replace the string <target> with <replace> in the value referenced by
        <tagName>.

        %_rmext_<tagName>
        Remove the "extension" of the value referenced by <tagName>. This
        of course only makes sense if the <tagName> denotes something with
        an extension!

        %_name_<tag>
        Replace the value referenced by <tag> with a name generated by the
        faker module.

    Functions cannot currently be nested.


    EXAMPLES

    Perform a `pfdo_run` down some input directory and convert all input
    ``jpg`` files to ``png`` in the output tree:

        pfdo_run                                                \\
            -I /var/www/html/data --filter jpg                  \\
            -O /var/www/html/png                                \\
            --exec "convert %inputWorkingDir/%inputWorkingFile
            %outputWorkingDir/%_rmext_inputWorkingFile.png"     \\
            --threads 0 --printElapsedTime

    The above will find all files in the tree structure rooted at
    /var/www/html/data that also contain the string "jpg" anywhere
    in the filename. For each file found, a `convert` conversion
    will be called, storing a converted file in the same tree location
    in the output directory as the original input.

    Note the special construct, %_rmext_inputWorkingFile.png -- the
    %_<func>_ designates a built in funtion to apply to the
    tag value. In this case, to "remove the extension" from the
    %inputWorkingFile string.

    Consider an example where only one file in a branched inputdir
    space is to be preserved:

        pfdo_run                                                \\
            -I (pwd)/raw -O (pwd)/out                           \\
            -d 100307 -f " "                                    \\
            --exec "cp %inputWorkingDir/brain.mgz
            %outputWorkingDir/brain.mgz"                        \\
            --threads 0 --verbosity 3 --noJobLogging

    Here, the input directory space is pruned for a directory leaf
    node that contains the string 100307. The exec command essentially
    copies the file `brain.mgz` in that target directory to the
    corresponding location in the output tree.

    Finally the elapsed time and a JSON output are printed.

    '''

    if ab_shortOnly:
        return shortSynopsis
    else:
        return shortSynopsis + description



parser  = ArgumentParser(description = str_desc, formatter_class = RawTextHelpFormatter)

parser.add_argument("-I", "--inputDir",
                    help    = "input dir",
                    dest    = 'inputDir')
parser.add_argument("-i", "--inputFile",
                    help    = "input file",
                    dest    = 'inputFile',
                    default = '')
parser.add_argument("-e", "--exec",
                    help    = "command line execution string to perform",
                    dest    = 'exec',
                    default = '')
parser.add_argument("-f", "--fileFilter",
                    help    = "a list of comma separated string filters to apply across the input file space",
                    dest    = 'fileFilter',
                    default = '')
parser.add_argument("-d", "--dirFilter",
                    help    = "a list of comma separated string filters to apply across the input dir space",
                    dest    = 'dirFilter',
                    default = '')
parser.add_argument("--analyzeFileIndex",
                    help    = "file index per directory to analyze",
                    dest    = 'analyzeFileIndex',
                    default = '-1')
parser.add_argument("-O", "--outputDir",
                    help    = "output image directory",
                    dest    = 'outputDir',
                    default = '')
parser.add_argument("--printElapsedTime",
                    help    = "print program run time",
                    dest    = 'printElapsedTime',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--threads",
                    help    = "number of threads for innermost loop processing",
                    dest    = 'threads',
                    default = "0")
parser.add_argument("--outputLeafDir",
                    help    = "formatting spec for output leaf directory",
                    dest    = 'outputLeafDir',
                    default = "")
parser.add_argument("--test",
                    help    = "test",
                    dest    = 'test',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--noJobLogging",
                    help    = "Turn off per-job logging to file system",
                    dest    = 'noJobLogging',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-x", "--man",
                    help    = "man",
                    dest    = 'man',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-y", "--synopsis",
                    help    = "short synopsis",
                    dest    = 'synopsis',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--json",
                    help    = "output final return in json",
                    dest    = 'json',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--overwrite",
                    help    = "overwrite files if already existing",
                    dest    = 'overwrite',
                    action  = 'store_true',
                    default = False)
parser.add_argument("--followLinks",
                    help    = "follow symbolic links",
                    dest    = 'followLinks',
                    action  = 'store_true',
                    default = False)
parser.add_argument("-v", "--verbosity",
                    help    = "verbosity level for app",
                    dest    = 'verbosity',
                    default = "1")
parser.add_argument('--version',
                    help    = 'if specified, print version number',
                    dest    = 'b_version',
                    action  = 'store_true',
                    default = False)

args = parser.parse_args()

if args.man or args.synopsis:
    print(str_desc)
    if args.man:
        str_help     = synopsis(False)
    else:
        str_help     = synopsis(True)
    print(str_help)
    sys.exit(1)

if args.b_version:
    print("Version: %s" % str_version)
    sys.exit(1)

args.str_version    = str_version
args.str_desc       = synopsis(True)

pf_do_shell         = pfdo_run.pfdo_run(vars(args))

# And now run it!
# pudb.set_trace()
d_pfdo_shell        = pf_do_shell.run(timerStart = True)

if args.printElapsedTime:
    pf_do_shell.dp.qprint(
            "Elapsed time = %f seconds" %
            d_pfdo_shell['runTime']
    )

sys.exit(0)
