#!/usr/bin/env python

'''
  tgentools
  Authored by Rob Jansen, 2015
  See LICENSE for licensing information
'''

import sys, os, argparse, logging, re, datetime
from itertools import cycle
from socket import gethostname
from multiprocessing import cpu_count

from tgentools._version import __version__
import tgentools.util as util

DESC_MAIN = """
TGenTools is a utility to analyze (parse) and visualize (plot) TGen output.
TGenTools must be run with a subcommand to specify a mode of operation.

For more information, see https://github.com/shadow/tgen.
"""
HELP_MAIN = """
Use 'tgentools <subcommand> --help' for more info
"""

DESC_ANALYZE = """
Parse results from TGen traffic generator log files.

This subcommand processes TGen log files and stores the processed
data in json format for plotting. It was written so that the log files
need never be stored on disk decompressed, which is useful when log file
sizes reach tens of gigabytes.

The standard way to run this subcommand is to give the path to a TGen
file (e.g., those produced after running `tgen`) or to a directory
containing such files, and the statistics parsed during the analysis
will be dumped to `tgen.analysis.json.xz`.
"""
HELP_ANALYZE = """
Analyze TGen output
"""

DESC_VISUALIZE = """
Loads a previously parsed TGen json file and plots various interesting
performance metrics to PDF files.
"""
HELP_VISUALIZE = """
Visualize TGen analysis results
"""

DESC_EDIT = """
Edit TGen configuration files in place to change or add options (i.e.,
set the values of graphml attributes) on the specified graphml vertices
or edges. Be careful not to set unsupported attribute names or values!
"""
HELP_EDIT = """
Edit TGen configuration files in place
"""

logging.basicConfig(format='%(asctime)s %(created)f [tgentools] [%(levelname)s] %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S')

class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
    # adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
    def _fill_text(self, text, width, indent):
        return ''.join([indent + line for line in text.splitlines(True)])

def main():
    hostname = gethostname().split('.')[0]

    # argparse.RawDescriptionHelpFormatter, RawTextHelpFormatter, RawDescriptionHelpFormatter
    my_formatter_class = CustomHelpFormatter

    # construct the options
    main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)

    main_parser.add_argument('-v', '--version',
        help="""Prints the version of the toolkit and exits.""",
        action="store_true", dest="do_version",
        default=False)

    sub_parser = main_parser.add_subparsers(help=HELP_MAIN)

    # analyze
    analyze_parser = sub_parser.add_parser('parse',
        description=DESC_ANALYZE,
        help=HELP_ANALYZE,
        formatter_class=my_formatter_class)
    analyze_parser.set_defaults(func=analyze, formatter_class=my_formatter_class)

    analyze_parser.add_argument(
        help="""The PATH to a TGen log file, or to a directory that will be
recursively searched for TGen log files; may be '-' for STDIN; each log file
may end in '.xz' to enable inline xz decompression""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="tgen_path")

    analyze_parser.add_argument('-p', '--prefix',
        help="""A directory PATH prefix where the processed data
files generated by this script will be written""",
        metavar="PATH", type=type_str_dir_path_out,
        action="store", dest="prefix",
        default=os.getcwd())

    analyze_parser.add_argument('-m', '--multiproc',
        help="""Enable multiprocessing with N worker processes, which may be '0'
to use the number of available processor cores""",
        metavar="N", type=type_nonnegative_integer,
        action="store", dest="nprocesses",
        default=1)

    analyze_parser.add_argument('-c', '--complete',
        help="""Parse and export a more complete set of statistics that is more computationally expensive to obtain""",
        action="store_true", dest="do_complete",
        default=False)

    analyze_parser.add_argument('-a', '--address',
        help="""An IP address STRING that identifies the machine where the input logfiles were produced""",
        metavar="STRING", type=type_str_ip_in,
        action="store", dest="ip_address",
        default=None)

    analyze_parser.add_argument('-n', '--nickname',
        help="""A nickname STRING that identifies the machine where the input logfiles were produced""",
        metavar="STRING", type=str,
        action="store", dest="nickname",
        default=None)

    analyze_parser.add_argument('-d', '--date-filter',
        help="""A DATE string in the form YYYY-MM-DD, all log messages that did not occur on this date will be filtered out of the analysis""",
        metavar="DATE", type=type_str_date_in,
        action="store", dest="date_filter",
        default=None)

    analyze_parser.add_argument('-e', '--expression',
        help="""Append a regex PATTERN to a custom list of strings used with
re.search to find log file names in the search path. The custom list of patterns
will override the default pattern 'tgen.*\.log'.""",
        metavar="PATTERN", type=str,
        action="append", dest="patterns",
        default=[])

    # visualize
    visualize_parser = sub_parser.add_parser('plot', description=DESC_VISUALIZE, help=HELP_VISUALIZE,
        formatter_class=my_formatter_class)
    visualize_parser.set_defaults(func=visualize, formatter_class=my_formatter_class)

    visualize_parser.add_argument('-d', '--data',
        help="""Append a PATH to a tgen.analysis.json results file, and a LABEL
        that we should use for the graph legend for this dataset""",
        metavar=("PATH", "LABEL"),
        nargs=2,
        required="True",
        action=PathStringArgsAction, dest="datasets")

    visualize_parser.add_argument('-p', '--prefix',
        help="a STRING filename prefix for graphs we generate",
        metavar="STRING", type=str,
        action="store", dest="prefix",
        default=None)

    visualize_parser.add_argument('-c', '--counter-cdfs',
        help="""Plot the tgen heartbeat counters as cumulative distributions across
        nodes in addition to the timeseries that are plotted by default.""",
        action="store_true", dest="do_heartbeat_cdfs",
        default=False)

    visualize_parser.add_argument('-b', '--bytes',
        help="""Plot a set of time to first and last byte graphs for every file size.
        Warning: may result in a PDF with hundreds or thousands of pages!""",
        action="store_true", dest="do_bytes",
        default=False)

    visualize_parser.add_argument('-s', '--stats',
        help="""Plot a set of additional cumulative distributions showing
        mean, median, and max per-client statistics.""",
        action="store_true", dest="do_stats",
        default=False)

    visualize_parser.add_argument('-f', '--format',
        help="""A comma-separated LIST of color/line format strings to cycle to
                matplotlib's plot command (see matplotlib.pyplot.plot)""",
        metavar="LIST", type=str,
        action="store", dest="lineformats",
        default=util.LINEFORMATS)

    visualize_parser.add_argument('-e', '--expression',
        help="""Append a regex PATTERN to a custom list of strings used with
re.search to select which host names in the analysis results get plotted.
By default, results from all hosts in the analysis file will get plotted.""",
        metavar="PATTERN", type=str,
        action="append", dest="hostpatterns",
        default=[])

    # edit
    edit_parser = sub_parser.add_parser('edit', description=DESC_EDIT, help=HELP_EDIT,
        formatter_class=my_formatter_class)
    edit_parser.set_defaults(func=edit, formatter_class=my_formatter_class)

    edit_parser.add_argument(
        help="""The PATH to a TGen configuration file in graphml format""",
        metavar="PATH", type=type_str_path_in,
        action="store", dest="config_path")

    edit_parser.add_argument('-a', '--action',
        help="""Set the regex PATTERN that will be used to match actions (vertices)
        for which the given attribute and value will be set. Examples include:
        'start', 'stream', 'flow', 'pause', 'end'.""",
        metavar="PATTERN", type=str,
        action="store", dest="action_pattern",
        default=None)

    edit_parser.add_argument('-s', '--source',
        help="""Set the regex PATTERN that will be used to match the name of the edge
        source for which the given attribute and value will be set. An edge matches
        the pattern if the name of the action on the source of the edge matches.""",
        metavar="PATTERN", type=str,
        action="store", dest="edge_source_pattern",
        default=None)

    edit_parser.add_argument('-t', '--target',
        help="""Set the regex PATTERN that will be used to match the name of the edge
        target for which the given attribute and value will be set. An edge matches
        the pattern if the name of the action on the target of the edge matches.""",
        metavar="PATTERN", type=str,
        action="store", dest="edge_target_pattern",
        default=None)

    edit_parser.add_argument('-n', '--name',
        help="A STRING name for the attribute that we set on matched vertices/edges.",
        metavar="STRING", type=str,
        action="store", dest="name",
        default=None)

    edit_parser.add_argument('-v', '--value',
        help="A STRING value for the attribute that we set on matched vertices/edges.",
        metavar="STRING", type=str,
        action="store", dest="value",
        default=None)

    # get args and call the command handler for the chosen mode
    args = main_parser.parse_args()
    if args.do_version:
        logging.info("TGenTools version {}".format(__version__))
        return
    args.func(args)

def analyze(args):
    from tgentools.analysis import ParallelAnalysis, SerialAnalysis

    searchexp = args.patterns if len(args.patterns) > 0 else ["tgen.*\.log"]

    paths = []
    if os.path.isdir(args.tgen_path):
        # need to search
        paths = util.find_file_paths(args.tgen_path, searchexp)
    elif os.path.isfile(args.tgen_path):
        # just one file
        paths = [args.tgen_path]
    else:
        logging.warning("No valid tgen paths were given, nothing will be analyzed")
        return

    if len(paths) < 1:
        logging.warning("No valid tgen files found at path {}, nothing will be analyzed".format(args.tgen_path))
        return

    analysis = None

    if args.nprocesses == 0:
        args.nprocesses = cpu_count()

    if args.nprocesses > 1:
        analysis = ParallelAnalysis(nickname=args.nickname, ip_address=args.ip_address)
        analysis.analyze(paths, do_complete=args.do_complete, date_filter=args.date_filter,
            num_subprocs=min(args.nprocesses, len(paths)))
    else:
        analysis = SerialAnalysis(nickname=args.nickname, ip_address=args.ip_address)
        analysis.analyze(paths, do_complete=args.do_complete, date_filter=args.date_filter)

    analysis.save(output_prefix=args.prefix)

def visualize(args):
    from tgentools.visualization import TGenVisualization
    from tgentools.analysis import Analysis

    lflist = args.lineformats.strip().split(",")
    lfcycle = cycle(lflist)

    tgen_viz = TGenVisualization(args.hostpatterns, args.do_bytes, args.do_heartbeat_cdfs, args.do_stats)

    for (path, label) in args.datasets:
        nextformat = next(lfcycle)

        anal = Analysis.load(filename=path)
        if anal is not None:
            tgen_viz.add_dataset(anal, label, nextformat)

    tgen_viz.plot_all(args.prefix)

def edit(args):
    if args.action_pattern is None and \
            args.edge_source_pattern is None and args.edge_target_pattern is None:
        logging.info("You did not set any regex patterns, so no elements were selected")
        return

    if args.name is None or args.value is None:
        logging.info("You did not set a name and value, so no attributes were changed")
        return

    from tgentools.edit import edit_config

    edit_config(args.config_path, args.action_pattern,
        args.edge_source_pattern, args.edge_target_pattern, args.name, args.value)

def type_nonnegative_integer(value):
    i = int(value)
    if i < 0: raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
    return i

def type_supported_analysis(value):
    t = value.lower()
    if t != "all" and t != "tgen" and t != "tor":
        raise argparse.ArgumentTypeError("'%s' is an invalid Analysis type" % value)
    return t

def type_str_file_path_out(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    util.make_dir_path(os.path.dirname(p))
    return p

def type_str_dir_path_out(value):
    s = str(value)
    p = os.path.abspath(os.path.expanduser(s))
    util.make_dir_path(p)
    return p

def type_str_path_in(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    if not os.path.exists(p):
        raise argparse.ArgumentTypeError("path '%s' does not exist" % s)
    return p

def type_str_ip_in(value):
    s = str(value)
    ip = re.match(r'[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}', s)
    if ip is None:
        raise argparse.ArgumentTypeError("IP address '%s' is not a valid address" % s)
    return ip.group(0)

def type_str_date_in(value):
    s = str(value)
    parse_ok = False
    try:
        parts = s.split('-')
        if len(parts) == 3:
            y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
            parse_ok = True
    except:
        parse_ok = False
    if not parse_ok:
        raise argparse.ArgumentTypeError("date '%s' is not in the valid YYYY-MM-DD format" % s)
    if y < datetime.MINYEAR or y > datetime.MAXYEAR:
        raise argparse.ArgumentTypeError("the year portion of date '%s' must be in the range [%d, %d]" % s, datetime.MINYEAR, datetime.MAXYEAR)
    if m < 1 or m > 12:
        raise argparse.ArgumentTypeError("the month portion of date '%s' must be in the range [1, 12]" % s)
    if d < 1 or d > 31:
        raise argparse.ArgumentTypeError("the day portion of date '%s' must be in the range [1, 31]" % s)
    return datetime.date(y, m, d)

# a custom action for passing in experimental data directories when plotting
class PathStringArgsAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        # extract the path to our data, and the label for the legend
        p = os.path.abspath(os.path.expanduser(values[0]))
        s = values[1]
        # check the path exists
        if not os.path.exists(p): raise argparse.ArgumentError(self, "The supplied path does not exist: '{0}'".format(p))
        # remove the default
        if "_didremovedefault" not in namespace:
            setattr(namespace, self.dest, [])
            setattr(namespace, "_didremovedefault", True)
        # append out new experiment path
        dest = getattr(namespace, self.dest)
        dest.append((p, s))

if __name__ == '__main__': sys.exit(main())
