#!/usr/bin/env python3
"""
Construct a tree data structure for fast subtrees queries.

The tree information is obtained in one of two ways:

- simplified mode for NCBI taxonomy:
  by using the NCBI taxonomy dump files and the --ntdump option

- generic mode:
  by using the specified Python module "idsmod" (see below)
  and passing the specified positional or keyword arguments
  (if --keyargs is used)

Usage:
  fastsubtrees-construct [options] <outfname> <idsmod> [<idsmod_data>...]
  fastsubtrees-construct [options] <outfname> --ntdump <ntdumpdir>

Common arguments:
  outfname     desired name for the output file

Arguments for NCBI taxonomy simplified mode:
  ntdumpdir    directory containing NCBI taxonomy dump files

Arguments for generic mode:
  idsmod       Python module defining a function element_parent_ids()
               which may take arguments (<idsmod_data>) and yield pairs
               of IDs for all tree nodes (element_id, parent_id).
               For the root node, the parent_id shall be equal to the
               element_id.
  idsmod_data  [optional] arguments to be passed to the element_parent_ids()
               function of the module specified as <idsmod>; to pass keyword
               arguments, use the syntax "key=value" and the option --keyargs

Options:
  --ntdump     use the NCBI taxonomy simplified mode (see above)
               to construct the tree from NCBI taxonomuy dump files
  --keyargs    split the arguments specified in <idsmod_data> into
               keywords and values by splitting on the first instance of '=';
               arguments which do not contain '=' are passed as positional,
               before any keyword argument
  --quiet      disable log messages
  --debug      print debug information
  --help       show this help message and exit
  --version    show program's version number and exit
"""

import importlib
from docopt import docopt
from pathlib import Path
from fastsubtrees import Tree, logger, _scripts_support

def main(args):
  if args['--ntdump']:
    filename = Path(args['<ntdumpdir>']) / 'nodes.dmp'
    logger.info(f'Constructing tree from NCBI taxonomy dump file {filename}')
    tree = Tree.construct_from_csv(str(filename), "\t|\t", 0, 1)
  else:
    logger.debug("Loading Python module '{}'".format(args['<idsmod>']))
    modulename = Path(args["<idsmod>"]).stem
    spec = importlib.util.spec_from_file_location(modulename,
                                                  args["<idsmod>"])
    m = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(m)
    if not m.__dict__.get("element_parent_ids"):
      raise ValueError("The specified Python module {} does not define a "
                       "function element_parent_ids()".format(args["<idsmod>"]))
    logger.success("Ids module loaded, found generator element_parent_ids()")
    logger.info("Constructing tree using IDs yielded by the generator...")
    if args["--keyargs"]:
      keyargs = {k: v for k, v in \
          [a.split("=", 1) for a in args["<idsmod_data>"] if "=" in a]}
      posargs = [a for a in args["<idsmod_data>"] if "=" not in a]
    else:
      keyargs = {}
      posargs = args["<idsmod_data>"]
    if posargs:
      logger.debug(f"Positional arguments passed to the generator: {posargs}")
    if keyargs:
      logger.debug(f"Keyword arguments passed to the generator: {keyargs}")
    tree = Tree.construct(m.element_parent_ids(*posargs, **keyargs))
  logger.success("Tree constructed")
  tree.to_file(args["<outfname>"])

if __name__ == "__main__":
  args = docopt(__doc__, version="0.1")
  _scripts_support.setup_verbosity(args)
  main(args)
