#!/usr/bin/env python
#
# Copyright (C) 2006--2010,2012,2014,2016,2017  Kipp Cannon
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#


"""
Print things from LIGO LW XML files.  Inspired by lwtprint from LIGOTools.
"""


from optparse import OptionParser


from lal.utils.cache import CacheEntry
from ligo.lw import __date__, __version__
from ligo.lw import array as ligolw_array
from ligo.lw import ligolw
from ligo.lw import lsctables
from ligo.lw import table as ligolw_table
from ligo.lw import types as ligolw_types
from ligo.lw import utils as ligolw_utils
from ligo.segments import utils as segmentsUtils


__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"


#
# =============================================================================
#
#                                 Command Line
#
# =============================================================================
#


def parse_command_line():
	parser = OptionParser(
		version = "Name: %%prog\n%s" % __version__,
		usage = "%prog [options] [url ...]",
		description = "Prints the contents of table elements from one or more LIGO Light Weight XML files to stdout in delimited ASCII format.  In addition to regular files, the program can read from many common URLs such as http:// and ftp://.  Gzipped files will be automatically detected and decompressed.  If no filenames or URLs are given, then input is read from stdin."
	)
	parser.add_option("--ilwdchar-compat", action = "store_true", help = "Use obsolete ilwd:char based table definitions and ID reassignment algorithm (default = use new int_8s based table definitions and ID reassignment algorithm).")
	parser.add_option("-i", "--input-cache", metavar = "name", action = "append", default = [], help = "Get URLs from the LAL cache file.  Can be provided multiple times to name several caches to iterate over.")
	parser.add_option("-c", "--column", metavar = "name", action = "append", help = "Print only the contents of the given column.  Can be provided multiple times to print multiple columns.  The default is to print all columns from each table.")
	parser.add_option("-d", "--delimiter", metavar = "string", default = ",", help = "Delimit output with the given string.  The default is \",\".")
	parser.add_option("-r", "--rows", metavar = "rowspec", default = ":", help = "Print rows in the given range(s).  The format is first:last[,first:last,...].  Rows are numbered from 0.  A single first:last pair requests rows in the range [first, last).  If the first or last value of a pair is omited it means 0 or infinity respectively.  The default is \":\", or to print all rows.")
	parser.add_option("-t", "--table", metavar = "name", action = "append", default = [], help = "Print rows from this table.  Can be provided multiple times to print rows from multiple tables.  The default is to print the contents of all tables.")
	parser.add_option("-a", "--array", metavar = "name", action = "append", default = [], help = "Print the contents of this array.  Can be provided multiple times to print the elements from multiple arrays.  The default is to print the contents of all arrays.")
	parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
	parser.add_option("--constrain-lsc-tables", action = "store_true", help = "Impose additional constraints on official LSC tables.  Provides format validation and allows RAM requirements to be reduced.")
	options, urls = parser.parse_args()

	# add urls from cache files
	urls += [CacheEntry(line).url for cache in options.input_cache for line in open(cache)]

	# strip table names
	if options.table:
		options.table = list(map(ligolw_table.Table.TableName, options.table))

	if options.array:
		options.array = list(map(ligolw_array.Array.ArrayName, options.array))

	# turn row requests into a segment list
	try:
		options.rows = segmentsUtils.from_range_strings(options.rows.split(","))
	except ValueError as e:
		raise ValueError("invalid rowspec: %s" % str(e))

	# success
	return options, (urls or [None])


#
# =============================================================================
#
#                         How to find things to print
#
# =============================================================================
#


#
# How to print a table
#


def print_table(table_elem, columns, rows):
	if not columns:
		columns = table_elem.columnnames
	fmts = tuple(ligolw_types.FormatFunc[table_elem.getColumnByName(col).Type] for col in columns)
	for n, row in enumerate(table_elem):
		if n in rows:
			print(options.delimiter.join(fmt(val) if val is not None else "" for fmt, val in zip(fmts, (getattr(row, col) for col in columns))))


#
# How to print an array
#


def print_array(array_elem):
	a = array_elem.array
	if len(a.shape) == 1:
		# a one-dimensional array
		for row in a:
			print(repr(row))
	elif len(a.shape) == 2:
		for row in a.transpose()[:]:
			print(options.delimiter.join(map(repr, row)))
	else:
		# a three or more dimensional array
		raise ValueError("array has more than 2 dimensions")


#
# =============================================================================
#
#                                     Main
#
# =============================================================================
#


options, urls = parse_command_line()


if options.ilwdchar_compat:
	from glue.ligolw import array as ligolw_array
	from glue.ligolw import ligolw
	from glue.ligolw import lsctables
	from glue.ligolw import table as ligolw_table
	from glue.ligolw import types as ligolw_types
	from glue.ligolw import utils as ligolw_utils


#
# don't quote strings when printing them
#


for typ in ligolw_types.StringTypes:
	ligolw_types.FormatFunc[typ] = str


#
# Enable appropriate level of table parsing.  If specific table names have
# been asked for, don't parse other tables so as to improve parsing speed
# and reduce memory requirements.  Because we do this, we can assume later
# that we should print all the tables that can be found in the document.
#


if not (options.table or options.array):
	# parse the entire document
	ContentHandler = ligolw.LIGOLWContentHandler
else:
	class ContentHandler(ligolw.PartialLIGOLWContentHandler):
		def __init__(self, xmldoc):
			super(ContentHandler, self).__init__(xmldoc, lambda name, attrs: (name in (ligolw.Table.tagName, ligolw.Array.tagName)) and (ligolw_table.Table.TableName(attrs["Name"]) in options.table or ligolw_array.Array.ArrayName(attrs["Name"]) in options.array))

ligolw_array.use_in(ContentHandler)

if options.constrain_lsc_tables:
	lsctables.use_in(ContentHandler)
else:
	ligolw_table.use_in(ContentHandler)


#
# If specific columns have been requested, don't load any others.
#


if options.column is not None:
	ligolw_table.Table.loadcolumns = set(options.column)


#
# Loop over input URLs
#


for url in urls:
	xmldoc = ligolw_utils.load_url(url, verbose = options.verbose, contenthandler = ContentHandler)
	for elem in ligolw.WalkChildren(xmldoc):
		if elem.tagName == ligolw.Table.tagName:
			print_table(elem, options.column, options.rows)
		elif elem.tagName == ligolw.Array.tagName:
			print_array(elem)
	xmldoc.unlink()
