#!/usr/bin/env python

import sys
import os
import argparse
import logging
import requests
from urlparse import urlparse
from progressbar import FileTransferSpeed, Percentage, Bar, ETA, ProgressBar
import loompy
import StringIO
import pandas as pd
import numpy as np

class VerboseArgParser(argparse.ArgumentParser):
	def error(self, message):
		self.print_help()
		sys.stderr.write('\nerror: %s\n' % message)
		sys.exit(2)

def connect_loom(dataset_path, filename):
	logging.debug("Looking for " + filename)
	if os.path.exists(filename):
		return loompy.connect(filename)
	logging.debug("Looking for " + os.path.join(dataset_path, filename))
	if os.path.exists(os.path.join(dataset_path, filename)):
		return loompy.connect(os.path.join(dataset_path, filename))
	return None

def tile_command(dataset_path, filename):
	ds = connect_loom(dataset_path, filename)
	if ds == None:
		logging.error("File not found")
		sys.exit(1)
	logging.info("Precomputing heatmap tiles (stored inside the .loom file)")
	ds.prepare_heatmap()

def stats_command(dataset_path, filename):
	ds = connect_loom(dataset_path, filename)
	if ds == None:
		logging.error("File not found")
		sys.exit(1)
	logging.info("Computing statistics")
	ds.compute_stats()

def project_command(dataset_path, filename, perplexity):
	ds = connect_loom(dataset_path, filename)
	if ds == None:
		logging.error("File not found")
		sys.exit(1)
	logging.info("Projecting to 2D")
	ds.project_to_2d(axis=2, perplexity=perplexity)

def backspin_command(dataset_path, filename, n_genes, levels):
	ds = connect_loom(dataset_path, filename)
	if ds == None:
		logging.error("File not found")
		sys.exit(1)
	ds.feature_selection(n_genes)
	ds.backspin(numLevels=levels)
	logging.info("Permuting rows")
	ds.permute(np.argsort(ds.row_attrs['BackSPIN_level_%i_group' % levels]),axis=0)
	logging.info("Permuting columns")
	ds.permute(np.argsort(ds.col_attrs['BackSPIN_level_%i_group' % levels]),axis=1)

def list_command(dataset_path, server, username, password):
	if server != None:
		try:
			if server.startswith("http://"):
				url = server + "/loom"
			else:
				url = "http://" + server + "/loom"

			logging.debug(url)
			response = requests.get(url , stream=True, auth=(username, password))

			if not response.ok:
				logging.error("Server error: " + str(response.status_code))
				sys.exit(1)
			ds_list = response.json()
		except requests.ConnectionError:
			logging.error("Connection with the server could not be established")
			sys.exit(1)
		except requests.Timeout:
			logging.error("Connection timed out")
			sys.exit(1)
		except requests.TooManyRedirects:
			logging.error("Too many redirects")
			sys.exit(1)
	else:
		cache = loompy.LoomCache(dataset_path)
		ds_list = cache.list_datasets(username, password)

	datasets = {}
	for ds in ds_list:
		if not datasets.has_key(ds["project"]):
			datasets[ds["project"]] = [ds["filename"]]
		else:
			datasets[ds["project"]].append(ds["filename"])
	for p in datasets.keys():
		print p + ":"
		for f in datasets[p]:
			print "   " + f

def clone_command(dataset_path, project, url, username, password):
	url_parts = urlparse(url)
	if project == None and len(url_parts.path.split("/")) < 3:
		logging.error("Project name was not given and URL did not include a project name")
		sys.exit(1)
	if project == None:
		temp = url_parts.path.split("/")
		if len(temp) != 4 or temp[1] != "clone":
			logging.error("Could not infer project name from URL (try with --project flag)")
			sys.exit(1)
		(_, _, project, fname) = temp
		project = project + "@" + url_parts.netloc.split(":")[0]
	else:
		fname = url_parts.path.split("/")[-1]
	if not fname.endswith(".loom"):
		logging.error("Not a valid .loom filename: " + fname)
		sys.exit(1)
	projdir = os.path.join(dataset_path, project)
	if not os.path.exists(projdir):
		os.mkdir(projdir)
	fpath = os.path.join(projdir, fname)

	logging.debug("Cloning from " + url)
	logging.debug("Cloning to " + fpath)
	try:
		response = requests.get(url, stream=True, auth=(username, password))

		if not response.ok:
			if response.status_code == 404:
				logging.warn("File not found")
				sys.exit(1)
			else:
				logging.error("Server error: " + str(response.status_code))
				sys.exit(1)
		total_bytes = int(response.headers.get('content-length'))

		widgets = [fname, ": ", Percentage(), ' ', Bar(), ' ', ETA(), ' ', FileTransferSpeed()]
		pbar = ProgressBar(widgets=widgets, maxval=total_bytes).start()
		i = 1024
		with open(fpath, 'wb') as f:
			for block in response.iter_content(1024):
				pbar.update(min(i, total_bytes))
				i += 1024
				f.write(block)
		pbar.finish()

	except requests.ConnectionError:
		logging.error("Connection with the server could not be established")
		sys.exit(1)
	except requests.Timeout:
		logging.error("Connection timed out")
		sys.exit(1)
	except requests.TooManyRedirects:
		logging.error("Too many redirects")
		sys.exit(1)

def fromcef_command(dataset_path, infile, outfile, project):
	if project != None:
		outfile = os.path.join(dataset_path, project, outfile)
	logging.info("Converting %s to %s" % (infile, outfile))
	loompy.create_from_cef(infile, outfile)

def fromcellranger_command(dataset_path, infolder, outfile, project):
	if project != None:
		outfile = os.path.join(dataset_path, project, outfile)
	logging.info("Converting %s to %s" % (infolder, outfile))
	loompy.create_from_cellranger(infile, outfile)

def csv_to_dict(s):
	stringFile = StringIO.StringIO(s)
	data = pd.DataFrame.from_csv(stringFile, sep=",", parse_dates=False, index_col=None)
	dataDict = data.to_dict(orient="list")
	return {key: np.array(dataDict[key]) for key in dataDict}

def fromsql_command(dataset_path, outfile, project, row_attrs, col_attrs, sql, txid):
	if project != None:
		outfile = os.path.join(dataset_path, project, outfile)
	logging.info("Creating " + outfile + " from SQL")

	with open(row_csv, 'r') as rf:
		row_attrs = csv_to_dict(rf.read())

	with open(col_csv, 'r') as cf:
		col_attrs = csv_to_dict(cf.read())

	pipeline = loompy.LoomPipeline()
	logging("Uploading row and col attrs to SQL")
	pipeline.upload(project, filename, transcriptome, col_attrs, row_attrs)
	logging("Creating loom file from SQL")
	pipeline.create_loom(dataset_path, project, filename, transcriptome)
	logging("Done")

class Empty(object):
	pass

if __name__ == '__main__':
	def_dir = os.environ.get('LOOM_PATH')
	if def_dir == None:
		def_dir = os.path.join(os.path.expanduser("~"),"loom-datasets")

	# Handle the special case of no arguments, and create a fake args object with default settings
	if len(sys.argv) == 1:
		args = Empty()
		setattr(args, "debug", False)
		setattr(args, "dataset_path", def_dir)
		setattr(args, "port", 8003)
		setattr(args, "command", "server")
		setattr(args, "show_browser", True)
	else:
		parser = VerboseArgParser(description='Loom command-line tool.')
		parser.add_argument('--debug', action="store_true")
		parser.add_argument('--dataset-path', help="Path to datasets directory (default: %s)" % def_dir , default=def_dir)
		subparsers = parser.add_subparsers(title="subcommands", dest="command")

		# loom version
		version_parser = subparsers.add_parser('version', help="Print version")

		# loom server
		server_parser = subparsers.add_parser('server', help="Launch loom server (default command)")
		server_parser.add_argument('--show-browser', help="Automatically launch browser", action="store_true")
		server_parser.add_argument('-p','--port', help="Port", type=int, default=80)

		# loom list
		list_parser = subparsers.add_parser('list', help="List datasets")
		list_parser.add_argument('--server', help="Remote server hostname")
		list_parser.add_argument('-u','--username', help="Username")
		list_parser.add_argument('-p','--password', help="Password")

		# loom put
		put_parser = subparsers.add_parser('put', help="Submit dataset to remote server")
		put_parser.add_argument("file", help="Loom file to upload")
		put_parser.add_argument('--project', help="Project name", required=True)
		put_parser.add_argument('--server', help="Remote server hostname", required=True)
		put_parser.add_argument('-u','--username', help="Username")
		put_parser.add_argument('-p','--password', help="Password")

		# loom clone
		clone_parser = subparsers.add_parser('clone', help="Clone a remote dataset")
		clone_parser.add_argument("url", help="URL of the loom file to clone")
		clone_parser.add_argument('--project', help="Project name")
		clone_parser.add_argument('-u','--username', help="Username")	
		clone_parser.add_argument('-p','--password', help="Password")	
		
		# loom tsne
		tsne_parser = subparsers.add_parser('tsne', help="Compute t-SNE projection to 2D")
		tsne_parser.add_argument("file", help="Loom input file")
		tsne_parser.add_argument('--perplexity', help="Perplexity", type=int, default=20)

		# loom tile
		tile_parser = subparsers.add_parser('tile', help="Precompute heatmap tiles")
		tile_parser.add_argument("file", help="Loom input file")

		# loom stats
		stats_parser = subparsers.add_parser('stats', help="Compute standard aggregate statistics")
		stats_parser.add_argument("file", help="Loom input file")

		# loom backspin
		backspin_parser = subparsers.add_parser('backspin', help="Perform clustering using BackSPIN")
		backspin_parser.add_argument("file", help="Loom input file")
		backspin_parser.add_argument('-n','--n-genes', help="Number of genes to use for clustering", type=int, default=500)
		backspin_parser.add_argument('-l','--levels', help="Number of levels", type=int, default=2)

		# loom from-cef
		cef_parser = subparsers.add_parser('from-cef', help="Create loom file from data in CEF format")
		cef_parser.add_argument('-o','--outfile', help="Name of output file", required=True)
		cef_parser.add_argument('-i','--infile', help="Name of input file in CEF format", required=True)
		cef_parser.add_argument('--project', help="Project name")

		# loom from-cellranger
		cellranger_parser = subparsers.add_parser('from-cellranger', help="Create loom file from data in cellranger format")
		cellranger_parser.add_argument('-o','--outfile', help="Name of output file", required=True)
		cellranger_parser.add_argument('-i','--infolder', help="Folder containing the cellranger files", required=True)
		cellranger_parser.add_argument('--project', help="Project name")

		# loom from-sql
		sql_parser = subparsers.add_parser('from-sql')
		sql_parser.add_argument('-o','--outfile', help="Name of output file", required=True)
		sql_parser.add_argument('-c','--col-attrs', help="Column (cell) attributes CSV file", required=True)
		sql_parser.add_argument('-r','--row-attrs', help="Row (gene) attributes CSV file")
		sql_parser.add_argument('-s','--sql', help="SQL server hostname", required=True)
		sql_parser.add_argument('-t','--transcriptome', help="Transcriptome", required=True)
		sql_parser.add_argument('--project', help="Project name")

		args = parser.parse_args()

	if args.debug:
		logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
	else:
		logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	if not os.path.exists(args.dataset_path):
		logging.info("Creating dataset directory: " + args.dataset_path)
		os.mkdir(args.dataset_path)

	if args.command == "version":
		print "loom v" + str(loompy.__version__)
		sys.exit(0)
	if args.command == "list":
		list_command(args.dataset_path, args.server, args.username, args.password)
	elif args.command == "clone":
		clone_command(args.dataset_path, args.project, args.url, args.username, args.password)
	elif args.command == "server":
		loompy.start_server(args.dataset_path, args.show_browser, args.port, args.debug)
	elif args.command == "stats":
		stats_command(args.dataset_path, args.file)
	elif args.command == "tsne":
		tsne_command(args.dataset_path, args.file, args.perplexity)
	elif args.command == "tile":
		tile_command(args.dataset_path, args.file)
	elif args.command == "backspin":
		backspin_command(args.dataset_path, args.file, args.n_genes, args.levels)
	elif args.command == "from_cef":
		fromcef_command(args.dataset_path, args.infile, args.outfile, args.project)
	elif args.command == "from_cellranger":
		fromcellranger_command(args.dataset_path, args.infolder, args.outfile, args.project)
	elif args.command == "from_sql":
		fromsql_command(args.dataset_path, args.outfile, args.project, args.row_attrs, args.col_attrs, args.sql, args.transcriptome)
