#!/usr/bin/env python
#
# PyPPL command line tool:
# 1. List all work directories under <ppldir>
# 2. Compare the difference between processes from different directories
# 3. Clean work directories
#    - by date/time
#    - by status
#

import sys, re
from os import path
from glob import glob
from shutil import rmtree
from datetime import date, timedelta, datetime
from multiprocessing import Lock
from pyppl import PyPPL, Proc, commands
from pyppl.logger import COLORS
from pyppl.utils import parallel

try:
	input = raw_input
except NameError:
	pass

commands._desc = 'PyPPL command line tool'

commands.list             = 'list work directories under <wdir>'
commands.list._hbald      = False
commands.list.wdir        = './workdir'
commands.list.wdir.desc   = 'The <ppldir> containing process work directories.'
commands.list.proc.desc   = 'The process name to show or to compare.'
commands.list.ago.type    = 'int'
commands.list.ago.desc    = 'Work directories to be removed when modified N days ago.'
commands.list.before.desc = [
	'Before when the work directories to be removed.',
	'Supported format: m/d, m-d, m/d/y and y-m-d'
]
commands.list.all          = False
commands.list.all.desc     = 'List all processes if # processes > 100.'
commands.list.nocheck      = False
commands.list.nocheck.desc = 'Don`t check failure of processes.'
commands.list.error        = False
commands.list.error.desc   = 'Remove directories if any job failed or do error check when listing them.'

commands.clean              = 'remove some work directories'
commands.clean.wdir         = commands.list.wdir
commands.clean.nthread      = 1
commands.clean.nthread.desc = 'Number of threads used to clean up the work directories.'
commands.clean.proc         = commands.list.proc
commands.clean.ago          = commands.list.ago
commands.clean.before       = commands.list.before
commands.clean.nocheck      = commands.list.nocheck
commands.clean.force        = False
commands.clean.force.desc   = 'Don`t ask when remove work directories.'
commands.clean.error        = commands.list.error

commands.compare            = 'compare two processes from different directories'
commands.compare.wdir       = commands.list.wdir
commands.compare.proc       = commands.list.proc
commands.compare.proc1.desc = 'The first full process name to compare.'
commands.compare.proc2.desc = 'The second full process name to compare.'

command, params = commands.parse()

def streamout(msg, decors = None, end = '\n'):
	decors = decors or []
	if not isinstance(decors, list):
		decors = [decors]
	sys.stdout.write(''.join([COLORS[dec] for dec in decors]) + str(msg) + COLORS['end'] + end)

# params check
error = []
if command == 'compare':
	if params.proc and (params.proc1 or params.proc2):
		error.append('-proc and -proc1/proc2 are mutually exclusive for command "compare"')
	elif not (params.proc1 and params.proc2) and (params.proc1 or params.proc2):
		error.append('Missing either -proc1 or -proc2 for command "compare"')
	elif not (params.proc1 and params.proc2) and not params.proc:
		error.append('Nothing to compare, expecting -proc or -proc1/-proc2.')

if command == 'clean' or command == 'list':
	if params.error:
		params.nocheck = False

	# before and ago cannot be used at the same time
	if params.ago and params.before:
		error.append('-ago and -before are mutually exclusive for command "{}"'.format(command))
	elif params.before:
		dateregx1 = r'^(0?[1-9]|1[012])/(0?[1-9]|[12][0-9])(?:/(\d{4}))?$'
		dateregx2 = r'^(?:(\d{4})-)?(0?[1-9]|1[012])-(0?[1-9]|[12][0-9])$'
		m1 = re.match(dateregx1, params.before)
		m2 = re.match(dateregx2, params.before)
		if m1:
			y = m1.group(3) or date.today().year
			m = m1.group(1)
			d = m1.group(2)
			params.before = date(int(y), int(m), int(d))
		elif m2:
			y = m2.group(3) and m2.group(1) or date.today().year
			m = m2.group(3) and m2.group(2) or m2.group(1)
			d = m2.group(3) or m2.group(2)
			params.before = date(int(y), int(m), int(d))
		else:
			error.append('Malformat -before for command "{}"'.format(command))

if error:
	commands[command].help(error, True)

def proc_mtime(proc):
	setfile = path.join(proc, 'proc.settings')
	if not path.isfile(setfile):
		return datetime.fromtimestamp(0)
	return datetime.fromtimestamp(path.getmtime(setfile))

def proc_failed(proc):
	for job in glob(path.join(proc, '*')):
		if not path.basename(job).isdigit():
			continue
		rcfile = path.join(job, 'job.rc')
		if not path.isfile(rcfile):
			return True
		with open(rcfile) as f:
			rc = f.read().strip()
		if rc != '0':
			return True
	return False

# list
def list_procs(procs, listall):
	if not procs:
		streamout('WARNING: No process found in workdir: {}.'.format(params.wdir), 'yellow')
		sys.exit(1)
	if len(procs) > 100 and not listall:
		streamout('WARNING: Got {} processes, listing first 100. Use -all to list all.'.format(len(procs)), 'yellow')
		procs = procs[:100]
	procgroups = {}
	for proc, mtime, fail in procs:
		pname = '.'.join(path.basename(proc).split('.')[1:3])
		if pname not in procgroups:
			procgroups[pname] = []
		procgroups[pname].append((proc, mtime, fail))

	streamout('WORKDIR: {} ({} query processes)'.format(params.wdir, len(procs)), 'yellow')
	for pname in sorted(procgroups.keys()):
		streamout('\nPROCESS: {}'.format(pname))
		for proc, mtime, fail in sorted(procgroups[pname], key = lambda p: p[1]):
			streamout('{} {}: {}'.format(
				'x' if fail else '-',
				path.basename(proc), 
				mtime.strftime("%Y-%m-%d %H:%M:%S")
			), 'red' if fail else 'green')

def read_sections (sfile):
	ret = {}
	sec = ''
	with open(sfile) as f:
		for line in f:
			if line.startswith('['):
				sec = line.strip()[1:-1]
			elif sec:
				if sec not in ret:
					ret[sec] = []
				ret[sec].append(line)
	return ret

def compare_procs(proc1, proc2):
	from difflib import unified_diff
	setfile1 = path.join(proc1, 'proc.settings')
	setfile2 = path.join(proc2, 'proc.settings')
	streamout('1. {}'.format(setfile1), 'green')
	streamout('2. {}'.format(setfile2), 'red')
	streamout('-' * (max(len(setfile1), len(setfile2), 80) + 3))

	if path.isfile(setfile1) and path.isfile(setfile2):
		seclines1 = read_sections(setfile1)
		seclines2 = read_sections(setfile2)

		for sec in sorted(seclines1.keys()):
			lines1 = seclines1[sec]
			lines2 = seclines2[sec]
			diff   = unified_diff(lines1, lines2, n=0)
			ds     = []
			for i, d in enumerate(diff):
				if i==0 and d.strip() == '---':
					pass
				elif i==1 and d.strip() == '+++':
					pass
				elif d.startswith('-'):
					ds.append((d, 'green'))
				elif d.startswith('+'):
					ds.append((d, 'red'))
				elif d.startswith('@'):
					ds.append((d, 'yellow'))
				else:
					ds.append((d, ''))
			if ds:
				streamout('\n[{}]'.format(sec))
				for d, color in ds:
					streamout(d, color, end = '')
	else:
		streamout('ERROR: proc.settings of either processes not exists.', 'red')

def remove_proc(proc, nthread = 1, msg = '', lock = None):
	if msg:
		if lock:
			with lock:
				streamout(msg)
	parallel.run(rmtree, [(d, ) for d in glob(path.join(proc, '*')) if path.isdir(d)], nthread, 'thread')
	rmtree(proc)

def clean_procs(procs, nthread, force):
	if not procs:
		streamout('WARNING: No query processes found in workdir: {}.'.format(params.wdir), 'yellow')
		sys.exit(1)
	
	if force:
		lock = Lock()
		lenprocs = len(procs)
		parallel.run(remove_proc, [(procinfo[0], 1, 'Removeing [{}/{}]: {}'.format(i, lenprocs, procinfo[0]), lock) for i, procinfo in enumerate(procs)], nthread, 'thread')
	else:
		procgroups = {}
		for proc, mtime, fail in procs:
			pname = '.'.join(path.basename(proc).split('.')[1:3])
			if pname not in procgroups:
				procgroups[pname] = []
			procgroups[pname].append((proc, mtime, fail))

		ans = ['', 'Y', 'y', 'N', 'n']
		streamout('WORKDIR: {} ({} query processes)'.format(params.wdir, len(procs)), 'yellow')
		for pname in sorted(procgroups.keys()):
			streamout('\nPROCESS: {}'.format(pname))
			for proc, mtime, fail in sorted(procgroups[pname], key = lambda p: p[1]):
				streamout('{} {}: {}'.format(
					'x' if fail else '-',
					path.basename(proc), 
					mtime.strftime("%Y-%m-%d %H:%M:%S")
				), 'red' if fail else 'green')
				r = input('  Remove it? [Y/n] ')
				while r not in ans:
					r = input('  Remove it? [Y/n] ')
				if r in ['', 'Y', 'y']:
					remove_proc(proc, nthread)
					streamout('\x1b[1A  Removed!           ', 'green')

if __name__ == '__main__':

	if command == 'compare':
		if params.proc:
			pattern = path.join(params.wdir, 'PyPPL.{}.*'.format(params.proc))
			procs   = glob(pattern)
			if len(procs) < 2:
				streamout('ERROR: Not enough processes to compare: {}'.format(params.proc), 'red')
				sys.exit(1)
			procs = procs[:2]
			compare_procs(*procs)
		else:
			pattern1 = path.join(params.wdir, 'PyPPL.{}*'.format(params.proc1))
			proc1 = glob(pattern1)
			if not proc1:
				streamout('ERROR: No such process: {}'.format(params.proc1), 'red')
				sys.exit(1)
			proc1 = proc1[0]

			pattern2 = path.join(params.wdir, 'PyPPL.{}*'.format(params.proc2))
			proc2 = glob(pattern2)
			if not proc2:
				streamout('ERROR: No such process: {}'.format(params.proc2), 'red')
				sys.exit(1)
			proc2 = proc2[0]
			compare_procs(proc1, proc2)
	
	else:
		if params.proc:
			pattern = path.join(params.wdir, 'PyPPL.{}.*'.format(params.proc))
		else:
			pattern = path.join(params.wdir, 'PyPPL.*')

		procs   = glob(pattern)
		before  = None
		if params.ago:
			before = date.today() - timedelta(days = params.ago)
			before = datetime(before.year, before.month, before.day)
		elif params.before:
			before = params.before
			
		procs2 = []
		for proc in procs:
			mtime = proc_mtime(proc)
			if before and mtime >= before:
				continue

			if params.nocheck:	
				procs2.append((proc, mtime, False))
			else:
				fail = proc_failed(proc)
				if params.error and not fail:
					continue
				procs2.append((proc, mtime, fail))

		if command == 'clean':
			clean_procs(procs2, params.nthread, params.force)
		else:
			list_procs(procs2, params.all)
		
