#!/usr/bin/python

__author__		= "Sander Granneman"
__copyright__	= "Copyright 2019"
__version__		= "0.0.4"
__credits__		= ["Sander Granneman"]
__maintainer__	= "Sander Granneman"
__email__		= "sgrannem@ed.ac.uk"
__status__		= "beta"

##################################################################################
#
#	pybed2GTF.py
#
#
#	Copyright (c) Sander Granneman 2019
#
#	Permission is hereby granted, free of charge, to any person obtaining a copy
#	of this software and associated documentation files (the "Software"), to deal
#	in the Software without restriction, including without limitation the rights
#	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#	copies of the Software, and to permit persons to whom the Software is
#	furnished to do so, subject to the following conditions:
#
#	The above copyright notice and this permission notice shall be included in
#	all copies or substantial portions of the Software.
#
#	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#	THE SOFTWARE.
#
##################################################################################

import sys
import time
from optparse import *
from collections import defaultdict
from pyCRAC.Methods import numpy_overlap
from pyCRAC.Classes.NGSFormatReaders import NGSFileReader
from pyCRAC.Parsers import GTF2

def processBed(gtf,datafile,outfile):
	""" A function that reads a bed file and converts it to GTF with features overlapping each interval """

	genedict = defaultdict(list)
	outfile.write("##gff-version 2\n# generated by pybed2GTF.py, %s\n# %s\n" % (time.ctime(),' '.join(sys.argv)))

	if not datafile:
		infile = sys.stdin
	else:
		infile = open(datafile,"r")
	for chromosome in gtf.chromosomes:
		genedict[chromosome] = gtf.chromosomeGeneCoordIterator(chromosome,numpy=True)

	bed_file_reader = NGSFileReader(gtf)
	gtf_file_writer = NGSFileWriter(outfile)

	while bed_file_reader.readBedLine():
		chromosome = bed_file_reader.chromosome
		start = bed_file_reader.start
		end = bed_file_reader.end
		strand = bed_file_reader.strand
		score = bed_file_reader.score
		name = bed_file_reader.name
		genes =  numpy_overlap(genedict[chromosome],start,end,overlap=1)
		if genes:
			filtered = list()
			for i in genes:
				if gtf.strand(i) == strand:
					filtered.append(i)
			gene = ",".join(filtered)
		else:
			gene = "no_matches"

			outfile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tgene_name \"%s\"; transcript_name \"%s\";\n" % (chromosome,"interval",comment,start+1,end+1,numberofreads,strand,".",gene,gtf.gene2orf(gene)))	 # note that bed files are 0-based, whereas GTF files are 1-based

		gtf_file_writer.writeGTF(seqname,
							     "interval",
								 name,
								 start,
								 end,
								 score=score,
								 strand=strand,
								 frame=".",
								 gene_name=gene,
								 transcript_name=None,
								 gene_id=gene,
								 transcript_id=None,
								 exon_number=None,
								 comments=None)

def main():
	parser = OptionParser(usage="usage: %prog [options] --bed=myfile.bed --gtf=yeast.gtf -o myfile.gtf", version="%s" % __version__)
	files = OptionGroup(parser, "File input options")
	files.add_option("--bed",dest="bed_file",metavar="Yourfavoritebed.bed",help="type the path to the bed file that you want to convert. Default is standard input",default=None)
	files.add_option("--gtf",dest="gtf_file",metavar="annotations.gtf",help="type the path to the gtf file that you want to use as annotation file. Default is yeast",default=None)
	files.add_option("-o","--outfile",dest="outfile",help="type the name and path of the file you want to write the output to. Default is standard output",default=None)
	parser.add_option_group(files)
	(options, args) = parser.parse_args()
	outfile = sys.stdout
	if options.outfile:
		outfile = open(options.outfile,"w")
	gtf = GTF2.Parse_GTF()
	gtf.read_GTF(options.gtf_file)

	processBed(gtf,options.bed_file,outfile)


if __name__ == "__main__":
	main()
