# -*- coding: utf-8 -*-
import sys
import pandas as pd 
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from collections import defaultdict


def get_cds(args, db, genome, transcript_id, output):
    '''
    parameters:
     db: database generated by gffutils 
     genome: genome fasta 
     transcript_id: transcript id 
     output: output file
    '''
    cds_seq = pd.DataFrame(columns=['TranscriptID','Chrom','Start','End','Strand','CDS'])
    index = 0
    if not transcript_id:
        for t in db.features_of_type('mRNA', order_by='start'):
            seq = ''
            for c in db.children(t, featuretype='CDS', order_by='start'):
                s = c.sequence(genome, use_strand=False) # 不反向互补，对于负链要得到全部的cds后再一次性反向互补
                seq += s
            seq = Seq(seq)
            if t.strand == '-':
                seq= seq.reverse_complement()
            cds_seq.loc[index] = [t.id,t.chrom,t.start,t.end,t.strand,seq]
            index += 1
        cds_seq.to_csv(args.output, sep=',', index=False)
    else:
        for t in db.features_of_type('mRNA', order_by='start'):
            if transcript_id in t.id:
                seq = ''
                for c in db.children(t, featuretype='CDS', order_by='start'):
                    s = c.sequence(genome, use_strand=False) # 不反向互补，对于负链要得到全部的cds后再一次性反向互补
                    seq += s
                seq = Seq(seq)
                if t.strand == '-':
                    seq= seq.reverse_complement()
                cds_seq.loc[index] = [t.id,t.chrom,t.start,t.end,t.strand,seq]
                index += 1
                cdsRecord = SeqRecord(seq, id=t.id, description='strand %s start %d end %d length=%d'%(t.strand, t.start, t.end, len(seq)))
                if args.print:
                    SeqIO.write([cdsRecord], sys.stdout, "fasta") 
                else:
                    #cds_seq.to_csv(args.output, sep=',', index=False)
                    SeqIO.write([cdsRecord], args.output, "fasta") 
                break 
