#!/usr/bin/env jython
# -*- python -*-

__version__ = "$Revision: 1.5 $"

"""
enstag_describe.py -- describe enstags in a tab-delimited text file

ensdescribe.py COL... < inputfile

Each COL is a column in the tab-delimited input file that contains an
Ensembl accession ID.

Example:

$ echo egHsa2zqa | ensdescribe.py 1
egHsa2zqa	ENSG00000139618	BRCA2   BREAST CANCER TYPE 2 SUSCEPTIBILITY PROTEIN. [Source:SWISSPROT;Acc:P51587]
"""

import re
import sys
from xreadlines import xreadlines

import org

import ensembl
import enstag

def enumerate_list(seq):
    """
    enumerate_list(["a", "b", "c"]) -> [(0, "a"), (1, "b"), (2, "c")]
    """
    return zip(xrange(len(seq)), seq)

def enstag_describe(column_indexes):
    for line in xreadlines(sys.stdin):  # this is the same as python 2.2 "for line in sys.stdin:"
        cols = line.rstrip().split("\t")
        for col_index, col in enumerate_list(cols):
            if col_index in column_indexes:
                feature = enstag.fetch(col)

                try:
                    ensid = feature.accessionID
                except AttributeError:
                    ensid = enstag.decode(col)
                    name = None
                    description = None
                else:
                    if isinstance(feature, ensembl.datamodel.Translation):
                        name = feature.transcript.displayName
                        description = feature.transcript.gene.description
                    elif isinstance(feature, ensembl.datamodel.Transcript):
                        name = feature.displayName
                        description = feature.gene.description
                    else:
                        name = feature.displayName
                        description = feature.description

                out_cols = map(str, [col, ensid, name, description])
                cols[col_index] = "\t".join(out_cols)

        print "\t".join(cols)

def main(args):
    marked_column_indexes = [int(arg)-1 for arg in args]

    return enstag_describe(marked_column_indexes)

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:])) # system return status = main(arguments without the name of the script)

