import argparse, os
import pandas as pd
import margo as amg


parser = argparse.ArgumentParser(description='Generate astir marker')
parser.add_argument("expr_csv", help="Path to CSV expression matrix which is to be used for astir classification.")
parser.add_argument("marker_yaml", help="Path to output marker yaml file.")
parser.add_argument("-m", "--min_marker_per_celltype", 
                    type=int,
                    default=1,
                    help="Minimum marker per cell type needed to have to be included.")
parser.add_argument("-t", "--tissue", 
                    type=str,
                    default="",
                    help="Tissue specified to be searched within. If meant to include more than one tissue, \
                        seperate them with comma (e.g. '-t Blood,Breast'). If specified with 'all' ('-t all')\
                         or not specified (flag not included), the whole dataset is to be searched within.\
                        ---------------------------------------------------- Available tissues includes: \
                        'Abdominal adipose tissue', 'Adipose tissue', 'Adrenal gland', 'Adventitia', 'Airway epithelium',\
                         'Alveolus', 'Amniotic fluid', 'Amniotic membrane', 'Antecubital vein', 'Anterior cruciate ligament',\
                         'Artery', 'Ascites', 'Bladder', 'Blood', 'Blood vessel', 'Bone', 'Bone marrow', 'Brain', 'Breast',\
                         'Bronchoalveolar system', 'Brown adipose tissue', 'Cartilage', 'Chorionic villus', 'Colon', 'Colorectum',\
                         'Cornea', 'Corneal endothelium', 'Corneal epithelium', 'Corpus luteum', 'Decidua', 'Deciduous tooth',\
                         'Dental pulp', 'Dermis', 'Dorsolateral prefrontal cortex', 'Duodenum', 'Embryo', 'Embryoid body',\
                         'Embryonic brain', 'Embryonic prefrontal cortex', 'Embryonic stem cell', 'Endometrium',\
                         'Endometrium stroma', 'Epithelium', 'Esophagus', 'Eye', 'Fat pad', 'Fetal brain', 'Fetal gonad',\
                         'Fetal kidney', 'Fetal liver', 'Fetal pancreas', 'Foreskin', 'Gall bladder', 'Gastric corpus',\
                         'Gastric epithelium', 'Gastric gland', 'Gastrointestinal tract', 'Germ', 'Gingiva', 'Gonad',\
                         'Gut', 'Hair follicle', 'Heart', 'Hippocampus', 'Inferior colliculus', 'Intervertebral disc',\
                         'Intestinal crypt', 'Intestine', 'Jejunum', 'Kidney', 'Lacrimal gland', 'Large intestine',\
                         'Laryngeal squamous epithelium', 'Larynx', 'Ligament', 'Limbal epithelium', 'Liver', 'Lung',\
                         'Lymph', 'Lymph node', 'Lymphoid tissue', 'Mammary epithelium', 'Mammary gland', 'Meniscus',\
                         'Midbrain', 'Molar', 'Muscle', 'Myocardium', 'Myometrium', 'Nasal concha', 'Nasal epithelium',\
                         'Nerve', 'Nucleus pulposus', 'Optic nerve', 'Oral cavity', 'Oral mucosa', 'Osteoarthritic cartilage',\
                         'Ovarian cortex', 'Ovarian follicle', 'Ovary', 'Oviduct', 'Pancreas', 'Pancreatic acinar tissue',\
                         'Pancreatic islet', 'Parotid gland', 'Periodontal ligament', 'Periosteum', 'Peripheral blood',\
                         'Placenta', 'Plasma', 'Pleura', 'Pluripotent stem cell', 'Premolar', 'Primitive streak',\
                         'Prostate', 'Pyloric gland', 'Rectum', 'Renal glomerulus', 'Retina', 'Retinal pigment epithelium',\
                         'Salivary gland', 'Scalp', 'Sclerocorneal tissue', 'Seminal plasma', 'Serum', 'Sinonasal mucosa',\
                         'Skeletal muscle', 'Skin', 'Small intestinal crypt', 'Small intestine', 'Spinal cord', 'Spleen',\
                         'Splenic red pulp', 'Sputum', 'Stomach', 'Subcutaneous adipose tissue', 'Submandibular gland',\
                         'Sympathetic ganglion', 'Synovial fluid', 'Synovium', 'Tendon', 'Testis', 'Thymus', 'Thyroid',\
                         'Tongue', 'Tonsil', 'Tooth', 'Umbilical cord', 'Umbilical cord blood', 'Umbilical vein',\
                         'Undefined', 'Urine', 'Uterus', 'Vagina', 'Venous blood', 'Visceral adipose tissue', 'Vocal fold',\
                         'Whartons jelly', 'White adipose tissue'")
args = parser.parse_args()


if __name__ == "__main__":
    cur_path = os.path.dirname(os.path.realpath(__file__))
    cell_marker = [f"{cur_path}/../margo/marker_database/{f}" for f 
        in os.listdir(f"{cur_path}/../margo/marker_database") if ".csv" in f]
    alias_marker = f"{cur_path}/../margo/marker_database/alias.yml"

    exp_df = pd.read_csv(args.expr_csv, index_col=0)
    features = exp_df.columns

    tissue = args.tissue
    if tissue == "" or tissue == "all":
        tissue = None

    marker_mat = amg.construct_marker_mat_from_db(features=features, 
        database=cell_marker, 
        alias_marker=alias_marker,
        tissue=tissue,
        min_marker=args.min_marker_per_celltype)
    amg.to_yaml(marker_mat, args.marker_yaml)

    print(f"Corresponding marker file generated as {args.marker_yaml}.")

# margo ../BaselTMA_SP43_115_X4Y8.csv ./marker_output/test_marker.yml -t Breast,Blood -m 3