import argparse
import os
import sys
import zipfile
from pathlib import Path
from urllib.request import urlopen, Request  # <-- add Request import

from .run import classify_peptides

SUPPORTED_REFERENCES = ("hg19", "hg38", "mm10", "mm39")

URL_PREFIX = "https://elledge.hms.harvard.edu/wp-content/uploads/2025/12/"


def _get_package_root() -> Path:
    return Path(__file__).resolve().parent


def _download_reference(reference: str) -> None:
    reference = reference.lower()
    if reference not in SUPPORTED_REFERENCES:
        raise SystemExit(
            f"Unsupported reference '{reference}'. Must be one of: "
            f"{', '.join(SUPPORTED_REFERENCES)}"
        )

    url_prefix = URL_PREFIX

    pkg_root = _get_package_root()
    genome_dir = pkg_root / "genome"
    genome_dir.mkdir(exist_ok=True)

    url = f"{url_prefix.rstrip('/')}/darkprofiler_{reference}.zip"
    zip_path = genome_dir / f"{reference}.zip"

    print(f"[darkprofiler] Downloading {url} ...", file=sys.stderr)

    # User-Agent
    headers = {
        "User-Agent": "Wget/1.21.4 (linux-gnu)"
    }
    req = Request(url, headers=headers)

    try:
        with urlopen(req) as resp, open(zip_path, "wb") as out_fh:
            chunk = resp.read(8192)
            while chunk:
                out_fh.write(chunk)
                chunk = resp.read(8192)
    except Exception as e:
        if zip_path.exists():
            zip_path.unlink()
        raise SystemExit(f"Failed to download {url}: {e}")

    print(f"[darkprofiler] Extracting to {pkg_root} ...", file=sys.stderr)
    try:
        with zipfile.ZipFile(zip_path, "r") as zf:
            zf.extractall(path=pkg_root)
    except Exception as e:
        raise SystemExit(f"Failed to extract {zip_path}: {e}")

    print(
        f"[darkprofiler] Finished. Reference '{reference}' is now available.",
        file=sys.stderr,
    )

def cmd_download(args: argparse.Namespace) -> None:
    _download_reference(args.reference)


def cmd_run(args: argparse.Namespace) -> None:
    classify_peptides(
        reference=args.reference,
        peptide_fasta=args.peptide_fasta,
        output_dir=args.output_dir,
        vcf_path=args.vcf_path,
        database_path=args.database_path,
        num_threads=args.num_threads,
    )


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="darkprofiler",
        description=(
            "DarkProfiler: classify peptides into canonical, alternative, "
            "mutant, and dark proteome categories."
        ),
    )
    subparsers = parser.add_subparsers(dest="command", required=True)

    # ---------------- download ----------------
    p_download = subparsers.add_parser(
        "download",
        help="Download a reference genome bundle (hg19/hg38/mm10/mm39).",
    )
    p_download.add_argument(
        "reference",
        choices=SUPPORTED_REFERENCES,
        help="Reference assembly version to download.",
    )
    p_download.set_defaults(func=cmd_download)

    # ---------------- run ----------------
    p_run = subparsers.add_parser(
        "run",
        help="Run DarkProfiler classification pipeline.",
    )
    p_run.add_argument(
        "reference",
        choices=SUPPORTED_REFERENCES,
        help="Reference assembly version to use (must be downloaded first).",
    )
    p_run.add_argument("peptide_fasta", help="Path to peptide FASTA file.")
    p_run.add_argument("output_dir", help="Output directory.")
    p_run.add_argument(
        "--vcf-path",
        default=None,
        help="Optional path to VCF or VCF.GZ file with SNVs.",
    )
    p_run.add_argument(
        "--database-path",
        default=None,
        help=(
            "Optional path to existing database directory containing "
            "canonicalProteome.fa, alternativeSplicing.fa, mutanome.fa, "
            "mutatedCanonicalTranscriptome.fa, mutatedAlternativeTranslatome.fa, "
            "mutatedAlternativeORFeome.fa."
        ),
    )
    p_run.add_argument(
        "--num-threads",
        type=int,
        default=1,
        help="Threads for amino acid misincorporation search.",
    )
    p_run.set_defaults(func=cmd_run)

    return parser


def main(argv=None) -> None:
    parser = build_parser()
    args = parser.parse_args(argv)
    args.func(args)

