from __future__ import annotations

import argparse
import logging
from pathlib import Path
from typing import Optional, Sequence

from .config import ConfigLoadError, load_configs
from .document_discovery import iter_documents
from .file_dispatcher import FileDispatcher
from .llm_client import LLMClient
from .pdf_parser import PDFParser
from .pipeline import Pipeline


def build_arg_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="Sustainability report classifier pipeline")
    parser.add_argument("--config", type=Path, default=Path("config.json"), help="Path to config.json")
    parser.add_argument("--input", type=Path, default=Path("input_docs"), help="Input directory")
    parser.add_argument("--output", type=Path, default=Path("classified"), help="Output directory")
    parser.add_argument("--logs", type=Path, default="logs", help="Logs directory")
    parser.add_argument(
        "--log-level",
        default="INFO",
        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
        help="Logging verbosity",
    )
    return parser


def configure_logging(level: str) -> None:
    logging.basicConfig(
        level=getattr(logging, level.upper(), logging.INFO),
        format="%(asctime)s - %(levelname)s - %(message)s",
    )


def main(args: Optional[Sequence[str]] = None) -> int:
    parser = build_arg_parser()
    parsed_args = parser.parse_args(args=args)
    configure_logging(parsed_args.log_level)

    try:
        app_config, api_configs = load_configs(parsed_args.config)
    except ConfigLoadError as exc:
        logging.error("%s", exc)
        return 2

    if parsed_args.logs:
        app_config.logs_path = parsed_args.logs

    start_pages = max(0, app_config.start_pages)
    end_pages = max(0, app_config.end_pages)

    try:
        pdf_parser = PDFParser(start_pages=start_pages, end_pages=end_pages)
    except RuntimeError as exc:
        logging.error("PDF parser unavailable: %s", exc)
        return 3

    llm_client = LLMClient(configs=api_configs, timeout_seconds=app_config.request_timeout)
    dispatcher = FileDispatcher(
        parsed_args.output,
        app_config.logs_path,
        app_config.write_log,
        app_config.llm_certainty_threshold,
    )
    activity_enabled = any(cfg.activity for cfg in api_configs)
    pipeline = Pipeline(
        parser=pdf_parser,
        llm_client=llm_client,
        dispatcher=dispatcher,
        output_root=parsed_args.output,
        activity_enabled=activity_enabled,
        min_total_pages=app_config.min_total_pages,
        min_keywords_hit=app_config.min_keywords_hit,
    )

    documents = iter_documents(parsed_args.input)
    stats = pipeline.run(documents)
    logging.info(
        "Completed. processed=%s parsed=%s candidates=%s classified=%s moved=%s",
        stats.processed,
        stats.parsed,
        stats.candidates,
        stats.classified,
        stats.moved,
    )
    return 0
