from __future__ import annotations

import hashlib
import json
import logging
import re
import shutil
from datetime import datetime
from pathlib import Path
from typing import Optional, Sequence

from .heuristics import HeuristicResult
from .llm_client import LLMResponse
from .pdf_parser import ParsedDocument




def safe_segment(value: Optional[str], fallback: str) -> str:
    if not value:
        return fallback
    cleaned = re.sub(r"[^A-Za-z0-9]+", "_", value.strip())
    cleaned = cleaned.strip("_")
    return cleaned or fallback


class FileDispatcher:
    def __init__(self, output_root: Path, logs_root: Path, write_log: bool, llm_certainty_threshold: float) -> None:
        self.output_root = output_root
        self.logs_root = Path(logs_root)
        self.write_log = write_log
        self.llm_certainty_threshold = llm_certainty_threshold
        if self.write_log:
            self.logs_root.mkdir(parents=True, exist_ok=True)

    def _log_path_for(self, source_path: Path) -> Path:
        safe_stem = safe_segment(source_path.stem, "document")
        path_hash = hashlib.sha1(str(source_path).encode("utf-8")).hexdigest()[:8]
        filename = f"{safe_stem}_{path_hash}.json"
        return self.logs_root / filename

    def _write_log(
        self,
        parsed: ParsedDocument,
        heuristics: HeuristicResult,
        llm_response: Optional[LLMResponse],
        chunks: Sequence[str],
        activity: bool,
        destination: Optional[Path],
    ) -> None:
        log_data = {
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "source_path": str(parsed.path),
            "activity_enabled": activity,
            "planned_destination": str(destination) if destination else None,
            "heuristics": {
                "language": heuristics.language,
                "markers_found": heuristics.markers_found,
                "sustainability_hits": heuristics.sustainability_hits,
                "page_count": heuristics.page_count,
                "candidate": heuristics.candidate,
            },
            "extracted_text": {
                "start_pages": parsed.start_pages,
                "end_pages": parsed.end_pages,
                "chunks": list(chunks),
            },
            "llm_call": None,
        }
        if llm_response:
            log_data["llm_call"] = {
                "config_name": llm_response.config_name,
                "model": llm_response.model,
                "api_base": llm_response.api_base,
                "messages": llm_response.messages,
                "response": llm_response.raw_content,
                "raw_api_response": llm_response.raw_api_response,
                "is_sustainability_report": llm_response.is_sustainability_report,
                "certainty": llm_response.certainty,
                "company": llm_response.company,
                "year": llm_response.year,
            }

        log_file = self._log_path_for(parsed.path)
        with log_file.open("w", encoding="utf-8") as f:
            json.dump(log_data, f, ensure_ascii=False, indent=2)

    def dispatch(
        self,
        parsed: ParsedDocument,
        heuristics: HeuristicResult,
        llm_response: Optional[LLMResponse],
        activity: bool,
        chunks: Sequence[str],
    ) -> bool:
        target_dir: Path
        if llm_response is None:
            target_dir = self.output_root / "unknown"
        elif llm_response.certainty < self.llm_certainty_threshold:
            target_dir = self.output_root / "uncertain"
        elif llm_response.is_sustainability_report:
            company_segment = safe_segment(llm_response.company, "unknown_company")
            year_segment = safe_segment(llm_response.year, "unknown_year")
            target_dir = self.output_root / "NFD" / company_segment / year_segment
        elif llm_response.config_name == "rule-based":
            target_dir = self.output_root / "trash" / "rule-base"
        else:
            target_dir = self.output_root / "trash" / "llm"

        destination = target_dir / parsed.path.name

        if self.write_log:
            self._write_log(parsed, heuristics, llm_response, chunks, activity, destination)

        if not activity:
            logging.info(
                "[DRY-RUN] %s | candidate=%s | heuristics_markers=%s | llm=%s",
                parsed.path,
                heuristics.candidate,
                heuristics.markers_found,
                llm_response.raw_content if llm_response else None,
            )
            return False

        target_dir.mkdir(parents=True, exist_ok=True)
        logging.info("Moving %s → %s", parsed.path, destination)
        shutil.move(str(parsed.path), destination)
        return True
