"""Multi-region agent discovery for Idealista scraper.

Discovers agents across multiple regions using the zoneexperts API.
Creates a single ScrapflyClient and reuses it across all regions to reduce cost.
"""
from __future__ import annotations

import asyncio
import json
from pathlib import Path
from typing import Callable, Optional

from loguru import logger
from scrapfly import ScrapflyClient

from idealista_scraper.cli.config import ScraperConfig
from idealista_scraper.scraping.agent_orchestrator import AgentScraper
from idealista_scraper.utils.config import get_env
from idealista_scraper.utils.paths import get_output_dir


async def discover_agents_for_regions(
    config: ScraperConfig,
    progress_callback: Optional[Callable[[str, int, int], None]] = None,
) -> Path:
    """Discover agents across all configured regions using the zoneexperts API.

    Creates a single ScrapflyClient and reuses it for all regions to minimize
    connection overhead and reduce costs.

    Args:
        config: ScraperConfig with region selection and country
        progress_callback: Optional callback function(region_name, current, total)

    Returns:
        Path to the generated property_agents.jsonl file
    """
    output_dir = get_output_dir()
    output_file = output_dir / "property_agents.jsonl"
    output_dir.mkdir(parents=True, exist_ok=True)

    # Get region codes to process
    if not config.regions or config.regions.all_regions:
        logger.warning(
            "All regions mode not fully implemented yet. "
            "Using default region for country."
        )
        region_codes = [None]
        region_names = ["Default"]
    else:
        region_codes = config.regions.codes
        region_names = config.regions.names

    if not region_codes:
        logger.warning("No regions configured, using default region")
        region_codes = [None]
        region_names = ["Default"]

    total_regions = len(region_codes)
    logger.info(f"Starting agent discovery for {total_regions} region(s) in {config.country}")

    # Create single ScrapflyClient to reuse across all regions
    api_key = get_env("SCRAPFLY_KEY")
    if not api_key:
        raise ValueError("SCRAPFLY_KEY not found in environment")

    scrapfly = ScrapflyClient(key=api_key)
    all_agents = []

    try:
        for idx, (region_code, region_name) in enumerate(zip(region_codes, region_names), start=1):
            logger.info(f"Processing region {idx}/{total_regions}: {region_name} ({region_code})")

            if progress_callback:
                progress_callback(region_name, idx, total_regions)

            # Reuse the same scrapfly client
            scraper = AgentScraper(scrapfly=scrapfly, country=config.country)

            try:
                # First page gives us pagination info
                first_result = await scraper.scrapfly.async_scrape(
                    scraper.get_scrape_config(page_number=1, location=region_code)
                )
                first_page_data = json.loads(first_result.content)
                pagination = first_page_data.get('body', {}).get('pagination', {})
                total_pages = pagination.get('pages', 1)

                # Limit pages if configured (0 = all pages)
                if config.pages_per_region > 0:
                    total_pages = min(total_pages, config.pages_per_region)

                logger.info(f"Found {total_pages} page(s) for region {region_name}")

                # Process first page results immediately
                first_page_agencies = first_page_data.get('body', {}).get('agenciesListing', {}).get('matchingAgencies', [])
                first_page_agents = []
                for agency in first_page_agencies:
                    agent_data = await scraper.process_agency_data(agency)
                    agent_data["region"] = region_name
                    agent_data["region_code"] = region_code or "default"
                    first_page_agents.append(agent_data)

                all_agents.extend(first_page_agents)

                # Fetch remaining pages in parallel
                if total_pages > 1:
                    tasks = [scraper.scrape_page(page) for page in range(2, total_pages + 1)]
                    page_results = await asyncio.gather(*tasks)

                    for page_agents in page_results:
                        for agent in page_agents:
                            agent["region"] = region_name
                            agent["region_code"] = region_code or "default"
                            all_agents.append(agent)

                total_collected = len(first_page_agents) + sum(len(p) for p in page_results) if total_pages > 1 else len(first_page_agents)
                logger.info(f"Collected {total_collected} agent(s) from region {region_name}")

            except Exception as e:
                logger.error(f"Error processing region {region_name}: {e}")
                continue

    finally:
        try:
            await scrapfly.close()
        except Exception:
            pass

    logger.info(f"Writing {len(all_agents)} agent(s) to {output_file}")

    with open(output_file, 'w', encoding='utf-8') as f:
        for agent in all_agents:
            f.write(json.dumps(agent, ensure_ascii=False) + '\n')

    logger.info(f"Agent discovery complete. Output: {output_file}")

    return output_file
