"""
Region discovery module for Idealista.

Provides functions to discover available regions/cities from Idealista
for multi-country support.
"""
from __future__ import annotations

import json
import re
from dataclasses import dataclass
from typing import List, Dict, Optional, TYPE_CHECKING

from scrapfly import ScrapflyClient, ScrapeConfig
from bs4 import BeautifulSoup
from loguru import logger

if TYPE_CHECKING:
    from ..utils.countries import CountryConfig

from ..utils.countries import get_country, CountryConfig
from ..utils.config import get_env
from ..client.tiers import get_config_for_tier, TIER_CACHED


@dataclass
class Region:
    """Represents a region/city available on Idealista."""

    name: str
    slug: str
    code: str  # Location code for API (e.g., "0-EU-ES-28" for Madrid)
    property_count: int = 0
    country_code: str = ""

    def __str__(self) -> str:
        if self.property_count:
            return f"{self.name} ({self.property_count:,} listings)"
        return self.name


async def discover_regions(
    scrapfly: ScrapflyClient,
    country: CountryConfig | str | None = None,
    property_type: str = "rental"
) -> List[Region]:
    """
    Discover available regions from Idealista's geo search endpoint.

    This scrapes the main listing page to extract available regions/cities.
    For countries with static regions (like Portugal), returns pre-configured regions.

    Args:
        scrapfly: Configured Scrapfly client
        country: Country configuration or name. Defaults to 'spain'.
        property_type: Type of property listings ('rental' or 'sale')

    Returns:
        List of Region objects with name, slug, and code
    """
    if isinstance(country, str) or country is None:
        country = get_country(country)

    # Portugal and other countries with static regions - no API discovery needed
    if country.has_static_regions:
        logger.info(f"Using static regions for {country.code}")
        static_regions = country.get_regions()
        return [
            Region(
                name=r.name,
                slug=r.slug,
                code=r.code,
                property_count=0,
                country_code=country.code
            )
            for r in static_regions
        ]

    regions: List[Region] = []

    # Spain and countries with API-based region discovery
    if not country.regions_endpoint:
        logger.warning(f"No regions_endpoint configured for {country.code}")
        return get_common_regions(country.code)

    # Try to get regions from the geo search zones endpoint
    try:
        tier_config = get_config_for_tier(TIER_CACHED)
        url = f"{country.domain}{country.regions_endpoint}"

        logger.info(f"Fetching regions from {url}")

        result = await scrapfly.async_scrape(ScrapeConfig(
            url=url,
            **tier_config
        ))

        if result.content:
            # The geo search endpoint returns JSON with available zones
            try:
                data = json.loads(result.content)
                regions = _parse_geo_zones(data, country.code)
                if regions:
                    logger.info(f"Found {len(regions)} regions from geo endpoint")
                    return regions
            except json.JSONDecodeError:
                logger.debug("Geo endpoint did not return JSON, trying HTML parsing")

    except Exception as e:
        logger.warning(f"Failed to fetch from geo endpoint: {e}")

    # Fallback: scrape the main listings page to extract region links
    try:
        type_path = country.get_property_type_path(property_type)
        url = f"{country.base_url}/{type_path}/"

        logger.info(f"Fetching regions from listings page: {url}")

        tier_config = get_config_for_tier(TIER_CACHED)
        result = await scrapfly.async_scrape(ScrapeConfig(
            url=url,
            **tier_config
        ))

        if result.content:
            regions = _parse_regions_from_html(result.content, country)
            logger.info(f"Found {len(regions)} regions from HTML")

    except Exception as e:
        logger.error(f"Failed to discover regions: {e}")

    return regions


def _parse_geo_zones(data: dict, country_code: str) -> List[Region]:
    """Parse regions from the geo search zones API response."""
    regions = []

    # The structure may vary, but typically has a list of zones
    zones = data.get("zones", data.get("body", {}).get("zones", []))

    for zone in zones:
        if isinstance(zone, dict):
            name = zone.get("name", zone.get("label", ""))
            slug = zone.get("slug", zone.get("id", ""))
            code = zone.get("locationId", zone.get("code", ""))
            count = zone.get("propertyCount", zone.get("count", 0))

            if name and slug:
                regions.append(Region(
                    name=name,
                    slug=slug,
                    code=str(code) if code else "",
                    property_count=count,
                    country_code=country_code
                ))

    return regions


def _parse_regions_from_html(html_content: str, country: CountryConfig) -> List[Region]:
    """
    Parse region links from the main listings HTML page.

    Looks for links to region-specific listing pages.
    """
    regions = []
    soup = BeautifulSoup(html_content, 'html.parser')

    # Look for location filter links or navigation elements
    # Pattern: /en/alquiler-viviendas/madrid/ or similar
    type_paths = list(country.property_types.values())

    for link in soup.find_all('a', href=True):
        href = link.get('href', '')

        # Check if this is a region listing link
        for type_path in type_paths:
            pattern = f"/{country.language}/{type_path}/([^/]+)/"
            match = re.search(pattern, href)
            if match:
                slug = match.group(1)
                name = link.get_text(strip=True) or slug.replace("-", " ").title()

                # Extract property count if available (e.g., "Madrid (12,345)")
                count = 0
                count_match = re.search(r'\((\d[\d,.]*)\)', name)
                if count_match:
                    count_str = count_match.group(1).replace(",", "").replace(".", "")
                    count = int(count_str)
                    name = re.sub(r'\s*\(\d[\d,.]*\)', '', name).strip()

                # Avoid duplicates
                if not any(r.slug == slug for r in regions):
                    regions.append(Region(
                        name=name,
                        slug=slug,
                        code="",  # Code not available from HTML
                        property_count=count,
                        country_code=country.code
                    ))
                break

    return regions


async def list_regions(
    country_name: str | None = None,
    property_type: str = "rental"
) -> List[Region]:
    """
    Public function to list all regions for a country.

    For countries with static regions (Portugal), returns pre-configured regions.
    For API-based countries (Spain), creates a Scrapfly client to fetch regions.

    Args:
        country_name: Country name (spain, portugal). Defaults to env var or 'spain'.
        property_type: Type of property listings ('rental' or 'sale')

    Returns:
        List of Region objects
    """
    country = get_country(country_name)

    # For countries with static regions, return directly without API
    if country.has_static_regions:
        return [
            Region(
                name=r.name,
                slug=r.slug,
                code=r.code,
                property_count=0,
                country_code=country.code
            )
            for r in country.get_regions()
        ]

    # Only create ScrapflyClient for API-based region discovery
    api_key = get_env("SCRAPFLY_KEY")
    if not api_key:
        raise ValueError("SCRAPFLY_KEY not found in environment")

    scrapfly = ScrapflyClient(key=api_key)
    try:
        return await discover_regions(scrapfly, country, property_type)
    finally:
        await scrapfly.close()


def get_common_regions(country_name: str | None = None) -> List[Region]:
    """
    Get a list of common/popular regions without making API calls.

    This provides a fallback list of well-known regions for each country.

    Args:
        country_name: Country name (spain, portugal). Defaults to 'spain'.

    Returns:
        List of Region objects for common cities
    """
    country = get_country(country_name)

    common_regions = {
        "es": [
            Region("Madrid", "madrid", "0-EU-ES-28", country_code="es"),
            Region("Barcelona", "barcelona", "0-EU-ES-08", country_code="es"),
            Region("Valencia", "valencia", "0-EU-ES-46", country_code="es"),
            Region("Sevilla", "sevilla", "0-EU-ES-41", country_code="es"),
            Region("Málaga", "malaga", "0-EU-ES-29", country_code="es"),
            Region("Alicante", "alicante", "0-EU-ES-03", country_code="es"),
            Region("Bilbao", "bilbao", "0-EU-ES-48", country_code="es"),
            Region("Zaragoza", "zaragoza", "0-EU-ES-50", country_code="es"),
            Region("Palma de Mallorca", "palma-de-mallorca", "0-EU-ES-07", country_code="es"),
            Region("Las Palmas", "las-palmas-de-gran-canaria", "0-EU-ES-35", country_code="es"),
        ],
        "pt": [
            Region("Lisboa", "lisboa", "0-EU-PT-11", country_code="pt"),
            Region("Porto", "porto", "0-EU-PT-13", country_code="pt"),
            Region("Faro", "faro", "0-EU-PT-08", country_code="pt"),
            Region("Braga", "braga", "0-EU-PT-03", country_code="pt"),
            Region("Coimbra", "coimbra", "0-EU-PT-06", country_code="pt"),
            Region("Setúbal", "setubal", "0-EU-PT-15", country_code="pt"),
            Region("Aveiro", "aveiro", "0-EU-PT-01", country_code="pt"),
            Region("Funchal", "funchal", "0-EU-PT-31", country_code="pt"),
        ],
    }

    return common_regions.get(country.code, [])
