"""
Optimized Scrapfly client wrapper with tiered configurations and shared sessions.

Key optimizations:
1. Uses tiered configurations to minimize credit usage
2. Shares cookies across all requests (no per-client cookie fetching)
3. Implements request deduplication
4. Auto-retries with tier escalation on failure
5. All configurable values loaded from src.utils.config.settings
"""

import asyncio
import os
from datetime import datetime, timedelta, timezone
from typing import Dict, Optional, Set, TYPE_CHECKING
from urllib.parse import urlparse

import backoff
from loguru import logger
from scrapfly import ScrapeConfig, ScrapflyClient
import scrapfly.errors

from .tiers import (
    ScrapflyTier,
    get_config_for_tier,
    get_tier_for_url,
    TIER_CACHED,
    TIER_PROTECTED,
)
from ..utils.config import settings
from ..utils.countries import CountryConfig, get_country

if TYPE_CHECKING:
    from ..utils.countries import CountryConfig


class OptimizedScrapflyClient:
    """
    Wrapper around ScrapflyClient with cost optimization features.

    Features:
    - Tiered configurations (1x, 10x, 25x credits)
    - Shared cookie management (not per-client)
    - Request deduplication
    - Automatic tier escalation on failure
    - Credit usage tracking
    """

    def __init__(
        self,
        api_key: Optional[str] = None,
        max_concurrency: Optional[int] = None,
        default_tier: ScrapflyTier = TIER_CACHED,
        country: str | CountryConfig | None = None,
    ):
        self.api_key = api_key or os.getenv("SCRAPFLY_KEY")
        if not self.api_key:
            raise ValueError("SCRAPFLY_KEY not found in environment")

        self.client = ScrapflyClient(key=self.api_key)
        # Use provided value or fall back to settings
        self.max_concurrency = max_concurrency or settings.max_concurrency
        self.default_tier = default_tier
        self.semaphore = asyncio.Semaphore(self.max_concurrency)

        # Set up country configuration
        if isinstance(country, CountryConfig):
            self.country = country
        else:
            self.country = get_country(country)

        # Shared state
        self._cookies: Optional[Dict[str, str]] = None
        self._cookies_last_refresh: Optional[datetime] = None
        self._cookies_lock = asyncio.Lock()
        self._cookie_refresh_interval = timedelta(hours=4)

        # Deduplication
        self._fetched_urls: Set[str] = set()
        self._fetched_lock = asyncio.Lock()

        # Credit tracking
        self._credits_used = 0
        self._requests_made = 0

        logger.info(
            f"Initialized OptimizedScrapflyClient (concurrency={self.max_concurrency}, "
            f"default_tier={default_tier.value}, country={self.country.code})"
        )

    async def close(self):
        """Close the underlying client."""
        await self.client.close()

    async def get_cookies(self) -> Dict[str, str]:
        """
        Get shared cookies, refreshing if needed.

        Cookies are shared across ALL requests (not per-client like before).
        This saves 93% of cookie fetch credits (was 15 clients x 25 ASP = 375 credits).
        """
        async with self._cookies_lock:
            needs_refresh = True

            if self._cookies and self._cookies_last_refresh:
                age = datetime.now(timezone.utc) - self._cookies_last_refresh
                if age < self._cookie_refresh_interval:
                    needs_refresh = False

            if needs_refresh:
                await self._refresh_cookies()

            return self._cookies or {}

    async def _refresh_cookies(self):
        """Fetch fresh cookies from Idealista."""
        logger.info(f"Refreshing shared cookies for {self.country.code}...")

        # Use CACHED tier for cookie fetch (not PROTECTED unless blocked)
        config = get_config_for_tier(TIER_CACHED)

        # Build URL using country configuration
        cookie_url = f"{self.country.base_url}/"

        try:
            result = await self.client.async_scrape(
                ScrapeConfig(
                    url=cookie_url,
                    **config,
                )
            )

            self._cookies = dict(result.cookies) if result.cookies else {}
            self._cookies_last_refresh = datetime.now(timezone.utc)
            logger.info(f"Refreshed cookies: {len(self._cookies)} cookies obtained")

        except Exception as e:
            logger.warning(f"Failed to refresh cookies with CACHED tier: {e}")
            # Escalate to PROTECTED tier
            try:
                config = get_config_for_tier(TIER_PROTECTED)
                result = await self.client.async_scrape(
                    ScrapeConfig(
                        url=cookie_url,
                        **config,
                    )
                )
                self._cookies = dict(result.cookies) if result.cookies else {}
                self._cookies_last_refresh = datetime.now(timezone.utc)
                logger.info(
                    f"Refreshed cookies with PROTECTED tier: {len(self._cookies)} cookies"
                )
            except Exception as e2:
                logger.error(f"Failed to refresh cookies with PROTECTED tier: {e2}")
                self._cookies = {}

    async def is_already_fetched(self, url: str) -> bool:
        """Check if URL was already fetched (deduplication)."""
        normalized = self._normalize_url(url)
        async with self._fetched_lock:
            return normalized in self._fetched_urls

    async def mark_fetched(self, url: str):
        """Mark URL as fetched."""
        normalized = self._normalize_url(url)
        async with self._fetched_lock:
            self._fetched_urls.add(normalized)

    def _normalize_url(self, url: str) -> str:
        """Normalize URL for deduplication."""
        parsed = urlparse(url)
        # Remove trailing slashes and query params for comparison
        path = parsed.path.rstrip("/")
        return f"{parsed.netloc}{path}"

    @backoff.on_exception(
        backoff.expo,
        (scrapfly.errors.ScrapflyError,),
        max_tries=3,
        max_time=60,
    )
    async def scrape(
        self,
        url: str,
        tier: Optional[ScrapflyTier] = None,
        skip_if_fetched: bool = True,
        cookies: Optional[Dict[str, str]] = None,
        **kwargs,
    ):
        """
        Scrape a URL with cost-optimized configuration.

        Args:
            url: URL to scrape
            tier: Scrapfly tier to use (auto-detected if None)
            skip_if_fetched: Skip if URL was already fetched
            cookies: Optional cookies to use (uses shared cookies if None)
            **kwargs: Additional ScrapeConfig options

        Returns:
            ScrapeApiResponse or None if skipped
        """
        # Deduplication check
        if skip_if_fetched and await self.is_already_fetched(url):
            logger.debug(f"Skipping already fetched URL: {url}")
            return None

        # Determine tier
        if tier is None:
            tier = get_tier_for_url(url)

        # Get configuration for tier
        config = get_config_for_tier(tier)

        # Merge with any overrides
        config.update(kwargs)

        # Get cookies
        if cookies is None:
            cookies = await self.get_cookies()

        # Optional request delay (Scrapfly handles rate limiting, but available if needed)
        if settings.request_delay > 0:
            await asyncio.sleep(settings.request_delay)

        async with self.semaphore:
            logger.debug(f"Scraping [{tier.value}]: {url}")

            try:
                result = await self.client.async_scrape(
                    ScrapeConfig(
                        url=url,
                        cookies=cookies,
                        **config,
                    )
                )

                # Track metrics
                self._requests_made += 1
                # PROTECTED tier costs 25x only if ASP is actually enabled
                protected_cost = 25 if settings.asp_enabled else 1
                credit_multipliers = {
                    ScrapflyTier.MINIMAL: 1,
                    ScrapflyTier.STANDARD: 1,
                    ScrapflyTier.CACHED: 1,
                    ScrapflyTier.PROTECTED: protected_cost,
                    ScrapflyTier.JAVASCRIPT: 10,
                }
                self._credits_used += credit_multipliers.get(tier, 1)

                # Mark as fetched
                await self.mark_fetched(url)

                return result

            except scrapfly.errors.ScrapflyError as e:
                # Check if we should escalate tier (only if enabled in settings)
                if (
                    settings.asp_escalate_on_block
                    and tier != TIER_PROTECTED
                    and self._should_escalate(e)
                ):
                    logger.warning(
                        f"Escalating to PROTECTED tier for {url} due to: {e}"
                    )
                    return await self.scrape(
                        url,
                        tier=TIER_PROTECTED,
                        skip_if_fetched=False,
                        cookies=cookies,
                        **kwargs,
                    )
                raise

    def _should_escalate(self, error: Exception) -> bool:
        """Determine if we should escalate to a higher tier based on error."""
        error_str = str(error).lower()
        escalation_indicators = [
            "blocked",
            "captcha",
            "access denied",
            "403",
            "rate limit",
        ]
        return any(indicator in error_str for indicator in escalation_indicators)

    async def scrape_many(
        self,
        urls: list,
        tier: Optional[ScrapflyTier] = None,
        skip_if_fetched: bool = True,
    ):
        """
        Scrape multiple URLs concurrently.

        Args:
            urls: List of URLs to scrape
            tier: Tier to use for all URLs (auto-detected if None)
            skip_if_fetched: Skip already fetched URLs

        Yields:
            (url, result) tuples
        """
        tasks = [
            self.scrape(url, tier=tier, skip_if_fetched=skip_if_fetched) for url in urls
        ]

        for url, coro in zip(urls, asyncio.as_completed(tasks)):
            try:
                result = await coro
                yield url, result
            except Exception as e:
                logger.error(f"Failed to scrape {url}: {e}")
                yield url, None

    def get_stats(self) -> dict:
        """Get usage statistics."""
        return {
            "requests_made": self._requests_made,
            "credits_used": self._credits_used,
            "urls_fetched": len(self._fetched_urls),
            "avg_credits_per_request": (
                self._credits_used / self._requests_made
                if self._requests_made > 0
                else 0
            ),
        }

    def reset_stats(self):
        """Reset usage statistics."""
        self._credits_used = 0
        self._requests_made = 0

    def clear_fetched_urls(self):
        """Clear the fetched URLs cache."""
        self._fetched_urls.clear()
