"""
Tiered Scrapfly configuration system for credit optimization.

Cost Reference (Scrapfly pricing):
- Base request: 1 credit
- ASP (Anti-Scraping Protection): 25x multiplier
- render_js (JavaScript rendering): 10x multiplier
- Screenshots: Additional credits based on size
- Residential proxy: Higher cost than datacenter

Tiers are designed to use the minimum credits necessary for each request type.
All configurable values are loaded from src.utils.config.settings.
"""

from dataclasses import dataclass
from enum import Enum
from typing import Dict, Any, Optional

from ..utils.config import settings


class ScrapflyTier(Enum):
    """
    Request cost tiers for Scrapfly API.

    MINIMAL: 1x credits - Basic HTML fetch, no anti-bot, no caching
    STANDARD: 1x credits - Basic HTML with retry logic
    CACHED: 1x credits - Same as standard but uses Scrapfly's cache
    PROTECTED: 25x credits - Uses ASP for sites with anti-bot protection
    JAVASCRIPT: 10x credits - For pages requiring JS rendering
    """

    MINIMAL = "minimal"
    STANDARD = "standard"
    CACHED = "cached"
    PROTECTED = "protected"
    JAVASCRIPT = "javascript"


# Pre-defined tier configurations
TIER_MINIMAL = ScrapflyTier.MINIMAL
TIER_STANDARD = ScrapflyTier.STANDARD
TIER_CACHED = ScrapflyTier.CACHED
TIER_PROTECTED = ScrapflyTier.PROTECTED
TIER_JAVASCRIPT = ScrapflyTier.JAVASCRIPT


BASE_HEADERS = {
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "accept-language": "en-US,en;q=0.9,es;q=0.8",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
    "cache-control": "no-cache",
}


@dataclass
class TierConfig:
    """Configuration for a specific Scrapfly tier."""

    asp: bool = False
    render_js: bool = False
    cache: bool = False
    cache_ttl: Optional[int] = None
    retry: bool = True
    country: str = "ES"
    proxy_pool: str = "public_datacenter_pool"
    session_sticky_proxy: bool = False
    headers: Dict[str, str] = None
    dns: bool = True
    ssl: bool = True

    def __post_init__(self):
        if self.headers is None:
            self.headers = BASE_HEADERS.copy()

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for ScrapeConfig."""
        result = {
            "asp": self.asp,
            "render_js": self.render_js,
            "cache": self.cache,
            "retry": self.retry,
            "country": self.country,
            "proxy_pool": self.proxy_pool,
            "session_sticky_proxy": self.session_sticky_proxy,
            "headers": self.headers,
            "dns": self.dns,
            "ssl": self.ssl,
        }
        if self.cache and self.cache_ttl:
            result["cache_ttl"] = self.cache_ttl
        return result


# Tier configurations with their credit costs
TIER_CONFIGS: Dict[ScrapflyTier, TierConfig] = {
    # 1x credits - Absolute minimum, for trusted/simple pages
    ScrapflyTier.MINIMAL: TierConfig(
        asp=False,
        render_js=False,
        cache=False,
        retry=True,
        session_sticky_proxy=False,
    ),
    # 1x credits - Standard configuration for most pages
    ScrapflyTier.STANDARD: TierConfig(
        asp=False,
        render_js=False,
        cache=False,
        retry=True,
        session_sticky_proxy=True,
    ),
    # 1x credits - With Scrapfly caching (saves credits on repeat requests)
    ScrapflyTier.CACHED: TierConfig(
        asp=False,
        render_js=False,
        cache=settings.cache_enabled,
        cache_ttl=settings.cache_ttl,
        retry=True,
        session_sticky_proxy=True,
    ),
    # 25x credits - For pages with strong anti-bot protection
    # ASP is now configurable via SCRAPFLY_ASP_ENABLED environment variable
    ScrapflyTier.PROTECTED: TierConfig(
        asp=settings.asp_enabled,
        render_js=False,
        cache=settings.cache_enabled,
        cache_ttl=settings.cache_ttl,
        retry=True,
        session_sticky_proxy=True,
    ),
    # 10x credits - For pages requiring JavaScript rendering
    ScrapflyTier.JAVASCRIPT: TierConfig(
        asp=False,
        render_js=True,
        cache=settings.cache_enabled,
        cache_ttl=settings.cache_ttl,
        retry=True,
        session_sticky_proxy=True,
    ),
}


def get_config_for_tier(tier: ScrapflyTier) -> Dict[str, Any]:
    """Get the configuration dictionary for a specific tier."""
    return TIER_CONFIGS[tier].to_dict()


def get_tier_for_url(url: str) -> ScrapflyTier:
    """
    Determine the appropriate tier for a URL based on known patterns.

    Override this function based on testing results from test_asp.py.
    """
    url_lower = url.lower()

    # Property detail pages may need more protection
    if "/inmueble/" in url_lower:
        return ScrapflyTier.CACHED  # Try cached first, upgrade if blocked

    # Listing pages are usually less protected
    if any(x in url_lower for x in ["/alquiler-", "/venta-", "/obra-nueva/"]):
        return ScrapflyTier.CACHED

    # Agent pages
    if "/agencia/" in url_lower or "/profesional/" in url_lower:
        return ScrapflyTier.CACHED

    # Default to cached standard
    return ScrapflyTier.CACHED


def estimate_credits(urls: list, tier: ScrapflyTier = None) -> dict:
    """
    Estimate credit usage for a list of URLs.

    Returns a breakdown of estimated credits by tier.
    PROTECTED tier cost depends on whether ASP is enabled in settings.
    """
    # PROTECTED tier costs 25x only if ASP is enabled, otherwise 1x
    protected_cost = 25 if settings.asp_enabled else 1

    credit_multipliers = {
        ScrapflyTier.MINIMAL: 1,
        ScrapflyTier.STANDARD: 1,
        ScrapflyTier.CACHED: 1,
        ScrapflyTier.PROTECTED: protected_cost,
        ScrapflyTier.JAVASCRIPT: 10,
    }

    if tier:
        # All URLs use the same tier
        multiplier = credit_multipliers[tier]
        return {
            "total_urls": len(urls),
            "tier": tier.value,
            "credits_per_request": multiplier,
            "estimated_total": len(urls) * multiplier,
        }

    # Auto-detect tier for each URL
    tier_counts = {}
    for url in urls:
        detected_tier = get_tier_for_url(url)
        tier_counts[detected_tier] = tier_counts.get(detected_tier, 0) + 1

    total_credits = sum(
        count * credit_multipliers[t] for t, count in tier_counts.items()
    )

    return {
        "total_urls": len(urls),
        "breakdown": {t.value: count for t, count in tier_counts.items()},
        "estimated_total": total_credits,
    }
