"""
Agent scraper orchestrator module.

Coordinates the scraping of real estate agent data from Idealista,
including agent listings and detailed information.

Both Spain and Portugal use the zoneexperts API:
- Spain: POST https://www.idealista.com/en/zoneexperts
- Portugal: POST https://www.idealista.pt/pt/zoneexperts
"""

import json
import os
import asyncio
import codecs
from typing import List, Dict, TYPE_CHECKING

from scrapfly import ScrapflyClient, ScrapeConfig
from bson import ObjectId
from loguru import logger

from .agent_details import AgentDetailsScraper
from ..utils.config import settings, get_env, require_env
from ..utils.countries import CountryConfig, get_country

if TYPE_CHECKING:
    from ..utils.countries import CountryConfig

# Get API key from environment - never hardcode credentials
# Note: Key is retrieved lazily in __init__ to avoid import-time errors
def _get_scrapfly_key() -> str:
    """Get Scrapfly API key, raising error if not found."""
    key = get_env("SCRAPFLY_KEY")
    if not key:
        raise ValueError("SCRAPFLY_KEY not found in .env.local or .env")
    return key

# Base headers template - origin/referer set dynamically based on country
BASE_HEADERS = {
    "accept": "*/*",
    "content-type": "application/json",
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}


def get_headers_for_country(country: CountryConfig) -> Dict[str, str]:
    """Get headers with origin/referer set for the specified country."""
    headers = BASE_HEADERS.copy()
    headers["origin"] = country.domain
    headers["referer"] = f"{country.domain}/{country.language}/agencias-inmobiliarias/"
    return headers


def generate_mongodb_id() -> ObjectId:
    """Generate a new MongoDB ObjectId."""
    return ObjectId()


class AgentScraper:
    """
    Orchestrates agent data scraping from Idealista using the zoneexperts API.

    Both Spain and Portugal use the same API pattern with country-specific endpoints.

    Example:
        >>> scraper = AgentScraper(country="spain")
        >>> await scraper.run()

        >>> scraper = AgentScraper(country="portugal")
        >>> await scraper.run()
    """

    def __init__(
        self,
        api_key: str = None,
        scrapfly: ScrapflyClient = None,
        country: str | CountryConfig | None = None,
    ):
        """
        Initialize the agent scraper.

        Args:
            api_key: Optional Scrapfly API key. If not provided,
                    uses SCRAPFLY_KEY from environment.
            scrapfly: Optional existing ScrapflyClient to reuse. Reduces cost by
                     sharing sessions across multiple scrapers.
            country: Country name or CountryConfig. Defaults to IDEALISTA_DEFAULT_COUNTRY
                    env var or 'spain'.
        """
        if scrapfly:
            self.scrapfly = scrapfly
            self._owns_client = False
        else:
            key = api_key or _get_scrapfly_key()
            self.scrapfly = ScrapflyClient(key=key)
            self._owns_client = True

        if isinstance(country, CountryConfig):
            self.country = country
        else:
            self.country = get_country(country)

        self.agent_details_scraper = AgentDetailsScraper(self.scrapfly, country=self.country)
        self.headers = get_headers_for_country(self.country)

        logger.info(f"AgentScraper initialized for {self.country.code}")

    async def process_agency_data(self, agency: Dict) -> Dict:
        """
        Process agency data and fetch additional details.

        Args:
            agency: Raw agency data from Idealista API

        Returns:
            Processed agency data with additional details
        """
        commercial_data = agency.get('commercialData', {})
        microsite = commercial_data.get('microsite', {})

        # Generate MongoDB compatible ObjectIDs
        shared_id = generate_mongodb_id()

        # Get base data
        base_data = {
            "firstname": commercial_data.get('name', ''),
            "url": microsite.get('urlWithParams', ''),
            "agencyReference": microsite.get('shortName', ''),
            "logo": commercial_data.get('logo', ''),
            "agentID": commercial_data.get('id', ''),
            "owner": {"$oid": str(shared_id)},
            "superAgent": {"$oid": str(shared_id)}
        }

        # Get additional details
        if base_data['agentID'] and base_data['url']:
            additional_details = await self.agent_details_scraper.get_agent_details(
                str(base_data['agentID']),
                base_data['url']
            )
            base_data.update(additional_details)

        return base_data

    def get_scrape_config(self, page_number: int, location: str = None) -> ScrapeConfig:
        """
        Create scrape config for a page of agent listings.

        Args:
            page_number: Page number to fetch
            location: Optional location code (e.g., "0-EU-ES-03" for Alicante, Spain).
                     If not provided, uses a default for the country.

        Returns:
            Configured ScrapeConfig for the request
        """
        # Build URL using country's agents_endpoint from config
        url = f"{self.country.domain}{self.country.agents_endpoint}"

        # Default location codes per country (can be overridden)
        default_locations = {
            "es": "0-EU-ES-03",  # Spain - Alicante
            "pt": "0-EU-PT-11",  # Portugal - Lisboa
        }
        loc = location or default_locations.get(self.country.code, "0-EU-ES-03")

        return ScrapeConfig(
            url=url,
            proxy_pool="public_residential_pool",
            asp=settings.asp_enabled,
            headers=self.headers,
            method="POST",
            body=json.dumps({
                "location": loc,
                "operation": "SALE",
                "typology": "HOUSING",
                "minPrice": 0,
                "maxPrice": None,
                "languages": [],
                "pageNumber": page_number
            }, ensure_ascii=False)
        )

    async def scrape_page(self, page_number: int) -> List[Dict]:
        """
        Scrape a single page of agent listings.

        Args:
            page_number: Page number to scrape

        Returns:
            List of processed agency data dictionaries
        """
        try:
            result = await self.scrapfly.async_scrape(self.get_scrape_config(page_number))
            response_data = json.loads(result.content)
            agencies = response_data.get('body', {}).get('agenciesListing', {}).get('matchingAgencies', [])

            # Process each agency with additional details
            tasks = [self.process_agency_data(agency) for agency in agencies]
            return await asyncio.gather(*tasks)
        except Exception as e:
            print(f"Error scraping page {page_number}: {str(e)}")
            return []

    async def run(
        self,
        output_file: str = 'agent_properties.jsonl',
        max_pages: int = 0
    ) -> None:
        """
        Run the full agent scraping process using the zoneexperts API.

        Args:
            output_file: Path to output JSONL file
            max_pages: Maximum pages to scrape (0 = all)
        """
        logger.info(f"Using zoneexperts API for {self.country.code}")

        # First, get the total number of pages
        first_page = await self.scrapfly.async_scrape(self.get_scrape_config(1))
        first_page_data = json.loads(first_page.content)
        pagination = first_page_data.get('body', {}).get('pagination', {})
        total_pages = pagination.get('pages', 0)

        if max_pages > 0:
            total_pages = min(total_pages, max_pages)

        logger.info(f"Found {total_pages} pages to scrape")

        # Create scraping tasks for all pages
        tasks = []
        for page in range(1, total_pages + 1):
            tasks.append(self.scrape_page(page))

        # Execute tasks
        results = await asyncio.gather(*tasks)

        # Write results to file using UTF-8 encoding
        with codecs.open(output_file, 'w', encoding='utf-8') as f:
            for page_results in results:
                for agency_data in page_results:
                    f.write(json.dumps(agency_data, ensure_ascii=False) + '\n')

        logger.info(f"Completed scraping {total_pages} pages. Data saved to {output_file}")


async def main():
    """Entry point for running the agent scraper."""
    scraper = AgentScraper()
    await scraper.run()


if __name__ == "__main__":
    asyncio.run(main())
