#!/usr/bin/env python3
"""idealistaScraper CLI - Unified command-line interface for Idealista property scraping.

A modern CLI built with Typer for managing all aspects of the Idealista scraper project.
"""
from __future__ import annotations

import asyncio
import json
import os
import pickle
from enum import Enum
from pathlib import Path
from typing import Optional

import typer
from dotenv import load_dotenv
from rich import print as rprint
from rich.console import Console
from rich.panel import Panel
from rich.progress import (
    BarColumn,
    Progress,
    SpinnerColumn,
    TaskProgressColumn,
    TextColumn,
    TimeRemainingColumn,
)
from rich.prompt import Confirm, Prompt
from rich.table import Table

from idealista_scraper.utils.paths import get_env_file, get_output_dir
from idealista_scraper.utils.countries import get_country, list_available_countries
from idealista_scraper.cli.config import ScraperConfig
from idealista_scraper.cli.presets import load_preset, list_presets, delete_preset

# Load environment variables using centralized path resolution
if env_file := get_env_file():
    load_dotenv(env_file)

# Create Typer app
app = typer.Typer(
    name="idealista-scraper",
    help="Unified CLI for Idealista property scraping and data processing",
    add_completion=False,
)

console = Console()


@app.callback(invoke_without_command=True)
def main_callback(
    ctx: typer.Context,
    preset: Optional[str] = typer.Option(
        None, "--preset", "-p", help="Run with a saved preset configuration"
    ),
) -> None:
    """Main callback that launches the wizard or runs a preset if no command is given.

    If no subcommand is provided:
    - With --preset: Loads and runs the specified preset configuration
    - Without --preset: Launches the interactive configuration wizard
    """
    if ctx.invoked_subcommand is None:
        # No command was invoked
        if preset:
            # Load and run with preset
            console.print(f"\n[cyan]Loading preset '{preset}'...[/cyan]\n")
            config = load_preset(preset)

            if config is None:
                console.print(f"[red]Error: Preset '{preset}' not found[/red]")
                console.print("\n[yellow]Available presets:[/yellow]")
                presets = list_presets()
                if presets:
                    for p in presets:
                        console.print(f"  - {p}")
                else:
                    console.print("  (none)")
                raise typer.Exit(code=1)

            # Execute with the loaded preset
            asyncio.run(execute_pipeline(config))
        else:
            # Launch interactive wizard
            from idealista_scraper.cli.interactive import InteractiveWizard

            console.print("\n[bold cyan]Welcome to Idealista Scraper![/bold cyan]\n")
            console.print("No command specified. Launching interactive wizard...\n")

            try:
                wizard = InteractiveWizard(console=console)
                config = asyncio.run(wizard.run())

                # Execute the pipeline with the wizard-generated config
                asyncio.run(execute_pipeline(config))
            except KeyboardInterrupt:
                console.print("\n[yellow]Cancelled by user[/yellow]")
                raise typer.Exit(code=0)


class PropertyType(str, Enum):
    """Property listing types."""

    rental = "rental"
    sale = "sale"


class OutputFormat(str, Enum):
    """Output format options."""

    jsonl = "jsonl"
    json = "json"


# ============================================================================
# PIPELINE EXECUTION FUNCTION
# ============================================================================


async def execute_pipeline(config: ScraperConfig) -> None:
    """Execute the complete pipeline based on configuration.

    Executes each step in config.pipeline_steps with progress reporting.

    Args:
        config: ScraperConfig containing all pipeline settings

    Raises:
        typer.Exit: If any pipeline step fails
    """
    from idealista_scraper.cli.agent_discovery import discover_agents_for_regions
    from idealista_scraper.orchestration.agent_manager import main as scraper_main
    from idealista_scraper.transform.cleaner import MongoCleaner

    console.print(
        Panel(
            f"[bold]Executing Pipeline[/bold]\n"
            f"Country: {config.country.title()}\n"
            f"Property Type: {config.property_type.value.title()}\n"
            f"Steps: {', '.join(config.pipeline_steps)}",
            title="Pipeline Configuration",
            border_style="cyan",
        )
    )

    output_dir = get_output_dir()

    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        TaskProgressColumn(),
        TimeRemainingColumn(),
        console=console,
    ) as progress:
        # Step 1: Discover Agents
        if "discover_agents" in config.pipeline_steps:
            task = progress.add_task(
                "[cyan]Step 1: Discovering agents across regions...", total=100
            )
            try:
                # Progress callback for agent discovery
                def agent_progress(region_name: str, current: int, total: int) -> None:
                    progress.update(
                        task,
                        completed=int((current / total) * 100),
                        description=f"[cyan]Step 1: Discovering agents in {region_name}...",
                    )

                agents_file = await discover_agents_for_regions(
                    config, progress_callback=agent_progress
                )
                progress.update(task, completed=100)
                console.print(f"[green]Agent discovery complete: {agents_file}[/green]")
            except Exception as e:
                console.print(f"[red]Agent discovery failed: {e}[/red]")
                raise typer.Exit(code=1)

        # Step 2: Scrape Listings
        if "scrape" in config.pipeline_steps:
            task = progress.add_task("[cyan]Step 2: Scraping property listings...", total=100)
            try:
                await scraper_main(config=config)
                progress.update(task, completed=100)
                console.print("[green]Scraping complete[/green]")
            except Exception as e:
                console.print(f"[red]Scraping failed: {e}[/red]")
                raise typer.Exit(code=1)

        # Step 3: Transform Data
        if "transform" in config.pipeline_steps:
            task = progress.add_task("[cyan]Step 3: Transforming data...", total=100)
            try:
                # For now, log that transform requires manual intervention
                # In the future, this could be automated
                console.print(
                    "[yellow]Note: Transform step currently requires raw HTML files[/yellow]"
                )
                console.print(
                    "[yellow]Use 'idealista-scraper transform' command for HTML processing[/yellow]"
                )
                progress.update(task, completed=100)
            except Exception as e:
                console.print(f"[red]Transform failed: {e}[/red]")

        # Step 4: Clean Data
        if "clean" in config.pipeline_steps:
            task = progress.add_task("[cyan]Step 4: Cleaning MongoDB data...", total=100)
            try:
                # Look for rental/resale properties files
                rental_file = output_dir / "propli_rental_properties.jsonl"
                resale_file = output_dir / "propli_resale_properties.jsonl"

                cleaner = MongoCleaner()

                if rental_file.exists():
                    cleaned_rental = output_dir / "rental_properties_cleaned.jsonl"
                    cleaner.convert_file(str(rental_file), str(cleaned_rental))
                    console.print(f"[green]Cleaned rental data: {cleaned_rental}[/green]")

                if resale_file.exists():
                    cleaned_resale = output_dir / "resale_properties_cleaned.jsonl"
                    cleaner.convert_file(str(resale_file), str(cleaned_resale))
                    console.print(f"[green]Cleaned resale data: {cleaned_resale}[/green]")

                if not rental_file.exists() and not resale_file.exists():
                    console.print("[yellow]No property files found to clean[/yellow]")

                progress.update(task, completed=100)
            except Exception as e:
                console.print(f"[red]Cleaning failed: {e}[/red]")

        # Step 5: Upload to S3 (if configured)
        if config.s3_bucket:
            task = progress.add_task("[cyan]Step 5: Uploading to S3...", total=100)
            try:
                images_file = output_dir / "image_urls.jsonl"
                if images_file.exists():
                    _upload_images_to_s3(str(images_file), config.s3_bucket, resume=False)
                    progress.update(task, completed=100)
                    console.print(f"[green]Upload complete to {config.s3_bucket}[/green]")
                else:
                    console.print("[yellow]No image URLs file found[/yellow]")
                    progress.update(task, completed=100)
            except Exception as e:
                console.print(f"[red]Upload failed: {e}[/red]")

    console.print("\n[bold green]Pipeline execution complete![/bold green]\n")


# ============================================================================
# TEST ASP COMMAND
# ============================================================================


@app.command()
def test_asp() -> None:
    """Test if ASP (Anti-Scraping Protection) is required for Idealista.

    ASP costs 25x credits per request. This test determines if we can
    save 96% of credits by disabling it.
    """
    console.print("\n[bold cyan]Testing ASP Requirement...[/bold cyan]\n")

    try:
        from tests.integration.test_asp import run_tests

        asyncio.run(run_tests())
    except ImportError as e:
        console.print(f"[red]Error: Could not import test_asp module: {e}[/red]")
        raise typer.Exit(code=1)
    except Exception as e:
        console.print(f"[red]Error running ASP test: {e}[/red]")
        raise typer.Exit(code=1)


# ============================================================================
# REGIONS COMMAND
# ============================================================================


@app.command()
def regions(
    country: str = typer.Option(
        "spain", "--country", "-c", help="Country: spain or portugal"
    ),
    list_countries: bool = typer.Option(
        False, "--list-countries", help="List all supported countries"
    ),
    property_type: PropertyType = typer.Option(
        PropertyType.rental, "--type", "-t", help="Property type: rental or sale"
    ),
    common_only: bool = typer.Option(
        False, "--common", help="Show common regions only (no API call)"
    ),
) -> None:
    """List available regions for a country.

    Examples:
        idealista-scraper regions --list-countries
        idealista-scraper regions --country spain
        idealista-scraper regions --country portugal --type sale
        idealista-scraper regions --country spain --common
    """
    if list_countries:
        console.print("\n[bold cyan]Supported Countries[/bold cyan]\n")
        countries = list_available_countries()
        for c in countries:
            country_config = get_country(c)
            console.print(f"  • [green]{c}[/green] ({country_config.domain})")
        console.print()
        return

    console.print(f"\n[bold cyan]Regions in {country.title()}[/bold cyan]\n")

    try:
        country_config = get_country(country)
    except ValueError as e:
        console.print(f"[red]Error: {e}[/red]")
        raise typer.Exit(code=1)

    if common_only:
        from idealista_scraper.scraping.regions import get_common_regions

        regions_list = get_common_regions(country)
        console.print("[yellow]Showing common regions (no API call)[/yellow]\n")
    else:
        from idealista_scraper.scraping.regions import list_regions

        console.print("[cyan]Fetching regions from Idealista...[/cyan]\n")
        try:
            regions_list = asyncio.run(list_regions(country, property_type.value))
        except Exception as e:
            console.print(f"[yellow]Could not fetch regions: {e}[/yellow]")
            console.print("[yellow]Falling back to common regions...[/yellow]\n")
            from idealista_scraper.scraping.regions import get_common_regions
            regions_list = get_common_regions(country)

    if not regions_list:
        console.print("[yellow]No regions found.[/yellow]")
        return

    table = Table(title=f"Available Regions ({country.title()})")
    table.add_column("Region", style="cyan")
    table.add_column("Slug", style="green")
    table.add_column("Code", style="yellow")
    table.add_column("Listings", justify="right", style="magenta")

    for region in regions_list:
        table.add_row(
            region.name,
            region.slug,
            region.code or "-",
            f"{region.property_count:,}" if region.property_count else "-"
        )

    console.print(table)
    console.print(f"\n[dim]Use --location {regions_list[0].slug if regions_list else 'region-slug'} when scraping[/dim]\n")


# ============================================================================
# SCRAPE COMMANDS
# ============================================================================

scrape_app = typer.Typer(help="Scraping commands for listings, properties, and agents")
app.add_typer(scrape_app, name="scrape")


@scrape_app.command("listings")
def scrape_listings(
    location: str = typer.Option(
        "madrid", "--location", "-l", help="Location to scrape (e.g., madrid, barcelona, lisboa)"
    ),
    country: str = typer.Option(
        "spain", "--country", help="Country: spain or portugal"
    ),
    property_type: PropertyType = typer.Option(
        PropertyType.rental, "--type", "-t", help="Property type: rental or sale"
    ),
    pages: int = typer.Option(10, "--pages", "-p", help="Number of pages to scrape"),
    max_concurrent: int = typer.Option(50, "--concurrent", "-c", help="Max concurrent requests"),
    upload_s3: bool = typer.Option(False, "--upload-s3", help="Upload images to S3 after scraping"),
    bucket: Optional[str] = typer.Option(None, "--bucket", "-b", help="S3 bucket name for upload"),
) -> None:
    """Scrape property listings from Idealista.

    Example:
        idealista-scraper scrape listings --location madrid --type rental --pages 10
        idealista-scraper scrape listings --country portugal --location lisboa --type sale
        idealista-scraper scrape listings --location madrid --upload-s3 --bucket my-bucket
    """
    from idealista_scraper.cli.config import PropertyType as ConfigPropertyType, RegionSelection

    try:
        country_config = get_country(country)
    except ValueError as e:
        console.print(f"[red]Error: {e}[/red]")
        raise typer.Exit(code=1)

    console.print(
        Panel(
            f"[bold]Scraping {property_type.value} listings[/bold]\n"
            f"Country: {country.title()} ({country_config.domain})\n"
            f"Location: {location}\n"
            f"Pages: {pages}\n"
            f"Concurrency: {max_concurrent}",
            title="Idealista Listings Scraper",
            border_style="cyan",
        )
    )

    output_dir = get_output_dir()

    try:
        from idealista_scraper.orchestration.agent_manager import main as scraper_main

        # Create a ScraperConfig from CLI parameters
        config = ScraperConfig(
            country=country,
            regions=RegionSelection(codes=[location], names=[location.title()]),
            property_type=ConfigPropertyType(property_type.value),
            pages_per_region=pages,
            max_concurrent=max_concurrent,
            pipeline_steps=["scrape"],
            s3_bucket=bucket if upload_s3 else None,
        )

        # Pass config to scraper_main
        asyncio.run(scraper_main(config=config))

        console.print("\n[green]Scraping completed successfully![/green]")

        if not upload_s3:
            upload_s3 = Confirm.ask(
                "\n[cyan]Do you want to upload images to S3?[/cyan]", default=True
            )

        if upload_s3:
            if not bucket:
                bucket = os.getenv("S3_BUCKET_NAME")
                if not bucket:
                    bucket = Prompt.ask("[cyan]Enter S3 bucket name[/cyan]")

            console.print("\n[cyan]Starting S3 image upload...[/cyan]")
            _upload_images_to_s3(str(output_dir / "image_urls.jsonl"), bucket, resume=False)

    except Exception as e:
        console.print(f"[red]Error during scraping: {e}[/red]")
        raise typer.Exit(code=1)


@scrape_app.command("properties")
def scrape_properties(
    input_file: Path = typer.Option(
        ..., "--input", "-i", help="Input file with property URLs (one per line)"
    ),
    output_file: Path = typer.Option(
        "properties.jsonl", "--output", "-o", help="Output JSONL file"
    ),
    max_concurrent: int = typer.Option(50, "--concurrent", "-c", help="Max concurrent requests"),
) -> None:
    """Scrape detailed property information from a list of URLs.

    Example:
        idealista-scraper scrape properties --input urls.txt --output properties.jsonl
    """
    if not input_file.exists():
        console.print(f"[red]Error: Input file {input_file} not found[/red]")
        raise typer.Exit(code=1)

    with open(input_file) as f:
        urls = [line.strip() for line in f if line.strip()]

    console.print(
        Panel(
            f"[bold]Scraping property details[/bold]\n"
            f"URLs to process: {len(urls)}\n"
            f"Output: {output_file}\n"
            f"Concurrency: {max_concurrent}",
            title="Property Details Scraper",
            border_style="cyan",
        )
    )

    try:
        from idealista_scraper.scraping import PropertyDetailsScraper

        console.print(
            "[yellow]Note: Property scraping from URL file not yet implemented.[/yellow]"
        )
        console.print("[yellow]Use the main scraper which processes from agent URLs.[/yellow]")

    except Exception as e:
        console.print(f"[red]Error during property scraping: {e}[/red]")
        raise typer.Exit(code=1)


@scrape_app.command("agents")
def scrape_agents(
    limit: Optional[int] = typer.Option(
        None, "--limit", "-l", help="Limit number of agents to scrape"
    ),
    country: str = typer.Option(
        "spain", "--country", help="Country: spain or portugal"
    ),
    output_file: Path = typer.Option(
        "agent_properties.jsonl", "--output", "-o", help="Output file"
    ),
) -> None:
    """Scrape agent data from Idealista.

    Example:
        idealista-scraper scrape agents --limit 100
        idealista-scraper scrape agents --country portugal --limit 50
    """
    try:
        country_config = get_country(country)
    except ValueError as e:
        console.print(f"[red]Error: {e}[/red]")
        raise typer.Exit(code=1)

    console.print(
        Panel(
            f"[bold]Scraping agent data[/bold]\n"
            f"Country: {country.title()} ({country_config.domain})\n"
            f"Limit: {limit or 'All agents'}\n"
            f"Output: {output_file}",
            title="Agent Scraper",
            border_style="cyan",
        )
    )

    try:
        from idealista_scraper.scraping.agent_orchestrator import AgentScraper

        console.print("\n[cyan]Starting agent scraper...[/cyan]\n")
        scraper = AgentScraper(country=country)
        asyncio.run(scraper.run(output_file=str(output_file)))

        console.print(f"\n[green]Agent data saved to {output_file}[/green]")
    except Exception as e:
        console.print(f"[red]Error during agent scraping: {e}[/red]")
        raise typer.Exit(code=1)


# ============================================================================
# TRANSFORM COMMAND
# ============================================================================


@app.command()
def transform(
    input_file: Path = typer.Argument(..., help="Input HTML file to transform"),
    output_file: Path = typer.Option("output.jsonl", "--output", "-o", help="Output JSONL file"),
    agents_file: Path = typer.Option(
        "property_agents.jsonl", "--agents", "-a", help="Agents data file"
    ),
) -> None:
    """Transform raw HTML to MongoDB-compatible JSONL format.

    Example:
        idealista-scraper transform raw.html --output mongo.jsonl
    """
    if not input_file.exists():
        console.print(f"[red]Error: Input file {input_file} not found[/red]")
        raise typer.Exit(code=1)

    console.print(
        Panel(
            f"[bold]Transforming HTML to MongoDB format[/bold]\n"
            f"Input: {input_file}\n"
            f"Output: {output_file}\n"
            f"Agents: {agents_file}",
            title="Data Transformer",
            border_style="cyan",
        )
    )

    try:
        from idealista_scraper.parsing import clean_html_content
        from idealista_scraper.transform.properties import load_agent_info, transform_to_mongodb_format

        content = clean_html_content(str(input_file))
        if not content:
            console.print("[red]Failed to process HTML[/red]")
            raise typer.Exit(code=1)

        agency_ref = content.get("agencyReference") or content.get("advertiser_name") or "Unknown"
        agent_info = load_agent_info(str(agents_file), agency_ref)

        advertiser_name = content.get("advertiser_name", "Unknown Advertiser")
        development_name = content.get("development_name", "Unknown Development")

        transformed_data = transform_to_mongodb_format(
            content, agent_info, advertiser_name, development_name
        )
        if not transformed_data:
            console.print("[red]Failed to transform data[/red]")
            raise typer.Exit(code=1)

        with open(output_file, "a", encoding="utf-8") as f:
            json.dump(transformed_data, f, ensure_ascii=False)
            f.write("\n")

        console.print(f"\n[green]Successfully transformed and saved to {output_file}[/green]")

    except Exception as e:
        console.print(f"[red]Error during transformation: {e}[/red]")
        raise typer.Exit(code=1)


# ============================================================================
# STATUS COMMAND
# ============================================================================


@app.command()
def status() -> None:
    """Check scraping progress and statistics.

    Shows:
    - Total properties processed
    - Failed URLs
    - Client usage statistics
    - Output file sizes
    """
    console.print("\n[bold cyan]Scraping Progress Status[/bold cyan]\n")

    output_dir = get_output_dir()
    if not output_dir.exists():
        console.print("[yellow]No output directory found. Run a scrape first.[/yellow]")
        return

    table = Table(title="Output Files", show_header=True, header_style="bold magenta")
    table.add_column("File", style="cyan")
    table.add_column("Lines", justify="right", style="green")
    table.add_column("Size", justify="right", style="yellow")

    output_files = [
        "rental_properties.jsonl",
        "raw_listings.jsonl",
        "image_urls.jsonl",
        "agent_properties.jsonl",
    ]

    for filename in output_files:
        filepath = output_dir / filename
        if filepath.exists():
            with open(filepath, "r") as f:
                lines = sum(1 for _ in f)

            size = filepath.stat().st_size
            if size < 1024 * 1024:
                size_str = f"{size / 1024:.2f} KB"
            else:
                size_str = f"{size / (1024 * 1024):.2f} MB"

            table.add_row(filename, str(lines), size_str)
        else:
            table.add_row(filename, "0", "0 KB")

    console.print(table)

    progress_files = list(output_dir.glob("rental_progress.pkl"))
    if progress_files:
        try:
            with open(progress_files[0], "rb") as f:
                progress_data = pickle.load(f)

            console.print("\n[bold]Progress Summary:[/bold]")
            console.print(f"  Processed URLs: {len(progress_data.get('processed_urls', set()))}")
            console.print(f"  Failed URLs: {len(progress_data.get('failed_urls', set()))}")

            if progress_data.get("client_progress"):
                console.print("\n[bold]Client Statistics:[/bold]")
                for client_key, stats in progress_data["client_progress"].items():
                    console.print(f"  Client {client_key}:")
                    console.print(f"    Processed: {stats.get('processed_count', 0)}")
                    console.print(f"    Failed: {stats.get('failed_count', 0)}")
        except Exception as e:
            console.print(f"[yellow]Could not read progress file: {e}[/yellow]")

    console.print()


# ============================================================================
# RESUME COMMAND
# ============================================================================


@app.command()
def resume() -> None:
    """Resume an interrupted scraping session.

    Continues from the last saved progress checkpoint.
    """
    console.print("\n[bold cyan]Resuming Scraping Session...[/bold cyan]\n")

    output_dir = get_output_dir()
    progress_files = list(output_dir.glob("rental_progress.pkl"))

    if not progress_files:
        console.print("[yellow]No progress file found. Starting fresh scrape instead.[/yellow]")
        console.print("[cyan]Use: idealista-scraper scrape listings[/cyan]")
        return

    try:
        from idealista_scraper.orchestration.agent_manager import main as scraper_main

        console.print("[green]Progress file found. Resuming...[/green]\n")
        asyncio.run(scraper_main())

        console.print("\n[green]Scraping session completed![/green]")
    except Exception as e:
        console.print(f"[red]Error resuming scrape: {e}[/red]")
        raise typer.Exit(code=1)


# ============================================================================
# ESTIMATE COMMAND
# ============================================================================


@app.command()
def estimate(
    urls_file: Optional[Path] = typer.Option(
        None, "--urls", "-u", help="File with URLs to estimate"
    ),
    pages: Optional[int] = typer.Option(None, "--pages", "-p", help="Number of pages to estimate"),
    asp: bool = typer.Option(
        True, "--asp/--no-asp", help="Use ASP (25x credits) or not (1x credits)"
    ),
) -> None:
    """Estimate Scrapfly credit usage for a scraping job.

    Examples:
        idealista-scraper estimate --urls urls.txt --asp
        idealista-scraper estimate --pages 100 --no-asp
    """
    console.print("\n[bold cyan]Credit Usage Estimation[/bold cyan]\n")

    url_count = 0
    if urls_file:
        if not urls_file.exists():
            console.print(f"[red]Error: File {urls_file} not found[/red]")
            raise typer.Exit(code=1)
        with open(urls_file) as f:
            url_count = sum(1 for line in f if line.strip())
    elif pages:
        url_count = pages * 30
    else:
        console.print("[yellow]Please provide either --urls or --pages[/yellow]")
        raise typer.Exit(code=1)

    base_credits = 1
    asp_multiplier = 25 if asp else 1
    credits_per_request = base_credits * asp_multiplier
    total_credits = url_count * credits_per_request

    table = Table(title="Credit Estimation", show_header=True, header_style="bold magenta")
    table.add_column("Parameter", style="cyan")
    table.add_column("Value", justify="right", style="green")

    table.add_row("URLs/Requests", f"{url_count:,}")
    table.add_row("ASP Enabled", "Yes" if asp else "No")
    table.add_row("Credits per Request", str(credits_per_request))
    table.add_row("Total Credits", f"{total_credits:,}")

    credits_per_dollar = 1000
    estimated_cost = total_credits / credits_per_dollar

    table.add_row("Estimated Cost", f"${estimated_cost:.2f}")

    console.print(table)

    if asp:
        savings = total_credits - (url_count * base_credits)
        savings_percent = (savings / total_credits) * 100
        console.print(
            f"\n[yellow]Potential savings without ASP: {savings:,} credits "
            f"({savings_percent:.1f}%)[/yellow]"
        )
        console.print("[cyan]Run 'idealista-scraper test-asp' to check if ASP is required[/cyan]")

    console.print()


# ============================================================================
# CLEAN COMMAND
# ============================================================================


@app.command()
def clean(
    all_data: bool = typer.Option(False, "--all", "-a", help="Remove all output data"),
    cache: bool = typer.Option(False, "--cache", "-c", help="Remove cache files only"),
    progress: bool = typer.Option(False, "--progress", "-p", help="Remove progress files only"),
) -> None:
    """Clean output files and cached data.

    Examples:
        idealista-scraper clean --cache
        idealista-scraper clean --progress
        idealista-scraper clean --all
    """
    console.print("\n[bold yellow]Cleaning Data...[/bold yellow]\n")

    output_dir = get_output_dir()

    if not output_dir.exists():
        console.print("[yellow]No output directory found.[/yellow]")
        return

    removed_files: list[str] = []

    if all_data or cache:
        cache_dir = output_dir / "cache"
        if cache_dir.exists():
            for file in cache_dir.glob("*"):
                file.unlink()
                removed_files.append(str(file))

    if all_data or progress:
        for file in output_dir.glob("rental_progress.pkl"):
            file.unlink()
            removed_files.append(str(file))

    if all_data:
        for pattern in ["*.jsonl", "*.log", "*.html"]:
            for file in output_dir.glob(pattern):
                file.unlink()
                removed_files.append(str(file))

    if removed_files:
        console.print(f"[green]Removed {len(removed_files)} file(s):[/green]")
        for file in removed_files[:10]:
            console.print(f"  - {file}")
        if len(removed_files) > 10:
            console.print(f"  ... and {len(removed_files) - 10} more")
    else:
        console.print("[yellow]No files to remove.[/yellow]")

    console.print()


# ============================================================================
# INFO COMMAND
# ============================================================================


@app.command()
def info() -> None:
    """Display project information and configuration.

    Shows environment setup, API keys configured, and directory structure.
    """
    from idealista_scraper import __version__
    from idealista_scraper.utils.paths import get_config_dir, get_project_root

    console.print("\n[bold cyan]idealistaScraper Project Info[/bold cyan]\n")

    api_keys_count = 0
    for i in range(1, 16):
        key = os.getenv(f"SCRAPFLY_KEY_{i}")
        if key:
            api_keys_count += 1

    single_key = os.getenv("SCRAPFLY_KEY")

    # Get default country
    default_country = os.getenv("IDEALISTA_DEFAULT_COUNTRY", "spain")
    try:
        country_config = get_country(default_country)
        country_info = f"{default_country} ({country_config.domain})"
    except ValueError:
        country_info = f"{default_country} (invalid)"

    table = Table(show_header=True, header_style="bold magenta")
    table.add_column("Configuration", style="cyan")
    table.add_column("Value", style="green")

    table.add_row("Version", __version__)
    table.add_row("Project Directory", str(get_project_root()))
    table.add_row("Config Directory", str(get_config_dir()))
    table.add_row("Default Country", country_info)
    table.add_row("Available Countries", ", ".join(list_available_countries()))
    table.add_row(
        "Scrapfly Keys Found",
        str(api_keys_count) if api_keys_count > 0 else ("1 (SCRAPFLY_KEY)" if single_key else "0"),
    )
    table.add_row("Output Directory", str(get_output_dir()))

    modules = [
        "idealista_scraper/scraping/agent_orchestrator.py",
        "idealista_scraper/transform/properties.py",
        "idealista_scraper/parsing/html_parser.py",
    ]
    project_root = get_project_root()
    existing_modules = sum(1 for m in modules if (project_root / m).exists())
    table.add_row("Core Modules", f"{existing_modules}/{len(modules)}")

    console.print(table)

    output_dir = get_output_dir()
    if output_dir.exists():
        console.print("\n[bold]Output Files:[/bold]")
        for file in sorted(output_dir.glob("*.jsonl")):
            size = file.stat().st_size / 1024
            console.print(f"  - {file.name} ({size:.2f} KB)")

    console.print()


# ============================================================================
# UPLOAD-IMAGES COMMAND
# ============================================================================


def _upload_images_to_s3(input_file: str, bucket_name: str, resume: bool = False) -> None:
    """Upload images to S3."""
    from idealista_scraper.upload.s3 import S3ImageUploader

    total_properties = 0
    with open(input_file, "r", encoding="utf-8") as f:
        for _ in f:
            total_properties += 1

    console.print(f"\n[cyan]Found {total_properties} properties to process[/cyan]")

    uploader = S3ImageUploader(bucket_name=bucket_name, max_concurrency=50)

    if resume:
        start_index = uploader.load_progress()
        console.print(f"[yellow]Resuming from property {start_index}[/yellow]")

    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        BarColumn(),
        TaskProgressColumn(),
        TimeRemainingColumn(),
        console=console,
    ) as progress:
        task = progress.add_task(
            f"[cyan]Uploading images to {bucket_name}...", total=total_properties
        )

        async def upload_with_progress() -> None:
            uploader_copy = S3ImageUploader(bucket_name=bucket_name, max_concurrency=50)
            await uploader_copy.process_all_properties(input_file)
            progress.update(task, completed=total_properties)

        asyncio.run(upload_with_progress())

    console.print(f"\n[green]Successfully uploaded images to S3 bucket: {bucket_name}[/green]")


@app.command()
def upload_images(
    input_file: Path = typer.Option(
        None, "--input", "-i", help="Input JSONL file with image URLs"
    ),
    bucket: Optional[str] = typer.Option(None, "--bucket", "-b", help="S3 bucket name"),
    resume_upload: bool = typer.Option(
        False, "--resume", "-r", help="Resume from last checkpoint"
    ),
) -> None:
    """Upload property images to S3 bucket.

    Example:
        idealista-scraper upload-images --input image_urls.jsonl --bucket my-bucket
        idealista-scraper upload-images --bucket my-bucket --resume
    """
    if input_file is None:
        input_file = get_output_dir() / "image_urls.jsonl"

    if not input_file.exists():
        console.print(f"[red]Error: Input file {input_file} not found[/red]")
        raise typer.Exit(code=1)

    if not bucket:
        bucket = os.getenv("S3_BUCKET_NAME")
        if not bucket:
            bucket = Prompt.ask("[cyan]Enter S3 bucket name[/cyan]")

    console.print(
        Panel(
            f"[bold]Uploading Images to S3[/bold]\n"
            f"Input: {input_file}\n"
            f"Bucket: {bucket}\n"
            f"Resume: {'Yes' if resume_upload else 'No'}",
            title="S3 Image Uploader",
            border_style="cyan",
        )
    )

    try:
        _upload_images_to_s3(str(input_file), bucket, resume_upload)
    except Exception as e:
        console.print(f"[red]Error uploading images: {e}[/red]")
        raise typer.Exit(code=1)


# ============================================================================
# CLEAN-MONGO COMMAND
# ============================================================================


@app.command()
def clean_mongo(
    input_file: Path = typer.Argument(..., help="Input JSONL file to clean"),
    output_file: Path = typer.Option(
        "properties_cleaned.jsonl", "--output", "-o", help="Output JSONL file"
    ),
) -> None:
    """Clean MongoDB JSONL data (convert UUIDs to ObjectIDs, fix BSON issues).

    Example:
        idealista-scraper clean-mongo properties.jsonl --output properties_cleaned.jsonl
    """
    if not input_file.exists():
        console.print(f"[red]Error: Input file {input_file} not found[/red]")
        raise typer.Exit(code=1)

    with open(input_file, "r", encoding="utf-8") as f:
        total_lines = sum(1 for _ in f)

    console.print(
        Panel(
            f"[bold]Cleaning MongoDB Data[/bold]\n"
            f"Input: {input_file} ({total_lines} records)\n"
            f"Output: {output_file}",
            title="MongoDB Data Cleaner",
            border_style="cyan",
        )
    )

    try:
        from idealista_scraper.transform.cleaner import convert_file

        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            BarColumn(),
            TaskProgressColumn(),
            console=console,
        ) as progress:
            task = progress.add_task("[cyan]Cleaning records...", total=total_lines)
            convert_file(str(input_file), str(output_file))
            progress.update(task, completed=total_lines)

        console.print(f"\n[green]Successfully cleaned data and saved to {output_file}[/green]")

        with open(output_file, "r", encoding="utf-8") as f:
            output_lines = sum(1 for _ in f)

        stats_table = Table(title="Cleaning Statistics")
        stats_table.add_column("Metric", style="cyan")
        stats_table.add_column("Count", justify="right", style="green")
        stats_table.add_row("Input Records", str(total_lines))
        stats_table.add_row("Output Records", str(output_lines))
        stats_table.add_row("Success Rate", f"{(output_lines / total_lines * 100):.1f}%")
        console.print(stats_table)

    except Exception as e:
        console.print(f"[red]Error cleaning MongoDB data: {e}[/red]")
        raise typer.Exit(code=1)


# ============================================================================
# PIPELINE COMMAND
# ============================================================================


class PipelinePreset(str, Enum):
    """Pipeline preset options."""

    full = "full"
    quick = "quick"
    export = "export"


@app.command()
def pipeline(
    preset: Optional[PipelinePreset] = typer.Option(
        None, "--preset", "-p", help="Pipeline preset: full, quick, or export"
    ),
    location: str = typer.Option("madrid", "--location", "-l", help="Location to scrape"),
    country: str = typer.Option("spain", "--country", help="Country to scrape"),
    pages: int = typer.Option(10, "--pages", help="Number of pages to scrape"),
    bucket: Optional[str] = typer.Option(None, "--bucket", "-b", help="S3 bucket name"),
) -> None:
    """Run automated data pipeline with interactive or preset modes.

    Presets:
        full  - Discover Agents -> Scrape -> Transform -> Clean -> Upload
        quick - Scrape -> Transform (skip cleaning and upload)
        export - Clean -> Upload (process existing data)

    Example:
        idealista-scraper pipeline                    # Interactive mode
        idealista-scraper pipeline --preset full      # Full automated pipeline
        idealista-scraper pipeline --preset quick --location barcelona --pages 20
    """
    from idealista_scraper.cli.config import PropertyType as ConfigPropertyType, RegionSelection

    console.print(
        Panel(
            "[bold cyan]Idealista Data Pipeline[/bold cyan]\n"
            "Automated workflow for scraping, transforming, cleaning, and uploading data",
            border_style="cyan",
        )
    )

    output_dir = get_output_dir()

    # Determine which steps to run
    if not preset:
        console.print("\n[bold]Select Pipeline Steps:[/bold]")
        run_discover = Confirm.ask("[cyan]1. Discover agents?[/cyan]", default=True)
        run_scrape = Confirm.ask("[cyan]2. Scrape listings?[/cyan]", default=True)
        run_transform = Confirm.ask("[cyan]3. Transform data?[/cyan]", default=False)
        run_clean = Confirm.ask("[cyan]4. Clean MongoDB data?[/cyan]", default=True)
        run_upload = Confirm.ask("[cyan]5. Upload to S3?[/cyan]", default=False)

        if run_scrape or run_discover:
            location = Prompt.ask("[cyan]Location to scrape[/cyan]", default=location)
            country = Prompt.ask("[cyan]Country[/cyan]", default=country)
            pages = int(Prompt.ask("[cyan]Number of pages[/cyan]", default=str(pages)))
    else:
        if preset == PipelinePreset.full:
            run_discover, run_scrape, run_transform, run_clean, run_upload = True, True, True, True, True
        elif preset == PipelinePreset.quick:
            run_discover, run_scrape, run_transform, run_clean, run_upload = False, True, False, False, False
        elif preset == PipelinePreset.export:
            run_discover, run_scrape, run_transform, run_clean, run_upload = False, False, False, True, True

    # Build pipeline steps list
    pipeline_steps = []
    if run_discover:
        pipeline_steps.append("discover_agents")
    if run_scrape:
        pipeline_steps.append("scrape")
    if run_transform:
        pipeline_steps.append("transform")
    if run_clean:
        pipeline_steps.append("clean")

    # Create config
    config = ScraperConfig(
        country=country,
        regions=RegionSelection(codes=[location], names=[location.title()]),
        property_type=ConfigPropertyType.rental,
        pages_per_region=pages,
        max_concurrent=50,
        pipeline_steps=pipeline_steps,
        s3_bucket=bucket if run_upload else None,
    )

    # Execute the pipeline
    asyncio.run(execute_pipeline(config))


# ============================================================================
# PRESET COMMANDS
# ============================================================================


preset_app = typer.Typer(help="Manage configuration presets")
app.add_typer(preset_app, name="preset")


@preset_app.command("list")
def preset_list() -> None:
    """List all saved presets.

    Example:
        idealista-scraper preset list
    """
    console.print("\n[bold cyan]Saved Presets[/bold cyan]\n")

    presets = list_presets()

    if not presets:
        console.print("[yellow]No presets found.[/yellow]")
        console.print("\n[dim]Create a preset by running the wizard and choosing to save[/dim]\n")
        return

    from idealista_scraper.cli.presets import get_preset_info

    table = Table(title="Configuration Presets", show_header=True, header_style="bold magenta")
    table.add_column("Name", style="cyan")
    table.add_column("Country", style="green")
    table.add_column("Property Type", style="yellow")
    table.add_column("Regions", style="blue")
    table.add_column("Steps", style="magenta")

    for preset_name in presets:
        preset_data = get_preset_info(preset_name)
        if preset_data:
            country = preset_data.get("country", "N/A")
            property_type = preset_data.get("property_type", "N/A")
            regions_data = preset_data.get("regions", {})

            # Format regions
            if regions_data.get("all_regions"):
                regions_str = "All regions"
            elif regions_data.get("names"):
                regions_str = ", ".join(regions_data["names"][:3])
                if len(regions_data["names"]) > 3:
                    regions_str += f" (+{len(regions_data['names']) - 3} more)"
            else:
                regions_str = "Default"

            # Format steps
            steps = preset_data.get("pipeline_steps", [])
            steps_str = ", ".join(steps[:2])
            if len(steps) > 2:
                steps_str += f" (+{len(steps) - 2})"

            table.add_row(preset_name, country, property_type, regions_str, steps_str)
        else:
            table.add_row(preset_name, "Error", "Error", "Error", "Error")

    console.print(table)
    console.print(f"\n[dim]Total: {len(presets)} preset(s)[/dim]\n")


@preset_app.command("delete")
def preset_delete(
    name: str = typer.Argument(..., help="Name of the preset to delete"),
    force: bool = typer.Option(False, "--force", "-f", help="Skip confirmation"),
) -> None:
    """Delete a saved preset.

    Example:
        idealista-scraper preset delete my-preset
        idealista-scraper preset delete my-preset --force
    """
    from idealista_scraper.cli.presets import get_preset_info

    # Check if preset exists
    preset_data = get_preset_info(name)
    if not preset_data:
        console.print(f"[red]Error: Preset '{name}' not found[/red]")
        raise typer.Exit(code=1)

    # Confirm deletion unless --force is used
    if not force:
        console.print(f"\n[yellow]Preset '{name}' details:[/yellow]")
        console.print(f"  Country: {preset_data.get('country', 'N/A')}")
        console.print(f"  Property Type: {preset_data.get('property_type', 'N/A')}")
        console.print()

        confirmed = Confirm.ask(
            f"[red]Are you sure you want to delete preset '{name}'?[/red]",
            default=False,
        )

        if not confirmed:
            console.print("\n[yellow]Deletion cancelled[/yellow]")
            return

    # Delete the preset
    if delete_preset(name):
        console.print(f"\n[green]Preset '{name}' deleted successfully[/green]\n")
    else:
        console.print(f"\n[red]Failed to delete preset '{name}'[/red]\n")
        raise typer.Exit(code=1)


@preset_app.command("run")
def preset_run(
    name: str = typer.Argument(..., help="Name of the preset to run"),
) -> None:
    """Run the scraper with a saved preset.

    Example:
        idealista-scraper preset run my-preset
    """
    console.print(f"\n[cyan]Loading preset '{name}'...[/cyan]\n")

    config = load_preset(name)

    if config is None:
        console.print(f"[red]Error: Preset '{name}' not found[/red]")
        console.print("\n[yellow]Available presets:[/yellow]")
        presets = list_presets()
        if presets:
            for p in presets:
                console.print(f"  - {p}")
        else:
            console.print("  (none)")
        raise typer.Exit(code=1)

    # Show configuration summary
    console.print(
        Panel(
            f"[bold]Preset:[/bold] {name}\n"
            f"[bold]Country:[/bold] {config.country.title()}\n"
            f"[bold]Property Type:[/bold] {config.property_type.value.title()}\n"
            f"[bold]Pipeline Steps:[/bold] {', '.join(config.pipeline_steps)}",
            title="Running with Preset",
            border_style="cyan",
        )
    )

    # Execute the pipeline
    asyncio.run(execute_pipeline(config))


# ============================================================================
# MAIN ENTRY POINT
# ============================================================================


def main() -> None:
    """Entry point for the CLI."""
    app()


if __name__ == "__main__":
    main()
