"""
Cached configuration loader that loads configs once at module level.

This solves the performance issue in html_parser.py where config files
were being loaded on every parse call. Instead, configs are loaded once
when the module is first imported and cached in memory.

Features:
- Lazy loading (loads on first access)
- Module-level caching (loads once, reused everywhere)
- Automatic reload on file change (optional)
- Thread-safe operations
- Support for JSON, JSON5, and environment variables
"""

import json
import logging
import os
import threading
import time
from pathlib import Path
from typing import Any, Dict, Optional

logger = logging.getLogger(__name__)

# Try to import json5 for extended JSON support
try:
    import json5
    HAS_JSON5 = True
except ImportError:
    HAS_JSON5 = False
    logger.debug("json5 not available, using standard json")


class ConfigLoader:
    """
    Cached configuration loader with lazy loading and file watching.

    Loads configuration files once and caches them in memory.
    Subsequent calls return the cached version without disk I/O.

    Args:
        config_dir: Directory containing config files (default: configs/)
        auto_reload: Enable automatic reloading on file change
        check_interval: How often to check for changes (seconds)

    Example:
        loader = ConfigLoader()
        patterns = loader.get_config('regex_patterns.json')
        settings = loader.get_config('settings_config.json')
    """

    def __init__(
        self,
        config_dir: Optional[str] = None,
        auto_reload: bool = False,
        check_interval: float = 60.0
    ):
        self._lock = threading.RLock()
        self._cache: Dict[str, Dict[str, Any]] = {}
        self._file_mtimes: Dict[str, float] = {}
        self._auto_reload = auto_reload
        self._check_interval = check_interval
        self._last_check = 0.0

        # Set up config directory
        if config_dir is None:
            # Default to configs/ inside the package directory
            base_dir = Path(__file__).parent.parent
            self._config_dir = base_dir / 'configs'
        else:
            self._config_dir = Path(config_dir)

        if not self._config_dir.exists():
            logger.warning(f"Config directory does not exist: {self._config_dir}")

        logger.info(f"ConfigLoader initialized with dir: {self._config_dir}")

    def _get_file_mtime(self, filepath: Path) -> float:
        """Get file modification time."""
        try:
            return filepath.stat().st_mtime
        except Exception:
            return 0.0

    def _check_for_updates(self) -> None:
        """Check if any cached configs have been modified on disk."""
        if not self._auto_reload:
            return

        current_time = time.time()
        if current_time - self._last_check < self._check_interval:
            return

        self._last_check = current_time

        with self._lock:
            for filename, cached_mtime in list(self._file_mtimes.items()):
                filepath = self._config_dir / filename
                current_mtime = self._get_file_mtime(filepath)

                if current_mtime > cached_mtime:
                    logger.info(f"Config file changed, reloading: {filename}")
                    # Remove from cache to force reload
                    self._cache.pop(filename, None)
                    self._file_mtimes.pop(filename, None)

    def _load_file(self, filename: str) -> Dict[str, Any]:
        """Load a configuration file from disk."""
        filepath = self._config_dir / filename

        if not filepath.exists():
            raise FileNotFoundError(f"Config file not found: {filepath}")

        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        # Parse based on extension
        if filename.endswith('.json5') and HAS_JSON5:
            data = json5.loads(content)
        elif filename.endswith('.json'):
            # Try json5 first for extended JSON support (comments, trailing commas)
            if HAS_JSON5:
                try:
                    data = json5.loads(content)
                except Exception:
                    data = json.loads(content)
            else:
                data = json.loads(content)
        else:
            # Default to JSON
            data = json.loads(content)

        return data

    def get_config(
        self,
        filename: str,
        default: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Get a configuration file, loading from cache or disk.

        Args:
            filename: Name of the config file (e.g., 'regex_patterns.json')
            default: Default value if file not found (default: empty dict)

        Returns:
            The configuration dictionary
        """
        # Check for updates if auto_reload is enabled
        if self._auto_reload:
            self._check_for_updates()

        with self._lock:
            # Return cached version if available
            if filename in self._cache:
                return self._cache[filename]

            # Load from disk
            try:
                data = self._load_file(filename)
                filepath = self._config_dir / filename

                # Cache the loaded data
                self._cache[filename] = data
                self._file_mtimes[filename] = self._get_file_mtime(filepath)

                logger.debug(f"Loaded and cached config: {filename}")
                return data
            except FileNotFoundError:
                logger.warning(f"Config file not found: {filename}")
                return default if default is not None else {}
            except Exception as e:
                logger.error(f"Error loading config {filename}: {e}")
                return default if default is not None else {}

    def reload(self, filename: Optional[str] = None) -> None:
        """
        Force reload a config file or all configs.

        Args:
            filename: Specific file to reload, or None for all
        """
        with self._lock:
            if filename is not None:
                self._cache.pop(filename, None)
                self._file_mtimes.pop(filename, None)
                logger.info(f"Cleared cache for: {filename}")
            else:
                self._cache.clear()
                self._file_mtimes.clear()
                logger.info("Cleared all config cache")

    def preload(self, filenames: list) -> None:
        """
        Preload multiple config files into cache.

        Args:
            filenames: List of config filenames to preload
        """
        for filename in filenames:
            self.get_config(filename)
        logger.info(f"Preloaded {len(filenames)} config files")

    def get_stats(self) -> dict:
        """Get loader statistics."""
        with self._lock:
            return {
                'cached_files': list(self._cache.keys()),
                'cache_count': len(self._cache),
                'config_dir': str(self._config_dir),
                'auto_reload': self._auto_reload
            }

    def __contains__(self, filename: str) -> bool:
        """Check if a config is cached."""
        return filename in self._cache


# ============================================================================
# Module-level cached instance (loads configs once at import time)
# ============================================================================

# Singleton instance - created once when module is first imported
_config_loader_instance: Optional[ConfigLoader] = None
_config_loader_lock = threading.Lock()


def _get_config_loader() -> ConfigLoader:
    """Get or create the singleton ConfigLoader instance."""
    global _config_loader_instance

    if _config_loader_instance is None:
        with _config_loader_lock:
            if _config_loader_instance is None:
                _config_loader_instance = ConfigLoader()

    return _config_loader_instance


class _ConfigLoaderProxy:
    """
    Proxy class that provides module-level access to cached configs.

    This allows usage like:
        from src.cache import config_loader
        patterns = config_loader.get_config('regex_patterns.json')

    The underlying ConfigLoader is created lazily on first access.
    """

    def get_config(
        self,
        filename: str,
        default: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """Get a cached config file."""
        return _get_config_loader().get_config(filename, default)

    def reload(self, filename: Optional[str] = None) -> None:
        """Reload config(s) from disk."""
        return _get_config_loader().reload(filename)

    def preload(self, filenames: list) -> None:
        """Preload config files."""
        return _get_config_loader().preload(filenames)

    def get_stats(self) -> dict:
        """Get loader statistics."""
        return _get_config_loader().get_stats()

    def __contains__(self, filename: str) -> bool:
        """Check if config is cached."""
        return filename in _get_config_loader()


# Module-level config loader instance
config_loader = _ConfigLoaderProxy()


# ============================================================================
# Pre-cached config accessors (loaded once at module level)
# ============================================================================

# These are populated lazily on first access
_cached_configs: Dict[str, Dict[str, Any]] = {}


def get_regex_patterns() -> Dict[str, str]:
    """
    Get regex patterns config (cached at module level).

    Returns:
        Dictionary of regex pattern names to pattern strings
    """
    if 'regex_patterns' not in _cached_configs:
        _cached_configs['regex_patterns'] = config_loader.get_config(
            'regex_patterns.json'
        )
    return _cached_configs['regex_patterns']


def get_data_structure_config() -> Dict[str, Any]:
    """
    Get data structure config (cached at module level).

    Returns:
        Data structure configuration dictionary
    """
    if 'data_structure' not in _cached_configs:
        _cached_configs['data_structure'] = config_loader.get_config(
            'data_structure_config.json'
        )
    return _cached_configs['data_structure']


def get_settings_config() -> Dict[str, Any]:
    """
    Get settings config (cached at module level).

    Returns:
        Settings configuration dictionary
    """
    if 'settings' not in _cached_configs:
        _cached_configs['settings'] = config_loader.get_config(
            'settings_config.json'
        )
    return _cached_configs['settings']


def get_scrapfly_configs() -> Dict[str, Any]:
    """
    Get scrapfly configs (cached at module level).

    Returns:
        Scrapfly configuration dictionary
    """
    if 'scrapfly' not in _cached_configs:
        _cached_configs['scrapfly'] = config_loader.get_config(
            'scrapfly_configs.json'
        )
    return _cached_configs['scrapfly']


def get_views_config() -> Dict[str, Any]:
    """
    Get views config (cached at module level).

    Returns:
        Views configuration dictionary
    """
    if 'views' not in _cached_configs:
        _cached_configs['views'] = config_loader.get_config(
            'views_config.json'
        )
    return _cached_configs['views']
