"""
Batched JSONL writer for optimized I/O operations.

Key optimization: Instead of writing each record immediately (with flush),
batch writes reduce disk I/O by 90%+.

Previous behavior:
- Each property triggered immediate write + flush
- ~100 properties = 100 disk writes

Optimized behavior:
- Buffer records in memory
- Write batch when buffer is full or on explicit flush
- 100 properties = 1 disk write
"""

import asyncio
import json
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from threading import Lock
from loguru import logger

from ..utils.paths import get_output_dir


class BatchedJSONLWriter:
    """
    Write JSONL files with batching for improved performance.

    Features:
    - Configurable batch size (default: 100 records)
    - Automatic flush on batch completion
    - Thread-safe operations
    - Async write support
    - Progress callback support
    """

    def __init__(
        self,
        output_path: Union[str, Path],
        batch_size: int = 100,
        append: bool = True,
        on_batch_written: Optional[callable] = None,
    ):
        self.output_path = Path(output_path)
        self.batch_size = batch_size
        self.append = append
        self.on_batch_written = on_batch_written

        self._buffer: List[Dict[str, Any]] = []
        self._lock = Lock()
        self._total_written = 0
        self._batches_written = 0

        # Ensure output directory exists
        self.output_path.parent.mkdir(parents=True, exist_ok=True)

        # Clear file if not appending
        if not append and self.output_path.exists():
            self.output_path.unlink()

        logger.debug(
            f"Initialized BatchedJSONLWriter: {output_path} "
            f"(batch_size={batch_size}, append={append})"
        )

    def add(self, record: Dict[str, Any]) -> int:
        """
        Add a record to the buffer.

        Returns the current buffer size. Automatically flushes when
        buffer reaches batch_size.
        """
        with self._lock:
            self._buffer.append(record)
            buffer_size = len(self._buffer)

            if buffer_size >= self.batch_size:
                self._flush_buffer()

            return buffer_size

    def add_many(self, records: List[Dict[str, Any]]) -> int:
        """Add multiple records at once."""
        with self._lock:
            self._buffer.extend(records)
            buffer_size = len(self._buffer)

            # Flush in batches if we have enough
            while len(self._buffer) >= self.batch_size:
                self._flush_buffer()

            return buffer_size

    def flush(self) -> int:
        """
        Flush any remaining records in the buffer.

        Returns the number of records flushed.
        """
        with self._lock:
            return self._flush_buffer()

    def _flush_buffer(self) -> int:
        """Internal flush method (must be called with lock held)."""
        if not self._buffer:
            return 0

        # Take a batch from the buffer
        batch = self._buffer[: self.batch_size]
        self._buffer = self._buffer[self.batch_size :]

        # Write batch to file
        with open(self.output_path, "a", encoding="utf-8") as f:
            for record in batch:
                json_line = json.dumps(record, ensure_ascii=False)
                f.write(json_line + "\n")

        count = len(batch)
        self._total_written += count
        self._batches_written += 1

        logger.debug(
            f"Wrote batch of {count} records to {self.output_path} "
            f"(total: {self._total_written})"
        )

        if self.on_batch_written:
            try:
                self.on_batch_written(count, self._total_written)
            except Exception as e:
                logger.warning(f"Batch callback error: {e}")

        return count

    async def add_async(self, record: Dict[str, Any]) -> int:
        """Async version of add()."""
        # Run in executor to avoid blocking
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, self.add, record)

    async def add_many_async(self, records: List[Dict[str, Any]]) -> int:
        """Async version of add_many()."""
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, self.add_many, records)

    async def flush_async(self) -> int:
        """Async version of flush()."""
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(None, self.flush)

    def get_stats(self) -> dict:
        """Get writer statistics."""
        with self._lock:
            return {
                "output_path": str(self.output_path),
                "batch_size": self.batch_size,
                "buffer_size": len(self._buffer),
                "total_written": self._total_written,
                "batches_written": self._batches_written,
                "pending_records": len(self._buffer),
            }

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.flush()

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.flush_async()


# Singleton for default writer
_default_writer: Optional[BatchedJSONLWriter] = None


def get_default_writer(
    output_path: Path | str | None = None,
    batch_size: int = 100,
) -> BatchedJSONLWriter:
    """Get or create the default writer singleton."""
    global _default_writer
    if _default_writer is None:
        path = Path(output_path) if output_path else get_output_dir() / "properties.jsonl"
        _default_writer = BatchedJSONLWriter(
            output_path=path,
            batch_size=batch_size,
        )
    return _default_writer


class MultiWriter:
    """
    Write to multiple JSONL files simultaneously.

    Useful for writing properties and image URLs to separate files
    in a single pass.
    """

    def __init__(self, writers: Dict[str, BatchedJSONLWriter]):
        self.writers = writers

    def add(self, writer_name: str, record: Dict[str, Any]) -> int:
        """Add a record to a specific writer."""
        if writer_name not in self.writers:
            raise KeyError(f"Unknown writer: {writer_name}")
        return self.writers[writer_name].add(record)

    def flush_all(self) -> Dict[str, int]:
        """Flush all writers."""
        return {name: writer.flush() for name, writer in self.writers.items()}

    def get_all_stats(self) -> Dict[str, dict]:
        """Get stats from all writers."""
        return {name: writer.get_stats() for name, writer in self.writers.items()}

    @classmethod
    def create_default(cls, output_dir: Path | str | None = None) -> "MultiWriter":
        """Create a MultiWriter with default configuration."""
        output_path = Path(output_dir) if output_dir else get_output_dir()
        return cls(
            {
                "properties": BatchedJSONLWriter(output_path / "properties.jsonl"),
                "images": BatchedJSONLWriter(output_path / "image_urls.jsonl"),
                "agents": BatchedJSONLWriter(output_path / "agents.jsonl"),
                "errors": BatchedJSONLWriter(output_path / "errors.jsonl"),
            }
        )
