import asyncio
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from urllib.parse import urlparse

import aiohttp

from mineru_flow.internal.storage.base import (
    StorageError,
    StorageFile,
    StorageNotFoundError,
    StorageOperator,
    StoragePermissionError,
    StorageUnsupportedError,
)


class HTTPStorageOperator(StorageOperator):
    """Storage operator for HTTP/HTTPS accessible resources."""

    def __init__(
        self,
        *,
        default_headers: Optional[Dict[str, str]] = None,
        timeout: Optional[aiohttp.ClientTimeout] = None,
        **kwargs: Any,
    ):
        super().__init__(**kwargs)
        self._headers = default_headers or {}
        self._timeout = timeout or aiohttp.ClientTimeout(total=60)

    async def _read(self, path: str, *, max_bytes: Optional[int]) -> bytes:
        headers = dict(self._headers)
        if max_bytes is not None:
            headers["Range"] = f"bytes=0-{max_bytes - 1}"

        try:
            async with aiohttp.ClientSession(timeout=self._timeout) as session:
                async with session.get(path, headers=headers) as response:
                    if response.status == 404:
                        raise StorageNotFoundError(f"HTTP resource not found: {path}")
                    if response.status in {401, 403}:
                        raise StoragePermissionError(f"Permission denied: {path}")
                    response.raise_for_status()
                    if max_bytes is not None:
                        return await response.content.read(max_bytes)
                    return await response.read()
        except aiohttp.ClientResponseError as exc:
            raise StorageError(f"HTTP error {exc.status} while reading {path}") from exc
        except aiohttp.ClientError as exc:
            raise StorageError(f"Failed to fetch {path}: {exc}") from exc
        except asyncio.TimeoutError as exc:
            raise StorageError(f"Request timed out for {path}") from exc

    async def _stream(self, path: str, *, chunk_size: int):
        headers = dict(self._headers)

        try:
            async with aiohttp.ClientSession(timeout=self._timeout) as session:
                async with session.get(path, headers=headers) as response:
                    if response.status == 404:
                        raise StorageNotFoundError(f"HTTP resource not found: {path}")
                    if response.status in {401, 403}:
                        raise StoragePermissionError(f"Permission denied: {path}")
                    response.raise_for_status()
                    async for chunk in response.content.iter_chunked(chunk_size):
                        if chunk:
                            yield chunk
        except aiohttp.ClientResponseError as exc:
            raise StorageError(f"HTTP error {exc.status} while reading {path}") from exc
        except aiohttp.ClientError as exc:
            raise StorageError(f"Failed to fetch {path}: {exc}") from exc
        except asyncio.TimeoutError as exc:
            raise StorageError(f"Request timed out for {path}") from exc

    async def _list(self, location: str, *, recursive: bool) -> Sequence[StorageFile]:
        return [await self._fetch_metadata(location)]

    async def _get_mime(self, path: str) -> Optional[str]:
        info = await self._fetch_metadata(path)
        return info.mime_type

    async def _fetch_metadata(self, url: str) -> StorageFile:
        headers = dict(self._headers)
        async with aiohttp.ClientSession(timeout=self._timeout) as session:
            try:
                async with session.head(
                    url, headers=headers, allow_redirects=True
                ) as response:
                    if response.status == 404:
                        raise StorageNotFoundError(f"HTTP resource not found: {url}")
                    if response.status in {401, 403}:
                        raise StoragePermissionError(f"Permission denied: {url}")
                    if response.status in {405, 501}:
                        return await self._probe_with_range(session, url)
                    response.raise_for_status()
                    return self._build_http_file(url, response.headers)
            except aiohttp.ClientResponseError as exc:
                raise StorageError(
                    f"HTTP error {exc.status} while probing {url}"
                ) from exc
            except aiohttp.ClientError as exc:
                raise StorageError(f"Failed to probe {url}: {exc}") from exc
            except asyncio.TimeoutError as exc:
                raise StorageError(f"Request timed out for {url}") from exc

    async def _probe_with_range(
        self, session: aiohttp.ClientSession, url: str
    ) -> StorageFile:
        headers = dict(self._headers)
        headers["Range"] = "bytes=0-0"
        async with session.get(url, headers=headers, allow_redirects=True) as response:
            if response.status == 404:
                raise StorageNotFoundError(f"HTTP resource not found: {url}")
            if response.status in {401, 403}:
                raise StoragePermissionError(f"Permission denied: {url}")
            response.raise_for_status()
            await response.content.read(1)
            return self._build_http_file(url, response.headers)

    def _build_http_file(
        self, url: str, headers: aiohttp.typedefs.LooseHeaders
    ) -> StorageFile:
        content_length = headers.get("Content-Length")
        try:
            size = int(content_length) if content_length is not None else None
        except ValueError:
            size = None

        mime = headers.get("Content-Type")
        return StorageFile(
            path=url,
            name=self._derive_name(url),
            size=size,
            mime_type=self._sanitize_mime(mime),
            metadata={"headers": dict(headers)},
        )

    @staticmethod
    def _derive_name(url: str) -> str:
        parsed = urlparse(url)
        name = Path(parsed.path).name
        return name or parsed.netloc or url

    @staticmethod
    def _sanitize_mime(mime: Optional[str]) -> Optional[str]:
        if mime is None:
            return None
        return mime.split(";", 1)[0].strip()
