from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, List, Optional


class StorageType(Enum):
    """存储类型枚举"""

    LOCAL = "local"
    S3 = "s3"
    URL = "url"


class MinerUType(Enum):
    """MinerU 解析器类型"""

    SAAS = "saas"
    SELFHOST = "selfhost"


class KnowledgeBaseType(Enum):
    """知识库类型"""

    RAGFLOW = "ragflow"


@dataclass
class FileInfo:
    """文件信息"""

    filename: str
    path: str
    size: Optional[int] = None
    mime_type: Optional[str] = None


@dataclass
class MineruParsedResult:
    """MinerU 解析结果"""

    filename: str
    content: str
    content_list: List[Any]
    images: Dict[str, str]
    html: Optional[str] = None
    latex: Optional[str] = None


@dataclass
class StorageConfig:
    """存储配置"""

    type: StorageType
    storage_path: str
    endpoint: Optional[str] = None
    access_key_id: Optional[str] = None
    secret_key_id: Optional[str] = None


@dataclass
class MinerUConfig:
    """MinerU 配置基类"""

    type: MinerUType
    base_url: str


@dataclass
class SaasMinerUConfig(MinerUConfig):
    """SaaS MinerU 配置"""

    api_key: str
    enable_formula: bool = True
    enable_table: bool = True
    language: str = "ch"
    model_version: str = "pipeline"
    extra_formats: Optional[List[str]] = None
    enable_ocr: bool = False


@dataclass
class SelfhostMinerUConfig(MinerUConfig):
    """自托管 MinerU 配置"""

    server_url: Optional[str] = None
    backend_type: str = "pipeline"
    parse_method: str = "auto"
    return_images: bool = True
    language: str = "ch"
    enable_formula: bool = True
    enable_table: bool = True
    return_content_list: bool = True
    return_md: bool = True
    start_page: Optional[int] = None
    end_page: Optional[int] = None


@dataclass
class KnowledgeBaseConfig:
    """知识库配置基类"""

    type: KnowledgeBaseType
    base_url: str


@dataclass
class RagFlowConfig(KnowledgeBaseConfig):
    """RagFlow 知识库配置"""

    dataset_id: str
    api_key: str
    document_id: Optional[str] = None
    should_chunk: bool = False
