"""
数据库管理器

协调 SQLite 和 ChromaDB 两个数据库的操作，实现数据一致性保证。
提供统一的数据库操作接口，处理两阶段提交和失败补偿。
"""
import sys
import os

# 将项目根目录添加到 sys.path
# 这确保了 'src' 模块可以被正确导入
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from typing import Dict, List, Optional, Any, Tuple
import numpy as np

from src.core.logger import get_logger
from src.database.models import QuestionModel, QuestionCreateDTO, QuestionUpdateDTO, QuestionSearchFilter
from src.database.sqlite_dao import SQLiteDAO
from src.database.chroma_dao import ChromaDAO
from src.utils.helpers import generate_uuid, get_current_timestamp


class DatabaseManager:
    """
    数据库管理器
    
    负责协调 SQLite 和 ChromaDB 两个数据库的操作。
    实现两阶段提交策略，确保数据一致性。
    """
    
    def __init__(
        self,
        sqlite_dao: SQLiteDAO,
        chroma_dao: ChromaDAO,
        embedding_service=None,  # 阶段3实现后传入
        logger=None
    ):
        """
        初始化数据库管理器
        
        Args:
            sqlite_dao: SQLite 数据访问对象
            chroma_dao: ChromaDB 数据访问对象
            embedding_service: Embedding 服务（用于文本向量化）
            logger: 日志记录器实例
        """
        self.sqlite_dao = sqlite_dao
        self.chroma_dao = chroma_dao
        self.embedding_service = embedding_service
        self.logger = logger or get_logger()
        
    def initialize_databases(self):
        """
        初始化数据库
        
        确保数据库表结构和 Collection 已创建。
        """
        try:
            # 初始化 SQLite
            if not self.sqlite_dao.check_schema_initialized():
                self.sqlite_dao.initialize_schema()
                self.logger.info("SQLite 数据库表结构初始化完成")
            else:
                self.logger.info("SQLite 数据库表结构已存在")
            
            # 初始化 ChromaDB
            self.chroma_dao.initialize_collection()
            self.logger.info("ChromaDB Collection 初始化完成")
            
            # 检查数据一致性
            self.check_data_consistency()
            
        except Exception as e:
            self.logger.error(f"数据库初始化失败: {e}")
            raise
    
    # -------------------------------------------------------------------------
    # 题目管理操作（协调两个数据库）
    # -------------------------------------------------------------------------
    
    def create_question(
        self,
        question_data: QuestionCreateDTO,
        embedding: Optional[List[float]] = None
    ) -> str:
        """
        创建题目（两阶段提交）
        
        Args:
            question_data: 题目创建数据
            embedding: 题目内容的向量表示（如果为None则需要embedding_service）
            
        Returns:
            str: 创建的题目ID
        """
        question_id = None
        
        try:
            # 生成题目ID和时间戳
            question_id = generate_uuid()
            current_time = get_current_timestamp()
            
            # 构建完整的题目模型
            question = QuestionModel(
                question_id=question_id,
                title=question_data.title,
                content=question_data.content,
                question_type=question_data.question_type,
                category=question_data.category,
                difficulty=question_data.difficulty,
                status=question_data.status,
                tags=question_data.tags,
                answer=question_data.answer,
                explanation=question_data.explanation,
                source=question_data.source,
                source_url=question_data.source_url,
                points=question_data.points,
                created_at=current_time,
                updated_at=current_time,
                created_by=question_data.created_by
            )
            
            # 如果没有提供向量且有embedding服务，则生成向量
            if embedding is None and self.embedding_service:
                embedding = self.embedding_service.embed_text(question.content)
            
            # 第一阶段：写入 SQLite（主数据库）
            self.sqlite_dao.create_question(question)
            self.logger.debug(f"SQLite 写入成功: {question_id}")
            
            # 第二阶段：写入 ChromaDB（如果有向量）
            if embedding:
                # 构建元数据
                metadata = {
                    "category": question.category,
                    "difficulty": question.difficulty,
                    "question_type": question.question_type,
                    "status": question.status,
                    "tags": ",".join(question.tags) if question.tags else "",
                    "created_at": question.created_at
                }
                
                self.chroma_dao.add_document(
                    question_id=question_id,
                    content=question.content,
                    embedding=np.array(embedding),
                    metadata=metadata
                )
                self.logger.debug(f"ChromaDB 写入成功: {question_id}")
            else:
                self.logger.warning(f"题目 {question_id} 未提供向量，跳过ChromaDB写入")
            
            self.logger.info(f"成功创建题目: {question_id}")
            return question_id
            
        except Exception as e:
            # 如果ChromaDB写入失败，尝试回滚SQLite
            if question_id:
                try:
                    self.sqlite_dao.delete_question(question_id, soft_delete=False)
                    self.logger.warning(f"回滚SQLite数据: {question_id}")
                except Exception as rollback_error:
                    self.logger.error(f"回滚失败: {rollback_error}")
            
            self.logger.error(f"创建题目失败: {e}")
            raise
    
    def get_question(self, question_id: str, include_answer: bool = True) -> Optional[Dict[str, Any]]:
        """
        获取题目详情
        
        Args:
            question_id: 题目ID
            include_answer: 是否包含答案
            
        Returns:
            Optional[Dict]: 题目数据，如果不存在则返回None
        """
        try:
            # 从SQLite获取完整数据
            question = self.sqlite_dao.get_question(question_id)
            
            if question is None:
                return None
            
            # 如果不需要答案，则移除答案字段
            if not include_answer:
                question.pop('answer', None)
                question.pop('explanation', None)
            
            return question
            
        except Exception as e:
            self.logger.error(f"获取题目失败: {e}")
            raise
    
    def update_question(
        self,
        question_id: str,
        updates: QuestionUpdateDTO,
        new_embedding: Optional[List[float]] = None
    ) -> bool:
        """
        更新题目（协调两个数据库）
        
        Args:
            question_id: 题目ID
            updates: 更新数据
            new_embedding: 新的向量表示（如果更新了content）
            
        Returns:
            bool: 更新是否成功
        """
        try:
            # 检查题目是否存在
            existing = self.sqlite_dao.get_question(question_id)
            if not existing:
                raise ValueError(f"题目不存在: {question_id}")
            
            # 转换为字典并过滤None值
            update_dict = {k: v for k, v in updates.model_dump().items() if v is not None}
            
            if not update_dict:
                self.logger.warning("没有字段需要更新")
                return False
            
            # 检查是否需要重新向量化
            content_updated = 'content' in update_dict
            
            # 如果更新了content但没有提供新向量，且有embedding服务，则生成向量
            if content_updated and new_embedding is None and self.embedding_service:
                new_embedding = self.embedding_service.embed_text(update_dict['content'])
            
            # 更新 SQLite
            self.sqlite_dao.update_question(question_id, update_dict)
            self.logger.debug(f"SQLite 更新成功: {question_id}")
            
            # 更新 ChromaDB
            chroma_exists = self.chroma_dao.document_exists(question_id)
            
            if chroma_exists:
                # 更新元数据
                metadata_updates = {}
                if 'category' in update_dict:
                    metadata_updates['category'] = update_dict['category']
                if 'difficulty' in update_dict:
                    metadata_updates['difficulty'] = update_dict['difficulty']
                if 'question_type' in update_dict:
                    metadata_updates['question_type'] = update_dict['question_type']
                if 'status' in update_dict:
                    metadata_updates['status'] = update_dict['status']
                if 'tags' in update_dict:
                    metadata_updates['tags'] = ",".join(update_dict['tags']) if update_dict['tags'] else ""
                
                # 执行更新
                self.chroma_dao.update_document(
                    question_id=question_id,
                    content=update_dict.get('content'),
                    embedding=np.array(new_embedding) if new_embedding else None,
                    metadata=metadata_updates if metadata_updates else None
                )
                self.logger.debug(f"ChromaDB 更新成功: {question_id}")
            elif new_embedding and 'content' in update_dict:
                # ChromaDB中不存在但有向量，添加新文档
                metadata = {
                    "category": update_dict.get('category', existing['category']),
                    "difficulty": update_dict.get('difficulty', existing['difficulty']),
                    "question_type": update_dict.get('question_type', existing['question_type']),
                    "status": update_dict.get('status', existing['status']),
                    "tags": ",".join(update_dict.get('tags', existing.get('tags', []))),
                    "created_at": existing['created_at']
                }
                
                self.chroma_dao.add_document(
                    question_id=question_id,
                    content=update_dict['content'],
                    embedding=np.array(new_embedding),
                    metadata=metadata
                )
                self.logger.debug(f"ChromaDB 添加文档: {question_id}")
            
            self.logger.info(f"成功更新题目: {question_id}")
            return True
            
        except Exception as e:
            self.logger.error(f"更新题目失败: {e}")
            raise
    
    def delete_question(self, question_id: str, soft_delete: bool = True) -> bool:
        """
        删除题目（协调两个数据库）
        
        Args:
            question_id: 题目ID
            soft_delete: 是否软删除
            
        Returns:
            bool: 删除是否成功
        """
        try:
            # 删除 SQLite 中的数据
            self.sqlite_dao.delete_question(question_id, soft_delete=soft_delete)
            self.logger.debug(f"SQLite 删除成功: {question_id}")
            
            # 如果是硬删除，同时删除ChromaDB中的数据
            if not soft_delete:
                if self.chroma_dao.document_exists(question_id):
                    self.chroma_dao.delete_document(question_id)
                    self.logger.debug(f"ChromaDB 删除成功: {question_id}")
            else:
                # 软删除时更新ChromaDB的元数据
                if self.chroma_dao.document_exists(question_id):
                    self.chroma_dao.update_metadata(question_id, {"status": "已归档"})
                    self.logger.debug(f"ChromaDB 元数据更新成功: {question_id}")
            
            delete_type = "软删除" if soft_delete else "硬删除"
            self.logger.info(f"成功{delete_type}题目: {question_id}")
            return True
            
        except Exception as e:
            self.logger.error(f"删除题目失败: {e}")
            raise
    
    def list_questions(
        self,
        filters: Optional[QuestionSearchFilter] = None,
        page: int = 1,
        page_size: int = 20,
        sort_by: str = "created_at",
        sort_order: str = "DESC"
    ) -> Tuple[List[Dict[str, Any]], int]:
        """
        查询题目列表（分页）
        
        Args:
            filters: 过滤条件
            page: 页码
            page_size: 每页数量
            sort_by: 排序字段
            sort_order: 排序方向
            
        Returns:
            Tuple[List[Dict], int]: (题目列表, 总数量)
        """
        try:
            return self.sqlite_dao.list_questions(
                filters=filters,
                page=page,
                page_size=page_size,
                sort_by=sort_by,
                sort_order=sort_order
            )
        except Exception as e:
            self.logger.error(f"查询题目列表失败: {e}")
            raise
    
    # -------------------------------------------------------------------------
    # 向量检索操作
    # -------------------------------------------------------------------------
    
    def search_questions(
        self,
        query_embedding: List[float],
        top_k: int = 10,
        filters: Optional[QuestionSearchFilter] = None,
        min_similarity: float = 0.0
    ) -> List[Dict[str, Any]]:
        """
        语义检索题目
        
        Args:
            query_embedding: 查询向量
            top_k: 返回结果数量
            filters: 过滤条件
            min_similarity: 最低相似度阈值
            
        Returns:
            List[Dict]: 检索结果列表
        """
        try:
            # 构建元数据过滤条件
            where = None
            if filters:
                where = {}
                if filters.category:
                    where['category'] = filters.category
                if filters.difficulty:
                    where['difficulty'] = filters.difficulty
                if filters.question_type:
                    where['question_type'] = filters.question_type
                if filters.status:
                    where['status'] = filters.status
            
            # 从 ChromaDB 检索
            chroma_results = self.chroma_dao.search_similar(
                query_embedding=np.array(query_embedding),
                top_k=top_k,
                where=where,
                min_similarity=min_similarity
            )
            
            # 如果需要完整信息，从SQLite补充
            enriched_results = []
            for result in chroma_results:
                question_id = result['question_id']
                full_data = self.sqlite_dao.get_question(question_id)
                
                if full_data:
                    # 合并数据
                    full_data['similarity_score'] = result['similarity_score']
                    enriched_results.append(full_data)

            self.logger.info(f"语义检索完成，返回 {len(enriched_results)} 个结果")
            return enriched_results

        except Exception as e:
            self.logger.error(f"语义检索失败: {e}")
            raise

    def search_questions_by_keyword(
        self,
        query: str,
        top_k: int = 10,
        filters: Optional[QuestionSearchFilter] = None,
        search_fields: Optional[List[str]] = None,
        match_mode: str = 'OR'
    ) -> List[Dict[str, Any]]:
        """
        关键词检索题目 - 基于SQLite FTS5

        Args:
            query: 检索查询文本
            top_k: 返回结果数量
            filters: 过滤条件
            search_fields: 搜索字段列表，默认为['content', 'title', 'tags']
            match_mode: 匹配模式，'OR'或'AND'

        Returns:
            List[Dict]: 检索结果列表

        Raises:
            ValueError: 查询参数无效
            Exception: 检索过程出错
        """
        try:
            self.logger.info(
                f"执行关键词检索: 查询='{query}', "
                f"top_k={top_k}, 匹配模式={match_mode}"
            )

            # 调用SQLite DAO进行关键词检索
            results = self.sqlite_dao.search_questions_by_keyword(
                query=query,
                top_k=top_k,
                filters=filters,
                search_fields=search_fields,
                match_mode=match_mode
            )

            self.logger.info(f"关键词检索完成，返回 {len(results)} 个结果")
            return results

        except ValueError as e:
            self.logger.error(f"关键词检索参数验证失败: {e}")
            raise
        except Exception as e:
            self.logger.error(f"关键词检索失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 数据一致性检查与修复
    # -------------------------------------------------------------------------
    
    def check_data_consistency(self) -> Dict[str, Any]:
        """
        检查两个数据库的数据一致性
        
        Returns:
            Dict: 一致性检查报告
        """
        try:
            # 获取SQLite中的所有题目ID
            sqlite_count = self.sqlite_dao.get_statistics_overview()['total_questions']
            
            # 获取ChromaDB中的所有文档ID
            chroma_ids = set(self.chroma_dao.get_all_ids())
            chroma_count = len(chroma_ids)
            
            # 获取SQLite中的所有题目ID（需要查询）
            # 这里简化处理，只比较总数
            
            report = {
                "sqlite_count": sqlite_count,
                "chroma_count": chroma_count,
                "consistent": sqlite_count == chroma_count,
                "difference": abs(sqlite_count - chroma_count),
                "message": "数据一致" if sqlite_count == chroma_count else f"数据不一致：SQLite有{sqlite_count}条，ChromaDB有{chroma_count}条"
            }
            
            if not report["consistent"]:
                self.logger.warning(f"数据一致性检查：{report['message']}")
            else:
                self.logger.info("数据一致性检查：数据一致")
            
            return report
            
        except Exception as e:
            self.logger.error(f"数据一致性检查失败: {e}")
            raise
    
    def sync_sqlite_to_chroma(self, question_ids: Optional[List[str]] = None):
        """
        从SQLite同步数据到ChromaDB
        
        Args:
            question_ids: 要同步的题目ID列表，None表示同步所有
        """
        try:
            if question_ids is None:
                # 获取所有题目ID（需要实现分页获取）
                self.logger.warning("全量同步功能需要在后续实现")
                return
            
            for question_id in question_ids:
                question = self.sqlite_dao.get_question(question_id)
                if question and self.embedding_service:
                    # 生成向量
                    embedding = self.embedding_service.embed_text(question['content'])
                    
                    # 构建元数据
                    metadata = {
                        "category": question['category'],
                        "difficulty": question['difficulty'],
                        "question_type": question['question_type'],
                        "status": question['status'],
                        "tags": ",".join(question.get('tags', [])),
                        "created_at": question['created_at']
                    }
                    
                    # 检查是否存在
                    if self.chroma_dao.document_exists(question_id):
                        self.chroma_dao.update_document(
                            question_id=question_id,
                            content=question['content'],
                            embedding=embedding,
                            metadata=metadata
                        )
                    else:
                        self.chroma_dao.add_document(
                            question_id=question_id,
                            content=question['content'],
                            embedding=embedding,
                            metadata=metadata
                        )
            
            self.logger.info(f"同步完成：{len(question_ids)} 个题目")
            
        except Exception as e:
            self.logger.error(f"数据同步失败: {e}")
            raise
    
    # -------------------------------------------------------------------------
    # 统计与分析
    # -------------------------------------------------------------------------
    
    def get_statistics(self) -> Dict[str, Any]:
        """
        获取统计数据
        
        Returns:
            Dict: 统计数据
        """
        try:
            stats = self.sqlite_dao.get_statistics_overview()
            
            # 添加ChromaDB统计
            stats['chroma_document_count'] = self.chroma_dao.count_documents()
            
            return stats
        except Exception as e:
            self.logger.error(f"获取统计数据失败: {e}")
            raise
    
    def close(self):
        """关闭所有数据库连接"""
        try:
            self.sqlite_dao.close()
            self.chroma_dao.close()
            self.logger.info("所有数据库连接已关闭")
        except Exception as e:
            self.logger.error(f"关闭数据库连接失败: {e}")
