"""
统计分析服务

提供题目数据的多维度统计分析功能，包括：
- 整体统计概览
- 分类统计分析
- 难度分布统计
- 标签使用统计
- 题目质量评估
- 时间序列分析
- 趋势分析报告
"""

import sys
import os
from datetime import datetime, timedelta, timezone
from typing import Dict, List, Optional, Any, Tuple
from collections import defaultdict

# 将项目根目录添加到 sys.path
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.core.logger import get_logger
from src.database.database_manager import DatabaseManager
from src.database.models import (
    QualityMetrics, CategoryStatistics, DifficultyStatistics,
    TagStatistics, TimeSeriesData, OverallStatistics
)


class AnalyticsService:
    """
    统计分析服务

    提供题目数据的多维度统计分析功能。
    """

    def __init__(
        self,
        db_manager: DatabaseManager,
        logger=None
    ):
        """
        初始化统计分析服务

        Args:
            db_manager: 数据库管理器实例
            logger: 日志记录器实例
        """
        self.db_manager = db_manager
        self.logger = logger or get_logger()

    # -------------------------------------------------------------------------
    # 整体统计
    # -------------------------------------------------------------------------

    def get_overall_statistics(self) -> Dict[str, Any]:
        """
        获取整体统计概览

        Returns:
            Dict: 包含所有主要统计指标的概览

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info("开始获取整体统计概览...")

            # 获取总体统计数据
            try:
                stats = self.db_manager.sqlite_dao.get_statistics_overview()
            except Exception as e:
                # 空数据库是正常情况，返回相关空值
                self.logger.debug(f"获取统计数据失败: {e}")
                stats = {
                    'total_questions': 0,
                    'published_count': 0,
                    'draft_count': 0,
                    'archived_count': 0,
                    'total_tags': 0,
                    'total_categories': 0
                }

            # 计算额外指标
            total_questions = stats.get('total_questions', 0)
            
            # 获取所有题目计算平均值
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )
            
            total_points = 0
            total_correct_rate = 0.0
            total_usage_count = 0
            question_count = 0

            for q in all_questions:
                if q.get('points'):
                    total_points += q['points']
                if q.get('correct_rate') is not None:
                    total_correct_rate += q['correct_rate']
                total_usage_count += q.get('usage_count', 0)
                question_count += 1

            # 计算平均值
            average_points = total_points / question_count if question_count > 0 else None
            average_correct_rate = total_correct_rate / question_count if question_count > 0 else None
            average_usage_count = total_usage_count / question_count if question_count > 0 else 0.0

            result = {
                'total_questions': stats.get('total_questions', 0),
                'published_questions': stats.get('published_count', 0),
                'draft_questions': stats.get('draft_count', 0),
                'archived_questions': stats.get('archived_count', 0),
                'total_tags': stats.get('total_tags', 0),
                'total_categories': stats.get('total_categories', 0),
                'average_points': average_points,
                'total_points': total_points,
                'average_correct_rate': average_correct_rate,
                'average_usage_count': average_usage_count,
                'total_usage_count': total_usage_count,
                'last_updated': stats.get('last_updated', datetime.now().isoformat()),
                'statistics_timestamp': datetime.now().isoformat()
            }

            self.logger.info(f"整体统计概览完成: 题目总数={result['total_questions']}")
            return result

        except Exception as e:
            self.logger.error(f"获取整体统计概览失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 分类统计
    # -------------------------------------------------------------------------

    def get_category_statistics(self) -> Dict[str, Any]:
        """
        获取分类统计信息

        Returns:
            Dict: 按分类划分的统计数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info("开始获取分类统计...")

            # 获取所有题目
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )

            # 按分类分组
            category_stats = defaultdict(lambda: {
                'total_count': 0,
                'easy_count': 0,
                'medium_count': 0,
                'hard_count': 0,
                'published_count': 0,
                'draft_count': 0,
                'archived_count': 0,
                'total_points': 0,
                'question_ids': []
            })

            for q in all_questions:
                category = q.get('category', '未分类')
                difficulty = q.get('difficulty', '中等')
                status = q.get('status', '草稿')
                points = q.get('points', 0) or 0

                category_stats[category]['total_count'] += 1
                category_stats[category]['total_points'] += points
                category_stats[category]['question_ids'].append(q['question_id'])

                # 难度分布
                if difficulty == '简单':
                    category_stats[category]['easy_count'] += 1
                elif difficulty == '困难':
                    category_stats[category]['hard_count'] += 1
                else:
                    category_stats[category]['medium_count'] += 1

                # 状态分布
                if status == '已发布':
                    category_stats[category]['published_count'] += 1
                elif status == '已归档':
                    category_stats[category]['archived_count'] += 1
                else:
                    category_stats[category]['draft_count'] += 1

            # 构建结果
            result = {
                'total_categories': len(category_stats),
                'categories': []
            }

            for category, stats_data in sorted(category_stats.items()):
                total = stats_data['total_count']
                average_points = stats_data['total_points'] / total if total > 0 else None

                result['categories'].append({
                    'category': category,
                    'total_count': total,
                    'easy_count': stats_data['easy_count'],
                    'medium_count': stats_data['medium_count'],
                    'hard_count': stats_data['hard_count'],
                    'published_count': stats_data['published_count'],
                    'draft_count': stats_data['draft_count'],
                    'archived_count': stats_data['archived_count'],
                    'average_points': average_points,
                    'total_points': stats_data['total_points']
                })

            self.logger.info(f"分类统计完成: 共{len(category_stats)}个分类")
            return result

        except Exception as e:
            self.logger.error(f"获取分类统计失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 难度统计
    # -------------------------------------------------------------------------

    def get_difficulty_statistics(self) -> Dict[str, Any]:
        """
        获取难度分布统计

        Returns:
            Dict: 按难度等级划分的统计数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info("开始获取难度统计...")

            # 获取所有题目
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )

            # 初始化难度统计
            difficulty_stats = {
                '简单': {'count': 0, 'total_correct_rate': 0.0, 'total_usage': 0},
                '中等': {'count': 0, 'total_correct_rate': 0.0, 'total_usage': 0},
                '困难': {'count': 0, 'total_correct_rate': 0.0, 'total_usage': 0}
            }

            total_questions = len(all_questions)

            for q in all_questions:
                difficulty = q.get('difficulty', '中等')
                correct_rate = q.get('correct_rate') or 0.0
                usage_count = q.get('usage_count', 0)

                if difficulty in difficulty_stats:
                    difficulty_stats[difficulty]['count'] += 1
                    difficulty_stats[difficulty]['total_correct_rate'] += correct_rate
                    difficulty_stats[difficulty]['total_usage'] += usage_count

            # 构建结果
            result = {
                'total_questions': total_questions,
                'difficulties': []
            }

            for difficulty in ['简单', '中等', '困难']:
                stats_data = difficulty_stats[difficulty]
                count = stats_data['count']
                percentage = (count / total_questions * 100) if total_questions > 0 else 0.0

                average_correct_rate = (stats_data['total_correct_rate'] / count) if count > 0 else None
                average_usage = stats_data['total_usage'] / count if count > 0 else 0.0

                result['difficulties'].append({
                    'difficulty': difficulty,
                    'count': count,
                    'percentage': round(percentage, 2),
                    'average_correct_rate': average_correct_rate,
                    'average_usage_count': average_usage
                })

            self.logger.info(f"难度统计完成: 简单={difficulty_stats['简单']['count']}, "
                           f"中等={difficulty_stats['中等']['count']}, "
                           f"困难={difficulty_stats['困难']['count']}")
            return result

        except Exception as e:
            self.logger.error(f"获取难度统计失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 标签统计
    # -------------------------------------------------------------------------

    def get_tag_statistics(self, top_n: int = 20) -> Dict[str, Any]:
        """
        获取标签使用统计

        Args:
            top_n: 返回使用次数最多的前N个标签

        Returns:
            Dict: 标签统计数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info(f"开始获取标签统计 (top_n={top_n})...")

            # 获取所有题目
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )

            # 统计标签使用
            tag_stats = defaultdict(lambda: {
                'usage_count': 0,
                'question_count': 0,
                'total_difficulty': 0.0,
                'difficulties': {'简单': 0, '中等': 0, '困难': 0}
            })

            # 难度分数映射
            difficulty_scores = {'简单': 1.0, '中等': 2.0, '困难': 3.0}

            for q in all_questions:
                tags = q.get('tags', [])
                difficulty = q.get('difficulty', '中等')
                difficulty_score = difficulty_scores.get(difficulty, 2.0)

                for tag in tags:
                    tag_stats[tag]['question_count'] += 1
                    tag_stats[tag]['total_difficulty'] += difficulty_score
                    if difficulty in tag_stats[tag]['difficulties']:
                        tag_stats[tag]['difficulties'][difficulty] += 1

            # 获取所有标签
            all_tags = self.db_manager.sqlite_dao.get_all_tags()
            tag_usage_map = {tag['tag_name']: tag.get('usage_count', 0) for tag in all_tags}

            # 整合使用次数
            for tag in tag_stats:
                tag_stats[tag]['usage_count'] = tag_usage_map.get(tag, 0)

            # 排序并截取
            sorted_tags = sorted(
                tag_stats.items(),
                key=lambda x: x[1]['question_count'],
                reverse=True
            )[:top_n]

            # 构建结果
            result = {
                'total_tags': len(tag_stats),
                'top_n': top_n,
                'tags': []
            }

            for tag_name, stats_data in sorted_tags:
                count = stats_data['question_count']
                average_difficulty = stats_data['total_difficulty'] / count if count > 0 else None

                result['tags'].append({
                    'tag_name': tag_name,
                    'usage_count': stats_data['usage_count'],
                    'question_count': count,
                    'average_difficulty': average_difficulty,
                    'easy_count': stats_data['difficulties']['简单'],
                    'medium_count': stats_data['difficulties']['中等'],
                    'hard_count': stats_data['difficulties']['困难']
                })

            self.logger.info(f"标签统计完成: 总标签数={len(tag_stats)}, 返回前{len(sorted_tags)}个")
            return result

        except Exception as e:
            self.logger.error(f"获取标签统计失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 质量评估
    # -------------------------------------------------------------------------

    def get_quality_metrics(self, question_ids: Optional[List[str]] = None) -> Dict[str, Any]:
        """
        获取题目质量评估指标

        Args:
            question_ids: 指定的题目ID列表，为None则评估所有题目

        Returns:
            Dict: 质量评估数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info("开始获取质量评估指标...")

            # 获取题目列表
            if question_ids:
                all_questions = []
                for q_id in question_ids:
                    q = self.db_manager.sqlite_dao.get_question(q_id)
                    if q:
                        all_questions.append(q)
            else:
                all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                    page=1,
                    page_size=10000
                )

            # 计算质量指标
            quality_metrics = []
            total_completeness = 0.0
            total_quality = 0.0

            for q in all_questions:
                # 计算完整度评分（0-100）
                completeness_score = 0.0
                
                # 有答案 (+25)
                if q.get('answer'):
                    completeness_score += 25.0
                
                # 有解析 (+25)
                if q.get('explanation'):
                    completeness_score += 25.0
                
                # 有标签 (+25)
                if q.get('tags') and len(q['tags']) > 0:
                    completeness_score += 25.0
                
                # 内容充分 (+25)
                content_length = len(q.get('content', ''))
                if content_length >= 100:
                    completeness_score += 25.0

                # 计算质量评分（0-100）
                quality_score = completeness_score * 0.5  # 完整度占50%

                # 其他质量因素 (+50)
                quality_bonus = 0.0
                
                # 有正确率信息 (+15)
                if q.get('correct_rate') is not None:
                    quality_bonus += 15.0
                
                # 有分值信息 (+15)
                if q.get('points'):
                    quality_bonus += 15.0
                
                # 使用次数多 (+20)
                usage_count = q.get('usage_count', 0)
                if usage_count >= 10:
                    quality_bonus += 20.0
                elif usage_count >= 5:
                    quality_bonus += 10.0

                quality_score += quality_bonus * 0.5

                # 确保评分在0-100范围内
                quality_score = min(100.0, max(0.0, quality_score))

                total_completeness += completeness_score
                total_quality += quality_score

                metrics = {
                    'question_id': q['question_id'],
                    'title': q.get('title', ''),
                    'has_answer': bool(q.get('answer')),
                    'has_explanation': bool(q.get('explanation')),
                    'has_tags': bool(q.get('tags') and len(q['tags']) > 0),
                    'title_length': len(q.get('title', '')),
                    'content_length': len(q.get('content', '')),
                    'answer_length': len(q.get('answer', '') or ''),
                    'explanation_length': len(q.get('explanation', '') or ''),
                    'tags_count': len(q.get('tags', [])),
                    'completeness_score': round(completeness_score, 2),
                    'quality_score': round(quality_score, 2)
                }

                quality_metrics.append(metrics)

            # 计算平均分数
            question_count = len(all_questions)
            average_completeness = total_completeness / question_count if question_count > 0 else 0.0
            average_quality = total_quality / question_count if question_count > 0 else 0.0

            # 统计质量分布
            quality_distribution = {
                'excellent': 0,  # 85-100
                'good': 0,       # 70-84
                'fair': 0,       # 50-69
                'poor': 0        # 0-49
            }

            for metrics in quality_metrics:
                score = metrics['quality_score']
                if score >= 85:
                    quality_distribution['excellent'] += 1
                elif score >= 70:
                    quality_distribution['good'] += 1
                elif score >= 50:
                    quality_distribution['fair'] += 1
                else:
                    quality_distribution['poor'] += 1

            result = {
                'total_questions_evaluated': question_count,
                'average_completeness_score': round(average_completeness, 2),
                'average_quality_score': round(average_quality, 2),
                'quality_distribution': quality_distribution,
                'metrics': sorted(quality_metrics, key=lambda x: x['quality_score'], reverse=True)
            }

            self.logger.info(f"质量评估完成: 评估题目数={question_count}, "
                           f"平均质量分数={result['average_quality_score']}")
            return result

        except Exception as e:
            self.logger.error(f"获取质量评估指标失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 时间序列分析
    # -------------------------------------------------------------------------

    def get_time_series_analysis(
        self,
        days: int = 30,
        period: str = 'day'
    ) -> Dict[str, Any]:
        """
        获取时间序列分析数据

        Args:
            days: 分析过去多少天的数据
            period: 时间粒度（day/week/month）

        Returns:
            Dict: 时间序列数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info(f"开始获取时间序列分析 (days={days}, period={period})...")

            # 获取所有题目
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )

            # 按时间分组
            time_series_dict = defaultdict(lambda: {
                'created': 0,
                'updated': 0,
                'deleted': 0,
                'cumulative': 0
            })

            now = datetime.now(timezone.utc)
            cutoff_date = now - timedelta(days=days)

            for q in all_questions:
                created_at_str = q.get('created_at', '')
                updated_at_str = q.get('updated_at', '')

                try:
                    created_at = datetime.fromisoformat(created_at_str.replace('Z', '+00:00'))
                    # 确保时区一致进行比较
                    if created_at.tzinfo is None:
                        created_at = created_at.replace(tzinfo=timezone.utc)
                    if created_at >= cutoff_date:
                        if period == 'day':
                            key = created_at.strftime('%Y-%m-%d')
                        elif period == 'week':
                            key = created_at.strftime('%Y-W%U')
                        else:  # month
                            key = created_at.strftime('%Y-%m')
                        
                        time_series_dict[key]['created'] += 1

                    updated_at = datetime.fromisoformat(updated_at_str.replace('Z', '+00:00'))
                    # 确保时区一致进行比较
                    if updated_at.tzinfo is None:
                        updated_at = updated_at.replace(tzinfo=timezone.utc)
                    if updated_at >= cutoff_date and updated_at != created_at:
                        if period == 'day':
                            key = updated_at.strftime('%Y-%m-%d')
                        elif period == 'week':
                            key = updated_at.strftime('%Y-W%U')
                        else:  # month
                            key = updated_at.strftime('%Y-%m')
                        
                        time_series_dict[key]['updated'] += 1

                except (ValueError, AttributeError):
                    pass

            # 计算累计值
            cumulative = 0
            time_series = []

            for timestamp in sorted(time_series_dict.keys()):
                data = time_series_dict[timestamp]
                cumulative += data['created']
                data['cumulative'] = cumulative

                time_series.append({
                    'timestamp': timestamp,
                    'date': timestamp,
                    'period': period,
                    'questions_created': data['created'],
                    'questions_updated': data['updated'],
                    'questions_deleted': data['deleted'],
                    'cumulative_total': data['cumulative']
                })

            result = {
                'period': period,
                'days': days,
                'total_records': len(time_series),
                'time_series': time_series
            }

            self.logger.info(f"时间序列分析完成: 共{len(time_series)}条记录")
            return result

        except Exception as e:
            self.logger.error(f"获取时间序列分析失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 问题类型统计
    # -------------------------------------------------------------------------

    def get_question_type_statistics(self) -> Dict[str, Any]:
        """
        获取题目类型统计

        Returns:
            Dict: 按题目类型划分的统计数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info("开始获取题目类型统计...")

            # 获取所有题目
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )

            # 统计题目类型
            type_stats = defaultdict(lambda: {
                'count': 0,
                'total_usage': 0,
                'total_correct_rate': 0.0
            })

            total_questions = len(all_questions)

            for q in all_questions:
                q_type = q.get('question_type', '未知')
                usage_count = q.get('usage_count', 0)
                correct_rate = q.get('correct_rate') or 0.0

                type_stats[q_type]['count'] += 1
                type_stats[q_type]['total_usage'] += usage_count
                type_stats[q_type]['total_correct_rate'] += correct_rate

            # 构建结果
            result = {
                'total_questions': total_questions,
                'total_types': len(type_stats),
                'question_types': []
            }

            for q_type, stats_data in sorted(type_stats.items()):
                count = stats_data['count']
                percentage = (count / total_questions * 100) if total_questions > 0 else 0.0

                average_usage = stats_data['total_usage'] / count if count > 0 else 0.0
                average_correct_rate = stats_data['total_correct_rate'] / count if count > 0 else None

                result['question_types'].append({
                    'question_type': q_type,
                    'count': count,
                    'percentage': round(percentage, 2),
                    'average_usage_count': average_usage,
                    'average_correct_rate': average_correct_rate
                })

            self.logger.info(f"题目类型统计完成: 共{len(type_stats)}种类型")
            return result

        except Exception as e:
            self.logger.error(f"获取题目类型统计失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 状态分布统计
    # -------------------------------------------------------------------------

    def get_status_statistics(self) -> Dict[str, Any]:
        """
        获取题目状态分布统计

        Returns:
            Dict: 按状态划分的统计数据

        Raises:
            Exception: 统计过程出错
        """
        try:
            self.logger.info("开始获取状态分布统计...")

            # 获取所有题目
            all_questions, _ = self.db_manager.sqlite_dao.list_questions(
                page=1,
                page_size=10000
            )

            # 统计状态分布
            status_stats = defaultdict(lambda: {
                'count': 0,
                'total_points': 0,
                'total_usage': 0
            })

            total_questions = len(all_questions)

            for q in all_questions:
                status = q.get('status', '草稿')
                points = q.get('points', 0) or 0
                usage_count = q.get('usage_count', 0)

                status_stats[status]['count'] += 1
                status_stats[status]['total_points'] += points
                status_stats[status]['total_usage'] += usage_count

            # 构建结果
            result = {
                'total_questions': total_questions,
                'statuses': []
            }

            for status in ['已发布', '草稿', '已归档']:
                if status in status_stats:
                    stats_data = status_stats[status]
                    count = stats_data['count']
                    percentage = (count / total_questions * 100) if total_questions > 0 else 0.0

                    average_points = stats_data['total_points'] / count if count > 0 else None
                    average_usage = stats_data['total_usage'] / count if count > 0 else 0.0

                    result['statuses'].append({
                        'status': status,
                        'count': count,
                        'percentage': round(percentage, 2),
                        'average_points': average_points,
                        'average_usage_count': average_usage,
                        'total_points': stats_data['total_points']
                    })

            self.logger.info(f"状态分布统计完成")
            return result

        except Exception as e:
            self.logger.error(f"获取状态分布统计失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 综合分析报告
    # -------------------------------------------------------------------------

    def generate_analysis_report(self) -> Dict[str, Any]:
        """
        生成综合分析报告

        Returns:
            Dict: 包含所有分析数据的综合报告

        Raises:
            Exception: 生成报告过程出错
        """
        try:
            self.logger.info("开始生成综合分析报告...")

            report = {
                'report_timestamp': datetime.now().isoformat(),
                'report_type': 'comprehensive_analysis',
                'overall_statistics': self.get_overall_statistics(),
                'category_statistics': self.get_category_statistics(),
                'difficulty_statistics': self.get_difficulty_statistics(),
                'question_type_statistics': self.get_question_type_statistics(),
                'status_statistics': self.get_status_statistics(),
                'tag_statistics': self.get_tag_statistics(top_n=10),
                'quality_metrics': {
                    'summary': self._get_quality_summary()
                },
                'time_series_analysis': self.get_time_series_analysis(days=30, period='day'),
                'insights': self._generate_insights()
            }

            self.logger.info("综合分析报告生成完成")
            return report

        except Exception as e:
            self.logger.error(f"生成分析报告失败: {e}")
            raise

    # -------------------------------------------------------------------------
    # 辅助方法
    # -------------------------------------------------------------------------

    def _get_quality_summary(self) -> Dict[str, Any]:
        """获取质量评估摘要"""
        try:
            quality_metrics = self.get_quality_metrics()
            return {
                'total_questions_evaluated': quality_metrics['total_questions_evaluated'],
                'average_completeness_score': quality_metrics['average_completeness_score'],
                'average_quality_score': quality_metrics['average_quality_score'],
                'quality_distribution': quality_metrics['quality_distribution']
            }
        except Exception as e:
            self.logger.warning(f"获取质量评估摘要失败: {e}")
            return {}

    def _generate_insights(self) -> Dict[str, List[str]]:
        """生成洞察和建议"""
        insights = {
            'strengths': [],
            'weaknesses': [],
            'recommendations': []
        }

        try:
            # 获取统计数据进行分析
            overall = self.get_overall_statistics()
            difficulty = self.get_difficulty_statistics()
            quality = self._get_quality_summary()

            # 强项分析
            if overall.get('total_questions', 0) > 100:
                insights['strengths'].append(f"题库规模较大，共有{overall['total_questions']}道题目")

            if overall.get('published_questions', 0) / max(overall.get('total_questions', 1), 1) > 0.8:
                insights['strengths'].append("已发布题目比例高，题库质量有保障")

            # 弱项分析
            if quality.get('average_quality_score', 0) < 60:
                insights['weaknesses'].append("题目整体质量评分较低，需要提高题目完整度")

            # 建议
            if overall.get('draft_questions', 0) > overall.get('published_questions', 1) * 0.3:
                insights['recommendations'].append("草稿题目数量较多，建议及时审核和发布")

            if quality.get('average_quality_score', 0) < 70:
                insights['recommendations'].append("建议添加更多题目解析和参考答案以提高质量")

            # 难度建议
            for diff in difficulty.get('difficulties', []):
                if diff['percentage'] < 10 and diff['difficulty'] != '中等':
                    insights['recommendations'].append(f"缺乏{diff['difficulty']}难度的题目，建议补充")

        except Exception as e:
            self.logger.warning(f"生成洞察失败: {e}")

        return insights
