"""
Tests for advanced SDK features.

These tests verify advanced SDK functionality like parameter validation,
metadata extraction, and evaluator handling without requiring server infrastructure.
"""

from cat_cafe.sdk import CATCafeClient, DatasetImport, DatasetExample, Example, Experiment, ExperimentResult


class TestAdvancedSDKFeatures:
    """Test advanced SDK functionality"""

    def test_metadata_function_integration(self):
        """Test that metadata functions work correctly with SDK"""

        def metadata_extractor(example, output):
            """Extract metadata from example and output"""
            messages = example.input.get("messages", []) if isinstance(example.input, dict) else list(example.input)
            output_messages = output.get("messages", []) if isinstance(output, dict) else []
            user_content = messages[0]["content"] if messages else ""
            return {
                "input_length": len(user_content),
                "output_length": len(output_messages[0]["content"]) if output_messages else len(str(output)),
                "has_question_mark": "?" in user_content,
                "processing_info": "metadata_extracted",
            }

        # Create sample example
        example = Example(
            id="test-example",
            input={"messages": [{"role": "user", "content": "What is 2+2?"}]},
            output={"messages": [{"role": "assistant", "content": "4"}]},
            tags=["math"],
            metadata={},
        )

        # Test metadata extraction
        output = "The answer is 4"
        metadata = metadata_extractor(example, output)

        assert metadata["input_length"] == len("What is 2+2?")
        assert metadata["output_length"] == len("The answer is 4")
        assert metadata["has_question_mark"] is True
        assert metadata["processing_info"] == "metadata_extracted"

    def test_evaluator_function_scoring(self):
        """Test evaluator functions with different scoring scenarios"""

        def accuracy_evaluator(actual_output, output):
            """Evaluate accuracy based on content matching"""
            messages = []
            if isinstance(output, dict):
                messages = output.get("messages", [])
            elif isinstance(output, list):
                messages = output
            expected_content = messages[0]["content"] if messages else ""
            if expected_content.lower() in actual_output.lower():
                return 1.0, "Perfect match"
            elif any(word in actual_output.lower() for word in expected_content.lower().split()):
                return 0.7, "Partial match"
            else:
                return 0.0, "No match"

        def completeness_evaluator(actual_output, output):
            """Evaluate completeness based on output length and structure"""
            if len(actual_output) < 10:
                return 0.3, "Too short"
            elif len(actual_output) > 100:
                return 0.8, "Very complete"
            else:
                return 0.6, "Adequate length"

        # Test perfect match
        expected = {"messages": [{"role": "assistant", "content": "The answer is 42"}]}
        actual_perfect = "The answer is 42"

        acc_score, acc_reason = accuracy_evaluator(actual_perfect, expected)
        comp_score, comp_reason = completeness_evaluator(actual_perfect, expected)

        assert acc_score == 1.0
        assert acc_reason == "Perfect match"
        assert comp_score == 0.6
        assert comp_reason == "Adequate length"

        # Test partial match
        actual_partial = "The answer involves 42"
        acc_score_partial, acc_reason_partial = accuracy_evaluator(actual_partial, expected)

        assert acc_score_partial == 0.7
        assert acc_reason_partial == "Partial match"

        # Test no match
        actual_none = "Completely different response"
        acc_score_none, acc_reason_none = accuracy_evaluator(actual_none, expected)

        assert acc_score_none == 0.0
        assert acc_reason_none == "No match"

    def test_parameter_inference_logic(self):
        """Test dataset ID inference logic used in convenience functions"""

        # Test inference from string
        dataset_string = "dataset-123"
        inferred_from_string = dataset_string
        assert inferred_from_string == "dataset-123"

        # Test inference from dict
        dataset_dict = {"id": "dataset-456", "name": "Test Dataset"}
        inferred_from_dict = dataset_dict.get("id", "unknown")
        assert inferred_from_dict == "dataset-456"

        # Test inference from Dataset object
        from cat_cafe.sdk import Dataset

        dataset_obj = Dataset(id="dataset-789", name="Object Dataset", example_count=0, version=1)
        inferred_from_obj = dataset_obj.id
        assert inferred_from_obj == "dataset-789"

    def test_error_handling_scenarios(self):
        """Test error handling in various SDK scenarios"""

        # Test experiment result with error
        error_result = ExperimentResult(
            example_id="error-example",
            input_data={"messages": [{"role": "user", "content": "test"}]},
            output={"messages": [{"role": "assistant", "content": "expected"}]},
            actual_output="",
            error="Connection timeout",
        )

        assert error_result.error == "Connection timeout"
        assert error_result.actual_output == ""
        assert error_result.evaluation_scores == {}

        # Test client creation still works
        client = CATCafeClient()
        assert client.base_url == "http://localhost:8000"

    def test_async_function_compatibility(self):
        """Test that async functions can be properly detected and handled"""
        import asyncio
        import inspect

        # Define test functions
        def sync_function(example):
            messages = example.input.get("messages", [])
            return f"Sync: {messages[0]['content']}" if messages else "Sync:"

        async def async_function(example):
            await asyncio.sleep(0.001)
            messages = example.input.get("messages", [])
            return f"Async: {messages[0]['content']}" if messages else "Async:"

        # Test function detection
        assert not inspect.iscoroutinefunction(sync_function)
        assert inspect.iscoroutinefunction(async_function)

        # Test execution
        example = Example(
            id="test",
            input={"messages": [{"role": "user", "content": "Hello"}]},
            output={"messages": [{"role": "assistant", "content": "Hi"}]},
        )

        # Sync execution
        sync_result = sync_function(example)
        assert sync_result == "Sync: Hello"

        # Async execution
        async_result = asyncio.run(async_function(example))
        assert async_result == "Async: Hello"


class TestDatasetConversionLogic:
    """Test dataset conversion logic used in SDK"""

    def test_dataset_example_to_example_conversion(self):
        """Test converting DatasetExample to Example (simulates API processing)"""

        # Original DatasetExample
        dataset_example = DatasetExample(
            input={"messages": [{"role": "user", "content": "Test input"}]},
            output={"messages": [{"role": "assistant", "content": "Test output"}]},
            metadata={"type": "test"},
        )
        dataset_example.tags = ["test"]
        dataset_example.source_trace_id = "trace-123"
        dataset_example.source_node_id = "node-456"

        # Simulate API conversion (what happens when imported then fetched)
        api_example = Example(
            id="api-generated-id",  # API generates this
            input=dataset_example.input,
            output=dataset_example.output,
            metadata=dict(dataset_example.metadata),
            created_at="2024-01-01T00:00:00Z",  # API adds timestamps
            updated_at="2024-01-01T00:00:00Z",
        )
        api_example.source_trace_id = dataset_example.source_trace_id
        api_example.source_node_id = dataset_example.source_node_id

        # Verify conversion preserved all data
        assert api_example.input == dataset_example.input
        assert api_example.output == dataset_example.output
        assert api_example.source_trace_id == dataset_example.source_trace_id
        assert api_example.source_node_id == dataset_example.source_node_id
        assert api_example.metadata == dataset_example.metadata
        assert api_example.tags == dataset_example.tags

    def test_dict_to_dataset_conversion(self):
        """Test converting raw dict to Dataset object (used in convenience functions)"""

        # Raw dict from API
        raw_dict = {
            "id": "dataset-123",
            "name": "Raw Dataset",
            "description": "From API",
            "tags": ["api", "raw"],
            "metadata": {"source": "api"},
            "example_count": 2,
            "version": 1,
            "created_at": "2024-01-01T00:00:00Z",
            "updated_at": "2024-01-01T00:00:00Z",
            "examples": [
                {
                    "id": "ex1",
                    "input": {"messages": [{"role": "user", "content": "Q1"}]},
                    "output": {"messages": [{"role": "assistant", "content": "A1"}]},
                    "tags": ["q1"],
                    "metadata": {},
                    "created_at": "2024-01-01T00:00:00Z",
                    "updated_at": "2024-01-01T00:00:00Z",
                }
            ],
        }

        # Convert to Dataset object (this is what SDK does internally)
        examples = []
        for example_data in raw_dict.get("examples", []):
            example = Example(
                id=example_data.get("id", ""),
                input=example_data.get("input", {}),
                output=example_data.get("output", {}),
                metadata=example_data.get("metadata", {}),
                created_at=example_data.get("created_at"),
                updated_at=example_data.get("updated_at"),
            )
            examples.append(example)

        from cat_cafe.sdk import Dataset

        dataset_obj = Dataset(
            id=raw_dict.get("id", ""),
            name=raw_dict.get("name", ""),
            description=raw_dict.get("description"),
            tags=raw_dict.get("tags", []),
            metadata=raw_dict.get("metadata", {}),
            example_count=len(examples),
            version=raw_dict.get("version", 1),
            created_at=raw_dict.get("created_at"),
            updated_at=raw_dict.get("updated_at"),
            examples=examples,
        )

        # Verify conversion
        assert dataset_obj.id == "dataset-123"
        assert dataset_obj.name == "Raw Dataset"
        assert len(dataset_obj.examples) == 1
        assert dataset_obj.examples[0].id == "ex1"
        assert dataset_obj.examples[0].input == {"messages": [{"role": "user", "content": "Q1"}]}


class TestSDKIntegrationPatterns:
    """Test common SDK integration patterns"""

    def test_complete_workflow_simulation(self):
        """Test simulating a complete SDK workflow without server"""

        # Step 1: Create dataset import
        dataset_example = DatasetExample(
            input={"messages": [{"role": "user", "content": "Hello world"}]},
            output={"messages": [{"role": "assistant", "content": "Hello there!"}]},
        )
        dataset_example.tags = ["greeting"]
        dataset_import = DatasetImport(
            name="Workflow Test Dataset",
            description="Testing complete workflow",
            tags=["workflow", "test"],
            examples=[dataset_example],
        )

        # Step 2: Define test function
        def test_function(example):
            messages = example.input.get("messages", [])
            user_input = messages[-1]["content"] if messages else ""
            return f"Response to: {user_input}"

        # Step 3: Define evaluators
        def response_evaluator(actual_output, output):
            score = 0.8 if "Response to:" in actual_output else 0.2
            reason = "Contains response prefix" if score > 0.5 else "Missing prefix"
            return score, reason

        # Step 4: Create experiment config
        Experiment(
            name="Workflow Test", description="Testing workflow", dataset_id="workflow-dataset-123", tags=["automated"]
        )

        # Step 5: Simulate processing (what the SDK would do)
        examples = dataset_import.examples
        results = []

        for i, dataset_example in enumerate(examples):
            # Convert to Example object (simulates API import/fetch)
            example = Example(
                id=f"example-{i + 1}",
                input=dataset_example.input,
                output=dataset_example.output,
                metadata=dict(dataset_example.metadata),
            )
            example.tags = dataset_example.tags

            # Run test function
            output = test_function(example)

            # Run evaluators
            evaluation_scores = {}
            score, reason = response_evaluator(output, example.output)
            evaluation_scores["response_evaluator"] = score

            # Create result
            result = ExperimentResult(
                example_id=example.id,
                input_data={"input": example.input},
                output=str(example.output),
                actual_output=output,
                evaluation_scores=evaluation_scores,
                metadata={"reason": reason},
            )
            results.append(result)

        # Step 6: Verify workflow
        assert len(results) == 1
        result = results[0]
        assert result.actual_output == "Response to: Hello world"
        assert result.evaluation_scores["response_evaluator"] == 0.8
        assert result.metadata["reason"] == "Contains response prefix"

        # Step 7: Calculate summary
        total_examples = len(results)
        successful_examples = len([r for r in results if r.error is None])
        success_rate = successful_examples / total_examples

        assert total_examples == 1
        assert successful_examples == 1
        assert success_rate == 1.0
