"""
Unit tests for the transform.cleaner module.

Tests UUID conversion, string cleaning, data structure validation,
and the MongoCleaner class functionality.
"""

import json
import tempfile
from pathlib import Path

import pytest

from idealista_scraper.transform import (
    MongoCleaner,
    clean_dict,
    clean_string,
    convert_uuid_to_mongo_id,
    ensure_pricing_structure,
    ensure_proper_id_format,
    is_uuid,
)


class TestUUIDFunctions:
    """Test UUID validation and conversion functions."""

    def test_is_uuid_valid(self):
        """Test is_uuid with valid UUIDs."""
        assert is_uuid("550e8400-e29b-41d4-a716-446655440000") is True
        assert is_uuid("6ba7b810-9dad-11d1-80b4-00c04fd430c8") is True
        assert is_uuid("00000000-0000-0000-0000-000000000000") is True

    def test_is_uuid_invalid(self):
        """Test is_uuid with invalid UUIDs."""
        assert is_uuid("not-a-uuid") is False
        assert is_uuid("123456") is False
        assert is_uuid("") is False
        assert is_uuid(None) is False
        assert is_uuid(123) is False

    def test_convert_uuid_to_mongo_id_valid(self):
        """Test UUID conversion to MongoDB format."""
        result = convert_uuid_to_mongo_id("550e8400-e29b-41d4-a716-446655440000")
        assert len(result) == 24
        assert result == "550e8400e29b41d4a7164466"

    def test_convert_uuid_to_mongo_id_invalid(self):
        """Test conversion with non-UUID strings."""
        result = convert_uuid_to_mongo_id("not-a-uuid")
        assert result == "not-a-uuid"  # Returns unchanged


class TestStringCleaning:
    """Test string cleaning functions."""

    def test_clean_string_null_bytes(self):
        """Test removal of null bytes."""
        assert clean_string("Hello\x00World") == "HelloWorld"
        assert clean_string("\x00Start") == "Start"
        assert clean_string("End\x00") == "End"

    def test_clean_string_control_characters(self):
        """Test removal of control characters."""
        assert clean_string("Test\x01String") == "TestString"
        assert clean_string("Multi\x02ple\x03Chars") == "MultipleChars"
        assert clean_string("\x7FDelete") == "Delete"

    def test_clean_string_preserves_valid_whitespace(self):
        """Test that valid whitespace is preserved."""
        assert clean_string("Hello\tWorld") == "Hello\tWorld"  # Tab
        assert clean_string("Hello\nWorld") == "Hello\nWorld"  # Newline
        assert clean_string("Hello\rWorld") == "Hello\rWorld"  # Carriage return

    def test_clean_string_normal_strings(self):
        """Test that normal strings are unchanged."""
        assert clean_string("Normal String") == "Normal String"
        assert clean_string("Special: @#$%") == "Special: @#$%"
        assert clean_string("Unicode: é ñ ü") == "Unicode: é ñ ü"

    def test_clean_string_non_string_types(self):
        """Test that non-strings are returned unchanged."""
        assert clean_string(123) == 123
        assert clean_string(None) is None
        assert clean_string([1, 2, 3]) == [1, 2, 3]


class TestDictCleaning:
    """Test dictionary cleaning functions."""

    def test_clean_dict_simple(self):
        """Test cleaning a simple dictionary."""
        data = {"name": "Test\x00Name", "value": "Clean\x01This"}
        result = clean_dict(data)
        assert result["name"] == "TestName"
        assert result["value"] == "CleanThis"

    def test_clean_dict_nested(self):
        """Test cleaning nested dictionaries."""
        data = {
            "outer": "Value\x00",
            "nested": {"inner": "Test\x01String"},
        }
        result = clean_dict(data)
        assert result["outer"] == "Value"
        assert result["nested"]["inner"] == "TestString"

    def test_clean_dict_with_lists(self):
        """Test cleaning dictionaries with lists."""
        data = {
            "items": ["Item\x00One", "Item\x01Two"],
            "nested_items": [
                {"name": "First\x00"},
                {"name": "Second\x01"},
            ],
        }
        result = clean_dict(data)
        assert result["items"] == ["ItemOne", "ItemTwo"]
        assert result["nested_items"][0]["name"] == "First"
        assert result["nested_items"][1]["name"] == "Second"

    def test_clean_dict_preserves_non_strings(self):
        """Test that non-string values are preserved."""
        data = {
            "string": "Test\x00",
            "number": 123,
            "boolean": True,
            "null": None,
            "list": [1, 2, 3],
        }
        result = clean_dict(data)
        assert result["string"] == "Test"
        assert result["number"] == 123
        assert result["boolean"] is True
        assert result["null"] is None
        assert result["list"] == [1, 2, 3]


class TestIDFormatting:
    """Test ID formatting functions."""

    def test_ensure_proper_id_format_uuid_string(self):
        """Test conversion of UUID string to ObjectID format."""
        result = ensure_proper_id_format("550e8400-e29b-41d4-a716-446655440000")
        assert isinstance(result, dict)
        assert "$oid" in result
        assert len(result["$oid"]) == 24

    def test_ensure_proper_id_format_existing_oid(self):
        """Test handling of existing ObjectID format."""
        result = ensure_proper_id_format({"$oid": "507f1f77bcf86cd799439011"})
        assert result == {"$oid": "507f1f77bcf86cd799439011"}

    def test_ensure_proper_id_format_uuid_in_oid(self):
        """Test conversion of UUID within ObjectID format."""
        result = ensure_proper_id_format(
            {"$oid": "550e8400-e29b-41d4-a716-446655440000"}
        )
        assert result["$oid"] == "550e8400e29b41d4a7164466"

    def test_ensure_proper_id_format_invalid(self):
        """Test generation of new ObjectID for invalid input."""
        result = ensure_proper_id_format(None)
        assert isinstance(result, dict)
        assert "$oid" in result
        assert len(result["$oid"]) == 24


class TestPricingStructure:
    """Test pricing structure validation."""

    def test_ensure_pricing_structure_empty_unit(self):
        """Test adding pricing structure to empty unit."""
        unit = {}
        result = ensure_pricing_structure(unit)
        assert "pricing" in result
        assert "sales" in result["pricing"]
        assert "price" in result["pricing"]["sales"]

    def test_ensure_pricing_structure_all_fields(self):
        """Test that all required fields are present."""
        unit = {}
        result = ensure_pricing_structure(unit)
        sales = result["pricing"]["sales"]

        required_fields = [
            "_id",
            "salesTotalCommissionPercentage",
            "salesTotalCommission",
            "salesListingAgentCommissionPercentage",
            "salesListingAgentCommission",
            "salesSellingAgentCommissionPercentage",
            "salesSellingAgentCommission",
            "communityFees",
            "IBIFees",
            "price",
        ]

        for field in required_fields:
            assert field in sales

    def test_ensure_pricing_structure_preserves_existing(self):
        """Test that existing values are preserved."""
        unit = {
            "pricing": {
                "sales": {
                    "price": 500000,
                    "communityFees": 150,
                }
            }
        }
        result = ensure_pricing_structure(unit)
        assert result["pricing"]["sales"]["price"] == 500000
        assert result["pricing"]["sales"]["communityFees"] == 150

    def test_ensure_pricing_structure_default_values(self):
        """Test default values for missing fields."""
        unit = {}
        result = ensure_pricing_structure(unit)
        sales = result["pricing"]["sales"]

        assert sales["salesTotalCommissionPercentage"] == 0
        assert sales["salesTotalCommission"] == 0
        assert sales["salesSellingAgentCommission"] == 4
        assert sales["communityFees"] is None
        assert sales["IBIFees"] is None


class TestMongoCleaner:
    """Test MongoCleaner class."""

    def test_cleaner_initialization(self):
        """Test cleaner initialization."""
        cleaner = MongoCleaner()
        assert cleaner.success_count == 0
        assert cleaner.error_count == 0

    def test_clean_record_uuid_conversion(self):
        """Test record cleaning with UUID conversion."""
        cleaner = MongoCleaner()
        data = {
            "_id": "550e8400-e29b-41d4-a716-446655440000",
            "name": "Test",
        }
        result = cleaner.clean_record(data)
        assert result["_id"]["$oid"] == "550e8400e29b41d4a7164466"

    def test_clean_record_string_cleaning(self):
        """Test record cleaning removes invalid characters."""
        cleaner = MongoCleaner()
        data = {
            "_id": "test-id",
            "name": "Test\x00Name",
            "description": "Clean\x01This",
        }
        result = cleaner.clean_record(data)
        assert result["name"] == "TestName"
        assert result["description"] == "CleanThis"

    def test_clean_record_owner_details(self):
        """Test cleaning of owner details."""
        cleaner = MongoCleaner()
        data = {
            "_id": "prop-id",
            "ownerDetails": {"_id": "550e8400-e29b-41d4-a716-446655440000"},
        }
        result = cleaner.clean_record(data)
        assert result["ownerDetails"]["_id"]["$oid"] == "550e8400e29b41d4a7164466"

    def test_clean_record_units_pricing(self):
        """Test cleaning adds pricing structure to units."""
        cleaner = MongoCleaner()
        data = {
            "_id": "prop-id",
            "units": [{"name": "Unit 1"}, {"name": "Unit 2"}],
        }
        result = cleaner.clean_record(data)
        assert len(result["units"]) == 2
        assert "pricing" in result["units"][0]
        assert "pricing" in result["units"][1]

    def test_convert_file(self):
        """Test file conversion functionality."""
        cleaner = MongoCleaner()

        # Create temporary input file
        with tempfile.NamedTemporaryFile(
            mode='w', suffix='.jsonl', delete=False
        ) as f:
            input_path = Path(f.name)
            f.write(
                json.dumps(
                    {
                        "_id": "550e8400-e29b-41d4-a716-446655440000",
                        "name": "Test\x00Property",
                    }
                )
                + '\n'
            )
            f.write(json.dumps({"_id": "test-id", "name": "Normal Property"}) + '\n')

        # Create temporary output file
        output_path = input_path.with_suffix('.out.jsonl')

        try:
            # Convert file
            stats = cleaner.convert_file(input_path, output_path)

            # Check statistics
            assert stats["success_count"] == 2
            assert stats["error_count"] == 0
            assert stats["total_processed"] == 2

            # Verify output
            with open(output_path, 'r') as f:
                lines = f.readlines()
                assert len(lines) == 2

                # Check first record
                record1 = json.loads(lines[0])
                assert record1["_id"]["$oid"] == "550e8400e29b41d4a7164466"
                assert record1["name"] == "TestProperty"

                # Check second record
                record2 = json.loads(lines[1])
                assert record2["name"] == "Normal Property"

        finally:
            # Cleanup
            input_path.unlink(missing_ok=True)
            output_path.unlink(missing_ok=True)

    def test_convert_file_with_errors(self):
        """Test file conversion handles errors gracefully."""
        cleaner = MongoCleaner()

        # Create temporary input file with invalid JSON
        with tempfile.NamedTemporaryFile(
            mode='w', suffix='.jsonl', delete=False
        ) as f:
            input_path = Path(f.name)
            f.write('{"valid": "json"}\n')
            f.write('invalid json line\n')
            f.write('{"another": "valid"}\n')

        output_path = input_path.with_suffix('.out.jsonl')

        try:
            # Convert file
            error_count = 0

            def on_error(line_num, content, error):
                nonlocal error_count
                error_count += 1

            stats = cleaner.convert_file(
                input_path, output_path, error_callback=on_error
            )

            # Check statistics
            assert stats["success_count"] == 2
            assert stats["error_count"] == 1
            assert error_count == 1

        finally:
            # Cleanup
            input_path.unlink(missing_ok=True)
            output_path.unlink(missing_ok=True)

    def test_convert_file_progress_callback(self):
        """Test file conversion with progress callback."""
        cleaner = MongoCleaner()

        # Create temporary input file
        with tempfile.NamedTemporaryFile(
            mode='w', suffix='.jsonl', delete=False
        ) as f:
            input_path = Path(f.name)
            for i in range(10):
                f.write(json.dumps({"_id": f"id-{i}", "name": f"Item {i}"}) + '\n')

        output_path = input_path.with_suffix('.out.jsonl')

        try:
            progress_values = []

            def on_progress(percent):
                progress_values.append(percent)

            # Convert file
            cleaner.convert_file(input_path, output_path, progress_callback=on_progress)

            # Check that progress was tracked
            assert len(progress_values) == 10
            assert progress_values[-1] == 100.0

        finally:
            # Cleanup
            input_path.unlink(missing_ok=True)
            output_path.unlink(missing_ok=True)

    def test_get_stats(self):
        """Test get_stats method."""
        cleaner = MongoCleaner()
        cleaner.success_count = 10
        cleaner.error_count = 2

        stats = cleaner.get_stats()
        assert stats["success_count"] == 10
        assert stats["error_count"] == 2
        assert stats["total_processed"] == 12


if __name__ == "__main__":
    pytest.main([__file__, "-v"])
