"""
Tests for LangChain Olostep document loaders.
"""

import pytest
from unittest.mock import Mock, patch, AsyncMock
from langchain_olostep.document_loaders import OlostepLoader, OlostepWebCrawler
from langchain_core.documents import Document


class TestOlostepLoader:
    """Test OlostepLoader document loader."""
    
    def test_init_with_api_key(self):
        """Test initialization with API key."""
        loader = OlostepLoader(
            urls=["https://example.com"],
            api_key="test-api-key"
        )
        assert loader.api_key == "test-api-key"
        assert loader.urls == ["https://example.com"]
        assert loader.format == "markdown"
    
    def test_init_without_api_key(self):
        """Test initialization without API key raises error."""
        with pytest.raises(ValueError, match="OLOSTEP_API_KEY environment variable is required"):
            OlostepLoader(urls=["https://example.com"])
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_success(self, mock_api_class):
        """Test successful document loading."""
        # Mock API client
        mock_api = Mock()
        mock_api.scrape_url = AsyncMock(return_value={
            "markdown": "# Test Content",
            "id": "test-id",
            "timestamp": "2023-01-01T00:00:00Z"
        })
        mock_api_class.return_value = mock_api
        
        # Test
        loader = OlostepLoader(
            urls=["https://example.com"],
            api_key="test-api-key"
        )
        documents = loader.load()
        
        # Assertions
        assert len(documents) == 1
        assert documents[0].page_content == "# Test Content"
        assert documents[0].metadata["url"] == "https://example.com"
        assert documents[0].metadata["format"] == "markdown"
        assert documents[0].metadata["scrape_id"] == "test-id"
        assert documents[0].metadata["source"] == "olostep"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_multiple_urls(self, mock_api_class):
        """Test loading documents from multiple URLs."""
        mock_api = Mock()
        mock_api.scrape_url = AsyncMock(side_effect=[
            {"markdown": "# Page 1", "id": "id1"},
            {"markdown": "# Page 2", "id": "id2"}
        ])
        mock_api_class.return_value = mock_api
        
        loader = OlostepLoader(
            urls=["https://example1.com", "https://example2.com"],
            api_key="test-api-key"
        )
        documents = loader.load()
        
        assert len(documents) == 2
        assert documents[0].page_content == "# Page 1"
        assert documents[1].page_content == "# Page 2"
        assert documents[0].metadata["url"] == "https://example1.com"
        assert documents[1].metadata["url"] == "https://example2.com"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_error_handling(self, mock_api_class):
        """Test error handling during document loading."""
        mock_api = Mock()
        mock_api.scrape_url = AsyncMock(side_effect=Exception("Scraping failed"))
        mock_api_class.return_value = mock_api
        
        loader = OlostepLoader(
            urls=["https://example.com"],
            api_key="test-api-key"
        )
        documents = loader.load()
        
        # Should create error document
        assert len(documents) == 1
        assert "Error scraping" in documents[0].page_content
        assert documents[0].metadata["url"] == "https://example.com"
        assert documents[0].metadata["error"] == "Scraping failed"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_different_format(self, mock_api_class):
        """Test loading with different format."""
        mock_api = Mock()
        mock_api.scrape_url = AsyncMock(return_value={
            "html": "<h1>Test Content</h1>",
            "id": "test-id"
        })
        mock_api_class.return_value = mock_api
        
        loader = OlostepLoader(
            urls=["https://example.com"],
            format="html",
            api_key="test-api-key"
        )
        documents = loader.load()
        
        assert documents[0].page_content == "<h1>Test Content</h1>"
        assert documents[0].metadata["format"] == "html"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_lazy_load(self, mock_api_class):
        """Test lazy loading of documents."""
        mock_api = Mock()
        mock_api.scrape_url = AsyncMock(return_value={
            "markdown": "# Test Content",
            "id": "test-id"
        })
        mock_api_class.return_value = mock_api
        
        loader = OlostepLoader(
            urls=["https://example.com"],
            api_key="test-api-key"
        )
        
        # Test lazy loading
        documents = list(loader.lazy_load())
        assert len(documents) == 1
        assert documents[0].page_content == "# Test Content"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_with_parser_id(self, mock_api_class):
        """Test loading with parser ID."""
        mock_api = Mock()
        mock_api.scrape_url = AsyncMock(return_value={
            "markdown": "# Parsed Content",
            "id": "test-id"
        })
        mock_api_class.return_value = mock_api
        
        loader = OlostepLoader(
            urls=["https://example.com"],
            parser_id="custom-parser",
            api_key="test-api-key"
        )
        documents = loader.load()
        
        # Verify parser ID was passed to API
        mock_api.scrape_url.assert_called_with(
            "https://example.com", "markdown", 0, "custom-parser"
        )


class TestOlostepWebCrawler:
    """Test OlostepWebCrawler document loader."""
    
    def test_init_with_api_key(self):
        """Test initialization with API key."""
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            api_key="test-api-key"
        )
        assert crawler.api_key == "test-api-key"
        assert crawler.start_url == "https://example.com"
        assert crawler.max_pages == 10
        assert crawler.format == "markdown"
    
    def test_init_without_api_key(self):
        """Test initialization without API key raises error."""
        with pytest.raises(ValueError, match="OLOSTEP_API_KEY environment variable is required"):
            OlostepWebCrawler(start_url="https://example.com")
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_success(self, mock_api_class):
        """Test successful website crawling."""
        # Mock API client
        mock_api = Mock()
        mock_api.crawl_website = AsyncMock(return_value={
            "pages": [
                {
                    "url": "https://example.com",
                    "markdown": "# Homepage",
                    "id": "page1"
                },
                {
                    "url": "https://example.com/about",
                    "markdown": "# About",
                    "id": "page2"
                }
            ],
            "id": "crawl-id",
            "timestamp": "2023-01-01T00:00:00Z"
        })
        mock_api_class.return_value = mock_api
        
        # Test
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            max_pages=5,
            api_key="test-api-key"
        )
        documents = crawler.load()
        
        # Assertions
        assert len(documents) == 2
        assert documents[0].page_content == "# Homepage"
        assert documents[0].metadata["url"] == "https://example.com"
        assert documents[0].metadata["format"] == "markdown"
        assert documents[0].metadata["crawl_id"] == "crawl-id"
        assert documents[0].metadata["source"] == "olostep_crawl"
        
        assert documents[1].page_content == "# About"
        assert documents[1].metadata["url"] == "https://example.com/about"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_error_handling(self, mock_api_class):
        """Test error handling during crawling."""
        mock_api = Mock()
        mock_api.crawl_website = AsyncMock(side_effect=Exception("Crawling failed"))
        mock_api_class.return_value = mock_api
        
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            api_key="test-api-key"
        )
        documents = crawler.load()
        
        # Should create error document
        assert len(documents) == 1
        assert "Error crawling" in documents[0].page_content
        assert documents[0].metadata["url"] == "https://example.com"
        assert documents[0].metadata["error"] == "Crawling failed"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_different_format(self, mock_api_class):
        """Test crawling with different format."""
        mock_api = Mock()
        mock_api.crawl_website = AsyncMock(return_value={
            "pages": [
                {
                    "url": "https://example.com",
                    "html": "<h1>Homepage</h1>"
                }
            ],
            "id": "crawl-id"
        })
        mock_api_class.return_value = mock_api
        
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            format="html",
            api_key="test-api-key"
        )
        documents = crawler.load()
        
        assert documents[0].page_content == "<h1>Homepage</h1>"
        assert documents[0].metadata["format"] == "html"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_lazy_load(self, mock_api_class):
        """Test lazy loading of crawled documents."""
        mock_api = Mock()
        mock_api.crawl_website = AsyncMock(return_value={
            "pages": [
                {
                    "url": "https://example.com",
                    "markdown": "# Homepage"
                }
            ],
            "id": "crawl-id"
        })
        mock_api_class.return_value = mock_api
        
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            api_key="test-api-key"
        )
        
        # Test lazy loading
        documents = list(crawler.lazy_load())
        assert len(documents) == 1
        assert documents[0].page_content == "# Homepage"
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_with_parser_id(self, mock_api_class):
        """Test crawling with parser ID."""
        mock_api = Mock()
        mock_api.crawl_website = AsyncMock(return_value={
            "pages": [{"url": "https://example.com", "markdown": "# Content"}],
            "id": "crawl-id"
        })
        mock_api_class.return_value = mock_api
        
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            parser_id="custom-parser",
            api_key="test-api-key"
        )
        documents = crawler.load()
        
        # Verify parser ID was passed to API
        mock_api.crawl_website.assert_called_with(
            "https://example.com", 10, "markdown", 0, "custom-parser"
        )
    
    @patch('langchain_olostep.document_loaders.OlostepAPI')
    def test_load_empty_pages(self, mock_api_class):
        """Test handling of empty pages in crawl result."""
        mock_api = Mock()
        mock_api.crawl_website = AsyncMock(return_value={
            "pages": [],
            "id": "crawl-id"
        })
        mock_api_class.return_value = mock_api
        
        crawler = OlostepWebCrawler(
            start_url="https://example.com",
            api_key="test-api-key"
        )
        documents = crawler.load()
        
        # Should return empty list
        assert len(documents) == 0




