"""Testing code generated by nbdev in unpackai/nlp/data.py"""
# Generated automatically from notebook nbs/41_nlp_data.ipynb

from unpackai.nlp.data import *

# Test Cell
# For Test Cases (might have duplicate import because it will be in a dedicated file)
from pathlib import Path
from typing import List

import pytest
from test_common.utils_4_tests import DATA_DIR
from test_utils import GITHUB_TEST_DATA_URL, check_connection_github

# Test Cell
LOCAL_TEST_TXT = DATA_DIR / "to_download.txt"
GITHUB_TEST_TXT = f"{GITHUB_TEST_DATA_URL}/to_download.txt"

LOCAL_TEST_TXT_UTF8 = DATA_DIR / "Deep learning.txt"
GITHUB_TEST_TXT_UTF8 = f"{GITHUB_TEST_DATA_URL}/Deep%20learning.txt"


@pytest.fixture(scope="session")
def test_txt_content():
    return LOCAL_TEST_TXT.read_text()


@pytest.fixture(scope="session")
def local_textual(test_txt_content):
    return Textual(test_txt_content)


class Test_Textual:
    def test_init(self, local_textual, test_txt_content):
        """Test initialization of Textual from text"""
        expected_txt = test_txt_content.replace("\n", " ").replace("\r", "")
        assert local_textual.text == expected_txt

    def test_init_encoding(self):
        """Test initialization of Textual from text"""
        content = LOCAL_TEST_TXT_UTF8.read_text(encoding="utf-8")
        textual = Textual(content)
        assert textual.text == content.replace("\n", " ").replace("\r", "")

    def test_from_path(self, local_textual):
        """Test create Textual from path (existing)"""
        textual = Textual.from_path(LOCAL_TEST_TXT)
        assert textual.text == local_textual.text

    def test_from_path_error(self):
        """Test extract Textual of file that does not exist"""
        with pytest.raises(FileExistsError):
            textual = Textual.from_path("does_not_exist.txt")

    def test_from_url(self, check_connection_github, local_textual):
        """Test extract Textual from URL"""
        textual = Textual.from_url(GITHUB_TEST_TXT)
        assert textual.text == local_textual.text, f"URL text: {textual.text}"

    def test_from_url_non_ascii(self, check_connection_github):
        """Test extract Textual from URL with non-ascii characters"""
        textual = Textual.from_url(GITHUB_TEST_TXT_UTF8)
        content = LOCAL_TEST_TXT_UTF8.read_text(encoding="utf-8")
        content = content.replace("\n", " ").replace("\r", "")
        assert textual.text == content, f"URL text: {textual.text}"
