import pytest
import os
import jsonlines
from rara_tools.elastic import KataElastic
from rara_linker.linkers.linker import Linker
from rara_linker.config import EntityType
from rara_linker.exceptions import InvalidInputError
from time import sleep
from typing import List

ES_DATA_DIR = os.path.join("tests", "test_data", "es_data")
EMS_TEST_FILE = os.path.join(ES_DATA_DIR, "ems_es_test.jl")
PER_TEST_FILE = os.path.join(ES_DATA_DIR, "persons_es_test.jl")
ORG_TEST_FILE = os.path.join(ES_DATA_DIR, "organizations_es_test.jl")
LOC_TEST_FILE = os.path.join(ES_DATA_DIR, "locations_es_test.jl")

PER_TEST_INDEX = "per_test_linker"
ORG_TEST_INDEX = "org_test_linker"
LOC_TEST_INDEX = "loc_test_linker"
EMS_TEST_INDEX = "ems_test_linker"


ES_URL = os.getenv("ELASTIC_TEST_URL", "http://localhost:9200")#, "http://rara-elastic.texta.ee:9200")
ELASTIC = KataElastic(ES_URL)

VECTORIZER_DATA_DIR = os.path.join("tests", "vectorizer_data")


LINKER = Linker(
    add_viaf_info=False, 
    vectorizer_or_dir_path=VECTORIZER_DATA_DIR,
    per_config = {"es_host": ES_URL, "es_index": PER_TEST_INDEX},
    org_config = {"es_host": ES_URL, "es_index": ORG_TEST_INDEX},
    loc_config = {"es_host": ES_URL, "es_index": LOC_TEST_INDEX},
    ems_config = {"es_host": ES_URL, "es_index": EMS_TEST_INDEX}
)

LINKER_WITH_VIAF = Linker(
    add_viaf_info=True, 
    vectorizer_or_dir_path=VECTORIZER_DATA_DIR,
    per_config = {"es_host": ES_URL, "es_index": PER_TEST_INDEX},
    org_config = {"es_host": ES_URL, "es_index": ORG_TEST_INDEX},
    loc_config = {"es_host": ES_URL, "es_index": LOC_TEST_INDEX},
    ems_config = {"es_host": ES_URL, "es_index": EMS_TEST_INDEX}
)


def load_jl(file_path: str):
    data = []
    with jsonlines.open(file_path, "r") as f:
        for doc in f:
            data.append(doc)
    return data

def jl_iterator(file_path: str):
    with jsonlines.open(file_path, "r") as f:
        for doc in f:
            yield doc

def upload_test_documents(
        elastic: KataElastic, 
        data_file: str, 
        test_index_name: str, 
        vector_field: str = ""
    ):
    
    data = load_jl(data_file)
    # Create test index
    created = elastic.create_index(test_index_name)
    
    # If vector field is specified, add vector mapping
    if vector_field:
        result = elastic.add_vector_mapping(
            index_name=test_index_name, 
            field=vector_field
        )
    sleep(1)
    for document in data:
        indexed = elastic.index_document(test_index_name, document)
    return indexed

@pytest.mark.order(1)
def test_index_upload():
    # Upload EMS test index   
    indexed = upload_test_documents(
        elastic=ELASTIC, 
        data_file=EMS_TEST_FILE, 
        test_index_name=EMS_TEST_INDEX
    )
    assert indexed["result"] == "created"

    # Upload LOC test index
    indexed = upload_test_documents(
        elastic=ELASTIC, 
        data_file=LOC_TEST_FILE, 
        test_index_name=LOC_TEST_INDEX
    )
    assert indexed["result"] == "created"

    # Upload PER test index
    indexed = upload_test_documents(
        elastic=ELASTIC, 
        data_file=PER_TEST_FILE, 
        test_index_name=PER_TEST_INDEX, 
        vector_field="vector"
    )
    assert indexed["result"] == "created"

    # Upload ORG test index
    indexed = upload_test_documents(
        elastic=ELASTIC, 
        data_file=ORG_TEST_FILE, 
        test_index_name=ORG_TEST_INDEX, 
        vector_field="vector"
    )
    assert indexed["result"] == "created"
    
@pytest.mark.order(2)
def test_per_linking_exact():
    linked = LINKER.link(entity="Paul Keres")
    assert linked.n_linked == 2
    assert linked.entity_type == EntityType.PER.value
    

@pytest.mark.order(3)
def test_per_linking_fuzzy():
    linked = LINKER.link(entity="Paul Keers")
    assert linked.n_linked == 3
    assert linked.entity_type == EntityType.PER.value
    
@pytest.mark.order(4)
def test_per_linking_fuzzy_with_vector_search():
    context = "Selgusid 53. maleturniiri võitjad"
    linked = LINKER.link(entity="Paul Keers", context=context)
    
    expected_description = "Eesti maletaja ja maleteoreetik"
    assert linked.n_linked == 1
    assert linked.linked_info[0].elastic["description"] == expected_description
    assert linked.entity_type == EntityType.PER.value
    
@pytest.mark.order(5)
def test_org_linking_fuzzy():
    linked = LINKER.link(entity="Gustav Adolfi Gümnasium")
    assert linked.n_linked == 1
    assert linked.entity_type == EntityType.ORG.value
    
@pytest.mark.order(6)
def test_org_acronym_linking():
    linked = LINKER.link(entity="EKI")
    assert linked.n_linked == 3
    assert linked.entity_type == EntityType.ORG.value
    

@pytest.mark.order(7)
def test_org_acronym_linking_with_vector_search():
    context = "Tavast: keelemudeli arendajad ei soovi eesti keele korpust isegi tasuta"
    linked = LINKER.link(entity="EKI", context=context)
    assert linked.n_linked == 1
    assert linked.entity_type == EntityType.ORG.value
    assert linked.linked_info[0].linked_entity == "Eesti Keele Instituut"
    
@pytest.mark.order(8)
def test_loc_linking():
    linked = LINKER.link(entity="Reval")
    assert linked.n_linked == 1
    assert linked.linked_info[0].linked_entity == "Tallinn"
    assert linked.entity_type == EntityType.LOC.value
    
@pytest.mark.order(9)
def test_ems_en_keyword_linking():
    linked = LINKER.link(entity="cinematography")
    assert linked.n_linked == 1
    assert linked.linked_info[0].linked_entity == "filmikunst"
    assert linked.entity_type == EntityType.KEYWORD.value
    
@pytest.mark.order(10)
def test_ems_et_keyword_linking():
    linked = LINKER.link(entity="harimatu")
    assert linked.n_linked == 1
    assert linked.linked_info[0].linked_entity == "harimatus"
    assert linked.entity_type == EntityType.KEYWORD.value
    
@pytest.mark.order(11)
def test_entity_type_param():
    linked = LINKER.link(entity="feline")
    assert linked.n_linked == 1
    assert linked.linked_info[0].linked_entity == "Viljandi"
    assert linked.entity_type == EntityType.LOC.value
    
    linked = LINKER.link(entity="feline", entity_type="EMS_KEYWORD")
    assert linked.n_linked == 1
    assert linked.linked_info[0].linked_entity == "kaslased"
    assert linked.entity_type == EntityType.KEYWORD.value
    
@pytest.mark.order(12)
def test_prefix_length_param():
    linked = LINKER.link(entity="Raul Keres")
    assert linked.n_linked == 0
    assert linked.entity_type == EntityType.UNK.value
    
    linked = LINKER.link(entity="Raul Keres", prefix_length=0)
    assert linked.n_linked == 2
    assert linked.entity_type == EntityType.PER.value
    
@pytest.mark.order(13)
def test_fuzziness_param():
    linked = LINKER.link(entity="Heino Barrik")
    assert linked.n_linked == 1
    assert linked.entity_type == EntityType.PER.value
    
    linked = LINKER.link(entity="Heino Barrik", fuzziness=0)
    assert linked.n_linked == 0
    assert linked.entity_type == EntityType.UNK.value


@pytest.mark.order(14)
def test_output_has_required_fields():
    linked = LINKER_WITH_VIAF.link(entity="Jarmo Kauge")
    assert linked.n_linked == 1
    assert linked.entity_type == EntityType.PER.value
    assert linked.original_entity == "Jarmo Kauge"
    assert linked.similarity_score == 1.0
    assert linked.linking_config
    
    linked_doc = linked.linked_info[0]
    assert linked_doc.to_dict()
    assert linked_doc.elastic
    assert linked_doc.json
    assert linked_doc.marc
    assert linked_doc.linked_entity
    assert linked_doc.viaf
    
@pytest.mark.order(15)
def test_per_stage_name_linking():
    # Should return matches
    linked = LINKER.link(entity="Shakira")
    assert linked.n_linked == 1
    assert linked.entity_type == EntityType.PER.value
    
    
@pytest.mark.order(16)
def test_per_single_surname_linking():
    # Should NOT return matches
    linked = LINKER.link(entity="Snicket")
    assert linked.n_linked == 0
    assert linked.entity_type == EntityType.UNK.value
    
    linked = LINKER.link(entity="Lemony Snicket")
    assert linked.n_linked == 1
    assert linked.entity_type == EntityType.PER.value
    
    
@pytest.mark.order(17)
def test_linking_empty_entity_raises_exception():
    # Should NOT return matches
    with pytest.raises(InvalidInputError) as e:
        linked = LINKER.link(entity="")


@pytest.mark.order(18)
def test_linking_nonstring_entity_raises_exception():
    # Should NOT return matches
    with pytest.raises(InvalidInputError) as e:
        linked = LINKER.link(entity=200)
    

@pytest.mark.order(19)
def test_index_deleting():
    """
    Tests deleting index. We delete the test index now.
    """
    indices = [PER_TEST_INDEX, ORG_TEST_INDEX, LOC_TEST_INDEX, EMS_TEST_INDEX]
    for index in indices:
        deleted = ELASTIC.delete_index(index)
        sleep(1)
        assert deleted["acknowledged"] is True

