from typing import TypedDict

from llm_guard.model import Model


class NERConfig(TypedDict):
    PRESIDIO_SUPPORTED_ENTITIES: list[str]
    DEFAULT_MODEL: Model
    LABELS_TO_IGNORE: list[str]
    DEFAULT_EXPLANATION: str
    MODEL_TO_PRESIDIO_MAPPING: dict[str, str]
    CHUNK_OVERLAP_SIZE: int
    CHUNK_SIZE: int
    ID_SCORE_MULTIPLIER: float
    ID_ENTITY_NAME: str


BERT_BASE_NER_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
    ],
    "DEFAULT_MODEL": Model(
        path="dslim/bert-base-NER",
        revision="5271995b4b596a059d6efbce30031042aed67cb6",
        onnx_path="dslim/bert-base-NER",
        onnx_revision="5271995b4b596a059d6efbce30031042aed67cb6",
        onnx_subfolder="onnx",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the dslim/bert-base-NER NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "LOC": "LOCATION",
        "ORG": "ORGANIZATION",
        "PER": "PERSON",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}

BERT_LARGE_NER_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
    ],
    "DEFAULT_MODEL": Model(
        path="dslim/bert-large-NER",
        revision="13e784dccceca07aee7a7aab4ad487c605975423",
        onnx_path="dslim/bert-large-NER",
        onnx_revision="13e784dccceca07aee7a7aab4ad487c605975423",
        onnx_subfolder="onnx",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the dslim/bert-large-NER NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "LOC": "LOCATION",
        "ORG": "ORGANIZATION",
        "PER": "PERSON",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}

BERT_ZH_NER_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
    ],
    "DEFAULT_MODEL": Model(
        path="gyr66/bert-base-chinese-finetuned-ner",
        revision="42abc0872240300638223d0e46b9aacdcbcd2906",
        onnx_path="TangoBeeAkto/bert-chinese-ner-onnx",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the gyr66/bert-base-chinese-finetuned-ner NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "address": "LOCATION",
        "company": "ORGANIZATION",
        "name": "PERSON",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}

DISTILBERT_AI4PRIVACY_v2_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
        "EMAIL_ADDRESS",
        "PHONE_NUMBER",
        "CREDIT_CARD",
        "CRYPTO",
        "DATE_TIME",
        "IBAN_CODE",
        "IP_ADDRESS",
        "URL",
    ],
    "DEFAULT_MODEL": Model(
        path="Isotonic/distilbert_finetuned_ai4privacy_v2",
        revision="51d7b98bad735ff1af5bda6a6a9a7fbebd102ac9",
        onnx_path="Isotonic/distilbert_finetuned_ai4privacy_v2",
        onnx_revision="51d7b98bad735ff1af5bda6a6a9a7fbebd102ac9",
        onnx_subfolder="onnx",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the Isotonic/distilbert_finetuned_ai4privacy_v2 NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "STREET": "LOCATION",
        "CITY": "LOCATION",
        "ZIPCODE": "LOCATION",
        "BUILDINGNUMBER": "LOCATION",
        "NEARBYGPSCOORDINATES": "LOCATION",
        "SECONDARYADDRESS": "LOCATION",
        "STATE": "LOCATION",
        "COUNTY": "LOCATION",
        "EMAIL": "EMAIL_ADDRESS",
        "COMPANYNAME": "ORGANIZATION",
        "PHONENUMBER": "PHONE_NUMBER",
        "FIRSTNAME": "PERSON",
        "LASTNAME": "PERSON",
        "MIDDLENAME": "PERSON",
        "CREDITCARDNUMBER": "CREDIT_CARD",
        "ETHEREUMADDRESS": "CRYPTO",
        "BITCOINADDRESS": "CRYPTO",
        "LITECOINADDRESS": "CRYPTO",
        "DATE": "DATE_TIME",
        "TIME": "DATE_TIME",
        "DOB": "DATE_OF_BIRTH",
        "IBAN": "IBAN_CODE",
        "IPV4": "IP_ADDRESS",
        "IPV6": "IP_ADDRESS",
        "IP": "IP_ADDRESS",
        "URL": "URL",
        "AGE": "AGE",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}

DEBERTA_AI4PRIVACY_v2_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
        "EMAIL_ADDRESS",
        "PHONE_NUMBER",
        "CREDIT_CARD",
        "CRYPTO",
        "DATE_TIME",
        "IBAN_CODE",
        "IP_ADDRESS",
        "URL",
    ],
    "DEFAULT_MODEL": Model(
        path="Isotonic/deberta-v3-base_finetuned_ai4privacy_v2",
        revision="9ea992753ab2686be4a8f64605ccc7be197ad794",
        onnx_path="Isotonic/deberta-v3-base_finetuned_ai4privacy_v2",
        onnx_revision="9ea992753ab2686be4a8f64605ccc7be197ad794",
        onnx_subfolder="onnx",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the Isotonic/deberta-v3-base_finetuned_ai4privacy_v2 NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "STREET": "LOCATION",
        "CITY": "LOCATION",
        "ZIPCODE": "LOCATION",
        "BUILDINGNUMBER": "LOCATION",
        "NEARBYGPSCOORDINATES": "LOCATION",
        "SECONDARYADDRESS": "LOCATION",
        "STATE": "LOCATION",
        "COUNTY": "LOCATION",
        "EMAIL": "EMAIL_ADDRESS",
        "COMPANYNAME": "ORGANIZATION",
        "PHONENUMBER": "PHONE_NUMBER",
        "FIRSTNAME": "PERSON",
        "LASTNAME": "PERSON",
        "MIDDLENAME": "PERSON",
        "CREDITCARDNUMBER": "CREDIT_CARD",
        "ETHEREUMADDRESS": "CRYPTO",
        "BITCOINADDRESS": "CRYPTO",
        "LITECOINADDRESS": "CRYPTO",
        "DATE": "DATE_TIME",
        "TIME": "DATE_TIME",
        "DOB": "DATE_OF_BIRTH",
        "IBAN": "IBAN_CODE",
        "IPV4": "IP_ADDRESS",
        "IPV6": "IP_ADDRESS",
        "IP": "IP_ADDRESS",
        "URL": "URL",
        "AGE": "AGE",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}

MDEBERTA_AI4PRIVACY_v2_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
        "EMAIL_ADDRESS",
        "PHONE_NUMBER",
        "CREDIT_CARD",
        "CRYPTO",
        "DATE_TIME",
        "IBAN_CODE",
        "IP_ADDRESS",
        "URL",
    ],
    "DEFAULT_MODEL": Model(
        path="Isotonic/mdeberta-v3-base_finetuned_ai4privacy_v2",
        revision="316240516ad48a82ae1b13567670093901e41b5e",
        onnx_path="Isotonic/mdeberta-v3-base_finetuned_ai4privacy_v2",
        onnx_revision="316240516ad48a82ae1b13567670093901e41b5e",
        onnx_subfolder="onnx",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the Isotonic/deberta-v3-base_finetuned_ai4privacy_v2 NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "STREET": "LOCATION",
        "CITY": "LOCATION",
        "ZIPCODE": "LOCATION",
        "BUILDINGNUMBER": "LOCATION",
        "NEARBYGPSCOORDINATES": "LOCATION",
        "SECONDARYADDRESS": "LOCATION",
        "STATE": "LOCATION",
        "COUNTY": "LOCATION",
        "EMAIL": "EMAIL_ADDRESS",
        "COMPANYNAME": "ORGANIZATION",
        "PHONENUMBER": "PHONE_NUMBER",
        "FIRSTNAME": "PERSON",
        "LASTNAME": "PERSON",
        "MIDDLENAME": "PERSON",
        "CREDITCARDNUMBER": "CREDIT_CARD",
        "ETHEREUMADDRESS": "CRYPTO",
        "BITCOINADDRESS": "CRYPTO",
        "LITECOINADDRESS": "CRYPTO",
        "DATE": "DATE_TIME",
        "TIME": "DATE_TIME",
        "DOB": "DATE_OF_BIRTH",
        "IBAN": "IBAN_CODE",
        "IPV4": "IP_ADDRESS",
        "IPV6": "IP_ADDRESS",
        "IP": "IP_ADDRESS",
        "URL": "URL",
        "AGE": "AGE",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}

DEBERTA_LAKSHYAKH93_CONF: NERConfig = {
    "PRESIDIO_SUPPORTED_ENTITIES": [
        "LOCATION",
        "PERSON",
        "ORGANIZATION",
        "EMAIL_ADDRESS",
        "PHONE_NUMBER",
        "CREDIT_CARD",
        "CRYPTO",
        "DATE_TIME",
        "IBAN_CODE",
        "IP_ADDRESS",
        "URL",
    ],
    "DEFAULT_MODEL": Model(
        path="lakshyakh93/deberta_finetuned_pii",
        revision="a038061af92047b0afbbd5ca07d7aa0521789379",
        onnx_path="TangoBeeAkto/deberta-pii-onnx",
        onnx_subfolder="",
        pipeline_kwargs={
            "aggregation_strategy": "simple",
        },
        tokenizer_kwargs={"model_input_names": ["input_ids", "attention_mask"]},
    ),
    "LABELS_TO_IGNORE": ["O", "CARDINAL"],
    "DEFAULT_EXPLANATION": "Identified as {} by the lakshyakh93/deberta_finetuned_pii NER model",
    "MODEL_TO_PRESIDIO_MAPPING": {
        "MISC": "O",
        "BUILDINGNUMBER": "LOCATION",
        "NEARBYGPSCOORDINATE": "LOCATION",
        "STREET": "LOCATION",
        "SECONDARYADDRESS": "LOCATION",
        "PHONE_NUMBER": "PHONE_NUMBER",
        "EMAIL": "EMAIL_ADDRESS",
        "COMPANY_NAME": "ORGANIZATION",
        "FIRSTNAME": "PERSON",
        "FULLNAME": "PERSON",
        "NAME": "PERSON",
        "LASTNAME": "PERSON",
        "MIDDLENAME": "PERSON",
        "DATE": "DATE_TIME",
        "TIME": "DATE_TIME",
        "BITCOINADDRESS": "CRYPTO",
        "URL": "URL",
        "ETHEREUMADDRESS": "CRYPTO",
        "IPV4": "IP_ADDRESS",
        "IPV6": "IP_ADDRESS",
        "CITY": "LOCATION",
        "ZIPCODE": "LOCATION",
        "STREETADDRESS": "LOCATION",
        "CREDITCARDNUMBER": "CREDIT_CARD",
        "STATE": "LOCATION",
        "COUNTY": "LOCATION",
        "SSN": "US_SSN",
        "LITECOINADDRESS": "CRYPTO",
        "IP": "IP_ADDRESS",
        "IBAN": "IBAN_CODE",
    },
    "CHUNK_OVERLAP_SIZE": 40,
    "CHUNK_SIZE": 600,
    "ID_SCORE_MULTIPLIER": 0.4,
    "ID_ENTITY_NAME": "ID",
}
