# Lists of valid tags and mappings for tags canonicalization.
#
# Copyright (c) 2020-2021 Tatu Ylonen.  See file LICENSE and https://ylonen.org

# Mappings for tags in template head line ends outside parentheses.  These are
# also used to parse final tags from translations.
xlat_head_map = {
    "m": "masculine",
    "f": "feminine",
    "m/f": "masculine feminine",
    "m./f.": "masculine feminine",
    "m or f": "masculine feminine",
    "m or n": "masculine neuter",
    "m or c": "masculine common",
    "f or m": "feminine masculine",
    "f or n": "feminine neuter",
    "f or n.": "feminine neuter",  # fimmtíu/Icelandig
    "f or c": "feminine common",  # sustainability/Tr/Norwegian
    "n or f": "neuter feminine",
    "n or m": "neuter masculine",
    "n or c": "neuter common",
    "c or m": "common masculine",
    "c or f": "common feminine",  # picture book/Tr/Norwegian
    "c or n": "common neuter",  # ethylene/Eng/Tr/Danish
    "m or f or n": "masculine feminine neuter",
    "f or m or n": "feminine masculine neuter",
    "m or f or c": "masculine feminine common",
    "f or m or c": "feminine masculine common",
    "m or c or n": "masculine common neuter",
    "f or c or n": "feminine common neuter",
    "m or n or c": "masculine neuter common",
    "f or n or c": "feminine neuter common",
    "c or f or n": "common feminine neuter",
    "c or m or n": "common masculine neuter",
    "n or f or c": "neuter feminine common",
    "n or m or c": "neuter masculine common",
    "n or m or f": "neuter masculine feminine",
    "n or f or m": "neuter feminine masculine",
    "c or m or f": "common masculine feminine",
    "c or f or m": "common masculine feminine",
    "m or c or n": "masculine common neuter",
    "f or n or m": "feminine neuter masculine",
    "m or n or f": "masculine neuter feminine",
    "f or c or m": "feminine common masculine",
    "m or c or f": "masculine common feminine",
    "m or f or m": "?masculine feminine",  # fantasma,soldado/Portuguese
    "f or pl": "feminine singular plural",  # XXX information/Eng/Tr/Latgalian
    "m or pl": "masculine singular plural",  # XXX information/Eng/Tr/Latgalian
    "n or pl": "neuter singular plural",  # XXX table scrap/Tr/Greek
    "c or pl": "common singular plural",
    "pl or f": "feminine singular plural",  # XXX grit/Eng/Tr(husked...)/German
    "pl or m": "masculine singular plural",
    "pl or n": "neuter singular plural",  # ordnance/Tr/German
    "pl or c": "common singular plural",  # "you don't say"/Tr/Romanian
    "sg or f": "singular feminine",
    "sg or m": "singular masculine",
    "sg or n": "singular neuter",
    "sg or c": "singular common",
    "m or sg": "masculine singular",
    "f or sg": "feminine singular",
    "m or sg": "neuter singular",
    "c or sg": "common singular",
    "m or pl": "masculine plural",
    "f or pl": "feminine plural",
    "n or pl": "neuter plural",
    "c or pl": "common plural",
    "m or f pl": "masculine feminine plural",
    "c or n or n pl": "common neuter singular plural",  # XXX augmentation/Tr
    "pl or m or f": "masculine feminine singular plural",  # XXX suc* my co*/Tr
    "m or f or sg or pl": "masculine feminine singular plural",  # Ainu/Russian
    "m or f or pl": "masculine feminine plural",  # that/Tr/Dutch
    "m or f sg": "masculine feminine singular",
    "pl or f or m or n": "",  # Sindhi/Tr(language)/Spanish
    "pl or f or n": "masculine feminine neuter plural singular singular",  # XXX
    # crush/Portuguese head
    "m or m f": "?masculine feminine",
    # beginner/Eng/Tr/Polish
    "m or m pl": "masculine singular plural",
    "f or f pl": "feminine singular plural",
    "n or n pl": "neuter singular plural",
    "c or c pl": "common singular plural",
    "f pl or n pl": "feminine neuter plural",  # diurnal/Eng/Tr/Polish
    "f pl or n or n pl": "feminine neuter singular plural",  # veneral/Tr/Polish
    "m or m pl or f or f pl or n or n pl": "",  # "a lot"/Tr/Latin
    "pl or n or n pl": "neuter singular plural",  # salt/Tr/Greek
    "f or f": "feminine",
    "topo.": "toponymic",  # E.g., p/Egyptian
    "n": "neuter",
    "m or n or f": "masculine neuter feminine",  # cataract/Tr/Dutch
    "c": "common",  # common gender in at least West Frisian
    "sg": "singular",
    "pl": "plural",
    "pl or sg": "plural singular",
    "sg or pl": "singular plural",
    "m sg or m pl": "masculine singular plural",  # valenki/Tr/German
    "f sg or f pl": "feminine singular plural",
    "n sg or n pl": "neuter singular plural",
    "c sg or c pl": "common singular plural",
    "m pl or f pl": "masculine feminine plural",  # comedian/English/Tr/Welsh
    "m pl or n pl": "masculine neuter plural",  # whose/Tr/Latin
    "m pl or n": "?masculine neuter plural singular",  # pimpernel/Tr/Bulgarian
    "m sg or f sg": "masculine singular feminine",  # your/Eng/Tr/Walloon
    "f sg or m sg": "masculine singular feminine",  # your/Eng/Tr/Walloon
    "n sg or n sg": "masculine singular feminine",  # your/Eng/Tr/Walloon
    # copacetic/English/Tr/Hebrew:
    "m or m pl or f or f pl": "masculine feminine singular plural",
    # your/Eng/Tr/Norwegian:
    "m pl or f pl or n pl": "masculine feminine neuter plural",
    "m sg or f sg or n sg": "masculine feminine neuter singular",
    "m pl or f or f pl": "masculine feminine singular plural",
    "c or c pl": "common singular plural",
    "c pl or n pl": "common neuter plural",  # which/Tr/Danish
    "inan": "inanimate",
    "Inanimate": "inanimate",  # e.g., "James Bay"/English/Tr/Northern East Cree
    "inan or anim": "inanimate animate",
    "anim or inan": "animate inanimate",
    "anim": "animate",
    "f anim": "feminine animate",
    "m anim": "masculine animate",
    "n anim": "neuter animate",
    "f inan": "feminine inanimate",
    "m inan": "masculine inanimate",
    "n inan": "neuter inanimate",
    "f anim sg": "feminine animate singular",
    "m anim sg": "masculine animate singular",
    "n anim sg": "neuter animate singular",
    "f inan sg": "feminine inanimate singular",
    "m inan sg": "masculine inanimate singular",
    "n inan sg": "neuter inanimate singular",
    "f anim pl": "feminine animate plural",
    "m anim pl": "masculine animate plural",
    "n anim pl": "neuter animate plural",
    "f inan pl": "feminine inanimate plural",
    "m inan pl": "masculine inanimate plural",
    "n inan pl": "neuter inanimate plural",
    "f anim or f inan": "feminine animate inanimate",
    "f inan or f anim": "feminine inanimate animate",
    "m anim or m inan": "masculine animate inanimate",
    "m inan or m anim": "masculine inanimate animate",
    "m anim or f anim": "masculine animate feminine",
    "f anim or m anum": "feminine animate masculine",
    "f inan or f inan pl": "feminine inanimate singular plural",
    "m inan or m inan pl": "masculine inanimate singular plural",
    "n inan or n inan pl": "neuter inanimate singular plural",
    "f anim or f anim pl": "feminine animate singular plural",
    "m anim or m anim pl": "masculine animate singular plural",
    "n anim or n anim pl": "neuter animate singular plural",
    "f anim or m anim": "feminine animate masculine",
    "f inan or n inan": "feminine inanimate neuter",
    "m inan pl or m anim pl": "masculine inanimate animate plural",
    "f inan or m inan": "feminine masculine inanimate",
    "f inan or m inan or f inan pl":
    "feminine masculine inanimate singular plural",
    "f inan or m inan or f inan pl or m inan pl":
    "feminine masculine inanimate singular plural",
    "m inan pl or m anim pl or f anim pl":
    "masculine feminine inanimate animate plural",
    "f anim or f inan or f anim pl":
    "feminine animate inanimate singular plural",
    "f anim or f inan or f anim pl or f inan pl":
    "feminine animate inanimate singular plural",
    "f anim pl or f inan or f inan pl":
    "feminine animate inanimate singular plural",  # XXX
    "f inan pl or f anim or f anim pl":
    "feminine inanimate animate singular plural",  # XXX
    "m anim pl or f anim pl": "masculine feminine animate plural",
    "m anim pl or f anim pl or f inan or f inan pl":
    "masculine animate plural feminine inanimate",
    "m anim pl or f anim pl or f inan":
    "masculine animate feminine plural inanimate singular",  # XXX
    "f anim pl or f inan pl": "feminine animate inanimate plural",
    "f anim pl or f inan pl or m anim pl":
    "feminine masculine animate inanimate plural",
    "m anim pl or f anim pl or f inan pl":
    "masculine animate feminine plural inanimate",  # XXX
    "f inan pl or m anim pl": "feminine masculine animate inanimate plural",
    "f inan pl or m anim pl or f anim pl":
    "masculine animate feminine plural inanimate",  # XXX
    "m anim or f anim or m anim pl":
    "masculine animate feminine singular plural",
    "m anim or f anim or m anim pl or f anim pl":
    "masculine animate feminine singular plural",
    "n inan or n anim or m inan or m anim":
    "neuter inanimate animate masculine",
    "m anim pl or f anim pl or m anim or f anim":
    "masculine animate plural feminine singular",
    "m anim pl or f inan or f inan pl":
    "masculine animate plural feminine inanimate singular",  # XXX
    "m anim or n inan": "masculine animate neuter inanimate",  # XXX
    "n inan pl or m inan or m inan pl":
    "neuter inanimate plural masculine singular plural",  # XXX
    "n inan pl or f inan or f inan pl":
    "neuter inanimate plural feminine singular",  # XXX
    "f inan pl or m anim or m anim pl":
    "feminine inanimate plural masculine animate singular",  # XXX
    "f inan pl or m inan or m inan pl":
    "feminine inanimate plural masculine singular",  # XXX
    "n anim": "neuter animate",
    "n anim pl or n inan or n inan pl":
    "neuter animate plural inanimate singular",  # XXX
    "n inan or n inan pl or f inan or f inan pl":
    "neuter inanimate singular plural feminine",
    "n inan pl or n anim or n anim pl":
    "neuter inanimate plural animate singular",  # XXX
    "n anim or n inan": "neuter animate inanimate",
    "pers": "person",  # XXX check what this really is used for? personal?
    "npers": "impersonal",
    "f pers": "feminine person",
    "m pers": "masculine person",
    "f pers or f pers pl": "feminine person singular plural",
    "m pers or m pers pl": "masculine person singular plural",
    "m pers or f pers": "masculine person feminine",
    "f pers or m pers": "feminine person masculine",
    "m pers or n pers": "masculine person neuter",
    "f pers or n pers": "feminine person neuter",
    "m pers or m anim": "masculine person animate",
    "m pers or m inan": "masculine person inanimate",
    "f pers or f anim": "feminine person animate",
    "f pers or f inan": "feminine person inanimate",
    "m pers or f": "masculine person feminine",
    "m inan or m pers": "masculine inanimate person",
    "m or m pers or f": "masculine inanimate animate person feminine",  # XXX
    "m anim or m pers": "masculine animate person",
    "f anim or f pers": "feminine animate person",
    "n anim or n pers": "neuter animate person",
    "m pers or n": "masculine person neuter animate inanimate",  # XXX
    "m pers or f": "masculine person feminine animate inanimate",  # XXX
    "vir": "virile",
    "nvir": "nonvirile",
    "anml": "animal-not-person",
    "f anml": "feminine animal-not-person",
    "m anml": "masculine animal-not-person",
    "f animal": "feminine animal-not-person",
    "m animal": "masculine animal-not-person",
    "m animal or f animal": "masculine animal-not-person feminine",
    "f animal or m animal": "feminine animal-not-person masculine",
    "m anim or f": "masculine animate feminine inanimate",
    "impf": "imperfective",
    "impf.": "imperfective",
    "pf": "perfective",
    "pf.": "perfective",
    "impf or pf": "imperfective perfective",  # ought/Eng/Tr/Serbo-Croatian
    "pf or impf": "perfective imperfective",  # start/Tr(of an activity)/Russian
    "invariable": "invariable",
    "n.": "noun",
    "v.": "verb",
    "adj.": "adjective",
    "adv.": "adverb",
    "?": "",
    "1.": "first-person",
    "2.": "second-person",
    "3.": "third-person",
    "1": "class-1",
    "1a": "class-1a",
    "2": "class-2",
    "2a": "class-2a",
    "3": "class-3",
    "4": "class-4",
    "5": "class-5",
    "6": "class-6",
    "7": "class-7",
    "8": "class-8",
    "9": "class-9",
    "9a": "class-9a",
    "10": "class-10",
    "10a": "class-10a",
    "11": "class-11",
    "12": "class-12",
    "13": "class-13",
    "14": "class-14",
    "15": "class-15",
    "16": "class-16",
    "17": "class-17",
    "18": "class-18",
    "1/2": "class-1 class-2",
    "3/4": "class-3 class-4",
    "5/6": "class-5 class-6",
    "7/8": "class-7 class-8",
    "9/10": "class-9 class-10",
    "15/17": "class-15 class-17",
    "1 or 2": "class-1 class-2",
    "1a or 2a": "class-1a class-2a",
    "1a or 2": "class-1a class-2",
    "3 or 4": "class-3 class-4",
    "5 or 6": "class-5 class-6",
    "7 or 8": "class-7 class-8",
    "9 or 10": "class-9 class-10",
    "9a or 10a": "class-9a class-10a",
    "15 or 17": "class-15 class-17",
    "9/10 or 1/2": "class-9 class-10 class-1 class-2",
    # two/Tr/Kikuyu
    "2 or 4 or 6 or 13": "class-2 class-4 class-6 class-13",
    "8 or 10": "class-8 class-10",  # two/Tr/Kikuyu
    "11 or 10": "class-11 class-10",  # sea/Eng/Tr/Zulu
    "11 or 10a": "class-11 class-10a",  # half/Ngazidja Comorian
    "10 or 11": "class-10 class-11",  # mushroom/Tr/Swahili
    "11 or 14": "class-11 class-14",  # country/Tr/Swahili
    "11 or 12": "class-11 class-12",  # theater/Tr/Swahili
    "11 or 6": "class-11 class-6",   # leaf/'Maore Comorian
    "9 or 6": "class-9 class-6",  # birthday,carrot/Tr/Rwanda-Rundi
    "1 or 6": "class-2 class-6",  # Zulu/Tr/Zulu
    "6 or 7": "class-6 class-7",  # spider/Eng/Tr/Lingala ???
    "15 or 6": "class-15 class-6",  # leg/Tr/Rwanda-Rundi
    "14 or 6": "class-14 class-6",  # rainbow/Tr/Chichewa
    "9 or 9": "?class-9",  # XXX bedsheet/Tr/Sotho
    "m1": "masculine first-declension",
    "f2": "feminine second-declension",
    "m2": "masculine second-declension",
    "f3": "feminine third-declension",
    "m3": "masculine third-declension",
    "f4": "feminine fourth-declension",
    "m4": "masculine fourth-declension",
    "f5": "feminine fifth-declension",
    "m5": "masculine fifth-declension",
    "[uncountable]": "uncountable",
    "is more colloquial": "colloquial",
    "(plural f)": "singular plural feminine",  # XXX chromicas/Galician
    "(plural m)": "singular plural masculine",  # XXX Genseric/Galician
    "2 or 3": "?class-2 class-3",  # XXX branch/Tr/Swahili
    "m or impf": "masculine imperfective",  # pour/Tr/Ukrainian
    "f or impf": "feminine imperfective",  # fuc* around/Tr/(s with many)/Czech
    "n or impf": "neuter imperfective",  # glom/Russian
    "f or pf": "feminine perfective",
    "m or pf": "masculine perfective",
    "n or pf": "neuter perfective",
    "m or m": "?masculine",  # Paul/Tr(male given name)/Urdu
    "f or c pl": "?feminine common singular plural", # mulberry/Tr/Zazaki
    "c pl or n": "?common neuter singular plural",  # mouthpiece/Tr/Swedish
    "impf or impf": "?imperfective",
    "pf or pf": "?perfective",
    "sg or sg": "?singular",
    "pl or pl": "?plural",
    "c or impf": "?common imperfective",
    "m inan or n": "masculine inanimate neuter",
    "m inan or f": "masculine inanimate feminine",
    "pl or pf": "?plural perfective",
    "m pl or pf": "masculine plural perfective",
    "f pl or pf": "feminine plural perfective",
    "n pl or pf": "neuter plural perfective",
    "f pl or impf": "feminine plural imperfective",
    "m pl or impf": "masculine plural imperfective",
    "n pl or impf": "neuter plural imperfective",
    "m or f or impf": "?masculine feminine imperfective",
    "pl or m or f or n": "?plural masculine feminine neuter",
}

# Languages that can have head-final numeric class indicators.  They are mostly
# used in Bantu languages.  We do not want to interpret them at the ends of
# words like "Number 11"/English.  Also, some languages have something like
# "stress pattern 1" at the end of word head, which we also do not want to
# interpret as class-1.
head_final_numeric_langs = set([
    "Bende",
    "Chichewa",
    "Chimwiini",
    "Dyirbal",  # Australian aboriginal, uses class-4 etc
    "Kamba",
    "Kikuyu",
    "Lingala",
    "Luganda",
    "Maore Comorian",
    "Masaba",
    "Mwali Comorian",
    "Mwani",
    "Ngazidja Comorian",
    "Northern Ndebele",
    "Nyankole",
    "Phuthi",
    "Rwanda-Rundi",
    "Sotho",
    "Shona",
    "Southern Ndebele",
    "Swahili",
    "Swazi",
    "Tsonga",
    "Tswana",
    "Tumbuka",
    "Xhosa",
    "Zulu",
    "ǃXóõ",
])

# Languages for which to consider head_final_extra_map
head_final_bantu_langs = set([
    # XXX should other Bantu languages be included here?  Which ones use
    # suffixes such as "m or wa"?
    "Swahili",
])

head_final_bantu_map = {
    # These are only handled in parse_head_final_tags
    # and will generally be ignored elsewhere.  These may contain spaces.

    # Swahili class indications.
    "m or wa": "class-1 class-2",
    "m or mi": "class-3 class-4",
    "ma": "class-5 class-6",
    "ki or vi": "class-7 class-8",
    "n": "class-9 class-10",
    "u": "class-11 class-12 class-14",
    "ku": "class-15",
    "pa": "class-16",
    "mu": "class-18",

    # XXX these are probably errors in Wiktionary, currently ignored
    "n or n": "?",  # Andromeda/Eng/Tr/Swahili etc
    "m or ma": "?",  # environment/Eng/Tr/Swahili etc
    "u or u": "?",  # wife/Eng/Tr/Swahili
}

head_final_semitic_langs = set([
    "Akkadian",
    "Amharic",
    "Arabic",
    "Aramaic",
    "Eblaite",
    "Hebrew",
    "Hijazi Arabic",
    "Maltese",
    "Moroccan Arabic",
    "Phoenician",
    "South Levantine Arabic",
    "Tigre",
    "Tigrinya",
    "Ugaritic",
])

head_final_semitic_map = {
    "I": "form-i",
    "II": "form-ii",
    "III": "form-iii",
    "IV": "form-iv",
    "V": "form-v",
    "VI": "form-vi",
    "VII": "form-vii",
    "VIII": "form-viii",
    "IX": "form-ix",
    "X": "form-x",
    "XI": "form-xi",
    "XII": "form-xii",
    "XIII": "form-xiii",
    "Iq": "form-iq",
    "IIq": "form-iiq",
    "IIIq": "form-iiiq",
    "IVq": "form-ivq",
}

head_final_other_langs = set([
    "Finnish",
    "French",
    "Lithuanian",
    "Arabic",
    "Armenian",
    "Zazaki",
    "Hebrew",
    "Hijazi Arabic",
    "Moroccan Arabic",
    "Nama",
    "Old Church Slavonic",
    "Gothic",
    "Old Irish",
    "Latin",
    "Scottish Gaelic",
    "Slovene",
    "Sorbian",
    "South Levantine Arabic",
    "Kajkavian",
    "Chakavian",
    "Croatian",  # Kajkavian and Chakavian are forms of Croatian
    "Sanskrit",
    "Ancient Greek",
    # XXX For dual??? see e.g. route/Tr(course or way)/Polish
    "Dyirbal",
    "Egyptian",
    "Maltese",
    "Maori",
    "Polish",
    "Portuguese",
    "Romanian",  # cache,acquaintance/Tr/Romanian
    "Ukrainian",
    "Ugaritic",
])

head_final_other_map = {
    # This is used in Finnish at the end of some word forms (in translations?)
    "in indicative or conditional mood": "in-indicative in-conditional",

    # marine/French Derived terms
    "f colloquial form of a feminine marin": "feminine colloquial",

    # These stress pattern indicators occur in Lithuanian
    "stress pattern 1": "stress-pattern-1",
    "stress pattern 2": "stress-pattern-2",
    "stress pattern 3": "stress-pattern-3",
    "stress pattern 3a": "stress-pattern-3a",
    "stress pattern 3b": "stress-pattern-3b",
    "stress pattern 4": "stress-pattern-4",
    "stress pattern: 1": "stress-pattern-1",
    "stress pattern: 2": "stress-pattern-2",
    "stress pattern: 3": "stress-pattern-3",
    "stress pattern: 3a": "stress-pattern-3a",
    "stress pattern: 3b": "stress-pattern-3b",
    "stress pattern: 4": "stress-pattern-4",

    # These are specific to Arabic, Armenian, Zazaki, Hebrew, Nama,
    # Old Church Slavonic, Gothic, Old Irish, Latin, Scotish Gaelic,
    # Slovene, Sorbian, Kajkavian, Chakavian, (Croatian), Sanskrit,
    # Ancient Greek
    # (generally languages with a dual number)
    "du": "dual",
    "du or pl": "dual plural",  # aka duoplural
    "m du": "masculine dual",
    "f du": "feminine dual",
    "n du": "neuter dual",
    "m du or f du": "masculine feminine dual",  # yellow/Tr/Zazaki
    "f du or m du": "feminine masculine dual",
    "n du or n pl": "neuter dual plural",
    "f du or f pl": "feminine dual plural",
    "m du or m pl": "masculine dual plural",
    "du or f du or n": "?",  # XXX guest/Tr/Zazaki
    "du or n or pf": "?",  # XXX how would this be expressed
    "du or n du": "neuter dual",  # bilberry/Tr/Zazaki
    "du or f du or n": "",  # XXX guest/Tr(patron)/Zazaki
    "pl or pf": "?plural perfective",  # walk/Tr(to steal)/Russian
    "m or pf": "?masculine perfective",  # boom/Tr(make book)/Russian
    "n or n du": "neuter singular dual",
    # XXX clump/Tr/Portuguese
    "sg or m du": "singular feminine neuter masculine dual",
    "m du or f du or n du": "masculine dual feminine neuter",
    "du or m": "?dual masculine",
}

# Accepted uppercase tag values.  As tags these are represented with words
# connected by hyphens.
uppercase_tags = set([
    "A Estierna",
    "AF",  # ??? what does this mean
    "ALUPEC",
    "ASL gloss",  # Used with sign language heads
    "Aargau",
    "Abagatan",
    "Absheron",
    "Abung/Kotabumi",
    "Abung/Sukadana",
    "Abzakh",
    "Acadian",
    "Achaemenid",
    "Achterhooks",
    "Adana",
    "Adlam",  # Script
    "Adyghe",
    "Aeolic",
    "Affectation",
    "Afi-Amanda",
    "Africa",
    "African-American Vernacular English",
    "Afrikaans",
    "Afyonkarahisar",
    "Agdam",
    "Ağrı",
    "Akhmimic",
    "Aknada",
    "Al-Andalus",
    "Ala-Laukaa",
    "Alak",
    "Alemannic",  # Variant of German
    "Alemannic German",  # Variant of German
    "Algherese",
    "Alles",
    "Alliancelles",
    "Alsace",
    "Alsatian",
    "Alviri",  # Variant of Alviri-Vidari
    "Amecameca",
    "American continent",
    "Americanization",
    "Amerindish",
    "Amharic",  # Script (at least for numberals)
    "Amianan",
    "Amira",
    "Amrum",
    "Amur",
    "Anbarani",  # Variant of Talysh
    "Ancient",
    "Ancient China",
    "Ancient Egyptian",
    "Ancient Greek",
    "Ancient Rome",
    "Ancient Roman",
    "Andalucia",
    "Andalusia",
    "Andalusian",
    "Anglian",
    "Anglicised",
    "Anglicism",
    "Anglism",
    "Anglo-Latin",
    "Anglo-Norman",
    "Angola",
    "Aniwa",
    "Anpezan",  # Variant of Ladin
    "Antalya",
    "Antanosy",
    "Antilles",
    "Appalachia",
    "Appalachian",
    "Arabic",  # Also script
    "Arabic-Indic",  # Also script
    "Aragon",
    "Aragón",
    "Aramaic",
    "Aran",
    "Aranese",
    "Arango",
    "Arawak",
    "Arbëresh",
    "Ardennes",
    "Argentina",
    "Arkhangelsk",
    "Armenia",
    "Armenian",  # Also script
    "Aromanian",
    "Aruba",
    "Asalem",
    "Asalemi",  # Variant of Talysh
    "Asante",
    "Ashkenazi Hebrew",
    "Assamese",  # Also script (India)
    "Asturias",
    "Atlantic Canada",
    "Atlapexco",
    "Attic",  # Ancient greek
    "Aukštaitian",
    "Australia",
    "Australian",
    "Austria",
    "Austrian",
    "Auve",
    "Auvernhàs",  # Dialect of Occitan
    "Avignon",
    "Ayer",
    "Ayt Ndhir",
    "Azerbaijani",
    "Azores",
    "Baan Nong Duu",
    "Babia",
    "Bacheve",
    "Badiot",  # Variant of Ladin
    "Badiu",
    "Baghdad",
    "Bahamas",
    "Bahasa Baku",
    "Baku",
    "Balearic",
    "Balkar",
    "Balinese",  # Also script
    "Baltic-Finnic",
    "Bamu",
    "Banatiski Gurbet",
    "Banawá",
    "Bangkok",
    "Barbados",
    "Barda",
    "Bardez Catholic",
    "Barlavento",
    "Basel",
    "Bashkir",
    "Basque",
    "Batang",
    "Batangas",
    "Bavaria",
    "Bavarian",
    "Baybayin",
    "Beijing",
    "Belalau",
    "Belarusian",
    "Belgium",
    "Belize",
    "Bengali",  # Also script (India)
    "Bentheim",
    "Bering Straits",  # Inupiaq language
    "Berlin",
    "Berlin-Brandenburg",
    "Bern",
    "Beru",
    "Bezhta",
    "Bharati braille",
    "Biblical Hebrew",
    "Biblical",
    "Bikol Legazpi",
    "Bikol Naga",
    "Bikol Tabaco",
    "Bilasuvar",
    "Bimenes",
    "Biscayan",
    "Bla-Brang",
    "Bo Sa-ngae",
    "Bodega",
    "Bogota",
    "Bohairic",
    "Bohemia",
    "Boholano",
    "Bokmål",  # Variant of Norwegian
    "Bolivia",
    "Bologna",
    "Bolognese",
    "Bombay",
    "Borneo",
    "Boro",
    "Bosnia Croatia",
    "Bosnia",
    "Bosnian",
    "Bosnian Croatian",
    "Bosnian Serbian",
    "Boston",
    "Botswana",
    "Brabant",
    "Brabantian",
    "Brahmi",  # Script (India, historic)
    "Brazil",
    "Brazilian",
    "Bressan",
    "Brest",
    "Britain",
    "British",
    "British airforce",
    "British Army",
    "British Columbia",
    "British Isles",
    "British Royal Navy",
    "Brunei",
    "Bugey",
    "Bugurdži",
    "Bukovina",
    "Bulgaria",
    "Bulgarian",
    "Busan",
    "Bushehr",
    "Burdur",
    "Burgenland",
    "Burmese",  # Script
    "Bygdeå",
    "Byzantine",
    "Bzyb",
    "Béarn",
    "cabo Verde",
    "CJK tally marks",
    "Cabrales",
    "Caipira",
    "Caithness",
    "California",
    "Campello Monti",
    "Campidanese",  # Variant of Sardinian
    "Canada",
    "Canadian",
    "Canadian English",
    "Canadian French",
    "Canadian Prairies",
    "Canado-American",
    "Canary Islands",
    "Cangas del Narcea",
    "Cantonese",  # Chinese dialect/language
    "Cape Afrikaans",
    "Carakan",
    "Carcoforo",
    "Caribbean",
    "Carioca",
    "Carpi",
    "Castilian Spanish",
    "Castilian",
    "Catalan",
    "Catalan-speaking bilingual areas mostly",
    "Catalonia",
    "Catholic",
    "Cebu",
    "Cebuano",
    "Central America",
    "Central Apulia",
    "Central Asia",
    "Central Scots",
    "Central Sweden",
    "Central and Southern Italy",
    "Central",
    "Chakavian",
    "Chakma",  # Script (India/Burma?)
    "Cham",  # Script (Austronesian - Vietnam/Cambodia)
    "Changuena",
    "Chanthaburi",
    "Chazal",  # Jewish historical sages
    "Chengdu",
    "Chiconamel",
    "Chicontepec",
    "Child US",
    "Chile",
    "China",
    "Chinese",  # Also script
    "Chinese Character classification",
    "Cholula",
    "Chongqing",
    "Christian",
    "Chugoku",
    "Chūgoku",
    "Chumulu",
    "Church of England",
    "Cieszyn Silesia",
    "Cincinnati",
    "Classical",  # Variant of several languages, e.g., Greek, Nahuatl
    "Classical Attic",
    "Classical Chinese",
    "Classical Edessan",
    "Classical Indonesian",
    "Classical K'Iche",
    "Classical Latin",
    "Classical Persian",
    "Classical Sanskrit",
    "Classical Syriac",
    "Classical studies",
    "Clay",
    "Closed ultima",
    "Coastal Min",
    "Cockney",
    "Cois Fharraige",
    "Cois Fharraige",
    "Colombia",
    "Colunga",
    "Common accent",
    "Commonwealth",
    "Congo",
    "Congo-Kinshasa",
    "Connacht",
    "Connemara",
    "Contentin",
    "Continent",
    "Copenhagen",
    "Cork",
    "Cornish",
    "Cornwall",
    "Counting rod",
    "Costa Rica",
    "Cotentin",
    "Crimea",
    "Croatia",
    "Croatian",
    "Cu'up",  # Region in Indonesia (Rejang language)
    "Cuarto de los Valles",
    "Cuba",
    "Cuisnahuat",
    "Cumbria",
    "Cuoq",
    "Cusco",
    "Cypriot",
    "Cyprus",
    "Cyrillic",  # Script
    "Czech",
    "Czech Republic",
    "Čakavian",
    "DR Congo",
    "Dalmatia",
    "Dananshan Miao",
    "Dankyira",
    "Dari",
    "Dashtestan",
    "Dauphinois",
    "Daya",
    "Daytshmerish",
    "De'kwana",
    "Debri",
    "Deh Sarv",
    "Deirate",
    "Delhi",
    "Delhi Hindi",
    "Demotic",  # Greek/Ancient Greek
    "Denizli",
    "Derbyshire",
    "Devanagari",  # Script
    "Devon",
    "Déné syllabary",  # Script for Canadian Indian languages?
    "Digor",  # Variant of Ossetian
    "Dingzhou",
    "Dissenter",
    "Dithmarsisch",
    "Diyarbakır",
    "Dominican Republic",
    "Dominican Republic",
    "Dongmen",
    "Doric",  # Ancient Greek
    "Drasi",  # Region in India
    "Draweno-Polabian",
    "Drents",
    "Dundee",
    "Dungan",
    "Durham",
    "Dutch",
    "Dêgê",
    "Džáva",
    "EU",
    "Early Middle English",
    "Early Modern Greek",
    "Early",
    "East Anglia",
    "East Armenian",
    "East Bengal",
    "East Coast",
    "East Frisian",
    "East Midland",
    "East Slovakia",
    "East",
    "Eastern Armenian",
    "Eastern New England",
    "Eastern Syriac",
    "Eastern",
    "Ecclesiastical",
    "Ectasian",
    "Ecuador",
    "Ecuadorian Kichwa",
    "Edirne",
    "Egypt",
    "Egyptian",  # Also script (hieroglyph)
    "Egyptian Arabic",  # Variant of Arabic
    "Egyptiot",
    "Ekagongo",
    "Ekavian",
    "El Salvador",
    "Elazığ",
    "Elberfelder Bibel",
    "England",
    "English Midlands",
    "English",
    "Eonavian",
    "Epic",
    "Epigraphic Gandhari",
    "Erzgebirgisch",
    "Esham",
    "Esperantized",
    "Esperanto",
    "Estonian",
    "Estuary English",
    "Ethiopic",  # Script
    "Europe",
    "European",
    "European Union",
    "European ortography",
    "Eurozone",
    "Fante",
    "Faroese",
    "Fars",
    "Fascian",  # Variant of Ladin
    "Fayyumic",
    "Fengkai",
    "Finglish",  # Finnish word taken from English
    "Finland",
    "Fjolde",
    "Flanders",
    "Flemish",
    "Florida",
    "Fluminense",
    "Fodom",  # Variant of Ladin
    "For transcription only",
    "Formazza",
    "Fountain",
    "Fragoria vesca",
    "France Quebec",
    "France",
    "Fredrikstad",
    "French",
    "Frenchified",
    "Fribourg",
    "Friulian",
    "From Old Northern French",
    "Föhr",
    "Föhr-Amrum",
    "Gadabay",
    "Gaellic",
    "Galgolitic",
    "Galicia",
    "Galician",
    "Galitzish",
    "Galway",
    "Gan",  # Variant of Chinese
    "Gangwon",  # Dialect/region for Korean
    "Gascon",  # DIalect of Occitan
    "Gascony",
    "Gaspésie",
    "Gaúcho",
    "Gelders",
    "General American",
    "General Australian",
    "General Brazilian",
    "General Cebuano",
    "General New Zealand",
    "General South African",
    "Genoese",
    "Genovese",
    "Geordie",
    "Georgia",
    "German",
    "German Low German",
    "Germanic",
    "Germany",
    "Gheg",
    "Gherdëina",  # Variant of Ladin
    "Gipuzkoan",
    "Glagolitic",  # Script
    "Glarus",
    "Goan Konkani",
    "Goerdie",
    "Goeree-Overflakkee",
    "Gope",
    "Gorj",
    "Goth",
    "Gothenburg",
    "Gothic",  # Script
    "Gotland",
    "Goud Saraswat",
    "Grecian",
    "Greco-Bohairic",
    "Greek",  # Also script
    "Greek Catholic",
    "Greek-type",  # Used to characterize some Latin words e.g. nematodes/Latin)
    "Gressoney",
    "Grischun",
    "Grisons",
    "Groningen",
    "Gronings",
    "Guadeloupean",
    "Gualaca",
    "Guatemala",
    "Guernsey",
    "Gufin",
    "Guichicovi",
    "Guinea-Bissau",
    "Guinée Conakry",
    "Gujarati",  # Script (Indo-Arabic)
    "Gulf Arabic",  # Variant of Arabic Language
    "Gurbet",
    "Gurmukhi",  # Script (Indo-Arabic)
    "Gurvari",
    "Guyana",
    "Gwichya",
    "Gyeongsang",
    "H-system",
    "Ha",
    "Hachijō",
    "Hainanese",
    "Haketia",
    "Hakka",  # Chinese dialect/language
    "Halchighol",
    "Hallig",
    "Halligen",
    "Hamburg",
    "Hangaza",
    "Hanifi Rohingya",  # Script (Perso-Arabic)
    "Hanoi",
    "Hanyuan",
    "Harak",
    "Harat",
    "Harry Potter",
    "Hawaii",
    "Hawick",
    "Hán tự",
    "Hebei",  # China
    "Hebrew",  # also Script (for Aramaic)
    "Hejazi Arabic",  # Variant of Arabic Language
    "Hejazi",
    "Helgoland",  # Variant of North Frisian
    "Heligoland",
    "Heligolandic",
    "Hellenizing School",
    "Hevaha",
    "Hianacoto",
    "Hiberno-English",
    "Hijazi",  # Variant of Arabic
    "Hijazi Arabic",  # Variant of Arabic
    "Hindi",  # Script (at least for numberals, e.g. 80
    "Hinduism",
    "Hokkien",  # Chinese dialect/language
    "Honduras",
    "Hong Kong",
    "Hong'an",
    "Hoanya",
    "Hometwoli",
    "Hongfeng",
    "Hosso",
    "Hsinchu Hokkien",  # Chinese dialect/language
    "Hua",
    "Hungarian Vend",
    "Huế",
    "Hyōgai",  # Uncommon type of Kanji character
    "Hà Nội",  # Vietnamese dialect
    "Hà Tĩnh",  # Vietnamese dialect
    "Hán Nôm",  # Vietnamese latin spelling with diacritics?
    "Hössjö",
    "Hồ Chí Minh City",
    "I Ching hexagram",
    "I-I",  # Used in some Dungan nouns; I have no idea what this means
    "Ionic",  # Ancient Greek
    "IPA",
    "IUPAC name",
    "Iberian",
    "Ibero-Romance",
    "Iceland",
    "İçel",
    "Ikavian",
    "Ijekavian",
    "Ijekavian/Ekavian",
    "Ilir",
    "In conjunct consonants",
    "Inari",  # Variant of Sami
    "India",
    "Indian English",
    "Indo-Aryan linguistics",
    "Indo-European studies",
    "Indonesia",
    "Inkhokwari",
    "Inland Min",
    "Inland Northern American",
    "Inner Mongolia",
    "Insular Scots",
    "Insular",
    "Interlingua",
    "Internet",
    "Inuvialuktun",
    "Iran",
    "Iranian Persian",
    "Iraq",
    "Iraqi Hebrew",
    "Ireland",
    "Irish",
    "Iron",  # Variant of Ossetian
    "Isfahan",
    "Isparta",
    "Israel",
    "Issime",
    "Istanbul",
    "Italian Hebrew",
    "Italy",
    "Iyaric",
    "Izalco",
    "İzmit",
    "Jabung",
    "Jainism",
    "Jakarta",
    "Jalalabad",
    "Jalilabad",
    "Jalnguy",
    "Jamaica",
    "Jamaican",
    "Jamaican creole",
    "Japan",
    "Japurá",
    "Jarawara",
    "Javanese",  # Also script (Indonesia)
    "Jazan",
    "Jáva",
    "Jawi",
    "Jehovah's Witnesses",
    "Jèrriais",
    "Jersey",
    "Jewish Aramaic",
    "Jewish Babylonian Aramaic",
    "Jewish Palestinian Aramaic",
    "Jewish",
    "Jianghuai Mandarin",  # Chinese dialect/language
    "Jicalapa",
    "Jicarilla",  # Variant of the Apache Language?
    "Jilu Mandarin",  # Dialect/Language in Chinese
    "Jin",
    "Jin Mandarin",  # Chinese dialect/language
    "Jinjiang Hokkien",  # Chinese dialect/language
    "Johannesburg",
    "Johor-Selangor",
    "Johore",
    "Judaism",
    "Judeo-French",
    "Jurchen",  # Script?
    "Jyutping",
    "Kabul",
    "Kabuli",
    "Kadaru",
    "Kagoshima",
    "Kaipi",
    "Kaiwaligau Ya",
    "Kajkavian",
    "Kalaw Kawaw Ya",
    "Kalaw Lagaw Ya",
    "Kalbajar",
    "Kalderaš",
    "Kalianda",
    "Kaliarda",
    "Kalix",
    "Kaluga",
    "Kamino",
    "Kampong Ayer",
    "Kamrupi",
    "Kamviri",
    "Kanchanaburi",
    "Kandahar",
    "Kannada",  # Script (at least for numerals, Hindu-Arabic?)
    "Kansai",
    "Kanto",
    "Kaohsiung Hokkien",  # Chinese dialect/language
    "Karabakh",
    "Karachay",
    "Karanga",
    "Karwari",
    "Kasuweri",
    "Katharevousa",
    "Kautokeino",
    "Kayah Li",  # Script (Sino-Tibetan)
    "Kayseri",
    "Kayu Agung",
    "Kayu Agung Asli",
    "Kayu Agung Pendatang",
    "Kaw Kyaik",
    "Kazakh",
    "Kazerun",
    "Kazym",
    "Kedayan",
    "Kent",
    "Kentish",
    "Kenya",
    "Kernewek Kemmyn",
    "Kernowek Standard",
    "Kerry",
    "Kfar Kama",  # Region in Israel
    "Khesht",
    "Khmer",  # Script
    "Khojavend",
    "Khorasan",
    "Khoshar-Khota",
    "Khudawadi",  # Script (Sindhi language, India)
    "Khun villages",
    "Kiambu",
    "Kidero",
    "Kinmen Hokkien",
    "Kinshasa",
    "Kinyarwanda",
    "Kirundi",
    "Kobuk",  # Inupiaq
    "Koine",  # Ancient Greek
    "Konartakhteh",
    "Kong Loi village",
    "Kong Loi villages",
    "Konya",
    "Koryo-mar",
    "Kosovo",
    "Kosovo Arli",
    "Kota Agung",
    "Krui",
    "Kulkalgau Ya",
    "Kurdamir",
    "Kuritiba",
    "Kursk",
    "Kuwait",
    "Kuwaiti Gulf Arabic",  # Variant of Arabic Language
    "Kuzarg",
    "Kyoto",
    "Kyrgyz",
    "Kyūshū",
    "Kwantlada",
    "Kölsch",
    "LÚ",
    "La Up village",
    "Lamphun Province",
    "Lanna",  # Script (Thailand)
    "Languedoc",
    "Lao",  # Script (Lao langage in Laos)
    "Late Bohairic",
    "Late Egyptian",
    "Late Latin",
    "Late Middle English",
    "Late Old French",
    "Late Old Frisian",
    "Late West Saxon",
    "Late",
    "Latin America",
    "Latin",  # Script
    "Latinate",
    "Latinism",
    "Latvian",
    "Laval",
    "Lavarone",
    "Lebanese Arabic",  # Variant of Arabic language
    "Lebong",  # Region in Indonesia/Sumatra?  (Rejang language)
    "Leet",  # Leetspeak, an internet "slang"
    "Legazpi",
    "Leizhou Min",  # Chinese dialect/language
    "Lemosin",  # Dialect of Occitan
    "Lengadocian",  # Dialect of Occitan
    "Lepcha",  # Script (Himalayas?)
    "Lesotho",
    "Levantine Arabic",  # Variant of Arabic language
    "Lewis",
    "Leyte",
    "Lhasa",
    "Liechtenstein",
    "Limba Sarda Comuna",
    "Limbu",  # Script (Limbu language in Central Himalayas)
    "Limburg",
    "Limburgish",
    "Limousin",
    "Limuru",
    "Linnaeus",
    "Lippisch",
    "Lisan ud-Dawat",
    "Listuguj",
    "Literary affectation",
    "Lithuania",
    "Lithuanian",
    "Litvish",
    "Liverpudlian",
    "Llanos",
    "Logudorese",  # Variant of Sardinian
    "Lojban",
    "Loli",
    "Lombardy",
    "London",
    "Lorraine",
    "Louisiana",
    "Lovara",
    "Low Prussian",
    "Low Sorbian",
    "Lower Sorbian",
    "Lubunyaca",
    "Lukang Hokkien",
    "Luleå",
    "Lunfardo",
    "Luserna",
    "Luxembourg",
    "Luxembourgish",
    "Lycopolitan",
    "Lyon",
    "Lyons",
    "Lviv",
    "Lövånger",
    "Ḷḷena",
    "Łowicz",
    "M.O.D.",  # Used as head form in Marshallese
    "Maastrichtian",
    "Macau",
    "Macedonia",
    "Macedonian",
    "Macedonian Arli",
    "Macedonian Džambazi",
    "Mackem",
    "Madeira",
    "Maharashtra",
    "Mahuizalco",
    "Maiak",
    "Maine",
    "Mainland China",
    "Malacatepec",
    "Malak",
    "Malayalam",
    "Malaysia",
    "Malaysian English",
    "Mallorca",
    "Malta",
    "Malyangapa",
    "Mamluk-Kipchak",
    "Mandarin",  # Dialect/Language in Chinese
    "Mandi",
    "Manglish",
    "Manichaean",
    "Manicoré",
    "Manitoba Saulteux",
    "Mantua",
    "Manyika",
    "Marathi",
    "Martinican",
    "Martinican Creole",
    "Marwari",
    "Mary-marry-merry distinction",
    "Mary-marry-merry merger",
    "Marxism",
    "Masarm",
    "Maharastri Prakrit",
    "Mauritania",
    "Mawakwa",
    "Mayo",
    "McCune-Reischauer",
    "Mecayapan",  # Variant of Nathuatl
    "Mecklenburg-Vorpommern",
    "Mecklenburgisch",
    "Mecklenburgisch-Vorpommersch",
    "Medan",
    "Mediaeval",
    "Medieval",
    "Medieval Greek",
    "Medieval Latin",
    "Medio-Late Egyptian",
    "Mehedinți",
    "Meitei",  # Script (used with Meitei language in India)
    "Meixian",
    "Melanesian",
    "Melinting",
    "Menggala/Tulang Bawang",
    "Mercian",
    "Merseyside",
    "Mescaleiro",
    "Mexica",
    "Mexico",
    "Mfom",
    "Microsoft Azure",
    "Mid Northern Scots",
    "Mid Northern",
    "Mid",
    "Mid-Atlantic",
    "Middle Ages",
    "Middle Chinese",  # Historical variant of Chinese
    "Middle Cornish",
    "Middle Egyptian",
    "Middle",
    "Midland American English",
    "Midlands",
    "Midlandsnormalen",
    "Midwestern US",
    "Milan",
    "Milanese",
    "Milpa Alta",
    "Min",
    "Min Bei",
    "Min Dong",  # Chinese dialect/language
    "Min Nan",  # Chinese dialect/language
    "Minas Gerais",
    "Mineiro",
    "Mirandola",
    "Mirandolese",
    "Mistralian",
    "Mizrahi Hebrew",
    "Modena",
    "Modern",
    "Modern Armenian",
    "Modern Israeli Hebrew",
    "Modern Israeli",
    "Modern Latin",
    "Modern Polabian",
    "Modern Turkish",
    "Modi",  # Variant/language based on Sanskrit
    "Moghamo",
    "Moldavia",
    "Molet Kasu",
    "Molet Mur",
    "Monegasque",
    "Mongo-Turkic",
    "Mongolian",  # Also script
    "Montenegro",
    "Montreal",
    "Mooring",  # Variant of North Frisian
    "Moravia",
    "Mormonism",
    "Moroccan",  # Variant of Arabic
    "Moroccan Arabic",  # Variant of Arabic
    "Morocco",
    "Moscow",
    "Moselle Franconian",
    "Mosetén",
    "Mount Currie",
    "Mozambique",
    "Moçambique",
    "Mpakwithi",
    "Muğla",
    "Multicultural London English",
    "Munster",
    "Murang'a",
    "Mushuau Innu",
    "Muslim",
    "Münsterland",
    "Münsterländisch",
    "Myanmar",  # Also script
    "Mycenaean",  # Variant of Greek
    "N'Ko",  # Script
    "Nahua",
    "Nahuatl",
    "Nakhchivan",
    "Namibia",
    "Nanchuan",
    "Nao Klao",  # dialect
    "Naples",
    "Navajo",
    "Navarre",
    "Navarrese",
    "Navarro-Lapurdian",
    "Navy",
    "Nazism",
    "Ndia",
    "Neo-Latin",
    "Nepal",
    "Netherlands",
    "Nevada",
    "New Age",
    "New England",
    "New Jersey",
    "New Latin",
    "New Sanskrit",
    "New York City",
    "New York",
    "New Zealand",
    "Newa",  # Script (Newa Spelling) ??? निर्वाचन/Newar/Noun
    "Newfoundland",
    "Nicaragua",
    "Niçard",
    "Nidwalden",
    "Nigeria",
    "Niğde",
    "Ningbo",
    "Nizhegorod",
    "Nomen sacrum",  # Used in Gothic form names
    "Non-Oxford",
    "Nordestino",
    "Nordic",
    "Norfolk",
    "Normandy",
    "Norse",
    "North Afar",
    "North America",
    "North American",
    "North Brazil",
    "North East England",
    "North Eastern US",
    "North German",
    "North Korea",
    "North Levantine",
    "North Levantine Arabic",  # Variant of Arabic
    "North Northern Scots",
    "North Northern",
    "North Northern",
    "North Wales",
    "North and East of the Netherlands",
    "North",
    "Northeast Brazil",
    "Northeastern Brazil",
    "Northeastern",
    "Northern California",
    "Northern Catalan",
    "Northern Crimea",
    "Northern England",
    "Northern English",
    "Northern Germany",
    "Northern Ireland",
    "Northern Italy",
    "Northern Mandarin",  # Chinese dialect/language
    "Northern Manx",
    "Northern Middle English",
    "Northern Puebla",
    "Northern Scots",
    "Northern UK",
    "Northern US",
    "Northern Yiddish",
    "Northern Zazaki",
    "Northern",
    "Northamptonshire",
    "Northumbria",
    "Northwestern",
    "Novgorod",
    "Nde",
    "Nembe",
    "Nfom",
    "Ngan'gimerri",
    "Ngan'gikurunggurr",
    "Ngie",
    "Ngoko",
    "Nghệ An",  # Vietnamese dialect
    "Nkim",
    "Nkojo",
    "Nkum",
    "Nselle",
    "Nsimbwa",
    "Nta",
    "Ntuzu",
    "Nuorese",
    "Nyeri",
    "Nynorak",
    "Nynorsk",  # Variant of Norwegian
    "Nyungkal",
    "Nürnbergisch",
    "Occitania",
    "Odia",  # Script (at least for numerals)
    "Ol Chiki",  # Script (Austroasiatic language in India)
    "Old Bohairic",
    "Old Chamorro",
    "Old Chinese",  # Historical variant of Chinese
    "Old Coptic",
    "Old East Church Slavonic",
    "Old Egyptian",
    "Old English",
    "Old Latin",
    "Old Lithuanian",
    "Old Norse",
    "Old Northern French",
    "Old Persian",  # Script
    "Old Polabian",
    "Old Tagalog",
    "Oliti",
    "Olles",
    "Ombos",
    "Ontario",
    "Ooldea",
    "Orcadian",
    "Ordubad",
    "Oriya",  # Script (Hindu-Arabic?)
    "Orkney",
    "Ormulum",
    "Oryol",
    "Oslo",
    "Osmanya",  # Script (Somalia)
    "Ottomans",
    "Oxford",  # Variant of British English
    "POJ",  # Latin alphabet based orthography for Min Nan (Chinese)
    "Pa Pae village",
    "Paderbornish",
    "Paderbornisch",
    "Pahang",
    "Pak Kret District",
    "Pakistan",
    "Palacios de Sil",
    "Palatine",
    "Palestinian",
    "Pali",  # Sanskrit
    "Panama",
    "Pangin",
    "Papua New Guinea",
    "Paraguay",
    "Paris",
    "Parisian",
    "Parres",
    "Parts of south Jeolla",
    "Paulistano",
    "Payang",  # Region in Indonesia (Rejang language)
    "Pays de Bray",
    "Pays de Caux",
    "Paḷḷuezu",
    "Peking",
    "Pembrokeshire",
    "Penang Hokkien",
    "Peng'im",
    "Penghu Hokkien",
    "Pennsylvania",
    "Periphrastic conjugations",
    "Perm",
    "Persian",  # Also script
    "Persian Gulf",
    "Persianized",
    "Perso-Arabic",
    "Peru",
    "Peshawar",
    "Phnom Penh",
    "Philadelphia",
    "Philippine",
    "Philippines",
    "Piacenza",
    "Picardy",
    "Pinghua",  # Chinese dialect/language
    "Pinyin",
    "Pirupiru",
    "Pite",  # Variant of Sami
    "Piteå",
    "Plautdietsch",
    "Polari",
    "Polish",
    "Portugal",
    "Portugal",
    "Possesse",
    "Poylish",
    "Poznań",
    "Praenominal",  # Type of abbreviation
    "Pre-Hebrew",
    "Prokem",
    "Protestant",
    "Proto-Slavic",
    "Provençal",
    "Provençau",  # Dialect of Occitan
    "Pskov",
    "Pu No",  # dialect
    "Pubian",
    "Puebla",
    "Puerto Rico",
    "Pulaar",
    "Pular",
    "Puter",
    "Puxian Min",  # Chinese language/dialect
    "Valdés",
    "Vallander",
    "Varendra",
    "Vegliot",
    "Vest Recklinghausen",
    "Villacidayo",
    "Qazakh",
    "Quakerism",
    "Quanzhou",
    "Quebec",
    "Quebec City",
    "Quetta",
    "Quirós",
    "Quốc ngữ",
    "Radical",  # Used to mark Japanese Kanji that are radical forms
    "Raguileo Alphabet",
    "Ragusan",
    "Rai Kaili",
    "Ranau",
    "Rastafari",
    "Rastafarian",
    "Ratak",
    "Received Pronunciation",
    "Recueil scientifique ou littéraire",
    "Reggio Emilia",
    "Reina-Valera version",
    "Renshou",
    "Revived Late Cornish",
    "Revived Middle Cornish",
    "Revived",
    "Rhine Franconian",  # Variant of German
    "Rhineland",
    "Rhodesia",
    "Riau",
    "Riau-Lingga",
    "Rigveda",
    "Riksmål",
    "Rimella",
    "Ring",
    "Rio Grande De Sul",
    "Rio de Janeiro",
    "Rioplatense",
    "Ripuarian",
    "Ritsu",
    "Rogaland",
    "Roman",  # Script
    "Roman Catholic",
    "Roman Empire",
    "Romanian",
    "Romungro",
    "Rouen",
    "Rubī-Safaia",
    "Ruhrgebiet",
    "Rumantsch Grischun",
    "Rumi",
    "Rumy",
    "Rundi",
    "Rungu",
    "Russia",
    "Russian",
    "Russianism",
    "Rwanda",
    "Rwanda-Rundi",
    "Rālik",
    "Rāṛha",
    "Rōmaji",
    "SK Standard",
    "SW England",
    "São-Paulo",
    "Saarve",
    "Sagada",
    "Sahidic",
    "Saint Ouën",
    "Saint Petersburg",
    "Sakayamuni",
    "Sakhalin",
    "Salaca",
    "Salas",
    "Sallans",
    "Salyan",
    "Sami",
    "San Juan Quiahije",
    "Sanskrit",
    "Sanskritized",
    "Santiago",
    "Sanxia Hokkien",
    "São Vicente",
    "Sappada",
    "Sapper-Ricke",
    "Sark",
    "Sauerland",
    "Sauerländisch",
    "Saurashtra",  # Script (Surashtra language in Tamil Nadu)
    "Sauris",
    "Savoie",
    "Savoyard",
    "Sawndip",
    "Sayisi",  # Variant of Chipewyan language?
    "Schleswig-Holstein",
    "Schwyz",
    "Scientific Latin",
    "Scotland",
    "Scottish",
    "Scouse",
    "Seoul",
    "Sepečides",
    "Sepoe",
    "Serbia",
    "Serbian",
    "Serbo-Croatian",
    "Servia",
    "Sesivi",
    "Sette Comuni",
    "Seville",
    "Shahmukhi",
    "Shandong",
    "Shanghai",
    "Shanghainese Wu",
    "Shapsug",
    "Sharada",  # Script (India for Sanskrit and Kashmiri; historic)
    "Shavian",
    "Sheffield",
    "Sheng",
    "Shephardi Hebrew",
    "Sheshatshiu Innu",
    "Shetland",
    "Shetlandic",
    "Shia",
    "Shidong",
    "Shikoku",
    "Shin",
    "Shiraz",
    "Shropshire",
    "Shubi",
    "Shuri-Naha",
    "Shuryshkar",
    "Siba",
    "Sibe",
    "Sichuanese",
    "Sikh",
    "Sikhism",
    "Silesian",
    "Simplified",
    "Singapore English",
    "Singapore",
    "Singlish",
    "Sinhalese",  # Script (Sri Lanka)
    "Sino-Korean",
    "Sino-Japanese",
    "Sisiame",
    "Sistani",
    "Skellefteå",
    "Skiri",
    "Skolt",  # Variant of Sami
    "Slovak",
    "Slovene",
    "Slovincian",
    "Smolensk",
    "Sobrescobiu",
    "Sofia Erli",
    "Soikkola",
    "Solothurn",
    "Somiedu",
    "Sori",
    "Sotavento",
    "Souletin",
    "South Afar",
    "South Africa",
    "South African",
    "South America",
    "South American English",
    "South Asia",
    "South Azerbaijani",
    "South Brazil",
    "South German",
    "South Korea",
    "South Levantine",
    "South Levantine Arabic",
    "South Northern Scots",
    "South Scots",
    "South Wales",
    "South",
    "Southeastern",
    "Southern Africa",
    "Southern American English",
    "Southern Brazil",
    "Southern England",
    "Southern Italy",
    "Southern Manx",
    "Southern Middle English",
    "Southern Quechua",
    "Southern Scotland",
    "Southern Scots",
    "Southern Spain",
    "Southern US",
    "Southern Yiddish",
    "Southern Zazaki",
    "Southern",
    "Southwestern",
    "Southwestern Mandarin",  # Chinese dialect/language
    "Space Force",
    "Spain",
    "Spanish",
    "Sremski Gurbet",
    "Sri Lanka",
    "St. Gallen",
    "Standard Cornish",
    "Standard East Norwegian",
    "Standard German of Switzerland",
    "Standard German",
    "Standard Hlai",
    "Standard Sicilian",
    "Standard Tagalog",
    "Standard Zhuang",
    "Stavanger",
    "Stellingwerfs",
    "Stokoe",  # Used in sign language letter entries to indicate Latin letter
    "Suizhou",
    "Sukai",
    "Sukau",
    "Sundanese",
    "Sungkai",
    "Sunni",
    "Surgut",
    "Surigaonon",
    "Surinam",
    "Suriname",
    "Surmiran",
    "Sursilvan",
    "Suðuroy",
    "Sutsilvan",
    "Suzhou",
    "Sweden",
    "Swiss German",
    "Swiss",
    "Switzerland",
    "Syllabics",  # Used in word head with Plains Cree, e.g. tânisi/Plains Cree
    "Sylt",  # Variant of North Frisian
    "Syriac",  # Also script (for Aramaic)
    "Syrian Hebrew",
    "São Paulo",
    "São Vicente",
    "TV",
    "Taberga",
    "Tabriz",
    "Tai Tham",  # Script (Northern Thai?)
    "Tai Xuan Jing",
    "Taichung Hokkien",
    "Tainan",
    "Taipei",
    "Taishanese",
    "Taiwan",
    "Taiwanese Hokkien",
    "Taiwanese Mandarin",  # Chinese dialect/language
    "Taixuanjing tetragram",
    "Tajik",
    "Takri",  # Script (mostly historic, used in Himachal Pradesh)
    "Talang Padang",
    "Tally-marks",
    "Talur",
    "Tamil",  # Also script
    "Tang-e Eram",
    "Tankarana",
    "Tantoyuca",
    "Tao",
    "Taraškievica",
    "Tashelhit",  # Variant of Berber
    "Tasmania",
    "Tasmanian",
    "Tavastia",
    "Tebera",
    "Teesside",
    "Tehran",
    "Tehrani",
    "Telugu",  # Also script (India)
    "Telugu-Kui",
    "Temapache",
    "Tenerife",
    "Teochew",
    "Teotepeque",
    "Tepetzintla",
    "Terre-Neuve-et-Labrador",
    "Tessin",
    "Texas",
    "Texcoco",
    "Textbibel",
    "Tgdaya",
    "Thai",  # Script
    "Thailand",
    "Thanh Chương",
    "The Hague",
    "Thung Luang village",
    "Thung Luang",
    "Thurgau",
    "Thuringian-Upper Saxon",
    "Tibetan",  # Script
    "Tiberian Hebrew",
    "Timau",
    "Timor-Leste",
    "Tirhuta",  # Script (historical: Maithili, Sanskrit)
    "Tlaxcala",
    "Tlyadal",
    "Toaripi",
    "Tokat",
    "Tokyo",
    "Tongzi",
    "Torlakian",
    "Tosk",
    "Toulouse",
    "Traditional",
    "Trakai-Vilnius",
    "Translingual",
    "Transoxianan",
    "Transylvania",
    "Trat",
    "Tredici Comuni",
    "Trentino",
    "Trinidad and Tobago",
    "Truku",
    "Tsimihety",
    "Tulamni",
    "Turkmen",
    "Tuscany",
    "Twente",
    "Twents",
    "Twi",  # Dialect of the Akan language
    "Tyneside",
    "Uganda",
    "UK with /ʊ/",
    "UK",
    "Ulu",
    "UPA",
    "Upper Silesia",
    "Upper Sorbian",
    "Urama",
    "Urdu",
    "US with /u/",
    "US",
    "US-Inland North",
    "US-merged",
    "Ukraine",
    "Ukrainish",
    "Ukraynish",
    "Ulaanbaatar",
    "Ulster Scots",
    "Ulster",
    "Umeå",
    "Unified",
    "Unix",
    "Unquachog",  # Dialect of Quiripi
    "Upper RP Triphthong Smoothing",
    "Uri",
    "Urkers",
    "Ursari",
    "Urtijëi",
    "Uruguay",
    "Utara",  # Region in Indonesia (Rejang language)
    "Uutände",
    "Uyghurjin",
    "Vaiśeṣika",
    "Valais",
    "Valencia",
    "Valencian",
    "Vallander",
    "Vancouver",
    "Vancouver Island",
    "Vaṅga",
    "Vedic",
    "Veluws",
    "Venezuela",
    "Verona",
    "Vidari",  # Variant of Alviri-Vidari
    "Vietnam",
    "Vinh",
    "Vinza",
    "Virginia",
    "Vivaro-Alpin",
    "Vivaro-Alpine",
    "Volapük Nulik",
    "Volapük Rigik",
    "Vosges",
    "Vulgata",
    "Västergötland",
    "WW2 air pilots' usage",
    "Wade-Giles",
    "Wadikali",
    "Walapai",
    "Wales",
    "Wallonia",
    "Wamwan",
    "Warang Citi",  # Script (Ho language, East India)
    "Wardak",
    "Waterford",
    "Way Lima",
    "Wazirwola",
    "Wearside",
    "Weirate",
    "Welche",
    "Welsh English",
    "Wenzhou",  # Chinese dialect/language
    "Wenzhou Wu",  # Chinese dialect/language
    "West Armenian",
    "West Bengal",
    "West Cork",
    "West Country",
    "West Kerry",
    "West Midlands",
    "West Muskerry",
    "West Pomeranian",
    "West",
    "Western Armenian",
    "Western Quebec",
    "Western Rumelia",
    "Western Syriac",
    "Western",
    "Westminster system",
    "Westmünsterland",
    "Westphalia",
    "Westphalian",
    "Westpfälzisch",
    "Westwestphalian",
    "Wiedingharde",
    "Windesi",
    "Witzapan",
    "Wood",
    "World War I",
    "Wrangelsholm",
    "Written Form",
    "Wu",  # Chinese dialect/language
    "Wuhan",
    "Wuvulu",
    "X-system",
    "Xiamen",
    "Xiang",
    "Xilitla",
    "YIVO",
    "Yagaria",
    "Yahualica",
    "Yajurveda chanting",
    "Yale",
    "Yaman",
    "Yanbian",
    "Yanhe",
    "Yao'an",
    "Yardliyawara",
    "Yardymli",
    "Yaut",
    "Yawelmani",
    "Yañalif",
    "Ye'kwana",
    "Yemen",
    "Yemenite Hebrew",
    "Yichang",
    "Yiddish-influenced",
    "Yilan Hokkien",
    "Yindjilandji",
    "Yintyingka",
    "Ylä-Laukaa",
    "Yongshan",
    "Yorkshire",
    "Yozgat",
    "Yukjin",
    "Yukon",
    "Yulparija",
    "Yunnan",
    "Zacatianguis",
    "Zamboanga",
    "Zangilan",
    "Zaqatala",
    "Zezuru",
    "Zhangzhou",
    "Zhangzhou Hokkien",
    "Zhuyin",  # Apparently a phonetic script used with Chinese/Mandarin
    "Zimbabwe",
    "Zinacantán",
    "Zurich",
    "Zêkog",
    "Överkalix",
    "al-Andalus",  # historically Muslim ruled area of the Iberian Penisula
    "bureaucratese",
    "central and northeastern Switzerland",
    "continental Normandy",
    "feudal Britain",
    "parts of South Africa",
    "outside Northumbria",
    "post-Augustan",
    "post-Classical",
    "post-Homeric",
    "pre-1989 IPA",
    "pre-Classical",
    "regionally African American Vernacular",
    "southern Moselle Franconian",
    "northernmost Moselle Franconian",
    "west Sweden",
    "most of Moselle Franconian",
])


# General mapping for linguistic tags.  Value is a string of space-separated
# tags, or list of alternative sets of tags.  Alternative forms in the same
# category can all be listed in the same string (e.g., multiple genders).
# XXX should analyze imperfect vs. imperfective - are they just used in
# different languages, or is there an actual difference in meaning?
xlat_tags_map = {
    "sg": "singular",
    "pl": "plural",
    "sg.": "singular",
    "pl.": "plural",
    "sg. and pl.": "singular plural",
    "sg and pl": "singular plural",
    "m/f": "masculine feminine",
    "no pl": "no-plural",
    "pl. only": "plural-only",
    "pl ordinaux": "usually plural",
    "m.": "masculine",
    "male": "masculine",
    "f.": "feminine",
    "fem.": "feminine",
    "female": "feminine",
    "indef.": "indefinite",
    "gen.": "genitive",
    "pres.": "present",
    "subj.": "subjunctive",
    "impf.": "imperfective",
    "pf.": "perfective",
    "trans.": "transitive",
    "unc": "uncountable",
    "abbreviated": "abbreviation",
    "diminutives": "diminutive",
    "Diminutive": "diminutive",
    "Diminutives": "diminutive",
    "†-tari": "-tari",
    "†-nari": "-nari",
    "♂♀": "masculine feminine",
    "♂": "masculine",
    "♀": "feminine",
    "cangjie input": "cangjie-input",
    "RP": "Received-Pronunciation",
    "BR": "Brazil",
    "Brasil": "Brazil",
    "Brazilian Portuguese": "Brazil",
    "FR": "France",
    "IT": "Italy",
    "CAN": "Canada",
    "AU": "Australia",
    "AUS": "Australia",
    "Austr.": "Australian",
    "AusE": "Australia",
    "Aus": "Australia",
    "LKA": "Sri-Lanka",
    "RU": "Russia",
    "SA": "South-Africa",
    "[AU]": "Australia",
    "NYC": "New-York-City",
    "CA": "Canada",
    "AT": "Austria",
    "GA": "General-American",
    "NV": "Navajo",
    "UK male": "UK",
    "UK female": "UK",
    "GB": "UK",
    "EN": "UK",
    "IN": "India",
    "PRC": "China",
    "BG": "Bulgaria",
    "DE": "Germany",
    "IE": "Ireland",
    "NL": "Netherlands",
    "NZ": "New-Zealand",
    "PT": "Portugal",
    "BOL": "Bolivia",
    "U.S.A.": "US",
    "U.S.": "US",
    "[US]": "US",
    "Americanisation": "Americanization",
    "Saint Ouen": "Saint-Ouën",
    "UK & Aus": "UK Australia",
    "Britian": "Britain",
    "coastal Min": "Coastal-Min",
    "Telugu-Kui language": "Telugu-Kui",
    "SK Standard/Seoul": "SK-Standard Seoul",
    "Devanagri": "Devanagari error-misspelling",
    "Standard Seoul": "SK-Standard Seoul",
    "Association canadienne de normalisation": "Canada",
    "esp.": "especially",
    "northwestern": "Northwestern",
    "northeastern": "Northeastern",
    "southwestern": "Southwestern",
    "southeastern": "Southeastern",
    "northern": "Northern",
    "southern": "Southern",
    "western": "Western",
    "eastern": "Eastern",
    "westernmost": "Western",
    "west": "West",
    "Mecayapán": "Mecayapan",
    "Mooring and Föhr-Amrum": "Mooring Föhr-Amrum",
    "Föhr-Amrum & Mooring": "Föhr-Amrum Mooring",
    "Nazi slur against Churchill": "Nazism slur",
    "religious slur": "slur",
    "euphemistic Nazi term": "Nazism euphemistic",
    "United States": "US",
    "Québec": "Quebec",
    "Classic Persian": "Classical-Persian",
    "Sette Communi": "Sette-Comuni",
    "Vivaro-alpine": "Vivaro-Alpine",
    "Mooring and Hallig": "Mooring Hallig",
    "Zürich": "Zurich",
    "Somiedo": "Somiedu",
    "Uk": "UK",
    "US/UK": "US UK",  # XXX leave separate
    "USA": "US",
    "México": "Mexico",
    "Latinamerica": "Latin-America",
    "Lat. Amer.": "Latin-America",
    "LAm": "Latin-America",
    "Monégasque": "Monegasque",
    "Audio": "",
    "orig. US": "",
    "poetical": "poetic",
    "Noun": "noun",
    "Adjective": "adjective",
    "Verb": "verb",
    "Poetic": "poetic",
    "Poetic.": "poetic",
    "Informal.": "informal",
    "Colloquial.": "colloquial",
    "Antiquated.": "dated",
    "Archaic": "archaic",
    "Causative": "causative",
    "Passive": "passive",
    "Stative": "stative",
    "Applicative": "applicative",
    "Colloquial": "colloquial",
    "Epic verse": "poetic",
    "Nominative plural - rare": "nominative plural rare",
    "Nonstandard but common": "nonstandard common",
    "Slang": "slang",
    "Slang-Latin America": "slang Latin-America",
    "slangy": "slang",
    "backslang": "slang",
    "butcher's slang": "slang jargon",
    "archiac": "archaic error-misspelling",
    "nonstandard form": "nonstandard",
    "nonstandard form of": "nonstandard alt-of",
    "standard form of": "standard alt-of",
    "nonstandard stylistic suffix": "nonstandard dialectal suffix",
    "honorific form": "honorific",
    "possessed form": "possessed",
    "obligatorily possessed": "possessed",
    "obligatory possessive": "possessed",
    "obligatory possession": "possessed",
    "indicated possession by preceding noun": "possessed",
    "unpossessed form": "unpossessed",
    "Dialectal": "dialectal",
    "Dialect": "dialectal",
    "dialectal form": "dialectal",
    "dialectal term": "dialectal",
    "dialectal Mandarin": "dialectal Mandarin",
    "Dialect: Oslo": "dialectal Oslo",
    "regiolectal": "dialectal",
    "archaic or regiolectal": "archaic dialectal",
    "Canada: Ontario": "Ontario",
    "Canada: British Columbia": "British-Columbia",
    "GenAm": "General-American",
    "Greco-Bohairic Pronunciation": "Greco-Bohairic",
    "Greco-Bohairic pronunciation": "Greco-Bohairic",
    "Vallader": "Vallander",
    "Conservative RP": "Received-Pronunciation",
    "Received Prononunciation": "Received-Pronunciation",
    "North American also": "North-American",
    "Cois Fharraige also": "Cois-Fharraige",
    "Sawndip forms": "Sawndip",
    "Sawndip form": "Sawndip",
    "old orthography": "archaic",
    "Maine accent": "Maine",
    "Bosnia Serbia": "Bosnian-Serbian",
    "MLE": "Multicultural-London-English",
    "AAVE": "African-American-Vernacular-English",
    "Early ME": "Early-Middle-English",
    "Northern ME": "Northern-Middle-English",
    "Southern ME": "Southern-Middle-English",
    "Late ME": "Late-Middle-English",
    "Spanish given name": "Spanish proper-noun",
    "St. Petersburg or dated": "Saint-Petersburg dated",
    "Irregular reading": "irregular-pronunciation",
    "irreg. adv.": "irregular adverbial",
    "Argentina and Uruguay": "Argentina Uruguay",
    "Argentina Uruguay": "Argentina Uruguay",
    "Southern US folk speech": "Southern-US dialectal",
    "dialect": "dialectal",
    "Main dialectal variations": "dialectal",
    "Many eastern and northern dialects": "dialectal",
    "many dialects": "dialectal",
    "some dialects of": "dialectal",
    "now sometimes by conflation with etymology 1 under standard German influence":
    "sometimes",
    "unstressed form": "unstressed",
    "mute of": "unstressed form-of",
    "for some speakers": "uncommon",
    'when "do" is unstressed and the next word starts with /j/':
    "unstressed-before-j",
    "before a vowel": "before-vowel",
    "before vowel": "before-vowel",
    "before vowels": "before-vowel",
    "used before vowels and lenited fh-": "before-vowel before-lenited-fh",
    "used before vowels": "before-vowel",
    "used before the past tense": "before-past",
    "used a verb in imperfect subjunctive": "with-imperfect with-subjunctive",
    "the Eurozone": "Eurozone",
    "Phoneme": "phoneme",
    "Vowel": "phoneme",
    "Consonant": "phoneme",
    "Name of letter": "name",
    "nation's name": "name",
    "proprietary name": "name",
    "Vulgar": "vulgar",
    "strong language": "vulgar",
    "Very Strong Swear word": "vulgar",
    "Spoken": "colloquial",
    "spoken": "colloquial",
    "written": "literary",
    "Syllable initial": "syllable-initial",
    "Syllable final": "syllable-final",
    "internet": "Internet",
    "online": "Internet",
    "instant messaging": "Internet",
    "text messaging": "Internet",
    "cot-caught merged": "cot-caught-merger",
    "cot–caught merged": "cot-caught-merger",
    "cot-caught merger": "cot-caught-merger",
    "cot–caught merger": "cot-caught-merger",
    "pin-pen merger": "pin-pen-merger",
    "pin–pen merger": "pin-pen-merger",
    "prefix before comparative forms": "prefix with-comparative",
    "countable and uncountable": "countable uncountable",
    "masculine and feminine plural": "masculine feminine plural",
    "definite singular and plural": "definite singular plural",
    "plural and definite singular attributive":
    ["plural attributive", "definite singular attributive"],
    "oblique and nominative feminine singular":
    "oblique nominative feminine singular",
    "feminine and neuter plural": "feminine neuter plural",
    "feminine and neuter": "feminine neuter",
    "feminine and neuter plural": "feminine neuter plural",
    "masculine and feminine": "masculine feminine",
    "masculine and neuter": "masculine neuter",
    "masculine and plural": "masculine plural",
    "female and neuter": "feminine neuter",
    "the third person": "third-person",
    "(at least) nominative/objective/reflexive cases":
    "nominative objective",
    "singular and plural": "singular plural",
    "plural and weak singular": ["plural", "weak singular"],
    "dative-directional": "dative directional",
    "preterite and supine": "preterite supine",
    "genitive and dative": "genitive dative",
    "genitive and plural": "genitive plural",
    "dative and accusative": "dative accusative",
    "accusative/illative": "accusative illative",
    "dative and accusative singular": "dative accusative singular",
    "simple past and past participle": ["simple past", "past participle"],
    "simple past tense and past participle": ["simple past", "past participle"],
    "taking a past participle": "with-past-participle",
    "literary or in compounds": "literary in-compounds",
    "certain compounds": "in-compounds idiomatic",
    "participial adjective": "participle adjective error-misspelling",
    "literary or archaic": "literary archaic",
    "literaly or archaic": "literary archaic error-misspelling",
    "literary or dialectal": "literary dialectal",
    "dated or dialectal": "dated dialectal",
    "dialectal or colloquial": "dialectal colloquial",
    "dialectal or obsolete": "dialectal obsolete",
    "simple past": "simple past",
    "simple present": "simple present",
    "with verb in simple tense": "with-simple",
    "in simple past tense": "simple past",
    "for most verbs": "usually",
    "in general": "usually",
    "in variation": "in-variation",
    "genitive/dative": "genitive dative",
    "dative/locative": "dative locative",
    "dative/instrumental": "dative instrumental",
    "genitive/dative/locative": "genitive dative locative",
    "genitive/dative/ablative": "genitive dative ablative",
    "dative/ablative/locative": "dative ablative locative",
    "ablative/vocative": "ablative vocative",
    "ablative/locative": "ablative locative",
    "ablative/instrumental": "ablative instrumental",
    "dative/ablative": "dative ablative",
    "genitive/instrumental/locative": "genitive instrumental locative",
    "genitive/dative/locative/vocative": "genitive dative locative vocative",
    "genitive/dative/instrumental/prepositional":
    "genitive dative instrumental prepositional",
    "+ prepositional case": "with-prepositional",
    "+prepositional": "with-prepositional",
    "+ por": "with-por",
    "accusative/instrumental": "accusative instrumental",
    "dative/adverbial case": "dative adverbial",
    "dative/genitive": "dative genitive",
    "dative/genitive/instrumental": "dative genitive instrumental",
    "dative/accusative": "dative accusative",
    "dative/accusative/locative": "dative accusative locative",
    "genitive/accusative/prepositional":
    "genitive accusative prepositional",
    "genitive/dative/accusative": "genitive dative accusative",
    "genitive/animate accusative": ["genitive", "animate accusative"],
    "accusative plural and genitive plural": "accusative genitive plural",
    "hidden-n declension": "hidden-n",
    "declension pattern of": "declension-pattern-of",
    "first/second-declension adjective":
    "first-declension second-declension adjective",
    "first/second-declension participle":
    "first-declension second-declension participle",
    "class 9/10": "class-9 class-10",
    "class 5/6": "class-5 class-6",
    "class 3/4": "class-3 class-4",
    "class 7/8": "class-7 class-8",
    "class 1/2": "class-1 class-2",
    "class 11/10": "class-11 class-10",
    "class 11/12": "class-11 class-12",
    "nc 1/2": "class-1 class-2",
    "nc 3/4": "class-3 class-4",
    "nc 5/6": "class-5 class-6",
    "nc 7/8": "class-7 class-8",
    "nc 9/10": "class-9 class-10",
    "nc 1": "class-1",
    "nc 2": "class-2",
    "nc 3": "class-3",
    "nc 4": "class-4",
    "nc 5": "class-5",
    "nc 6": "class-6",
    "nc 7": "class-7",
    "nc 8": "class-8",
    "nc 9": "class-9",
    "nc 10": "class-10",
    "nc 11": "class-11",
    "nc 12": "class-12",
    "nc 13": "class-13",
    "nc 14": "class-14",
    "nc 15": "class-15",
    "nc 16": "class-16",
    "nc 17": "class-17",
    "nc 18": "class-18",
    "cl. 2 to cl. 11 and cl. 16 to cl. 18":
    "class-2 class-3 class-4 class-5 class-6 class-7 class-8 class-9 class-10 class-11 class-16 class-17 class-18",
    "refl": "reflexive",
    "coll.": "colloquial",
    "colloq.": "colloquial",
    "colloq": "colloquial",
    "collo.": "colloquial",
    "collective when uncountable": "countable uncountable collective",
    "coloquial": "colloquial",
    "more colloquial": "colloquial",
    "used colloquially and jokingly": "colloquial humorous",
    "used adverbially": "adverbial",
    "adverbially": "adverbial",
    "intr.": "intransitive",
    "tr.": "transitive",
    "trans": "transitive",
    "intransitive use": "intransitive",
    "intransitive senses": "intransitive",
    "intr. impers.": "intransitive impersonal",
    "abbrev.": "abbreviation",
    "Abbreviation": "abbreviation",
    "Hiragana": "hiragana",
    "Katakana": "katakana",
    "synon. but common": "synonym common",
    "common hyperhyms": "common hypernym",
    "much more common": "common",
    "incorrectly": "proscribed",
    "incorrect": "proscribed",
    "a hyponymic term": "hyponym",
    "a hypernymic term": "hypernym",
    "transitively": "transitive",
    "intransitively": "intransitive",
    "transitiv": "transitive",
    "intransitiv": "intransitive",
    "nominalized adjective": "noun nominalization",
    "adjectivized noun": "adjectival",
    "adv.": "adverb",
    "infomal": "informal error-misspelling",
    "informally": "informal",
    "formally": "formal",
    "very formal": "formal",
    "unmarked form": "unstressed",
    "marked form": "stressed",
    "inifnitive": "infinitive error-misspelling",
    "inf.": "informal",
    "unformal": "informal",
    "unpolite": "impolite",
    "fairly polite": "polite",
    "postnominal": "postpositional",
    "first/second declension": "first-declension second-declension",
    "first/second-declension suffix":
    "first-declension second-declension suffix",
    "first/second-declension numeral plural only":
    "first-declension second-declension numeral plural-only",
    "with gendered nouns": "with-gendered-noun",
    "possessive (with noun)": "possessive with-noun",
    "possessive (without noun)": "possessive without-noun",
    "without a main word": "without-noun",
    "informal 1st possessive": "informal first-person possessive",
    "informal augmentations": "informal augmented",
    "formal or literary": ["formal", "literary"],
    "formal or plural": ["formal", "plural"],
    "formal and written": "formal literary",
    "addressing kings and queens": "formal deferential",
    "adressing kings and queens": "formal deferential",
    "impolite 2nd possessive": "informal second-person possessive",
    "casual": "informal",
    "strong personal": "strong personal pronoun",
    "weak personal": "weak personal pronoun",
    "persent participle": "present participle",
    "with adjective or adjective-phrase complement": "with-adjective",
    "with accusative or dative": "with-accusative with-dative",
    "with accusative or genitive": "with-accusative with-genitive",
    "with accusative or ablative": "with-accusative with-ablative",
    "genitive or accusative": ["genitive accusative"],
    "genitive of personal pronoun": "genitive personal pronoun",
    "nominative and accusative definite singular":
    "nominative accusative definite singular",
    "+ genitive": "with-genitive",
    "+ genitive possessive suffix or elative":
    "with-genitive with-possessive-suffix with-elative",
    "+ genitive-accusative": "with-genitive",
    "genitive + ~": "with-genitive postpositional",
    "+ partitive or (less common) possessive suffix":
    "with-partitive with-possessive-suffix",
    "+ allative": "with-allative",
    "[an (about) + accusative]": "with-an with-accusative",
    "less common": "uncommon",
    "less frequently": "uncommon",
    "no perfect or supine stem": "no-perfect no-supine",
    "no present participle": "no-present-participle",
    "no past participle": "no-past-participle",
    "past participle (obsolete except in adjectival use)":
    "obsolete past participle",
    "adverbial locative noun in the pa, ku, or mu locative classes":
    "adverbial locative",
    "comparative -": "no-comparative",
    "superlative -": "no-superlative",
    "1 declension": "first-declension",
    "4 declension": "fourth-declension",
    "5th declension": "fifth-declension",
    "feminine ? declension": "feminine",
    "masculine ? declension": "masculine",
    "1st declension": "first-declension",
    "2nd declension": "second-declension",
    "3rd declension": "third-declension",
    "4th declension": "fourth-declension",
    "2nd-person": "second-person",
    "1st-person": "first-person",
    "3rd-person": "third-person",
    "1st person": "first-person",
    "2nd person": "second-person",
    "3rd person": "third-person",
    "1st actor trigger": "actor-i",
    "2nd actor trigger": "actor-ii",
    "3rd actor trigger": "actor-iii",
    "4th actor trigger": "actor-iv",
    "object trigger": "objective",
    "1st object trigger": "objective actor-i",
    "2nd object trigger": "objective actor-ii",
    "3rd object trigger": "objective actor-iii",
    "4th object trigger": "objective actor-iv",
    "potential mood": "potential",
    "causative mood": "causative",
    "comitative trigger": "comitative",
    "1st comitative trigger": "comitative actor-i",
    "2nd comitative trigger": "comitative actor-ii",
    "3rd comitative trigger": "comitative actor-iii",
    "4th comitative trigger": "comitative actor-iv",
    "locative trigger": "locative",
    "thematic trigger": "thematic",
    "benefactive trigger": "benefactive",
    "instrument trigger": "instrumental",
    "1st instrument trigger": "instrumental actor-i",
    "2nd instrument trigger": "instrumental actor-ii",
    "3rd instrument trigger": "instrumental actor-iii",
    "4th instrument trigger": "instrumental actor-iv",
    "1st": "first-person",
    "2nd": "second-person",
    "3rd": "third-person",
    "plural inv": "plural invariable",
    "plural not attested": "no-plural",
    "no plural forms": "no-plural",
    "not translated": "not-translated",
    "not mutable": "not-mutable",
    "used only predicatively": "not-attributive predicative",
    "only in predicative position": "not-attributive predicative",
    "only predicative": "not-attributive predicative",
    "predicate-only":
    "not-attributive predicative error-misspelling",  # eleng/Luxembourgish
    "predicative only": "not-attributive predicative",
    "predicatively": "predicative",
    "in attributive use": "attributive",
    "(attributive)": "attributive",
    "(predicative)": "predicative",
    "(uncountable)": "uncountable",
    "only in attributive use": "attributive not-predicative",
    "present tense": "present",
    "past tense": "past",
    "feminine counterpart": "feminine",
    "masculine counterpart": "masculine",
    "passive counterpart": "passive",
    "active counterpart": "active",
    "basic stem form": "stem",
    "no supine stem": "no-supine",
    "no perfect stem": "no-perfect",
    "construct state": "construct",
    "construct form": "construct",
    "phonemic reduplicative": "reduplication",
    "reduplicated": "reduplication",
    "neutrally formal": "somewhat formal",
    "objective case": "objective",
    "first person": "first-person",
    "second person": "second-person",
    "third person": "third-person",
    "nominative case": "nominative",
    "genitive case": "genitive",
    "genitive 1": "genitive",
    "genitive 2": "genitive",
    "genitive 3": "genitive",
    "dative case": "dative",
    "dative 1": "dative",
    "dative 2": "dative",
    "dative 3": "dative",
    "accusative 1": "accusative",
    "accusative 2": "accusative",
    "accusative 3": "accusative",
    "accusative case": "accusative",
    "ergative cases": "ergative",
    "absolutive case": "absolutive",
    "ablative case": "ablative",
    "genitive unattested": "no-genitive",
    "genitive -": "no-genitive",
    "nominative plural -": "no-nominative-plural",
    "colloquially also feminine": "colloquial feminine",
    "colloquial or pejorative": "colloquial pejorative",
    "colloquial or dialectal": "colloquial dialectal",
    "pejorative or racial slur": "pejorative slur",
    "pejoratively": "pejorative",
    "racial slur": "slur",
    "in some dialects": "dialectal",
    "in other dialects": "dialectal",
    "dialects": "dialectal",
    "pejorativ": "pejorative error-misspelling",
    "idionomic": "idiomatic error-misspelling",
    "idiom": "idiomatic",
    "humorously self-deprecating": "humorous",
    "rare/awkward": "rare",
    "extremely rare": "rare",
    "now quite rare": "rare",
    "rarefied": "rare",
    "rarely": "rare",
    "rarer form": "rare",
    "relatively rare": "rare",
    "personified": "person",
    "person or animal": "person animal-not-person",
    "found only in the imperfective tenses": "no-perfect",
    "imperfekt": "imperfect error-misspelling",
    "imperf. aspect": "imperfect",
    "perfective 1": "perfect",
    "perfective 2": "perfect",
    "in counterfactual conditionals": "conditional counterfactual",
    "improbable of counterfactual": "usually counterfactual",
    "third plural indicative": "third-person plural indicative",
    "defective verb": "defective",
    "+ active 3rd infinitive in elative": "with-infinitive-iii-elative",
    "+ active 3rd infinitive in illative": "with-infinitive-iii-illative",
    "+ third infinitive in illative": "with-infinitive-iii-illative",
    "+ verb in 3rd infinitive abessive": "with-infinitive-iii-abessive",
    "+ verb in third infinitive illative or adverb":
    "with-infinitive-iii with-illative with-adverb",
    "+ partitive + 3rd person singular": "with-partitive",
    "3rd possessive": "third-person possessive",
    "active voice": "active",
    "+ infinitive": "with-infinitive",
    "+ first infinitive": "with-infinitive-i",
    "transitive + first infinitive": "transitive with-infinitive-i",
    "transitive + kV": "transitive with-kV",  # gǀkxʻâã/ǃXóõ
    "+ a + infinitive": "with-a with-infinitive",
    "+ indicative mood": "with-indicative",
    "+ conditional mood": "with-conditional",
    "+nominative": "with-nominative",
    "+ nominative": "with-nominative",
    "plus genitive": "with-genitive",
    "+ genitive": "with-genitive",
    "+ genetive": "with-genitive error-misspelling",
    "+genitive": "with-genitive",
    "+ genitive case": "with-genitive",
    "genitive +": "with-genitive",
    "nominative +": "with-nominative",
    "genitive or possessive suffix +": "with-genitive with-possessive-suffix",
    "with genitive case": "with-genitive",
    "with genitive": "with-genitive",
    "+dative": "with-dative",
    "+ dative case": "with-dative",
    "dative case +": "with-dative",
    "+ dative": "with-dative",
    "+ historic dative": "with-dative historic",
    "only with adjectives": "with-adjective",
    "plus dative": "with-dative",
    "plus dative case": "with-dative",
    "with dative": "with-dative",
    "with the dative": "with-dative",
    "with dative case": "with-dative",
    "+ accusative": "with-accusative",
    "+ accusative case": "with-accusative",
    "+accusative": "with-accusative",
    "with accusative case": "with-accusative",
    "with the accusative": "with-accusative",
    "with accusative": "with-accusative",
    "plus accusative": "with-accusative",
    "takes accusative": "with-accusative",
    "takes accusative object": "with-accusative",
    "governs the accusative": "with-accusative",
    "governs the genitive": "with-genitive",
    "governs the dative": "with-dative",
    "takes dative": "with-dative",
    "takes dative case": "with-dative",
    "+ partitive": "with-partitive",
    "+ partitive + vastaan": "with-partitive",
    "+partitive": "with-partitive",
    "with partitive case": "with-partitive",
    "plus partitive": "with-partitive",
    "with partitive": "with-partitive",
    "+ablative": "with-ablative",
    "+ ablative": "with-ablative",
    "with ablative case": "with-ablative",
    "plus ablative": "with-ablative",
    "with ablative": "with-ablative",
    "+ subjunctive": "with-subjunctive",
    "+subjunctive": "with-subjunctive",
    "plus subjunctive": "with-subjunctive",
    "with subjunctive": "with-subjunctive",
    "with subjunctives": "with-subjunctive",
    "+ subordinate clause": "with-subordinate-clause",
    "+ instrumental": "with-instrumental",
    "+instrumental": "with-instrumental",
    "+ instrumental case": "with-instrumental",
    "with instrumental case": "with-instrumental",
    "with instrumental": "with-instrumental",
    "plus instrumental": "with-instrumental",
    "with instrumental or genitive case": "with-instrumental with-genitive",
    "with instrumental or dative case": "with-instrumental with-dative",
    "+ locative": "with-locative",
    "+ locative case": "with-locative",
    "with locative": "with-locative",
    "+ illative": "with-illative",
    "intransitive + illative": "intransitive with-illative",
    "intransitive + elative": "intransitive with-elative",
    "intransitive + inessive or adessive":
    "intransitive with-inessive with-adessive",
    "intransitive + inessive": "intransitive with-inessive",
    "intransitive + adessive": "intransitive with-adessive",
    "intransitive + translative": "intransitive with-translative",
    "intransitive + partitive or transitive + accusative":
    "intransitive with-partitive transitive with-accusative",
    "transitive + partitive": "transitive with-partitive",
    "transitive + partitive + essive":
    "transitive with-partitive with-essive",
    "transitive + elative + kiinni":
    "transitive with-elative",
    "transitive (+ yllään) + partitive":
    "transitive with-partitive",
    "transitive + accusative": "transitive with-accusative",
    "transitive + elative": "transitive with-elative",
    "transitive or reflexive": "transitive reflexive",
    "illative + 3rd-person singular":
    "with-illative with-third-person-singular",
    "partitive + 3rd-person singular":
    "with-partitive with-third-person-singular",
    "+ translative": "with-translative",
    "+ negative adjective in translative": "with-translative with-negative-adj",
    "with negation": "with-negation",
    "with negated verb": "with-negation",
    "when negated": "with-negation",
    "usu. in negative": "usually with-negation",
    "predicate of copula": "copulative",
    "copular verb": "copulative",
    "copula": "copulative", # náina/Phalura
    "+ adessive": "with-adessive",
    "+ adessive or illative": "with-adessive with-illative",
    "+absolutive": "with-absolutive",
    "+ absolutive": "with-absolutive",
    "with absolutive case": "with-absolutive",
    "with absolutive": "with-absolutive",
    "+ absolutive case": "with-absolutive",
    "plus absolutive": "with-absolutive",
    "take nouns in absolute case": "with-absolute",
    "takes nouns in absolute case": "with-absolute",
    "takes absolute case": "with-absolute",
    "+elative": "with-elative",
    "+ elative": "with-elative",
    "elative +": "with-elative",
    "elative case": "elative",
    "+ [elative]": "with-elative",
    "with elative case": "with-elative",
    "with elative": "with-elative",
    "plus elative": "with-elative",
    "+ essive": "with-essive",
    "+ comparative": "with-comparative",
    "+objective": "with-objective",
    "+ objective": "with-objective",
    "with objective case": "with-objective",
    "with objective": "with-objective",
    "plus objective": "with-objective",
    "sublative case": "sublative",
    "terminative case": "terminative",
    "+ present form": "with-present",
    "+ noun phrase] + subjunctive (verb)":
    "with-noun-phrase with-subjunctive",
    "with noun phrase": "with-noun-phrase",
    "+ [nounphrase] + subjunctive":
    "with-noun-phrase with-subjunctive",
    "+ number": "with-number",
    "with number": "with-number",
    "optative mood +": "with-optative",
    "p-past": "passive past",
    "ppp": "passive perfect participle",
    "not used in plural form": "no-plural",
    "indecl": "indeclinable",
    "all forms unconjugated": "indeclinable",
    "not declined": "indeclinable",
    "not declinable": "indeclinable",
    "undeclinable": "indeclinable",
    "inconjugable": "indeclinable error-misspelling",
    "indeclinable?": "indeclinable",
    "no inflections": "indeclinable",
    "not often used": "rare",
    "interrogative adverb": "interrogative adverb",
    "perfect tense": "perfect",
    "intensive": "emphatic",
    "intensifier": "emphatic",
    "changed conjunct form": "conjunct",
    "biblical hebrew pausal form": "pausal Biblical",
    "bible": "Biblical",
    "Bibilical": "Biblical",
    "emphatic form": "emphatic",
    "emphatic form of": "emphatic form-of",
    "emphatically": "emphatic",
    "emphatical": "emphatic",
    "standard form": "standard",
    "augmented form": "augmented",
    "active form": "active",
    "passive form": "passive",
    "mutated form": "mutated",
    "auxiliary verb": "auxiliary",
    "modal auxiliary verb": "auxiliary modal",
    "transitive verb": "transitive",
    "tr and intr": "transitive intransitive",
    "intransitive verb": "intransitive",
    "transitive or intransitive": "transitive intransitive",
    "male equivalent": "masculine",
    "in compounds": "in-compounds",
    "in combination": "in-compounds",
    "attribute": "attributive",
    "in the past subjunctive": "with-past with-subjunctive",
    "in conditional": "with-conditional",
    "use the subjunctive tense of the verb that follows": "with-subjunctive",
    "kyūjitai form": "kyūjitai",
    "kyūjitai kanji": "kyūjitai",
    "shinjitai form": "shinjitai",
    "shinjitai kanji": "shinjitai",
    "grade 1 “Kyōiku” kanji": "grade-1-kanji",
    "grade 2 “Kyōiku” kanji": "grade-2-kanji",
    "grade 3 “Kyōiku” kanji": "grade-3-kanji",
    "grade 4 “Kyōiku” kanji": "grade-4-kanji",
    "grade 5 “Kyōiku” kanji": "grade-5-kanji",
    "grade 6 “Kyōiku” kanji": "grade-6-kanji",
    "uncommon “Hyōgai” kanji": "uncommon Hyōgai",
    "dialectical": "dialectal",
    "dialectal or archaic": "dialectal archaic",
    "dialectal or poetic": "dialectal poetic",
    "dialect": "dialectal",
    "obsolescent": "possibly obsolete",
    "antiquated": "dated",
    "19th century": "archaic",
    "dated or regional": "dated regional",
    "dated or archaic": "archaic",
    "common and polite term": "polite",
    "most common but potentially demeaning term": "possibly derogatory",
    "highly academic": "literary",
    "highly irregular": "irregular",
    "academic": "literary",
    "learned": "literary",
    "archaic ortography": "archaic",
    "archaic elsewhere": "dialectal",
    "in the plural": "plural-only",
    "derog.": "derogatory",
    "derogative": "derogatory",
    "derogatively": "derogatory",
    "disparaging": "derogatory",
    "deprecative": "derogatory",
    "collective sense": "collective",
    "relatively rare": "rare",
    "very rare": "rare",
    "very informal": "informal",
    "less formal": "somewhat formal",
    "very archaic": "archaic",
    "outdated": "archaic",
    "historiographic": "historical",
    "with a + inf.": "with-a with-infinitive",
    "with di + inf.": "with-di with-infinitive",
    "with che + subj.": "with-che with-subjunctive",
    "with inf.": "with-infinitive",
    "with infinitive": "with-infinitive",
    "with following infinitive": "with-infinitive",
    "followed by an infinitive": "with-infinitive",
    "zu-infinitive": "infinitive infinitive-zu",
    "zu infinitive": "infinitive infinitive-zu",
    "da-infinitive": "infinitive infinitive-da",
    "Use the future tense": "with-future",
    # XXX re-enable "~ се": "with-ce",
    "strong/mixed": "strong mixed",
    "strong/weak/mixed": "strong weak mixed",
    "weak/mixed": "weak mixed",
    "weak verb": "weak-verb",
    "auxiliary sein": "aux-sein",
    "auxiliary haben": "aux-haben",
    "no auxiliary": "no-auxiliary",
    "nominative/accusative": "nominative accusative",
    "masculine/feminine": "masculine feminine",
    "masculine/neuter": "masculine neuter",
    "present/future": "present future",
    "future/present": "present future",
    "present/aoriest": "present aorist error-misspelling",
    "superlative degree": "superlative",
    "sup.": "superlative",
    "comparative degree": "comparative",
    "comp.": "comparative",
    "positive degree": "positive",
    "pos.": "positive",
    "positive outcome": "positive",
    "negative outcome": "negative",
    "equative degree": "equative",
    "indicative and subjunctive": "indicative subjunctive",
    "indicative/subjunctive": "indicative subjunctive",
    "second/third-person": "second-person third-person",
    "singular/plural": "singular plural",
    "in the singular": "singular",
    "in the plural": "plural",
    "in singular": "singular",
    "in plural": "plural",
    "dual/plural": "dual plural",
    "collective or in the plural": "collective in-plural",
    "in the plural": "in-plural",
    "(with savrtsobi)": "with-savrtsobi",
    "plural and definite singular": ["plural", "definite singular"],
    "feminine singular & neuter plural": ["feminine singular", "neuter plural"],
    "partitive/illative": "partitive illative",
    "oblique/nominative": "oblique nominative",
    "nominative/vocative/dative/strong genitive":
    ["nominative vocative dative", "strong genitive"],
    "non-attributive": "not-attributive predicative",
    "not predicative": "not-predicative attributive",
    "attributive use": "attributive",
    "nominative/vocative/instrumental":
    "nominative vocative instrumental",
    "nominative/vocative/strong genitive/dative":
    ["nominative vocative dative", "strong genitive"],
    "nominative/vocative/dative": "nominative vocative dative",
    "accusative/genitive/partitive/illative":
    "accusative genitive partitive illative",
    "nominative/vocative/accusative/genitive":
    "nominative vocative accusative genitive",
    "accusative/genitive/locative": "accusative locative genitive",
    "accusative/genitive/dative/instrumental":
    "accusative genitive dative instrumental",
    "accusative/genitive/dative": "accusative genitive dative",
    "accusative/genitive": "accusative genitive",
    "masculine/feminine/neuter": "masculine feminine neuter",
    "feminine/neuter/masculine": "masculine feminine neuter",
    "feminine/neuter": "feminine neuter",
    "present participle and present tense": ["present participle", "present"],
    "present participle and gerund": ["present participle", "gerund"],
    "past indicative and past participle": "past indicative participle",
    "all-gender": "",
    "gender unknown": "",
    "all-case": "",
    "accusative/dative": "accusative dative",
    "accusative-singular": "accusative singular",
    "accusative-genitive": "accusative genitive",
    "dative/locative/instrumental": "dative locative instrumental",
    "dative/vocative/locative": "dative vocative locative",
    "dative/prepositional": "dative prepositional",
    "dative and ablative": "dative ablative",
    "nominative/vocative/dative and strong genitive":
    ["nominative vocative dative", "strong genitive"],
    "nominative/vocative/accusative":
    "nominative vocative accusative",
    "nominative/vocative": "nominative vocative",
    "nominative/oblique": "nominative oblique",
    "nominative/locative": "nominative locative",
    "nominative/instrumental": "nominative instrumental",
    "nominative/genitive/dative/accusative":
    "nominative genitive dative accusative",
    "nominative/genitive/dative": "nominative genitive dative",
    "nominative/genitive/accusative/vocative":
    "nominative genitive accusative vocative",
    "nominative/genitive/accusative":
    "nominative genitive accusative",
    "nominative/dative": "nominative dative",
    "nominative/accusative/vocative/instrumental":
    "nominative accusative vocative instrumental",
    "nominative/accusative/vocative": "nominative accusative vocative",
    "nominative/accusative/nominative/accusative":
    "nominative accusative",
    "nominative/accusative/nominative": "nominative accusative",
    "nominative/accusative/locative": "nominative accusative locative",
    "nominative/accusative/genitive/dative":
    "nominative accusative genitive dative",
    "nominative/accusative/genitive": "nominative accusative genitive",
    "nominative/accusative/genitive": "nominative accusative genitive",
    "nominative/accusative/dative": "nominative accusative dative",
    "nominative/accusative": "nominative accusative",
    "perfective/imperfective": "perfective imperfective",
    "neg. perfective": "perfective negative",
    "neg. continuous": "continuative negative",
    "negative form": "negative",
    "negating particle": "negative particle",
    "negation": "negative",
    "continuous": "continuative",
    "continuously": "continuative",
    "animate/inanimate": "animate inanimate",
    "animate or inanimate": "animate inanimate",
    "locative/vocative": "locative vocative",
    "prospective/agentive": "prospective agentive",
    "genitive/accusative": "genitive accusative",
    "singular/duoplural": "singular dual plural",
    "duoplural": "dual plural",
    "1st/3rd": "first-person third-person",
    "first/second/third-person":
    "first-person second-person third-person",
    "first/third/third-person": "first-person third-person",
    "first-/third-person": "first-person third-person",
    "first/second/second-person": "first-person second-person",
    "first/third-person": "first-person third-person",
    "first-person/second-person": "first-person second-person",
    "first-person/third-person": "first-person third-person",
    "first-person singular/third-person singular":
    "first-person third-person singular",
    "first-person singular/third-person plural":
    ["first-person singular", "third-person plural"],
    "affirmative/negative": "affirmative negative",
    "first-, second-, third-person singular subjunctive present":
    "first-person second-person third-person singular subjunctive present",
    "first-, second- and third-person singular present indicative":
    "first-person second-person third-person singular present indicative",
    "first- and third-person": "first-person third-person",
    "female equivalent": "feminine",
    "male equivalent": "masculine",
    "direct/oblique/vocative": "direct oblique vocative",
    "definite/plural": "definite plural",
    "singular definite and plural": ["singular definite", "plural"],
    "agent noun": "agent",
    "agent noun of": "agent form-of",
    "Principle verb suffix": "agent suffix nominal-from-verb nominalization",
    "third active infinitive": "infinitive-iii active",
    "third passive infinitive": "infinitive-iii passive",
    "British spelling": "UK",
    "Roman spelling": "Roman",
    "Perso-Arabic spelling": "Perso-Arabic",
    "Arabic/Persian": "Arabic Persian",
    "Urdu spelling": "Urdu",
    "Urdu spelling of": "Urdu alt-of",
    "eye dialect": "pronunciation-spelling",
    "feminist or eye dialect": "pronunciation-spelling",
    "enclitic and proclitic": "enclitic proclitic",
    "Enclitic contractions": "enclitic contraction",
    "Proclitic contractions": "proclitic contraction",
    "enclitic form": "enclitic",
    "Devanagari script form of": "alt-of Devanagari",
    "Hebrew script": "Hebrew",
    "Mongolian script": "Mongolian",
    "Bengali script": "Bengali",
    "script": "character",
    "letters": "letter",
    "digits": "digit",
    "characters": "character",
    "symbols": "symbol",
    "tetragrams": "symbol",
    "letter names": "letter-name",
    "Cyrillic-script": "Cyrillic",
    "Latin-script": "Latin",
    "obsolete form of": "alt-of obsolete",
    "former word": "obsolete",
    "obs.": "obsolete",
    "etymological spelling": "nonstandard",
    "(Dialectological)": "dialectal",
    "(hence past tense)": "past",
    "(ablative case)": "ablative",
    "(genitive case)": "genitive",
    "(suffix conjugation)": "suffix",
    "(suffix conjugation)": "prefix",
    "(nós)": "with-nos",
    "(eu)": "with-eu",
    "(vós)": "with-vós",
    "(vos)": "with-vos",
    "(voseo)": "with-voseo",
    "(tu)": "with-tu",
    "(tú)": "with-tú",
    "(eles)": "with-eles",
    "(elas)": "with-elas",
    "(vocês)": "with-vocês",
    "(usted)": "with-usted",
    "(ustedes)": "with-ustedes",
    "(yo)": "with-yo",
    "(ele, ela, also used with tu and você?)":
    "with-ele with-ela with-tu with-você",
    "(eles and elas, also used with vocês and others)":
    "with-eles with-elas with-vocês with-others",
    "(você)": "with-você",
    "(hiri)": "with-hiri",
    "(hura)": "with-hura",
    "(zuek)": "with-zuek",
    "(vós, sometimes used with vocês)": "with-vós with-vocês",
    "(gij)": "with-gij",
    "(tu, sometimes used with você)": "with-tu with-você",
    "(\u00e9l, ella, also used with usted)":
    "with-él with-ella with-usted",
    "(ellos, ellas, also used with ustedes)":
    "with-ellos with-ellas with-ustedes",
    "(nosotros, nosotras)": "with-nosotros with-nosotras",
    "(vosotros, vosotras)": "with-vosotros with-vosotras",
    "(vosotros or vosotras)": "with-vosotros with-vosotras",
    "(ele and ela, also used with você and others)":
    "with-ele with-ela with-você with-others",
    "(ele, ela, also used with tu and você)":
    "with-ele with-ela with-tu with-você",
    "former reform[s] only": "",
    "no conj.": "",  # XXX conjunctive/conjugation/indeclinable? dot/Latvian
    "no construct forms": "no-construct-forms",
    "no nominative plural": "no-nominative-plural",
    "no supine": "no-supine",
    "no perfect": "no-perfect",
    "no perfective": "no-perfect",
    "no genitive": "no-genitive",
    "no superlative": "no-superlative",
    "no sup.": "no-superlative",
    "no comparative": "no-comparative",
    "no comp.": "no-comparative",
    "no singulative": "no-singulative",
    "no plural": "no-plural",
    "no singular": "plural-only",
    "not comparable": "not-comparable",
    "incomparable": "not-comparable",
    "not generally comparable": "usually not-comparable",
    "plurale tantum": "plural-only",
    "plurare tantum": "plural-only",
    "pluralia tantum": "plural-only",
    "singulare tantum": "singular-only",
    "normally plural": "plural-normally",
    "used mostly in plural form": "plural-normally",
    "used mostly in the plural form": "plural-normally",
    "most often in the plural": "plural-normally",
    "used especially in the plural form": "plural-normally",
    "suffixed pronoun": "suffix pronoun",
    "possessive suffix": "possessive suffix",
    "possessive determiner": "possessive determiner",
    "pronominal state": "pronominal-state",
    "nominal state": "nominal-state",
    "form i": "form-i",
    "form ii": "form-ii",
    "form iii": "form-iii",
    "form iv": "form-iv",
    "form v": "form-v",
    "form vi": "form-vi",
    "form vii": "form-vii",
    "form viii": "form-viii",
    "form ix": "form-ix",
    "form x": "form-x",
    "form xi": "form-xi",
    "form xii": "form-xii",
    "form xiii": "form-xiii",
    "form iq": "form-iq",
    "form iiq": "form-iiq",
    "form iiiq": "form-iiiq",
    "form ivq": "form-ivq",
    "form I": "form-i",
    "form-I": "form-i",
    "form II": "form-ii",
    "form-II": "form-ii",
    "form III": "form-iii",
    "form-III": "form-iii",
    "form IV": "form-iv",
    "form-IV": "form-iv",
    "form V": "form-v",
    "form-V": "form-v",
    "form VI": "form-vi",
    "form-VI": "form-vi",
    "form VII": "form-vii",
    "form-VII": "form-vii",
    "form VIII": "form-viii",
    "form-VIII": "form-viii",
    "form IX": "form-ix",
    "form-IX": "form-ix",
    "form X": "form-x",
    "form-X": "form-x",
    "form XI": "form-xi",
    "form-XI": "form-xi",
    "form XII": "form-xii",
    "form-XII": "form-xii",
    "form XIII": "form-xiii",
    "form-XIII": "form-xiii",
    "form Iq": "form-iq",
    "form IIq": "form-iiq",
    "form IIIq": "form-iiiq",
    "form IVq": "form-ivq",
    "class 1": "class-1",
    "class 1a": "class-1a",
    "class 2": "class-2",
    "class 2a": "class-2a",
    "class 3": "class-3",
    "class 4": "class-4",
    "class 5": "class-5",
    "class 6": "class-6",
    "class 7": "class-7",
    "class 8": "class-8",
    "class 9": "class-9",
    "class 9a": "class-9a",
    "class 10": "class-10",
    "class 10a": "class-10",
    "class 11": "class-11",
    "class 12": "class-12",
    "class 13": "class-13",
    "class 14": "class-14",
    "class 15": "class-15",
    "class 16": "class-16",
    "class 17": "class-17",
    "class 18": "class-18",
    "m-wa class": "class-1 class-2",
    "m-mi class": "class-3 class-4",
    "ma class": "class-5 class-6",
    "ki-vi class": "class-7 class-8",
    "n class": "class-9 class-10",
    "u class": "class-11 class-12 class-14",
    "ku class": "class-15",
    "pa class": "class-16",
    # "ku class": "class-17",  # XXX how to distinguish from class-15?
    "mu class": "class-18",
    "first declension": "first-declension",
    "second declension": "second-declension",
    "third declension": "third-declension",
    "fourth declension": "fourth-declension",
    "fifth declension": "fifth-declension",
    "first conjugation": "first-conjugation",
    "second conjugation": "second-conjugation",
    "third conjugation": "third-conjugation",
    "fourth conjugation": "fourth-conjugation",
    "fifth conjugation": "fifth-conjugation",
    "sixth conjugation": "sixth-conjugation",
    "seventh conjugation": "seventh-conjugation",
    "stress pattern 1": "stress-pattern-1",
    "stress pattern 2": "stress-pattern-2",
    "stress pattern 3": "stress-pattern-3",
    "stress pattern 3a": "stress-pattern-3a",
    "stress pattern 3b": "stress-pattern-3b",
    "stress pattern 4": "stress-pattern-4",
    "preposition stressed": "stressed-preposition",
    "tone I": "tone-1",
    "tone II": "tone-2",
    "type p": "type-p",
    "type P": "type-p",
    "type u": "type-u",
    "type U": "type-u",
    "type up": "type-up",
    "type UP": "type-up",
    "type a": "type-a",
    "type A": "type-a",
    "type ua": "type-ua",
    "type UA": "type-ua",
    "form of": "form-of",
    "ordinal form of": "ordinal form-of",
    "ordinal form of the number": "ordinal form-of",
    "ordinal form of": "ordinal form-of",
    "ordinal of": "ordinal form-of",
    "ordinal number corresponding to the cardinal number":
    "ordinal form-of",
    "ordinal form of the cardinal number": "ordinal form-of",
    "the ordinal number": "ordinal alt-of",
    "used in the form": "used-in-the-form",
    "upper case": "uppercase",
    "upper-case": "uppercase",
    "lower case": "lowercase",
    "lower-case": "lowercase",
    "mixed case": "mixedcase",
    "mixed-case": "mixedcase",
    "capital": "uppercase",
    "verb form i": "verb-form-i",
    "verb form ii": "verb-form-ii",
    "pi'el construction": "construction-pi'el",
    "pa'el construction": "construction-pa'el",
    "pa'al construction": "construction-pa'al",
    "hif'il construction": "construction-hif'il",
    "hitpa'el construction": "construction-hitpa'el",
    "hitpu'al construction": "construction-hitpu'al",
    "pu'al construction": "construction-pu'al",
    "nif'al construction": "construction-nif'al",
    "huf'al construction": "construction-huf'al",
    "peal construction": "construction-peal",
    "verbal noun": "nominal-from-verb nominalization",
    "Verbal derivations": "verb",
    "abstract noun": "abstract-noun",
    "concrete verb": "concrete",
    "concrete verbs": "concrete",
    "genitive singular as substantive": "genitive singular substantive",
    "female names": "feminine proper-noun",
    "proper name": "proper-noun",
    "proper noun": "proper-noun",
    "proper nouns": "proper-noun",
    "usually in the": "usually",
    "usually in the negative": "usually with-negation",
    "non-scientific usage": "non-scientific",
    "krama inggil": "honorific",
    "krama andhap": "humble",
    "krama-ngoko": "informal",
    "ngoko": "informal",
    "McCune–Reischauer": "McCune-Reischauer",  # Dash type differs
    "gender indeterminate": "gender-neutral",
    "singular only": "singular singular-only",
    "not used in plural": "singular-only singular",
    "singularonly": "singular-only",
    "plural only": "plural plural-only",
    "imperative only": "imperative-only",
    "in general sense": "broadly",
    "by extension": "broadly",
    "by metonymy": "metonymically",
    "by synecdoche": "synecdoche",
    "by semantic narrowing": "narrowly",
    "by semantic widening": "broadly",
    "strict sense": "strict-sense",
    "baby talk": "baby-talk",
    "middle infinitive": "middle-infinitive",
    "first infinitive": "infinitive-i",
    "third-person form of the long first infinitive of":
    "third-person infinitive-i-long form-of",
    "second infinitive": "infinitive-ii",
    "second active infinitive": "infinitive-ii active",
    "second passive infinitive": "infinitive-ii passive",
    "third infinitive": "infinitive-iii",
    "third active infinitive": "infinitive-iii active",
    "third passive infinitive": "infinitive-iii passive",
    "fourth infinitive": "infinitive-iv",
    "fifth infinitive": "infinitive-v",
    "subjunctive I": "subjunctive-i",
    "subjunctive II": "subjunctive-ii",
    "morse code": "morse-code",
    "with odd-syllable stems": "with-odd-syllable-stems",
    "old orthography": "archaic",
    "Brazilian ortography": "Brazilian",
    "European ortography": "European",
    "with noun phrase": "with-noun-phrase",
    "contracted dem-form": "contracted-dem-form",
    "contractions": "contraction",
    "Yale cen": "Yale",
    "subjective pronoun": "subjective pronoun",
    "subject": "subjective",
    "subject form": "subjective",
    "‘subject form’": "subjective",  # tw.t/Egyptian
    # "object": "objective",  # XXX problems with "An object of ... form_of
    "possessive pronoun": "possessive pronoun without-noun",
    "demostrative": "demonstrative",  # eeteeṇú/Phalura
    "revised jeon": "revised-jeon",
    "form used before": "archaic",
    "front vowel harmony variant": "front-vowel",
    "romanization of": "alt-of romanization",
    "romanisation of": "alt-of romanization",
    "archaic spelling of": "alt-of archaic",
    "obsolete typography of": "alt-of obsolete",
    "obsolete spelling of": "alt-of obsolete",
    "rare spelling of": "alt-of rare",
    "superseded spelling of": "alt-of archaic",
    "pronunciation spelling of": "alt-of pronunciation-spelling",
    "pronunciation spelling": "pronunciation-spelling",
    "eye dialect spelling of": "alt-of pronunciation-spelling",
    "alternative or obsolete spelling of":
    "alt-of obsolete alternative",
    "obsolete and rare": "obsolete rare",
    "American spelling": "US",
    "Canadian spelling": "Canada",
    "name of the": "alt-of name",  # E.g. .. letter | Latin-script letter
    "alternative name of": "alt-of alternative name",
    "alternative name for": "alt-of alternative name",
    "nonstandard spelling of": "alt-of nonstandard",
    "US standard spelling of": "alt-of US standard",
    "US spelling of": "alt-of US",
    "alternative typography of": "alt-of alternative",
    "polytonic spelling of": "alt-of polytonic",
    "variant of": "alt-of alternative",
    "uncommon spelling of": "alt-of uncommon",
    "alternative typographic spelling of": "alt-of alternative",
    "especially in typeface names": "typography",
    "alternative spelling": "alternative",
    "alternative spelling of": "alt-of alternative",
    "alternative form": "alternative",
    "alternative form of": "alt-of alternative",
    "alternative term for": "alt-of alternative",
    "alternative stem of": "alt-of stem alternative",
    "alternative letter-case form of": "alt-of",
    "medieval spelling of": "alt-of obsolete",
    "post-1930s Cyrillic spelling of": "alt-of standard Cyrillic",
    "pre-1918 spelling of": "alt-of dated",
    "pre-1945 period": "dated",
    "Plural pre-1990": "dated plural",
    "Plural pre-1990 reformed spelling": "plural",
    "unreformed spelling": "nonstandard",
    "Switzerland and Liechtenstein standard spelling of":
    "alt-of Switzerland Liechtenstein standard",
    "form removed with the spelling reform of 2012; superseded by":
    "alt-of dated",
    "excessive spelling of": "alt-of excessive",
    "exaggerated degree of": "alt-of exaggerated",
    "defective spelling of": "alt-of misspelling",
    "verbal noun of": "nominal-from-verb nominalization form-of",
    "alternative verbal noun of":
    "form-of alternative nominal-from-verb nominalization",
    "alternative conjugation of": "alt-of alternative",
    "abbreviation of": "alt-of abbreviation",
    "short for": "alt-of abbreviation",
    "short form": "short-form",
    "eclipsed form of": "alt-of abbreviation eclipsis",
    "apocopic form of": "alt-of abbreviation apocope",
    "apocopic form": "apocope abbreviation",
    "apocopated": "apocope abbreviation",
    "apocopate": "apocope abbreviation",
    "h-prothesized form of": "alt-of prothesis",
    "acronym of": "alt-of abbreviation",
    "acronym": "abbreviation",
    "initialism of": "alt-of abbreviation initialism",
    "contraction of": "alt-of abbreviation contraction",
    "IUPAC 3-letter abbreviation for": "alt-of abbreviation",
    "IUPAC 3-letter abbreviation of": "alt-of abbreviation",
    "IUPAC 2-letter abbreviation of": "alt-of abbreviation",
    "IUPAC 2-letter abbreviation for": "alt-of abbreviation",
    "IUPAC 1-letter abbreviation of": "alt-of abbreviation",
    "IUPAC 1-letter abbreviation for": "alt-of abbreviation",
    "symbol for": "alt-of symbol",
    "praenominal abbreviation of": "alt-of abbreviation praenominal",
    "ellipsis of": "alt-of ellipsis abbreviation",
    "clipping of": "alt-of clipping abbreviation",
    "X-system spelling of": "alt-of X-system",
    "H-system spelling of": "alt-of H-system",
    "Pinyin transcription of": "alt-of Pinyin",
    "Rōmaji transcription of": "alt-of Rōmaji",
    "romaji": "Rōmaji",
    "rōmaji": "Rōmaji",
    "visual rendering of Morse code for":
    "alt-of visual-rendering morse-code",
    "soft mutation of": "form-of soft-mutation",
    "causes soft mutation": "triggers-soft-mutation",
    "non-Oxford British English standard spelling of":
    "alt-of nonstandard UK",
    "Nil standard spelling of": "alt-of UK standard",
    "nasal mutation of": "form-of nasal-mutation",
    "nasal mutation": "nasal-mutation",
    "triggers nasalization": "triggers-nasal-mutation",
    "triggers nasal mutation": "triggers-nasal-mutation",
    "mixed mutation of": "form-of mixed-mutation",
    "mixed mutation": "mixed-mutation",
    "aspirate mutation of": "form-of aspirate-mutation",
    "aspirate mutation": "aspirate-mutation",
    "British misspelling": "misspelling British",
    "misspelling of": "alt-of misspelling",
    "deliberate misspelling of": "alt-of misspelling deliberate",
    "common misspelling of": "alt-of misspelling",
    "misconstruction of": "alt-of misconstruction",
    "misconstructed": "misconstruction",
    "ungrammatical": "misconstruction",
    "Latin spelling of": "alt-of Latin",
    "Latn": "Latin",
    "Late Anglo-Norman spelling of": "alt-of Anglo-Norman",
    "Jawi spelling of": "alt-of Jawi",
    "Hanja form of": "alt-of hanja",
    "Hanja form? of": "alt-of hanja",
    "Hanja": "hanja",
    "Hán tự form of": "alt-of Hán-tự",
    "Newa Spelling": "Newa",
    "Glagolitic spelling of": "alt-of Glagolitic",
    "front vowel variant of": "alt-of front-vowel",
    "front-vowel variant of": "alt-of front-vowel",
    "euphemistic spelling of": "alt-of euphemistic",
    "euphemistic reading of": "alt-of euphemistic",
    "euphemism": "euphemistic",
    "transliterated Russian pet forms": "transliteration Russian",
    "Transliteration": "transliteration",
    "Cyrillic spelling of": "alt-of Cyrillic",
    "Cyrillic spelling": "Cyrillic",
    "British standard spellingh of": "alt-of UK standard",
    "British and Canada standard spelling of":
    "alt-of UK Canada standard",
    "Britain and Ireland standard spelling of":
    "alt-of Britain Ireland standard",
    "Britain and New Zealand standard spelling of":
    "alt-of Britain New-Zealand standard",
    "Britain and Canada spelling of": "alt-of Britain Canada",
    "Baybayin spelling of": "alt-of Baybayin",
    "Arabic spelling of": "alt-of Arabic",
    "Arabic (Eastern)": "Arabic-Indic",
    "Eastern Arabic": "Arabic-Indic",
    "Arabic (Western)": "Arabic",
    "Formerly standard spelling of": "alt-of archaic",
    "informal spelling of": "alt-of informal",
    "Yañalif spelling of": "alt-of Yañalif",
    "traditional orthography spelling of": "alt-of traditional",
    "traditional and simplified": "traditional simplified",
    "Taraškievica spelling of": "alt-of Taraškievica",
    "Post-1930s Cyrillic spelling of": "alt-of Cyrillic",
    "Britain spelling of": "alt-of Britain",
    "linguistically informed spelling of": "alt-of literary",
    "Chinese spelling of": "alt-of China",
    "Mongolian spelling of": "alt-of Mongolian",
    "Leet spelling of": "alt-of Leet Internet",
    "leetspeak": "Leet Internet",
    "bulletin board system slang": "slang Internet",
    "combining form of": "in-compounds form-of",
    "combining form": "in-compounds",
    "compound of": "compound-of",
    "compound of gerund of": "compound-of",
    "compound of imperative (noi form) of": "compound-of",
    "compound of imperative (tu form) of": "compound-of",
    "compound of imperative (vo form) of": "compound-of",
    "compound of imperative (voi form) of": "compound-of",
    "compound of imperative of": "compound-of",
    "compound of indicative present of": "compound-of",
    "compound of masculine plural past participle of": "compound-of",
    "compound of past participle of": "compound-of",
    "compound of present indicative of": "compound-of",
    "compound of plural past participle of": "compound-of",
    "compound of second-person singular imperative of": "compound-of",
    "compound of the gerund of": "compound-of",
    "compound of the imperfect": "compound-of",
    "compound of the infinitive": "compound-of",
    "synonym of": "synonym synonym-of",
    "same as": "synonym synonym-of",
    "topicalized form of": "topicalized form-of",
    "form of": "form-of",
    "inflected form of": "form-of",
    "lenited form of": "lenition form-of",
    "triggers lenition": "triggers-lenition",
    "triggers lenition of a following consonant-initial noun":
    "triggers-lenition",
    "triggers eclipsis": "triggers-eclipsis",
    "triggers h-prothesis": "triggers-h-prothesis",
    "causes aspirate mutation": "triggers-aspirate-mutation",
    "triggers aspiration": "triggers-aspirate-mutation",
    "triggers mixed mutation": "triggers-mixed-mutation",
    # XXX Could be more accurate
    "triggers mixed mutation except of forms of bod": "triggers-mixed-mutation",
    "humurous": "humorous error-misspelling",
    "humourous": "humorous",
    "sarcasm": "sarcastic",
    "ecclesiastic or ironic": "Ecclesiastical ironic",
    "figuratively or literally": "figuratively literally",
    "figuratively and literary": "figuratively literary",
    "figuative": "figuratively",
    "humorously": "humorous",
    "jocular": "humorous",
    "humorous or euphemistic": "humorous euphemistic",
    "may sound impolite": "possibly impolite",
    "northern dialects": "dialectal",
    "dialectism": "dialectal",
    "archaic or loosely": "archaic broadly",
    "archaic or poetic": "archaic poetic",
    "archeic or poetic": "archaic poetic",
    "archaic or phrasal": "archaic idiomatic",
    "archaic or dialectal": "archaic dialectal",
    "archaic or literary": "archaic literary",
    "archaic or Britain": "archaic Britain",
    "archaic or nonstandard": "archaic nonstandard",
    "most dialects": "dialectal",
    "most dialects of Ripuarian": "dialectal",
    "some dialects": "dialectal",
    "some compounds": "idiomatic in-compounds",
    "as a modifier in compound words": "in-compounds",
    "used in compound adjectives": "in-compounds adjective",
    "used attributively": "attributive",
    "used predicatively": "predicative",
    "used substatively": "substantive",
    "unofficial spelling": "nonstandard",
    "rare nonstandard spellings": "rare nonstandard",
    "as rare alternative form": "rare",
    "nonstandard spellings": "nonstandard",
    "capitalised": "capitalized",
    "always capitalized": "capitalized",
    "sometimes not capitalized": "usually capitalized",
    "sometimes capitalized": "sometimes capitalized",
    "Sometimes capitalized": "sometimes capitalized",
    "rhetorical question": "rhetoric",
    "old-fashioned": "dated",
    "rarely used": "rare",
    "rarely": "rare",
    "present tense seldom used": "present-rare",
    "often in place of present tense": "present often",
    "conjugated non-suppletively in the present tense": "irregular",
    "now rare": "archaic",
    "in the past tense": "past",
    "fixed expressions": "idiomatic",
    "formulaic": "idiomatic",
    "several set phrases": "idiomatic",
    "now colloquial": "colloquial",
    "now colloquial and nonstandard": "colloquial nonstandard",
    "colloquial or Min Nan": "colloquial Min-Nan",
    "colloquial or jargon": "colloquial jargon",
    "Wiktionary and WMF jargon": "jargon Internet",
    "colloquially": "colloquial",
    "fossil word": "archaic",
    "brusque": "impolite",
    "verbs": "verb",
    "prepositions": "prepositional",
    "postpositions": "postpositional",
    "interjections": "interjection",
    "Abbreviations": "abbreviation",
    "abbreviations": "abbreviation",
    "variants": "variant",
    "Ordinal": "ordinal",
    "ordinals": "ordinal",
    "local use": "regional",
    "more generally": "broadly",
    "loosely": "broadly",
    "broad sense": "broadly",
    "hypocoristic": "familiar",
    "familiar or childish": "familiar childish",
    "to a male": "addressee-masculine",
    "to a man": "addressee-masculine",
    "to a female": "addressee-masculine",
    "to a woman": "addressee-feminine",
    "hyperbolic": "excessive",
    "18th century": "obsolete",
    "9th century": "obsolete",
    "17th century": "obsolete",
    "10th century": "obsolete",
    "16th century": "obsolete",
    "14th century": "obsolete",
    "12th century": "obsolete",
    "post-classical": "obsolete",
    "early 20th century": "archaic",
    "20th century": "dated",
    "mid-20th century": "dated",
    "mid-19th century": "obsolete",
    "before 20th century": "obsolete",
    "19th to 20th century": "archaic",
    "15th century": "obsolete",
    "11th century": "obsolete",
    "until early 20th century": "obsolete",
    "since the 16th century": "dated",
    "late 16th century": "obsolete",
    "late 14th century": "obsolete",
    "in usage until 20th century": "obsolete",
    "in the 17th century": "obsolete",
    "in the 16 th century": "obsolete",
    "in Scots until the seventeenth century": "obsolete",
    "in 10th century": "obsolete",
    "early 17th century": "obsolete",
    "chiefly 18th century": "obsolete",
    "chiefly 12th century": "obsolete",
    "before 16th century": "obsolete",
    "attested in the 16th century": "obsolete",
    "5th century": "obsolete",
    "19th to early 20th century": "obsolete",
    "19th-mid 20th century": "obsolete",
    "19 the century": "obsolete",
    "19th-early 20th century": "obsolete",
    "19th century": "obsolete",
    "1776-19th century": "obsolete",
    "15th-16th century": "obsolete",
    "Medieval and Early Modern Greek regional":
    "Medieval-Greek Early-Modern-Greek dialectal",
    "collectively": "collective",
    "collective or singulative": "collective singulative",
    "used formally in Spain": "Spain",
    "nouns": "noun",
    "phrases": "phrase",
    "with the particle lai": "with-lai",
    "adjectives": "adjective",
    "related adjective": "adjective",
    "adj": "adjective",
    "adj.": "adjective",
    "adv": "adverb",
    "adverbs": "adverb",
    "augmentatives": "augmentative",
    "pejoratives": "pejorative",
    "perjorative": "pejorative error-misspelling",
    "pejorative or colloquial": "pejorative colloquial",
    "non-standard since 2012": "nonstandard",
    "colloquialism": "colloquial",
    "non-standard since 1917": "nonstandard",
    "conditional mood": "conditional",
    "figurative": "figuratively",
    "compound words": "compound",
    "form of address": "term-of-address",
    "term of address": "term-of-address",
    "as a term of address": "term-of-address",
    "direct address": "term-of-address",
    "face-to-face address term": "term-of-address",
    "address": "term-of-address",
    "endearingly": "endearing",
    "elliptically": "ellipsis",
    "elegant": "formal",  # Elegant or Formal Thai
    "nonce word": "nonce-word",
    "neologism or slang": "neologism slang",
    "attributively": "attributive",
    "poetic term": "poetic",
    "poetic meter": "poetic",
    "in certain phrases": "in-certain-phrases",
    "deprecated template usage": "",
    "deprecated": "proscribed",
    "diacritical mark": "diacritic",
    "inflection of": "form-of",
    "mainland China": "Mainland-China",
    "spelling in China": "China",
    "rhyming slang": "slang",
    "prison slang": "slang",
    "criminal slang": "slang",
    "fandom slang": "slang lifestyle",
    "furry fandom": "slang lifestyle",
    "manga fandom slang": "slang manga",
    "real estate slang": "slang real-estate",
    "gay slang": "slang LGBT",
    "urban slang": "slang urbanism",
    "lolspeak": "humorous Internet",
    "Usenet": "Internet",
    "one-termination adjective": "one-termination",
    "two-termination adjective": "two-termination",
    "three-termination adjective": "three-termination",
    "semelefactive": "semelfactive error-misspelling",
    "invariant": "invariable",
    "followed by to": "with-to",
    "taking a to-infinitive": "with-to with-infinitive",
    "with bare infinitive": "with-infinitive",
    "direct object": "direct-object",
    "indirect object": "indirect-object",
    "transitive with of": "transitive-with-of",
    "with of": "with-of",
    "with on": "with-on",
    "with down": "with-down",
    "with up": "with-up",
    "with a personal pronoun": "with-personal-pronoun",
    "with an indirect object": "with-indirect-object",
    "with comparatives": "with-comparative",
    "with definite article": "with-definite-article",
    "etc.": "usually",
    "regardless of gender": "gender-neutral",
    "gender-neutral (or multigendered)": "gender-neutral",
    "ditransitive for the second object": "ditransitive",
    "double transitive": "ditransitive",
    "transitive or ditransitive": "transitive ditransitive",
    "number": "numeral",
    "numerals": "numeral",
    "Tally marks": "Tally-marks numeral",
    "+ 3rd-pers.": "with-third-person",
    "Historical": "historical",
    "hist.": "historical",
    "antiquity": "historical",
    "ideophone": "ideophonic",
    "Alsatian (Low Alemannic German)": "Alsatian Alemannic",
    "all sects": "",
    "adessive + 3rd person singular + ~":
    "with-adessive with-third-person-singular postpositional",
    "inessive + 3rd person singular + ~":
    "with-inessive with-third-person-singular postpositional",
    "~ (olemassa)": "with-olemassa",
    "3rd person singular": "third-person singular",
    "+ genitive + 3rd person singular + passive present participle":
    "with-genitive with-third-person-singular with-passive-present-participle",
    "genitive + 3rd-pers. singular + 1st infinitive":
    "with-genitive with-third-person-singular with-infinitive-i",
    "+ direct object in accusative + 3rd infinitive in illative":
    "transitive with-accusative with-infinitive-iii-illative",
    "+ direct object in accusative + past participle in translative or partitive":
    "transitive with-accusative with-past-participle-translative with-past-participle-partitive",
    "+ past participle in translative or partitive":
    "with-past-participle-translative with-past-participle-partitive",
    "active past part. taitanut": "",
    "+ passive past participle in translative":
    "with-passive-past-participle-translative",
    "+ passive past participle in partitive":
    "with-passive-past-participle-partitive",
    "+ active past participle in translative":
    "with-past-participle-translative",
    "+ adjective in ablative or allative":
    "with-adjective with-ablative with-allative",
    "in indicative or conditional mood": "in-indicative in-conditional",
    "in negative sentences": "with-negation",
    "in negative clauses": "with-negation",
    "using Raguileo Alphabet": "Raguileo-Alphabet",
    "using Raguileo alphabet": "Raguileo-Alphabet",
    "using Raguileo and Unified Alphabet": "Raguileo-Alphabet Unified",
    "transliterated": "transliteration",
    "though not derogative": "",
    "women generally don't accept to be called this way": "offensive",
    "transitive sense": "transitive",
    "in intransitive meaning": "intransitive",
    "initial change reduplication": "reduplication",
    "initial change reduplication with syncope": "reduplication syncope",
    "initial change with syncope": "syncope",
    "syncopated": "syncope",
    "reduplication with syncope": "reduplication syncope",
    "introducing subjunctive hortative": "subjunctive hortative",
    "nominative and vocative plural animate": "nominative vocative",
    "with diaeresis to indicate disyllabilicity": "",
    "aphaeretic variant": "variant",
    "mediopassive voice": "mediopassive",
    "ALL": "",
    "archaic or hypercorrect": "archaic hypercorrect",
    "as a diacritic": "diacritic",
    "as a gerund": "gerund",
    "as a calque": "calque",
    "pseudoarchaic": "dated",
    "surnames": "surname",
    "all countable senses": "countable",
    "attributive form of pyjamas": "attributive",
    "ordinal form": "ordinal",
    "ordinal form of twelve": "ordinal",
    "conjugative of": "conjugative-of",
    "correlative of": "correlative-of",
    "modern nonstandard spellings": "modern nonstandard",
    "non-standard": "nonstandard",
    "non-standard form of": "nonstandard alt-of",
    "nonanimate": "inanimate",
    "nominalized verb": "nominalization",
    "nominalized": "nominalization",
    "n-v": "verb-from-nominal",
    "v-n": "nominal-from-verb nominalization",
    "n-n": "nominal-from-nominal nominalization",
    "v-v": "verb-from-verb",
    "uses -j- as interfix": "interfix-j",
    "eulogistic": "poetic",  # XXX not really, implies praise
    "prev": "previous",
    "normal usage": "",  # In some Russian words with two heads
    "professional usage": "",  # In some Russian words with two heads
    "?? missing information.": "",
    "unknown comparative": "",
    "unknown accent pattern": "",
    "?? conj.": "",
    "pres. ??": "",
    "past ??": "",
    "see usage notes": "",
    "no known Cyrillic variant": "",
    "no first-person singular present": "no-first-person-singular-present",
    "no first-person singular preterite": "no-first-person-singular-preterite",
    "no third-person singular past historic":
    "no-third-person-singular-past-historic",
    "‘dependent’": "dependent",  # sn/Egyptian
    "‘independent’": "independent",  # ntf/Egyptian
    "eum": "hangeul",  # Apparently synonym for the Korean alphabet
    "classifiers": "classifier",
    "discourse particle": "discourse particle",
    "discourse": "discourse",  # hum/Phalura
    "numeral tones": "numeral-tones",
    "alphabetic tones": "alphabetic-tones",
    "class A infixed pronoun": "infix pronoun class-A",
    "class B infixed pronoun": "infix pronoun class-B",
    "class C infixed pronoun": "infix pronoun class-C",
    "class B & C infixed pronoun": "infix pronoun class-B class-C",
    "class I": "class-i",
    "class II": "class-ii",
    "class III": "class-iii",
    "class N": "class-n",
    "class a-i": "class-a-i",
    "to multiple people": "addressee-plural",
    "to one person": "addressee-singular",
    "actor focus": "actor-focus",
    "indirect actor trigger": "actor-indirect",
    "usually feminine": "feminine-usually",
    "but usually feminine": "feminine-usually",
    "usually masculine": "masculine-usually",
    "but usually masculine": "masculine-usually",
    "but rarely feminine": "masculine-usually",
    "but rarely masculine": "feminine-usually",
    "requires negation": "with-negation",
    "inalienable–class I agreement": "inalienable class-i",
    "inalienable–class II agreement": "inalienable class-ii",
    "inalienable–class III agreement": "inalienable class-iii",
    "no first-person singular past historic":
    "no-first-person-singular-past-historic",
    "no definite forms": "no-definite",
    "no definite form": "no-definite",
    "no diminutive": "no-diminutive",
    "no second-person singular imperative":
    "no-second-person-singular-imperative",
    "no simple past": "no-simple-past",
    "no feminine form": "no-feminine",
    "no infinitive": "no-infinitive",
    "no longer productive": "idiomatic",
    "no past tense": "no-simple-past",
    "no third-person singular present": "no-third-person-singular-present",
    "nominalized adjective following adjective declension":
    "nominalization adjective-declension",
    # XXX this could be more accurate
    "truncative except after q and r": "truncative",  # Greenlandic
    "of masculine singular": "masculine singular nominative",
    "of masculine plural": "masculine plural nominative",
    "of feminine singular": "feminine singular nominative",
    "of feminine plural": "feminine plural nominative",
    "officialese": "bureaucratese",
    "+ optionally: adjective in accusative case + neuter noun in accusative case":
    "definite neuter with-accusative",
    "non-emphatic": "unemphatic",
    "not productive": "idiomatic",
    "passive with different sense": "passive",
    "active with different sense": "active",
    "+ von": "with-von",  # außerhalb/German
    "Symbol:": "symbol",
    "a reflexive": "reflexive",
    "active/stative": "active stative",
    "always postpostive": "postpositional",
    "postpositive": "postpositional",
    "defininte plural": "definite plural",  # aigg/Westrobothnian
    "determinative of": "determinative-of",
    "lenites": "lenition",
    "followed by indirect relative": "with-indirect-relative",
    "inflected like": "inflected-like",
    "locational noun": "locative",
    "mass noun": "uncountable",
    "negated": "past participle negative",  # fera/Westrobothnian
    "neutral": "gender-neutral",  # countryman/English
    "never clause-initial": "not-clause-initial",
    "primarily": "mostly",
    "definite articulation": "definite",  # boatsi/Aromanian
    "p-past": "passive simple past",
    "ppp": "passive past participle",
    "plural:": "plural",
    "synonyms:": "synonym",
    "quantified:": "quantified",
    "sentence case": "sentence-case",
    "set phrase from Classical Chinese": "idiomatic Classical-Chinese",
    "the plural of": "plural-of",
    "the reflexive case of": "reflexive-of",
    "the reflexive form of": "reflexive-of",
    "unipersonal": "",  # Too rare to track
    "used only after prepositions": "after-preposition",
    "appended after imperfective form": "in-compounds with-imperfect",
    "universal or indefinite": "universal indefinite",
    "el/ea": "third-person singular",  # o/Romanian/Verb
    "ele/ei": "third-person plural",  # vor/Romanian/Verb
    "vestre": "slang",  # type of backslang in Argentine and Uruguayan Spanish
    "onomatopoeia": "onomatopoeic",
    "ITERATIVE": "iterative",
    "OPTATIVE": "optative",
    "IMPERFECTIVE": "imperfective",
    "PERFECTIVE": "perfective",
    "(FIXME)": "error-fixme",
    "Conversive": "conversive",
    "Cholula and Milpa Alta": "Cholula Milpa-Alta",
    "Surnames": "surname",
    "metaphorically": "metaphoric",
    "hypothetic": "hypothetical",
    "Kinmen and Penghu Hokkien": "Kinmen-Hokkien Penghu-Hokkien",
}

# This mapping is applied to full descriptions before splitting by comma.
# Note: these cannot match just part of a description (even when separated
# by comma or semicolon), as these can contain commas and semicolons.
xlat_descs_map = {
    "with there, or dialectally it, as dummy subject": "with-dummy-subject",
    "+ location in inessive, adessive + vehicle in elative, often with pois":
    "with-inessive with-adessive with-elative",
    "+ accusative +, Genitive": "with-accusative with-genitive",
    "with genitive, instrumental or dative case":
    "with-genitive with-instrumental with-dative",
    "+ illative, allative, (verbs) 3rd infinitive in illative":
    "with-illative with-allative with-infinitive-iii-illative",
    "(inessive or adessive) + 3rd-pers. sg. + an adverb":
    "with-inessive with-adessive with-third-person-singular with-adverb",
    "+ partitive for agent, + allative for target":
    "with-partitive with-allative",
    "+ infinitive; in indicative or conditional mood":
    "with-infinitive with-indicative with-conditional",
    "transitive, auxiliary + first infinitive, active past part. taitanut or tainnut":
    "transitive, auxiliary, with-infinitive-i",
    "elative + 3rd person singular + noun/adjective in nominative or partitive or personal + translative":
    "with-elative with-third-person-singular",  # XXX very incomplete
    "group theory, of a group, semigroup, etc.": "group theory",
    "Triggers lenition of b, c, f, g, m, p, s. Triggers eclipsis of d, t.":
    "triggers-lenition triggers-eclipsis",
    # XXX this could be more precise
    "‘his’ and ‘its’ trigger lenition; ‘her’ triggers /h/-prothesis; ‘their’ triggers eclipsis": "triggers-lenition triggers-h-prothesis triggers-eclipsis",
    "for = elative; for verbs action noun in elative":
    "with-action-noun-in-elative",
    # de/Danish
    "as a personal pronoun, it has the forms dem in the oblique case and deres in the genitive; as a determiner, it is uninflected": "",
    # spinifer/Latin
    "nominative masculine singular in -er; two different stems": "",
    "^(???) please indicate transitivity!": "",
    "^(???) please provide spelling!": "",
    "please provide plural": "",
    "please provide feminine": "",
    "please provide feminine plural": "",
    "the passive, with different sense": "",
    "the active, with different sense": "",
    "m": "masculine",
    "f": "feminine",
    "classic": "",
}

# Words that are interpreted as tags at the beginning of a linkage
linkage_beginning_tags = {
    "factitive/causative": "factitive causative",
    "factive/causative": "factive causative",
    "factive": "factive",
    "factitive": "factive",  # Not sure if same or different as factive
    "causative": "causative",
    "reflexive": "reflexive",
    "frequentative": "frequentative",
    "optative": "optative",
    "affirmative": "affirmative",
    "cohortative": "cohortative",
    "applicative": "applicative",
    "stative": "stative",
    "passive": "passive",
    "optative": "optative",
    "adjective": "adjective",
    "verb": "verb",
    "noun": "noun",
    "adverb": "adverb",
}

# For a gloss to be interpreted as a form_of by parse_alt_or_inflection_of(),
# the form must contain at least one of these tags.  This is only used for
# the implicit form-of (tags followed by "of").
form_of_tags = set([
    "abessive",
    "ablative",
    "absolutive",
    "accusative",
    "adessive",
    "adjectival",
    "adverbial",
    "affirmative",
    "agentive",
    "allative",
    "aorist",
    "applicative",
    "aspirate-mutation",
    "attributive",
    "augmentative",
    "augmented",
    "benefactive",
    "causal-final",
    "causative",
    "collective",
    "comitative",
    "comparative",
    "conditional",
    "conditional-i",
    "conditional-ii",
    "connegative",
    "construct",
    "contemplative",
    "counterfactual",
    "dative",
    "debitive",
    "definite",
    "delative",
    "demonstrative",
    "desiderative",
    "diminutive",
    "distal",
    "dual",
    "durative",
    "elative",
    "endearing",
    "equative",
    "ergative",
    "essive",
    "feminine",
    "first-declension",
    "first-person",
    "form-i",
    "form-ii",
    "form-iii",
    "form-iiiq",
    "form-iiq",
    "form-iq",
    "form-iv",
    "form-ivq",
    "form-ix",
    "form-v",
    "form-vi",
    "form-vii",
    "form-viii",
    "form-x",
    "form-xi",
    "form-xii",
    "form-xiii",
    "fourth-person",
    "frequentative",
    "future",
    "gender-neutral",
    "genitive",
    "gerund",
    "hortative",
    "illative",
    "imperative",
    "imperfect",
    "imperfective",
    "impersonal",
    "inclusive",
    "indefinite",
    "indicative",
    "inessive",
    "infinitive",
    "infinitive-i",
    "infinitive-ii",
    "infinitive-iii",
    "infinitive-iv",
    "infinitive-v",
    "instructive",
    "instrumental",
    "interrogative",
    "iterative",
    "jussive",
    "lative",
    "locative",
    "masculine",
    "mediopassive",
    "middle-infinitive",
    "mixed-mutation",
    "nasal-mutation",
    "negative",
    "neuter",
    "nominal",
    "nominative",
    "non-past",
    "oblique",
    "offensive",
    "optative",
    "ordinal",
    "participle",
    "partitive",
    "passive",
    "past",
    "paucal",
    "perfect",
    "perfective",
    "pluperfect",
    "plural",
    "polite",
    "possessed",
    "possessive",
    "potential",
    "predicative",
    "prepositional",
    "present",
    "preterite",
    "prolative",
    "pronominal",
    "prospective",
    "proximal",
    "quotative",
    "reflexive",
    "root",
    "second-declension",
    "second-person",
    "singular",
    "singulative",
    "soft-mutation",
    "stative",
    "stressed",
    "subjective",
    "subjunctive",
    "subjunctive-i",
    "subjunctive-ii",
    "sublative",
    "superessive",
    "superlative",
    "supine",
    "terminative",
    "third-declension",
    "third-person",
    "transgressive",
    "translative",
    "unstressed",
    "vocative",
    # 2084 objective - beware of "An object of ..." (e.g., song/English)
])

# For a gloss to be interpreted as an alt_of by parse_alt_or_inflection_of(),
# the form must contain at least one of these tags.  This is only used for
# the implicit alt-of (tags followed by "of").
alt_of_tags = set([
    "abbreviation",
    "capitalized",
    "colloquial",
    "contracted",
    "dialectal",
    "historic",
    "hypercorrect",
    "initialism",
    "literary",
    "lowercase",
    "misconstruction",
    "nonstandard",
    "obsolete",
    "proscribed",
    "standard",
    "uppercase",
])

# Valid tag categories / attributes
tag_categories = set([
    "case",  # Grammatical case (also direct-object, indirect-object)
    "gender",  # Semantic gender (often also implies class)
    "class",  # Inflection class (Bantu languages, Japanese, etc)
    "number",  # Singular, plural, dual, paucal, ...
    "addressee",  # Something related to addressee
    "possession",  # possessive, possessed, alienable, inalienable
    "deictic",  # distal, proximal, inclusive, exclusive
    "voice",  # active, passive, middle
    "aspect",  # Aspect of verbs (perfective, imperfective, habitual, ...)
    "mood",  # cohortiative, commissive, conditional, conjunctive,
             # declarative, hortative, imperative, indicative, infinitive,
             # interrogative, jussive, optative, potential, prohibitive,
             # subjunctive
             # Note that interrogative also used for, e.g., pronouns
    "tense",  # present, past, imperfect, perfect, future, pluperfect
    "transitivity",  # intransitive, transitive, ditransitive, ambitransitive
    "participants",  # reflexive, reciprocal
    "degree",  # positive, comparative, superlative
    "trigger",  # Triggers something (e.g., mutation) in some context
    "related",  # Indicates related term / word / classifier / counter / aux
    "detail",  # Provides some detail
    "mod",  # Provides a modified form (e.g., abbreviation, mutation)
    "pos",  # Specifies part-of-speech
    "derivation",  # Specifies derivation (nominalization, agent,
                   # nominal-from-verb, ...)
    "pragmatic",  # Specifies pragmatics (e.g., stressed/unstressed)
    "phonetic",  # Describes some phonetic aspect
    "lexical",  # Describes some lexical/typographic aspect
    "category",  # person, personal, impersonal, animate, inanimate,
                 # (virile, nonvirile?)
    "register",  # dialectal, formal, informal, slang, vulgar
    "misc",  # simple, compound
    "gradation",  # gradation or qualifier
    "with",  # Co-occurs with something
    "order",  # Word position or order
    "XXX",  # TBD, currently not clear
])

# Set of all valid tags
valid_tags = set([
    "-i",   # Japanese inflection type
    "-na",  # Japanese inflection type
    "-nari",  # Japanese inflection type
    "-tari",  # Japanese inflection type
    "abbreviation",
    "abessive",   # Case
    "ablative",   # Case
    "absolute",   # Case, Bashkir, Swedish [absolute reflexive]
    "absolutive",  # Case (patient or experience of action)
    "abstract",
    "abstract-noun",
    "accent/glottal",
    "accusative",
    "active",
    "actor-focus",  # Tagalog
    "actor-indirect",  # Tagalog
    "actor-i",  # Ilocano vebs
    "actor-ii",
    "actor-iii",
    "actor-iv",
    "additive",  # Greenlandic: adds suffix after last letter of stem
    "addressee-feminine",
    "addressee-masculine",
    "addressee-plural",
    "addressee-singular",
    "adessive",  # Case
    "adjectival",
    "adjective",
    "adjective-declension",
    "admirative",  # Verb form in Albanian
    "adnominal",
    "adverb",
    "adverbial",
    "adversative",
    "affirmative",
    "affix",
    "after-preposition",  # Word used only after preposition nich/Lower Sorbian
    "agent",
    "agentive",
    "alienable",  # Alienable possession; Choctaw, Ojibwe, Navajo, Tokelauan etc
    "allative",  # Case
    "allative-i",
    "allative-ii",
    "alphabetic-tones",
    "also",
    "alt-of",
    "alternative",
    "ambitransitive",
    "analytic",
    "anaphorically",
    "animate",
    "animal-not-person",  # Refers to animal (e.g., Russian anml suffix)
    "anterior",  # French seems to have "past anterior" tense
    "aorist",  # Verb form (perfective past)  E.g., Latin, Macedonian
    "aorist-ii",  # Albanian
    "apocope",
    "applicative",  # Verb form
    "approximative",  # Noun form (case?), e.g., марксизм/Komi-Zyrian
    "archaic",
    "article",
    "aspirate-mutation",
    "assertive",  # Verb form (e.g., Korean)
    "associative",  # Case (e.g., Quechua)
    "ateji",
    "attributive",
    "augmentative",
    "augmented",
    "autonomous",
    "aux-haben",
    "aux-sein",
    "auxiliary",
    "baby-talk",
    "before-lenited-fh",  # Next word starts with lenited fh (Irish)
    "before-past",  # Used before the past tense (Irish)
    "before-vowel",  # next words starts with vowel (in pronunciation)
    "benefactive",  # Case (beneficiary of an action)
    "broadly",
    "būdinys",
    "calque",
    "cangjie-input",
    "canonical",  # Used to mark the canonical word from from the head tag
    "capitalized",
    "capitalized",
    "cardinal",
    "caritive",  # Case (lack or absense of something), марксизм/Komi-Zyrian
    "catenative",
    "causal-final",
    "causative",  # Verb aspect (e.g., Japanese); Cause/Reason (Korean)
    "character",
    "chiefly",
    "childish",
    "circumstantial",  # Verb form, e.g., patjaṉi
    "class",  # Used as a head prefix in San Juan Quajihe Chatino (class 68 etc)
    "class-1",    # Inflectional classes (e.g., Bantu languages), cf. gender
    "class-10",
    "class-10a",
    "class-11",
    "class-12",
    "class-13",
    "class-14",
    "class-15",
    "class-16",
    "class-17",
    "class-18",
    "class-1a",
    "class-2",
    "class-2a",
    "class-3",
    "class-4",
    "class-5",
    "class-6",
    "class-7",
    "class-8",
    "class-9",
    "class-9a",
    "class-A",  # e.g., Old Irish affixed pronoun classes
    "class-B",
    "class-C",
    "class-i",  # Choctaw
    "class-ii",
    "class-iii",
    "class-n",  # Chickasaw
    "class-a-i",  # Akkadian
    "classifier",
    "clipping",
    "clitic",
    "coactive",  # Verbs in Guaraní
    "cohortative",  # Verb form: plea, imploring, wish, intent, command, purpose
    "collective",  # plural interpreted collectively
    "colloquial",
    "comitative",  # Case
    "common",   # XXX gender (Swedish, Danish), also meaning commonly occurring
    "comparable",
    "comparative",
    "completive",
    "composition",
    "compound",  # Can indicate verb forms formed with auxiliary
    "compound-of",
    "concessive",  # Verb form
    "conclusive",  # Verb form (e.g., Bulgarian)
    "concrete",  # Slavic verbs
    "conditional",  # Verb mood
    "conditional-i",  # Verb mood (German)
    "conditional-ii",  # Verb mood (German)
    "conjugation-type",  # Used to indicate form really is conjugation class
    "conjugative",  # Verb form, e.g., উঘাল/Assamese
    "conjugative-of",  # Korean
    "conjunct",  # Verb form, e.g., gikaa/Ojibwe
    "conjunct-incorporating",
    "conjunct-non-incorporating",
    "conjunctive",  # Verb mood (doubt: wish, emotion, possibility, obligation)
    "conjunction",  # Used in Phalura conjunctions, relative pronouns
    "connective",  # Group of verb forms in Korean
    "connegative",
    "consecutive",  # Verb form, e.g., થૂંકવું/Gujarati, noun form марксизм
    "construct",  # Apparently like definite/indefinite (e.g., Arabic)
    "construction-hif'il",
    "construction-hitpa'el",
    "construction-hitpu'al",
    "construction-huf'al",
    "construction-nif'al",
    "construction-pa'al",
    "construction-pa'el",
    "construction-peal",  # Aramaic, Classical Syriac
    "construction-pi'el",
    "construction-pu'al",
    "contemplative",
    "contemporary",
    "contingent",  # Verb form, উঘাল/Assamese
    "continuative",  # Verb aspect (actions still happening; e.g., Japanese)
    "contracted",
    "contracted-dem-form",
    "contraction",
    "contrastive",  # Apparently connective verb form in Korean
    "converb",  # Verb form or special converb word
    "converb-i",  # e.g., խածնել/Armenian
    "converb-ii",
    "conversive",  # Verb form/type, at least in Swahili, reverse meaning?
    "coordinating",
    "copulative",
    "correlative-of",
    "cot-caught-merger",
    "count-form",  # Nominal form in Belarusian
    "countable",
    "counter",
    "counterfactual",
    "dated",
    "dative",
    "debitive",  # need or obligation (XXX is this same as "obligational" ???)
    "declension-pattern-of",
    "declinable",
    "defective",
    "deferential",
    "definite",
    "definition",
    "definitive",  # XXX is this used same as "definite", opposite indefinite?
    "deictically",
    "delative",  # Case
    "deliberate",
    "demonstrative",
    "demonym",
    "dependent",
    "derogatory",
    "desiderative",  # Verb mood
    "destinative",  # Case, marks destination/something destined (e.g. Hindi)
    "determinate",  # Polish verbs (similar to "concrete" in Russian?)
    "determinative-of",  # Korean
    "determiner",  # Indicates determiner; Korean determiner verb forms?
    "deuterotonic",  # e.g., dofuissim/Old Irish
    "diacritic",
    "dialectal",
    "digit",
    "diminutive",
    "diptote",  # Noun having two cases (e.g., Arabic)
    "direct",  # Apparently like case form (e.g., Hindi, Punjabi)
    "direct-object",
    "directional",  # Case?, e.g., тэр/Mongolian
    "directive",  # Case (locative/nearness), e.g. Basque, Sumerian, Turkic
    "disapproving",
    "discourse",  # At lest some Ancient Greek particles
    "disjunctive",
    "distal",  # Demonstrative referent is far, cf. proximal, obviative
    "distributive",  # Case in Quechua? (is this case or e.g. determiner?)
    "ditransitive",
    "dual",       # two in number, cf. singular, trial, plural
    "dubitative",  # Verb form (e.g., Bulgarian)
    "durative",  # Verb form
    "eclipsis",
    "egressive",  # Case?  e.g., дворец/Komi-Zyrian
    "elative",  # Case
    "ellipsis",
    "emphatic",
    "empty-gloss",
    "enclitic",
    "endearing",
    "epic",
    "epicene",
    "equative",  # Case (indicates something is like something else)
    "ergative",
    "error-fixme",
    "error-lua-exec",
    "error-lua-timeout",
    "error-unknown-tag",
    "error-misspelling",
    "error-unrecognized-form",
    "especially",
    "essive",  # Case
    "essive-formal",
    "essive-instructive",
    "essive-modal",
    "ethnic",
    "eumhun",
    "euphemistic",
    "evidential",  # Verb form (e.g., Azerbaijani)
    "exaggerated",
    "excessive",
    "exclusive",  # inclusive vs. exclusive first-person; case in Quechua
    "exessive",  # Case (transition away from state)
    "expectative",  # Verb form, e.g., ϯϩⲉ/Coptic
    "expletive",
    "expressively",
    "extended",  # At least in some Bulgarian forms, e.g. -лив
    "extinct",  # Uses for taxonomic entries, indicates species is extinct
    "factitive",  # Not sure if same or different as factive
    "factive",
    "familiar",  # Formality/politeness degree of verbs etc
    "feminine",  # Grammatical gender, masculine, neuter, common, class-* etc.
    "feminine-usually",  # m/f, but usually feminine
    "fifth-conjugation",
    "fifth-declension",
    "figuratively",
    "finite-form",  # General category for finite verb forms
    "first-conjugation",
    "first-declension",
    "first-person",
    "focalising",  # Verb form, e.g., ϯϩⲉ/Coptic
    "form-i",
    "form-ii",
    "form-iii",
    "form-iiiq",
    "form-iiq",
    "form-iq",
    "form-iv",
    "form-ivq",
    "form-ix",
    "form-of",
    "form-v",
    "form-vi",
    "form-vii",
    "form-viii",
    "form-x",
    "form-xi",
    "form-xii",
    "form-xiii",
    "formal",  # Formality/politeness degree of verbs etc
    "four-corner",
    "fourth-conjugation",
    "fourth-declension",
    "fourth-person",
    "frequentative",
    "front-vowel",
    "fusioning",  # Greenlandic suffixes
    "future",  # Verb tense
    "future-i",  # Verb tense (German, e.g., vertippen)
    "future-ii",  # Verb tense (German)
    "gender-neutral",
    "general",  # In general temporal participle, e.g., talamaq/Azerbaijani
    "genitive",
    "gerund",
    "goal",  # Verb form, e.g., উঘাল/Assamese
    "grade-1-kanji",
    "grade-2-kanji",
    "grade-3-kanji",
    "grade-4-kanji",
    "grade-5-kanji",
    "grade-6-kanji",
    "habitual",  # Verb aspect
    "hangeul",
    "hanja",  # Han character script (Chinese characters) to write Korean
    "hard",  # sladek/Slovene
    "hellenism",
    "hidden-n",   # Mongolian declension
    "hiragana",  # Japanese syllabic spelling for native words
    "historic",  # Grammatical tense/mood for retelling past events
    "historical",  # Relating to history
    "honorific",  # Formality/politeness degree of verbs etc
    "hortative",  # Verb mood
    "humble",
    "humorous",
    "hypernym",
    "hypercorrect",
    "hyponym",
    "hypothetical",  # Verb mood (e.g., Japanese)
    "ideophonic",
    "idiomatic",
    "illative",  # Case
    "imperative",
    "imperative-only",
    "imperfect",  # Past tense in various languages
    "imperfective",  # Verb aspect (action not completed)
    "impersonal",  # Verb form, e.g., Portuguese impersonal infinitive
    "impolite",  # Politeness degree of verbs etc
    "in-certain-phrases",
    "in-compounds",
    "in-plural",
    "in-indicative",
    "in-conditional",
    "in-variation",  # E.g. crush,WiFi,lhama,tsunami/Portuguese,
    "inalienable",  # Inablienable possession: body parts etc; Choctaw, Ojibwe..
    "inanimate",
    "including",
    "includes-article",  # Word form includes article
    "inclusive",  # inclusive vs. exclusive first-person
    "indeclinable",
    "indefinite",
    "independent",  # Verb form, e.g., gikaa/Ojibwe
    "indeterminate",  # Polish verbs (similar to "abstract" in Russian)
    "indicative",
    "indirect",  # Verb form, e.g., بونا/
    "indirect-object",
    "inessive",  # Case
    "inferential",  # Verb form (w/ aorist), e.g. -ekalmak/Turkish
    "infinitive",  # Verb form
    "infinitive-da",  # Estonian
    "infinitive-i",  # Finnish
    "infinitive-i-long",  # Finnish
    "infinitive-ii",  # Finnish
    "infinitive-iii",  # Finnish
    "infinitive-iv",  # Finnish
    "infinitive-ma",  # Estonian
    "infinitive-v",  # Finnish
    "infinitive-zu",  # German
    "infix",
    "inflected",  # Marks inflected form, constrast to uninflected (e.g., Dutch)
    "inflected-like",  # seleen/Limburgish
    "informal",  # Formality/politeness degree of verbs etc
    "initialism",
    "injunctive",  # Verb form, e.g., पुस्नु/Nepali
    "instructive",
    "instrumental",  # Case
    "iterative",
    "intensifier",  # In participle of intensification, e.g., talamaq
    "intentive",  # Verb form, e.g., patjaṉi
    "interfix-j",  # Greenlandic: adds -j- after long vowel
    "interjection",
    "interrogative",
    "intransitive",
    "invariable",
    "invertive",  # Case? (e.g., Сотрэш/Adyghe)
    "involuntary",  # Verb form, e.g., khitan/Indonesian
    "ionic",  # XXX ???
    "ironic",
    "irrealis",  # Verb form, e.g., たたかう/Japanese
    "irregular",  # Generally of written word forms
    "irregular-pronunciation",  # Kanji or similar pronunciation irregular
    "italics",  # Used in head form to indicate italic character variant
    "jargon",
    "jussive",  # Verb mood for orders, commanding, exhorting (subjunctively)
    "kanji",  # Used in word head for some Japanese symbols
    "katakana",  # Japanese syllabic spelling for foreign words
    "krama",
    "krama-ngoko",
    "kyūjitai",  # Traditional Japanese Kanji (before 1947)
    "lative",  # Case, e.g., тіл/Khakas
    "lenition",
    "letter",
    "letter-name",
    "limitative",  # Verb form, e.g., ϯϩⲉ/Coptic
    "literally",
    "literary",
    "locative",
    "long-form",  # Verb forms, отъпоустити/Old Church Slavonic
    "lowercase",
    "mainly",
    "majestic",
    "masculine",  # Grammatial gender see feminine, neuter, common, class-* etc.
    "masculine-usually",  # m/f, but usually masculine
    "material",
    "matronymic",
    "medial",
    "mediopassive",
    "metaphoric",
    "metonymically",
    "metrically",  # Used in Sanskrit word heads
    "mi-form",  # Malagasy verbs
    "middle",  # At least middle voice (cf. active, passive)
    "middle-infinitive",
    "mildly",
    "misconstruction",  # Used for e.g. incorrect Latin plurals
    "misspelling",
    "mixed",
    "mixed-mutation",
    "mixedcase",
    "mnemonic",
    "modal",
    "modern",
    "modified",  # Noun form, e.g., dikko/Sidamo (similar to person?)
    "monopersonal",
    "morpheme",
    "morse-code",
    "mostly",
    "motive-form",  # Verb form for Korean (e.g., 조사하다)
    "multiple-possession",
    "mutated",
    "mutation",
    "name",
    "narrowly",
    "nasal-mutation",
    "natural",
    "necessitative",  # Verb form in some languages
    "negated-with",  # Indicates how word is negated, e.g., ϣⲗⲏⲗ/Coptic
    "negative",  # Indicates negation of meaning (nominal or verbal)
    "neologism",
    "neuter",  # Gender, cf. masculine, feminine, common, class-* etc.
    "next",  # Next value in sequence (number, letter, etc.)
    "no-absolute",           # No aboslute form; femri/Icelandic
    "no-auxiliary",	     # No auxiliary needed for verb (?); lavarsi/Italian
    "no-comparative",        # The word has no comparative form
    "no-construct-forms",    # The word has no construct forms
    "no-definite",	     # Danish "no definite forms"
    "no-diminutive",         # No diminutive form (goeste/West Flemish)
    "no-feminine",	     # No feminine form (ácimo/Spanish)
    "no-first-person-singular-past-historic",  # Italian
    "no-first-person-singular-present",  # Spanish (only third person?)
    "no-first-person-singular-preterite",  # Spanish (only third person?)
    "no-genitive",           # The word has no genitive form
    "no-imperfective",       # No imperfective form (исходить/Russian)
    "no-infinitive",	     # No infinitive form (måste/Swedish)
    "no-nominative-plural",  # The word has no nominative plural
    "no-perfect",            # The word has no perfect/perfective aspect/form
    "no-plural",             # The word has no plural form (= singular only)
    "no-past-participle",    # The word has no past participle
    "no-present-participle",  # The word has no present participle
    "no-second-person-singular-imperative",  # No imperative
    "no-senses",             # Added synthesized sense when no senses extracted
    "no-simple-past",	     # No simple past form"
    "no-singulative",	     # no singulative form
    "no-superlative",        # The word has no superlative form
    "no-supine",             # The word has no supine form
    "no-third-person-singular-past-historic",  # Italian
    "no-third-person-singular-present",  # mittagessen/German
    "nominal",
    "nominal-from-nominal",  # Greenlandic: suffix derives nominal from nominal
    "nominal-from-verb",  # Greenlandic: suffix derives nominal from verb
    "nominal-state",
    "nominalization",
    "nominative",
    "nomino-accusative",  # 𒀀𒄿𒅖/Hittite XXX same as nominate/accusative???
    "non-aspectual",  # E.g., भूलना/Hindi
    "non-durative",  # non-durative sentence, e.g., ϣⲗⲏⲗ/Coptic
    "non-finite",  # General category of non-finite verb forms
    "non-numeral",  # Assamese noun forms
    "non-past",  # Verb tense (e.g., Korean)
    "non-scientific",
    "non-subject",    # ishno'/Chickasaw
    "nonce-word",
    "nondeferential",
    "nonstandard",
    "nonvirile",
    "not-attributive",
    "not-clause-initial",
    "not-comparable",
    "not-mutable",
    "not-predicative",
    "not-translated",
    "noun",
    "now",
    "numeral",  # Numeral part-of-speech; also Assamese noun forms
    "numeral-tones",
    "obligational",  # Verb form (e.g., Azerbaijani)
    "objective",  # Case, used as an object
    "oblique",  # Apparently like case form (e.g., Hindi)
    "obsolete",
    "obviative",  # Referent is not the most salient one, cf. proximal, distal
    "offensive",
    "often",
    "one-termination",
    "only",
    "onomatopoeic",
    "optative",
    "ordinal",
    "parasynonym",
    "parenthetic",
    "participle",
    "particle",
    "partitive",  # Case
    "passive",
    "past",
    "patronymic",
    "paucal",
    "pausal",
    "pejorative",
    "perfect",  # Tense/verb form, e.g., in Finnish
    "perfect-i",  # E.g., talamaq/Azerbaijani
    "perfect-ii",  # E.g., talamaq/Azerbaijani
    "perfective",  # Verb aspect
    "person",
    "personal",  # Verb form (e.g., Portuguese personal infinitive)
    "phoneme",
    "phrasal",
    "phrase",
    "physical",
    "pin-pen-merger",
    "place",
    "pluperfect",  # Tense/verb form
    "pluperfect-i",  # воштыны'/Udmurt
    "pluperfect-ii",
    "plural",     # Number, cf. sigular, dual, trial
    "plural-of",  # Plural form of something
    "plural-of-variety",  # Plural indicating different kinds of things (Arabic)
    "plural-only",  # Word only manifested in plural in this sense
    "plural-normally",  # Usually plural, but singular may be possible
    "poetic",
    "polite",  # Politeness degree of verbs etc
    "polytonic",
    "positive",  # degree of comparison; opposite of negation for verb forms
    "possessed",  # Marks object that is possessed, cf. possessed
    "possessive",  # Possession (marks who possesses)
    "possessive-sg",  # Possessive with single object possessed
    "possessive-pl",  # Possessive with multiple objects possessed
    "possibly",
    "postpositional",
    "potential",  # Verb mood
    "praenominal",
    "precursive",  # Verb form, e.g. ϯϩⲉ/Coptic
    "predicative",
    "prefix",
    "preparative",  # Verb form, e.g., ᠵᡠᠸᡝᡩᡝᠮᠪᡳ/Manchu
    "prepositional",
    "present",
    "present-rare",  # Present tense is rare
    "presumptive",  # Verb mood, e.g., गरजना/Hindi
    "preterite",  # Verb tense (action in the past, similar to simple past)
    "preterite-present",  # word where present&preterite forms look opposite
    "preterite-i",  # воштыны/Udmurt
    "preterite-ii",
    "pretonic",  # Precedes stressed syllable
    "previous",  # Previous value in sequence (number, letter, etc.)
    "proclitic",
    "progressive",  # Verb form, e.g., પચવું/Gurajati
    "prohibitive",  # Verb form (negative imperative), e.g., Old Armenian
    "prolative",
    "pronominal",
    "pronominal-state",
    "pronoun",
    "pronoun-included",
    "pronunciation-spelling",
    "proper-noun",
    "proscribed",
    "prosecutive",  # Case (move along a surface or way); Greenlandic -nnguaq
    "prospective",
    "prothesis",
    "prototonic",  # E.g., dofuissim/Old Irish
    "proximal",  # Demonstrative referent is far, cf. distal, obviative
    "purposive",  # Verb form, e.g., patjaṉi
    "quadral",
    "quantified",  # bat/Jamaican Creole (head form)
    "quotative",  # Verb mood (marks quoted speech keeping orig person/tense)
    "radical",
    "radical+strokes",
    "rare",
    "realis",  # Verb form, e.g., たたかう/Japanese
    "reason",  # Verb form, e.g., উঘাল/Assamese
    "recently",  # Used in Recently complete, e.g., {ligpit,magbukid}/Tagalog
    "reciprocal",  # Mutual action (board sense reflexive)
    "reconstruction",
    "reduced",  # de/Central Franconian (XXX merge with e.g. clipping?)
    "reduplication",
    "reflexive",
    "reflexive-of",  # Reflexive form of something
    "regional",
    "relational",
    "relative",
    "renarrative",  # Verb form (e.g. Bulgarian)
    "replacive",  # Greenlandic suffixes
    "reported",  # Verb forms for reported speech
    "resultative",  # partciple in Armenian (state resulting from action)
    "retronym",
    "revised",  # Used in many Korean words, is this same as revised-jeon?
    "revised-jeon",
    "rhetoric",
    "romanization",
    "root",
    "sarcastic",
    "second-conjugation",
    "second-declension",
    "second-person",
    "secular",  # Contrast with Ecclesiastical, Tham, etc
    "semelfactive",
    "sentence-case",  # дь/Yakut
    "sentence-final",  # Korean verb forms (broad category)
    "sequence",
    "sequential",
    "seventh-conjugation",
    "shinjitai",  # Simplified Japanese Kanji (after 1947)
    "short-form",  # Verb forms, отъпоустити/Old Church Slavonic
    "si-perfective",
    "simple",
    "simplified",
    "simultaneous",  # simultaneous converb, e.g. խածնել/Armenian
    "single-possession",
    "singular",  # Number, cf. plural, dual, trial
    "singular-only",
    "singulative",  # Individuation of a collective or mass noun, like number
    "sixth-conjugation",
    "slang",
    "slur",
    "sociative",  # Case?, e.g., மரம்/Tamil
    "soft",  # najslajši/slovene
    "soft-mutation",  # At least Welsh
    "sometimes",
    "somewhat",
    "special",  # Adverbial verb form in Lithuanian
    "specific",  # In specific temporal participle, e.g., talamaq
    "specifically",
    "standalone",  #  Without a main word (e.g., pronoun/determiner senses)
    "standard",
    "stative",
    "stem",  # Stem rather than full forms
    "stem-primary",  # Primary stem, e.g., दुनु/Nepali
    "stem-secondary",  # Secondary stem, e.g., दुनु/Nepali
    "stress-pattern-1",
    "stress-pattern-2",
    "stress-pattern-3",
    "stress-pattern-3a",
    "stress-pattern-3b",
    "stress-pattern-4",
    "stressed",  # Marked/full form, cf. unstressed
    "stressed-preposition",
    "strict-sense",
    "strokes",
    "strong",
    "subjective",  # Case, used as a subject; subject form
    "subjunctive",
    "subjunctive-i",
    "subjunctive-ii",
    "sublative",
    "subordinate-clause",  # e.g., ϣⲗⲏⲗ/Coptic
    "subordinating",
    "subscript",  # Variant of certain characters
    "substantive",
    "subsuntive",  # Verbs in Guaraní
    "suffix",
    "superessive",  # Case, e.g., Hungarian
    "superlative",
    "superscript",  # Variant of certain characters
    "supine",
    "suppletive",
    "surname",
    "suru",  # Japanese verb inflection type
    "syllable-final",
    "syllable-initial",
    "symbol",
    "syncope",
    "synecdoche",
    "synonym",
    "synonym-of",
    "taboo",
    "tafa-form",  # Malagasy verbs
    "temporal",  # Used in generic/specific temporal participle, e.g., talamaq
    "term-of-address",
    "terminative",  # Verb mood (e.g., Japanese); also case in Quechua?
    "thematic",
    "third-conjugation",
    "third-declension",
    "third-person",
    "three-termination",
    "tone-1",
    "tone-2",
    "topicalized",
    "toponymic",
    "traditional",
    "transcription",
    "transgressive",  # Verb form
    "transitive",
    "transitive-with-of",
    "translation-hub",  # Predictable compound term with translations, no gloss
    "translative",
    "translingual",
    "transliteration",
    "trial",  # Number, cf. singular, dual, plural
    "trigger-actor",  # Actor trigger, e.g., magtinda/Tagalog
    "trigger-benefactive",  # Benefactive trigger
    "trigger-causative",  # Causative trigger
    "trigger-instrument",  # Instrument trigger
    "trigger-locative",  # Locative trigger
    "trigger-measurement",  # Measurement trigger, e.g., rumupok/Tagalog
    "trigger-object",  # Object trigger
    "trigger-referential",   # Referential trigger
    "triggers-aspirate-mutation",  # Welsh
    "triggers-eclipsis",      # Irish
    "triggers-h-prothesis",   # Irish
    "triggers-lenition",      # Irish
    "triggers-mixed-mutation",  # Welsh
    "triggers-nasal-mutation",  # Old Irish
    "triggers-soft-mutation",  # Welsh
    "triptote",  # Noun having three cases (e.g., Arabic)
    "truncative",  # Greenlandic: suffix attaches to last vowel, removing stuff
    "two-termination",
    "type-a",
    "type-p",
    "type-u",
    "type-ua",
    "type-up",
    "unabbreviated",
    "unaugmented",
    "uncommon",
    "uncountable",
    "unemphatic",
    "uninflected",  # uninflected form (e.g., Dutch), cf. inflected
    "universal",  # universally known (καθεμία/Greek)
    "unknown",  # Apparently verb form, e.g., जाँच्नु/Nepali
    "unmodified",  # Noun form, e.g., dikko/Sidamo (similar to person?)
    "unpossessed",  # Not possessed (often omitted); cf. possessed
    "unspecified",  # Used in some conjugation/declension tables
    "unstressed",  # Unstressed (unmarked, weaker) form
    "unstressed-before-j",  # unstressed when next word starts with /j/
    "uppercase",
    "used-in-the-form",
    "usually",
    "utterance-medial",
    "variant",
    "vav-consecutive",
    "vernacular",
    "verb",
    "verb-completement",  # Used in some Chinese words (merged verb+complement?)
    "verb-form-da",  # Estonian da-form
    "verb-form-des",  # Estonian des-form
    "verb-form-i",
    "verb-form-ii",
    "verb-from-nominal",  # Forms verbs from nominals
    "verb-object",  # Used in some Chinese words (verb+object in same entry?)
    "verb-from-verb",  # Suffix modifies verbs producing verbs
    "vigesimal",
    "virile",
    "visual-rendering",
    "voa-form",  # Malagasy verbs
    "vocative",  # Case? used for addressee
    "volitive",
    "volitional",  # Verb mood (e.g., Japanese: suggests, urges, initates act)
    "vulgar",
    "weak",
    "weak-verb",
    "with-a",
    "with-ablative",
    "with-absolute",
    "with-absolutive",
    "with-accusative",
    "with-action-noun-in-elative",
    "with-adessive",
    "with-adjective",
    "with-adverb",
    "with-allative",
    "with-an",
    "with-avec",
    "with-ce",
    "with-che",
    "with-comparative",
    "with-con",
    "with-conditional",
    "with-da",
    "with-dative",
    "with-de",
    "with-definite-article",
    "with-di",
    "with-down",
    "with-ela",
    "with-elas",
    "with-elative",
    "with-ele",
    "with-eles",
    "with-ella",
    "with-ellas",
    "with-ellos",
    "with-en",
    "with-essive",
    "with-eu",
    "with-infinitive-i",
    "with-future",
    "with-for",
    "with-gendered-noun",
    "with-genitive",
    "with-gij",
    "with-hiri",
    "with-hura",
    "with-illative",
    "with-imperfect",
    "with-in",
    "with-indicative",
    "with-indirect-object",
    "with-indirect-relative",
    "with-inessive",
    "with-infinitive",
    "with-instrumental",
    "with-it-dialectally",
    "with-järgi",
    "with-kala",
    "with-kV",  # gǀkxʻâã/ǃXóõ
    "with-lai",
    "with-locative",
    "with-meel",
    "with-negation",
    "with-negative-adj",
    "with-nominative",
    "with-nos",
    "with-nosotras",
    "with-nosotros",
    "with-noun",
    "with-noun-phrase",
    "with-number",
    "with-objective",
    "with-odd-syllable-stems",
    "with-of",
    "with-olemassa",  # Finnish
    "with-on",
    "with-optative",
    "with-others",
    "with-partitive",
    "with-passive-present-participle",
    "with-passive-past-participle-partitive",
    "with-passive-past-participle-translative",
    "with-past",
    "with-past-participle",
    "with-past-participle-translative",
    "with-past-participle-partitive",
    "with-per",
    "with-personal-pronoun",
    "with-por",
    "with-possessive-suffix",
    "with-pour",
    "with-prepositional",
    "with-present",
    "with-savrtsobi",
    "with-simple",
    "with-su",
    "with-subjunctive",
    "with-subordinate-clause",
    "with-sur",
    "with-dummy-subject",
    "with-there",
    "with-third-person",
    "with-third-person-singular",
    "with-infinitive-iii",
    "with-infinitive-iii-abessive",
    "with-infinitive-iii-elative",
    "with-infinitive-iii-illative",
    "with-to",
    "with-translative",
    "with-tu",
    "with-tú",
    "with-up",
    "with-usted",
    "with-ustedes",
    "with-você",
    "with-vocês",
    "with-von",
    "with-vos",
    "with-voseo",
    "with-vosotras",
    "with-vosotros",
    "with-välja",
    "with-vós",
    "with-yo",
    "with-zuek",
    "with-à",
    "with-él",
    "without-article",  # E.g., grüun/Cimbrian
    "without-noun",
    "zhuyin",
    "æ-tensing",
    "има",  # Distinguishes certain verb forms in Macedonian
])

for tag in form_of_tags - valid_tags:
    print("tags.py:form_of_tags contains invalid tag {}"
          .format(tag))
