import regex
from get_the_hell_out_of_here import remove_escaped_characters


x_3_lower_case_escaped = (
    ("\\x80", "€"),
    ("\\x82", "‚"),
    ("\\x83", "ƒ"),
    ("\\x84", "„"),
    ("\\x85", "…"),
    ("\\x86", "†"),
    ("\\x87", "‡"),
    ("\\x88", "ˆ"),
    ("\\x89", "‰"),
    ("\\x8a", "Š"),
    ("\\x8b", "‹"),
    ("\\x8c", "Œ"),
    ("\\x8e", "Ž"),
    ("\\x91", "‘"),
    ("\\x92", "’"),
    ("\\x93", "“"),
    ("\\x94", "”"),
    ("\\x95", "•"),
    ("\\x96", "–"),
    ("\\x97", "—"),
    ("\\x98", "˜"),
    ("\\x99", "™"),
    ("\\x9a", "š"),
    ("\\x9b", "›"),
    ("\\x9c", "œ"),
    ("\\x9e", "ž"),
    ("\\x9f", "Ÿ"),
    ("\\xa1", "¡"),
    ("\\xa2", "¢"),
    ("\\xa3", "£"),
    ("\\xa4", "¤"),
    ("\\xa5", "¥"),
    ("\\xa6", "¦"),
    ("\\xa7", "§"),
    ("\\xa8", "¨"),
    ("\\xa9", "©"),
    ("\\xaa", "ª"),
    ("\\xab", "«"),
    ("\\xac", "¬"),
    ("\\xad", ""),
    ("\\xae", "®"),
    ("\\xaf", "¯"),
    ("\\xb0", "°"),
    ("\\xb1", "±"),
    ("\\xb2", "²"),
    ("\\xb3", "³"),
    ("\\xb4", "´"),
    ("\\xb5", "µ"),
    ("\\xb6", "¶"),
    ("\\xb7", "·"),
    ("\\xb8", "¸"),
    ("\\xb9", "¹"),
    ("\\xba", "º"),
    ("\\xbb", "»"),
    ("\\xbc", "¼"),
    ("\\xbd", "½"),
    ("\\xbe", "¾"),
    ("\\xbf", "¿"),
    ("\\xc0", "À"),
    ("\\xc1", "Á"),
    ("\\xc2", "Â"),
    ("\\xc3", "Ã"),
    ("\\xc4", "Ä"),
    ("\\xc5", "Å"),
    ("\\xc6", "Æ"),
    ("\\xc7", "Ç"),
    ("\\xc8", "È"),
    ("\\xc9", "É"),
    ("\\xca", "Ê"),
    ("\\xcb", "Ë"),
    ("\\xcc", "Ì"),
    ("\\xcd", "Í"),
    ("\\xce", "Î"),
    ("\\xcf", "Ï"),
    ("\\xd0", "Ð"),
    ("\\xd1", "Ñ"),
    ("\\xd2", "Ò"),
    ("\\xd3", "Ó"),
    ("\\xd4", "Ô"),
    ("\\xd5", "Õ"),
    ("\\xd6", "Ö"),
    ("\\xd7", "×"),
    ("\\xd8", "Ø"),
    ("\\xd9", "Ù"),
    ("\\xda", "Ú"),
    ("\\xdb", "Û"),
    ("\\xdc", "Ü"),
    ("\\xdd", "Ý"),
    ("\\xde", "Þ"),
    ("\\xdf", "ß"),
    ("\\xe0", "à"),
    ("\\xe1", "á"),
    ("\\xe2", "â"),
    ("\\xe3", "ã"),
    ("\\xe4", "ä"),
    ("\\xe5", "å"),
    ("\\xe6", "æ"),
    ("\\xe7", "ç"),
    ("\\xe8", "è"),
    ("\\xe9", "é"),
    ("\\xea", "ê"),
    ("\\xeb", "ë"),
    ("\\xec", "ì"),
    ("\\xed", "í"),
    ("\\xee", "î"),
    ("\\xef", "ï"),
    ("\\xf0", "ð"),
    ("\\xf1", "ñ"),
    ("\\xf2", "ò"),
    ("\\xf3", "ó"),
    ("\\xf4", "ô"),
    ("\\xf5", "õ"),
    ("\\xf6", "ö"),
    ("\\xf7", "÷"),
    ("\\xf8", "ø"),
    ("\\xf9", "ù"),
    ("\\xfa", "ú"),
    ("\\xfb", "û"),
    ("\\xfc", "ü"),
    ("\\xfd", "ý"),
    ("\\xfe", "þ"),
    ("\\xff", "ÿ"),
)

x_3_upper_case_escaped = (
    ("\\x80", "€"),
    ("\\x82", "‚"),
    ("\\x83", "ƒ"),
    ("\\x84", "„"),
    ("\\x85", "…"),
    ("\\x86", "†"),
    ("\\x87", "‡"),
    ("\\x88", "ˆ"),
    ("\\x89", "‰"),
    ("\\x8A", "Š"),
    ("\\x8B", "‹"),
    ("\\x8C", "Œ"),
    ("\\x8E", "Ž"),
    ("\\x91", "‘"),
    ("\\x92", "’"),
    ("\\x93", "“"),
    ("\\x94", "”"),
    ("\\x95", "•"),
    ("\\x96", "–"),
    ("\\x97", "—"),
    ("\\x98", "˜"),
    ("\\x99", "™"),
    ("\\x9A", "š"),
    ("\\x9B", "›"),
    ("\\x9C", "œ"),
    ("\\x9E", "ž"),
    ("\\x9F", "Ÿ"),
    ("\\xA1", "¡"),
    ("\\xA2", "¢"),
    ("\\xA3", "£"),
    ("\\xA4", "¤"),
    ("\\xA5", "¥"),
    ("\\xA6", "¦"),
    ("\\xA7", "§"),
    ("\\xA8", "¨"),
    ("\\xA9", "©"),
    ("\\xAA", "ª"),
    ("\\xAB", "«"),
    ("\\xAC", "¬"),
    ("\\xAD", " "),
    ("\\xAE", "®"),
    ("\\xAF", "¯"),
    ("\\xB0", "°"),
    ("\\xB1", "±"),
    ("\\xB2", "²"),
    ("\\xB3", "³"),
    ("\\xB4", "´"),
    ("\\xB5", "µ"),
    ("\\xB6", "¶"),
    ("\\xB7", "·"),
    ("\\xB8", "¸"),
    ("\\xB9", "¹"),
    ("\\xBA", "º"),
    ("\\xBB", "»"),
    ("\\xBC", "¼"),
    ("\\xBD", "½"),
    ("\\xBE", "¾"),
    ("\\xBF", "¿"),
    ("\\xC0", "À"),
    ("\\xC1", "Á"),
    ("\\xC2", "Â"),
    ("\\xC3", "Ã"),
    ("\\xC4", "Ä"),
    ("\\xC5", "Å"),
    ("\\xC6", "Æ"),
    ("\\xC7", "Ç"),
    ("\\xC8", "È"),
    ("\\xC9", "É"),
    ("\\xCA", "Ê"),
    ("\\xCB", "Ë"),
    ("\\xCC", "Ì"),
    ("\\xCD", "Í"),
    ("\\xCE", "Î"),
    ("\\xCF", "Ï"),
    ("\\xD0", "Ð"),
    ("\\xD1", "Ñ"),
    ("\\xD2", "Ò"),
    ("\\xD3", "Ó"),
    ("\\xD4", "Ô"),
    ("\\xD5", "Õ"),
    ("\\xD6", "Ö"),
    ("\\xD7", "×"),
    ("\\xD8", "Ø"),
    ("\\xD9", "Ù"),
    ("\\xDA", "Ú"),
    ("\\xDB", "Û"),
    ("\\xDC", "Ü"),
    ("\\xDD", "Ý"),
    ("\\xDE", "Þ"),
    ("\\xDF", "ß"),
    ("\\xE0", "à"),
    ("\\xE1", "á"),
    ("\\xE2", "â"),
    ("\\xE3", "ã"),
    ("\\xE4", "ä"),
    ("\\xE5", "å"),
    ("\\xE6", "æ"),
    ("\\xE7", "ç"),
    ("\\xE8", "è"),
    ("\\xE9", "é"),
    ("\\xEA", "ê"),
    ("\\xEB", "ë"),
    ("\\xEC", "ì"),
    ("\\xED", "í"),
    ("\\xEE", "î"),
    ("\\xEF", "ï"),
    ("\\xF0", "ð"),
    ("\\xF1", "ñ"),
    ("\\xF2", "ò"),
    ("\\xF3", "ó"),
    ("\\xF4", "ô"),
    ("\\xF5", "õ"),
    ("\\xF6", "ö"),
    ("\\xF7", "÷"),
    ("\\xF8", "ø"),
    ("\\xF9", "ù"),
    ("\\xFA", "ú"),
    ("\\xFB", "û"),
    ("\\xFC", "ü"),
    ("\\xFD", "ý"),
    ("\\xFE", "þ"),
    ("\\xFF", "ÿ"),
)
u_4_upper_case_escaped = (
    ("\\u20AC", "€"),
    ("\\u201A", "‚"),
    ("\\u0192", "ƒ"),
    ("\\u201E", "„"),
    ("\\u2026", "…"),
    ("\\u2020", "†"),
    ("\\u2021", "‡"),
    ("\\u02C6", "ˆ"),
    ("\\u2030", "‰"),
    ("\\u0160", "Š"),
    ("\\u2039", "‹"),
    ("\\u0152", "Œ"),
    ("\\u017D", "Ž"),
    ("\\u2018", "‘"),
    ("\\u2019", "’"),
    ("\\u201C", "“"),
    ("\\u201D", "”"),
    ("\\u2022", "•"),
    ("\\u2013", "–"),
    ("\\u2014", "—"),
    ("\\u02DC", "˜"),
    ("\\u2122", "™"),
    ("\\u0161", "š"),
    ("\\u203A", "›"),
    ("\\u0153", "œ"),
    ("\\u017E", "ž"),
    ("\\u0178", "Ÿ"),
    ("\\u00A1", "¡"),
    ("\\u00A2", "¢"),
    ("\\u00A3", "£"),
    ("\\u00A4", "¤"),
    ("\\u00A5", "¥"),
    ("\\u00A6", "¦"),
    ("\\u00A7", "§"),
    ("\\u00A8", "¨"),
    ("\\u00A9", "©"),
    ("\\u00AA", "ª"),
    ("\\u00AB", "«"),
    ("\\u00AC", "¬"),
    ("\\u00AD", " "),
    ("\\u00AE", "®"),
    ("\\u00AF", "¯"),
    ("\\u00B0", "°"),
    ("\\u00B1", "±"),
    ("\\u00B2", "²"),
    ("\\u00B3", "³"),
    ("\\u00B4", "´"),
    ("\\u00B5", "µ"),
    ("\\u00B6", "¶"),
    ("\\u00B7", "·"),
    ("\\u00B8", "¸"),
    ("\\u00B9", "¹"),
    ("\\u00BA", "º"),
    ("\\u00BB", "»"),
    ("\\u00BC", "¼"),
    ("\\u00BD", "½"),
    ("\\u00BE", "¾"),
    ("\\u00BF", "¿"),
    ("\\u00C0", "À"),
    ("\\u00C1", "Á"),
    ("\\u00C2", "Â"),
    ("\\u00C3", "Ã"),
    ("\\u00C4", "Ä"),
    ("\\u00C5", "Å"),
    ("\\u00C6", "Æ"),
    ("\\u00C7", "Ç"),
    ("\\u00C8", "È"),
    ("\\u00C9", "É"),
    ("\\u00CA", "Ê"),
    ("\\u00CB", "Ë"),
    ("\\u00CC", "Ì"),
    ("\\u00CD", "Í"),
    ("\\u00CE", "Î"),
    ("\\u00CF", "Ï"),
    ("\\u00D0", "Ð"),
    ("\\u00D1", "Ñ"),
    ("\\u00D2", "Ò"),
    ("\\u00D3", "Ó"),
    ("\\u00D4", "Ô"),
    ("\\u00D5", "Õ"),
    ("\\u00D6", "Ö"),
    ("\\u00D7", "×"),
    ("\\u00D8", "Ø"),
    ("\\u00D9", "Ù"),
    ("\\u00DA", "Ú"),
    ("\\u00DB", "Û"),
    ("\\u00DC", "Ü"),
    ("\\u00DD", "Ý"),
    ("\\u00DE", "Þ"),
    ("\\u00DF", "ß"),
    ("\\u00E0", "à"),
    ("\\u00E1", "á"),
    ("\\u00E2", "â"),
    ("\\u00E3", "ã"),
    ("\\u00E4", "ä"),
    ("\\u00E5", "å"),
    ("\\u00E6", "æ"),
    ("\\u00E7", "ç"),
    ("\\u00E8", "è"),
    ("\\u00E9", "é"),
    ("\\u00EA", "ê"),
    ("\\u00EB", "ë"),
    ("\\u00EC", "ì"),
    ("\\u00ED", "í"),
    ("\\u00EE", "î"),
    ("\\u00EF", "ï"),
    ("\\u00F0", "ð"),
    ("\\u00F1", "ñ"),
    ("\\u00F2", "ò"),
    ("\\u00F3", "ó"),
    ("\\u00F4", "ô"),
    ("\\u00F5", "õ"),
    ("\\u00F6", "ö"),
    ("\\u00F7", "÷"),
    ("\\u00F8", "ø"),
    ("\\u00F9", "ù"),
    ("\\u00FA", "ú"),
    ("\\u00FB", "û"),
    ("\\u00FC", "ü"),
    ("\\u00FD", "ý"),
    ("\\u00FE", "þ"),
    ("\\u00FF", "ÿ"),
)
u_4_lower_case_escaped = (
    ("\\u20ac", "€"),
    ("\\u201a", "‚"),
    ("\\u0192", "ƒ"),
    ("\\u201e", "„"),
    ("\\u2026", "…"),
    ("\\u2020", "†"),
    ("\\u2021", "‡"),
    ("\\u02c6", "ˆ"),
    ("\\u2030", "‰"),
    ("\\u0160", "Š"),
    ("\\u2039", "‹"),
    ("\\u0152", "Œ"),
    ("\\u017d", "Ž"),
    ("\\u2018", "‘"),
    ("\\u2019", "’"),
    ("\\u201c", "“"),
    ("\\u201d", "”"),
    ("\\u2022", "•"),
    ("\\u2013", "–"),
    ("\\u2014", "—"),
    ("\\u02dc", "˜"),
    ("\\u2122", "™"),
    ("\\u0161", "š"),
    ("\\u203a", "›"),
    ("\\u0153", "œ"),
    ("\\u017e", "ž"),
    ("\\u0178", "Ÿ"),
    ("\\u00a1", "¡"),
    ("\\u00a2", "¢"),
    ("\\u00a3", "£"),
    ("\\u00a4", "¤"),
    ("\\u00a5", "¥"),
    ("\\u00a6", "¦"),
    ("\\u00a7", "§"),
    ("\\u00a8", "¨"),
    ("\\u00a9", "©"),
    ("\\u00aa", "ª"),
    ("\\u00ab", "«"),
    ("\\u00ac", "¬"),
    ("\\u00ad", " "),
    ("\\u00ae", "®"),
    ("\\u00af", "¯"),
    ("\\u00b0", "°"),
    ("\\u00b1", "±"),
    ("\\u00b2", "²"),
    ("\\u00b3", "³"),
    ("\\u00b4", "´"),
    ("\\u00b5", "µ"),
    ("\\u00b6", "¶"),
    ("\\u00b7", "·"),
    ("\\u00b8", "¸"),
    ("\\u00b9", "¹"),
    ("\\u00ba", "º"),
    ("\\u00bb", "»"),
    ("\\u00bc", "¼"),
    ("\\u00bd", "½"),
    ("\\u00be", "¾"),
    ("\\u00bf", "¿"),
    ("\\u00c0", "À"),
    ("\\u00c1", "Á"),
    ("\\u00c2", "Â"),
    ("\\u00c3", "Ã"),
    ("\\u00c4", "Ä"),
    ("\\u00c5", "Å"),
    ("\\u00c6", "Æ"),
    ("\\u00c7", "Ç"),
    ("\\u00c8", "È"),
    ("\\u00c9", "É"),
    ("\\u00ca", "Ê"),
    ("\\u00cb", "Ë"),
    ("\\u00cc", "Ì"),
    ("\\u00cd", "Í"),
    ("\\u00ce", "Î"),
    ("\\u00cf", "Ï"),
    ("\\u00d0", "Ð"),
    ("\\u00d1", "Ñ"),
    ("\\u00d2", "Ò"),
    ("\\u00d3", "Ó"),
    ("\\u00d4", "Ô"),
    ("\\u00d5", "Õ"),
    ("\\u00d6", "Ö"),
    ("\\u00d7", "×"),
    ("\\u00d8", "Ø"),
    ("\\u00d9", "Ù"),
    ("\\u00da", "Ú"),
    ("\\u00db", "Û"),
    ("\\u00dc", "Ü"),
    ("\\u00dd", "Ý"),
    ("\\u00de", "Þ"),
    ("\\u00df", "ß"),
    ("\\u00e0", "à"),
    ("\\u00e1", "á"),
    ("\\u00e2", "â"),
    ("\\u00e3", "ã"),
    ("\\u00e4", "ä"),
    ("\\u00e5", "å"),
    ("\\u00e6", "æ"),
    ("\\u00e7", "ç"),
    ("\\u00e8", "è"),
    ("\\u00e9", "é"),
    ("\\u00ea", "ê"),
    ("\\u00eb", "ë"),
    ("\\u00ec", "ì"),
    ("\\u00ed", "í"),
    ("\\u00ee", "î"),
    ("\\u00ef", "ï"),
    ("\\u00f0", "ð"),
    ("\\u00f1", "ñ"),
    ("\\u00f2", "ò"),
    ("\\u00f3", "ó"),
    ("\\u00f4", "ô"),
    ("\\u00f5", "õ"),
    ("\\u00f6", "ö"),
    ("\\u00f7", "÷"),
    ("\\u00f8", "ø"),
    ("\\u00f9", "ù"),
    ("\\u00fa", "ú"),
    ("\\u00fb", "û"),
    ("\\u00fc", "ü"),
    ("\\u00fd", "ý"),
    ("\\u00fe", "þ"),
    ("\\u00ff", "ÿ"),
)

x_69_upper_case_escaped = (
    ("\\xE2\\x82\\xAC", "€"),
    ("\\xE2\\x80\\x9A", "‚"),
    ("\\xC6\\x92", "ƒ"),
    ("\\xE2\\x80\\x9E", "„"),
    ("\\xE2\\x80\\xA6", "…"),
    ("\\xE2\\x80\\xA0", "†"),
    ("\\xE2\\x80\\xA1", "‡"),
    ("\\xCB\\x86", "ˆ"),
    ("\\xE2\\x80\\xB0", "‰"),
    ("\\xC5\\xA0", "Š"),
    ("\\xE2\\x80\\xB9", "‹"),
    ("\\xC5\\x92", "Œ"),
    ("\\xC5\\xBD", "Ž"),
    ("\\xE2\\x80\\x98", "‘"),
    ("\\xE2\\x80\\x99", "’"),
    ("\\xE2\\x80\\x9C", "“"),
    ("\\xE2\\x80\\x9D", "”"),
    ("\\xE2\\x80\\xA2", "•"),
    ("\\xE2\\x80\\x93", "–"),
    ("\\xE2\\x80\\x94", "—"),
    ("\\xCB\\x9C", "˜"),
    ("\\xE2\\x84\\xA2", "™"),
    ("\\xC5\\xA1", "š"),
    ("\\xE2\\x80\\xBA", "›"),
    ("\\xC5\\x93", "œ"),
    ("\\xC5\\xBE", "ž"),
    ("\\xC5\\xB8", "Ÿ"),
    ("\\xC2\\xA1", "¡"),
    ("\\xC2\\xA2", "¢"),
    ("\\xC2\\xA3", "£"),
    ("\\xC2\\xA4", "¤"),
    ("\\xC2\\xA5", "¥"),
    ("\\xC2\\xA6", "¦"),
    ("\\xC2\\xA7", "§"),
    ("\\xC2\\xA8", "¨"),
    ("\\xC2\\xA9", "©"),
    ("\\xC2\\xAA", "ª"),
    ("\\xC2\\xAB", "«"),
    ("\\xC2\\xAC", "¬"),
    ("\\xC2\\xAD", " "),
    ("\\xC2\\xAE", "®"),
    ("\\xC2\\xAF", "¯"),
    ("\\xC2\\xB0", "°"),
    ("\\xC2\\xB1", "±"),
    ("\\xC2\\xB2", "²"),
    ("\\xC2\\xB3", "³"),
    ("\\xC2\\xB4", "´"),
    ("\\xC2\\xB5", "µ"),
    ("\\xC2\\xB6", "¶"),
    ("\\xC2\\xB7", "·"),
    ("\\xC2\\xB8", "¸"),
    ("\\xC2\\xB9", "¹"),
    ("\\xC2\\xBA", "º"),
    ("\\xC2\\xBB", "»"),
    ("\\xC2\\xBC", "¼"),
    ("\\xC2\\xBD", "½"),
    ("\\xC2\\xBE", "¾"),
    ("\\xC2\\xBF", "¿"),
    ("\\xC3\\x80", "À"),
    ("\\xC3\\x81", "Á"),
    ("\\xC3\\x82", "Â"),
    ("\\xC3\\x83", "Ã"),
    ("\\xC3\\x84", "Ä"),
    ("\\xC3\\x85", "Å"),
    ("\\xC3\\x86", "Æ"),
    ("\\xC3\\x87", "Ç"),
    ("\\xC3\\x88", "È"),
    ("\\xC3\\x89", "É"),
    ("\\xC3\\x8A", "Ê"),
    ("\\xC3\\x8B", "Ë"),
    ("\\xC3\\x8C", "Ì"),
    ("\\xC3\\x8D", "Í"),
    ("\\xC3\\x8E", "Î"),
    ("\\xC3\\x8F", "Ï"),
    ("\\xC3\\x90", "Ð"),
    ("\\xC3\\x91", "Ñ"),
    ("\\xC3\\x92", "Ò"),
    ("\\xC3\\x93", "Ó"),
    ("\\xC3\\x94", "Ô"),
    ("\\xC3\\x95", "Õ"),
    ("\\xC3\\x96", "Ö"),
    ("\\xC3\\x97", "×"),
    ("\\xC3\\x98", "Ø"),
    ("\\xC3\\x99", "Ù"),
    ("\\xC3\\x9A", "Ú"),
    ("\\xC3\\x9B", "Û"),
    ("\\xC3\\x9C", "Ü"),
    ("\\xC3\\x9D", "Ý"),
    ("\\xC3\\x9E", "Þ"),
    ("\\xC3\\x9F", "ß"),
    ("\\xC3\\xA0", "à"),
    ("\\xC3\\xA1", "á"),
    ("\\xC3\\xA2", "â"),
    ("\\xC3\\xA3", "ã"),
    ("\\xC3\\xA4", "ä"),
    ("\\xC3\\xA5", "å"),
    ("\\xC3\\xA6", "æ"),
    ("\\xC3\\xA7", "ç"),
    ("\\xC3\\xA8", "è"),
    ("\\xC3\\xA9", "é"),
    ("\\xC3\\xAA", "ê"),
    ("\\xC3\\xAB", "ë"),
    ("\\xC3\\xAC", "ì"),
    ("\\xC3\\xAD", "í"),
    ("\\xC3\\xAE", "î"),
    ("\\xC3\\xAF", "ï"),
    ("\\xC3\\xB0", "ð"),
    ("\\xC3\\xB1", "ñ"),
    ("\\xC3\\xB2", "ò"),
    ("\\xC3\\xB3", "ó"),
    ("\\xC3\\xB4", "ô"),
    ("\\xC3\\xB5", "õ"),
    ("\\xC3\\xB6", "ö"),
    ("\\xC3\\xB7", "÷"),
    ("\\xC3\\xB8", "ø"),
    ("\\xC3\\xB9", "ù"),
    ("\\xC3\\xBA", "ú"),
    ("\\xC3\\xBB", "û"),
    ("\\xC3\\xBC", "ü"),
    ("\\xC3\\xBD", "ý"),
    ("\\xC3\\xBE", "þ"),
    ("\\xC3\\xBF", "ÿ"),
)

x_69_lower_case_escaped = (
    ("\\xe2\\x82\\xac", "€"),
    ("\\xe2\\x80\\x9a", "‚"),
    ("\\xc6\\x92", "ƒ"),
    ("\\xe2\\x80\\x9e", "„"),
    ("\\xe2\\x80\\xa6", "…"),
    ("\\xe2\\x80\\xa0", "†"),
    ("\\xe2\\x80\\xa1", "‡"),
    ("\\xcb\\x86", "ˆ"),
    ("\\xe2\\x80\\xb0", "‰"),
    ("\\xc5\\xa0", "Š"),
    ("\\xe2\\x80\\xb9", "‹"),
    ("\\xc5\\x92", "Œ"),
    ("\\xc5\\xbd", "Ž"),
    ("\\xe2\\x80\\x98", "‘"),
    ("\\xe2\\x80\\x99", "’"),
    ("\\xe2\\x80\\x9c", "“"),
    ("\\xe2\\x80\\x9d", "”"),
    ("\\xe2\\x80\\xa2", "•"),
    ("\\xe2\\x80\\x93", "–"),
    ("\\xe2\\x80\\x94", "—"),
    ("\\xcb\\x9c", "˜"),
    ("\\xe2\\x84\\xa2", "™"),
    ("\\xc5\\xa1", "š"),
    ("\\xe2\\x80\\xba", "›"),
    ("\\xc5\\x93", "œ"),
    ("\\xc5\\xbe", "ž"),
    ("\\xc5\\xb8", "Ÿ"),
    ("\\xc2\\xa1", "¡"),
    ("\\xc2\\xa2", "¢"),
    ("\\xc2\\xa3", "£"),
    ("\\xc2\\xa4", "¤"),
    ("\\xc2\\xa5", "¥"),
    ("\\xc2\\xa6", "¦"),
    ("\\xc2\\xa7", "§"),
    ("\\xc2\\xa8", "¨"),
    ("\\xc2\\xa9", "©"),
    ("\\xc2\\xaa", "ª"),
    ("\\xc2\\xab", "«"),
    ("\\xc2\\xac", "¬"),
    ("\\xc2\\xad", ""),
    ("\\xc2\\xae", "®"),
    ("\\xc2\\xaf", "¯"),
    ("\\xc2\\xb0", "°"),
    ("\\xc2\\xb1", "±"),
    ("\\xc2\\xb2", "²"),
    ("\\xc2\\xb3", "³"),
    ("\\xc2\\xb4", "´"),
    ("\\xc2\\xb5", "µ"),
    ("\\xc2\\xb6", "¶"),
    ("\\xc2\\xb7", "·"),
    ("\\xc2\\xb8", "¸"),
    ("\\xc2\\xb9", "¹"),
    ("\\xc2\\xba", "º"),
    ("\\xc2\\xbb", "»"),
    ("\\xc2\\xbc", "¼"),
    ("\\xc2\\xbd", "½"),
    ("\\xc2\\xbe", "¾"),
    ("\\xc2\\xbf", "¿"),
    ("\\xc3\\x80", "À"),
    ("\\xc3\\x81", "Á"),
    ("\\xc3\\x82", "Â"),
    ("\\xc3\\x83", "Ã"),
    ("\\xc3\\x84", "Ä"),
    ("\\xc3\\x85", "Å"),
    ("\\xc3\\x86", "Æ"),
    ("\\xc3\\x87", "Ç"),
    ("\\xc3\\x88", "È"),
    ("\\xc3\\x89", "É"),
    ("\\xc3\\x8a", "Ê"),
    ("\\xc3\\x8b", "Ë"),
    ("\\xc3\\x8c", "Ì"),
    ("\\xc3\\x8d", "Í"),
    ("\\xc3\\x8e", "Î"),
    ("\\xc3\\x8f", "Ï"),
    ("\\xc3\\x90", "Ð"),
    ("\\xc3\\x91", "Ñ"),
    ("\\xc3\\x92", "Ò"),
    ("\\xc3\\x93", "Ó"),
    ("\\xc3\\x94", "Ô"),
    ("\\xc3\\x95", "Õ"),
    ("\\xc3\\x96", "Ö"),
    ("\\xc3\\x97", "×"),
    ("\\xc3\\x98", "Ø"),
    ("\\xc3\\x99", "Ù"),
    ("\\xc3\\x9a", "Ú"),
    ("\\xc3\\x9b", "Û"),
    ("\\xc3\\x9c", "Ü"),
    ("\\xc3\\x9d", "Ý"),
    ("\\xc3\\x9e", "Þ"),
    ("\\xc3\\x9f", "ß"),
    ("\\xc3\\xa0", "à"),
    ("\\xc3\\xa1", "á"),
    ("\\xc3\\xa2", "â"),
    ("\\xc3\\xa3", "ã"),
    ("\\xc3\\xa4", "ä"),
    ("\\xc3\\xa5", "å"),
    ("\\xc3\\xa6", "æ"),
    ("\\xc3\\xa7", "ç"),
    ("\\xc3\\xa8", "è"),
    ("\\xc3\\xa9", "é"),
    ("\\xc3\\xaa", "ê"),
    ("\\xc3\\xab", "ë"),
    ("\\xc3\\xac", "ì"),
    ("\\xc3\\xad", "í"),
    ("\\xc3\\xae", "î"),
    ("\\xc3\\xaf", "ï"),
    ("\\xc3\\xb0", "ð"),
    ("\\xc3\\xb1", "ñ"),
    ("\\xc3\\xb2", "ò"),
    ("\\xc3\\xb3", "ó"),
    ("\\xc3\\xb4", "ô"),
    ("\\xc3\\xb5", "õ"),
    ("\\xc3\\xb6", "ö"),
    ("\\xc3\\xb7", "÷"),
    ("\\xc3\\xb8", "ø"),
    ("\\xc3\\xb9", "ù"),
    ("\\xc3\\xba", "ú"),
    ("\\xc3\\xbb", "û"),
    ("\\xc3\\xbc", "ü"),
    ("\\xc3\\xbd", "ý"),
    ("\\xc3\\xbe", "þ"),
    ("\\xc3\\xbf", "ÿ"),
)
n_escaped = (
    ("\\226\\130\\172", "€"),
    ("\\226\\128\\154", "‚"),
    ("\\198\\146", "ƒ"),
    ("\\226\\128\\158", "„"),
    ("\\226\\128\\166", "…"),
    ("\\226\\128\\160", "†"),
    ("\\226\\128\\161", "‡"),
    ("\\203\\134", "ˆ"),
    ("\\226\\128\\176", "‰"),
    ("\\197\\160", "Š"),
    ("\\226\\128\\185", "‹"),
    ("\\197\\146", "Œ"),
    ("\\197\\189", "Ž"),
    ("\\226\\128\\152", "‘"),
    ("\\226\\128\\153", "’"),
    ("\\226\\128\\156", "“"),
    ("\\226\\128\\157", "”"),
    ("\\226\\128\\162", "•"),
    ("\\226\\128\\147", "–"),
    ("\\226\\128\\148", "—"),
    ("\\203\\156", "˜"),
    ("\\226\\132\\162", "™"),
    ("\\197\\161", "š"),
    ("\\226\\128\\186", "›"),
    ("\\197\\147", "œ"),
    ("\\197\\190", "ž"),
    ("\\197\\184", "Ÿ"),
    ("\\194\\161", "¡"),
    ("\\194\\162", "¢"),
    ("\\194\\163", "£"),
    ("\\194\\164", "¤"),
    ("\\194\\165", "¥"),
    ("\\194\\166", "¦"),
    ("\\194\\167", "§"),
    ("\\194\\168", "¨"),
    ("\\194\\169", "©"),
    ("\\194\\170", "ª"),
    ("\\194\\171", "«"),
    ("\\194\\172", "¬"),
    ("\\194\\173", " "),
    ("\\194\\174", "®"),
    ("\\194\\175", "¯"),
    ("\\194\\176", "°"),
    ("\\194\\177", "±"),
    ("\\194\\178", "²"),
    ("\\194\\179", "³"),
    ("\\194\\180", "´"),
    ("\\194\\181", "µ"),
    ("\\194\\182", "¶"),
    ("\\194\\183", "·"),
    ("\\194\\184", "¸"),
    ("\\194\\185", "¹"),
    ("\\194\\186", "º"),
    ("\\194\\187", "»"),
    ("\\194\\188", "¼"),
    ("\\194\\189", "½"),
    ("\\194\\190", "¾"),
    ("\\194\\191", "¿"),
    ("\\195\\128", "À"),
    ("\\195\\129", "Á"),
    ("\\195\\130", "Â"),
    ("\\195\\131", "Ã"),
    ("\\195\\132", "Ä"),
    ("\\195\\133", "Å"),
    ("\\195\\134", "Æ"),
    ("\\195\\135", "Ç"),
    ("\\195\\136", "È"),
    ("\\195\\137", "É"),
    ("\\195\\138", "Ê"),
    ("\\195\\139", "Ë"),
    ("\\195\\140", "Ì"),
    ("\\195\\141", "Í"),
    ("\\195\\142", "Î"),
    ("\\195\\143", "Ï"),
    ("\\195\\144", "Ð"),
    ("\\195\\145", "Ñ"),
    ("\\195\\146", "Ò"),
    ("\\195\\147", "Ó"),
    ("\\195\\148", "Ô"),
    ("\\195\\149", "Õ"),
    ("\\195\\150", "Ö"),
    ("\\195\\151", "×"),
    ("\\195\\152", "Ø"),
    ("\\195\\153", "Ù"),
    ("\\195\\154", "Ú"),
    ("\\195\\155", "Û"),
    ("\\195\\156", "Ü"),
    ("\\195\\157", "Ý"),
    ("\\195\\158", "Þ"),
    ("\\195\\159", "ß"),
    ("\\195\\160", "à"),
    ("\\195\\161", "á"),
    ("\\195\\162", "â"),
    ("\\195\\163", "ã"),
    ("\\195\\164", "ä"),
    ("\\195\\165", "å"),
    ("\\195\\166", "æ"),
    ("\\195\\167", "ç"),
    ("\\195\\168", "è"),
    ("\\195\\169", "é"),
    ("\\195\\170", "ê"),
    ("\\195\\171", "ë"),
    ("\\195\\172", "ì"),
    ("\\195\\173", "í"),
    ("\\195\\174", "î"),
    ("\\195\\175", "ï"),
    ("\\195\\176", "ð"),
    ("\\195\\177", "ñ"),
    ("\\195\\178", "ò"),
    ("\\195\\179", "ó"),
    ("\\195\\180", "ô"),
    ("\\195\\181", "õ"),
    ("\\195\\182", "ö"),
    ("\\195\\183", "÷"),
    ("\\195\\184", "ø"),
    ("\\195\\185", "ù"),
    ("\\195\\186", "ú"),
    ("\\195\\187", "û"),
    ("\\195\\188", "ü"),
    ("\\195\\189", "ý"),
    ("\\195\\190", "þ"),
    ("\\195\\191", "ÿ"),
)
wrong_chars = char1 = (
    ("Â", " "),
    ("â‚¬", "€"),
    ("â€š", "‚"),
    ("Æ’", "ƒ"),
    ("â€ž", "„"),
    ("â€¦", "…"),
    ("â€", "†"),
    ("â€¡", "‡"),
    ("Ë†", "ˆ"),
    ("â€°", "‰"),
    ("Å", "Š"),
    ("â€¹", "‹"),
    ("Å’", "Œ"),
    ("Å½", "Ž"),
    ("â€˜", "‘"),
    ("â€™", "’"),
    ("â€œ", "“"),
    ("â€", "”"),
    ("â€¢", "•"),
    ("â€“", "–"),
    ("â€”", "—"),
    ("Ëœ", "˜"),
    ("â„¢", "™"),
    ("Å¡", "š"),
    ("â€º", "›"),
    ("Å“", "œ"),
    ("Å¾", "ž"),
    ("Å¸", "Ÿ"),
    ("Â¡", "¡"),
    ("Â¢", "¢"),
    ("Â£", "£"),
    ("Â¤", "¤"),
    ("Â¥", "¥"),
    ("Â¦", "¦"),
    ("Â§", "§"),
    ("Â¨", "¨"),
    ("Â©", "©"),
    ("Âª", "ª"),
    ("Â«", "«"),
    ("Â¬", "¬"),
    ("Â®", "®"),
    ("Â¯", "¯"),
    ("Â°", "°"),
    ("Â±", "±"),
    ("Â²", "²"),
    ("Â³", "³"),
    ("Â´", "´"),
    ("Âµ", "µ"),
    ("Â¶", "¶"),
    ("Â·", "·"),
    ("Â¸", "¸"),
    ("Â¹", "¹"),
    ("Âº", "º"),
    ("Â»", "»"),
    ("Â¼", "¼"),
    ("Â½", "½"),
    ("Â¾", "¾"),
    ("Â¿", "¿"),
    ("Ã€", "À"),
    ("Ã", "Á"),
    ("Ã‚", "Â"),
    ("Ãƒ", "Ã"),
    ("Ã„", "Ä"),
    ("Ã…", "Å"),
    ("Ã†", "Æ"),
    ("Ã‡", "Ç"),
    ("Ãˆ", "È"),
    ("Ã‰", "É"),
    ("ÃŠ", "Ê"),
    ("Ã‹", "Ë"),
    ("ÃŒ", "Ì"),
    ("Ã", "Í"),
    ("ÃŽ", "Î"),
    ("Ã", "Ï"),
    ("Ã", "Ð"),
    ("Ã‘", "Ñ"),
    ("Ã’", "Ò"),
    ("Ã“", "Ó"),
    ("Ã”", "Ô"),
    ("Ã•", "Õ"),
    ("Ã–", "Ö"),
    ("Ã—", "×"),
    ("Ã˜", "Ø"),
    ("Ã™", "Ù"),
    ("Ãš", "Ú"),
    ("Ã›", "Û"),
    ("Ãœ", "Ü"),
    ("Ã", "Ý"),
    ("Ãž", "Þ"),
    ("ÃŸ", "ß"),
    ("Ã", "à"),
    ("Ã¡", "á"),
    ("Ã¢", "â"),
    ("Ã£", "ã"),
    ("Ã¤", "ä"),
    ("Ã¥", "å"),
    ("Ã¦", "æ"),
    ("Ã§", "ç"),
    ("Ã¨", "è"),
    ("Ã©", "é"),
    ("Ãª", "ê"),
    ("Ã«", "ë"),
    ("Ã¬", "ì"),
    ("Ã", "í"),
    ("Ã®", "î"),
    ("Ã¯", "ï"),
    ("Ã°", "ð"),
    ("Ã±", "ñ"),
    ("Ã²", "ò"),
    ("Ã³", "ó"),
    ("Ã´", "ô"),
    ("Ãµ", "õ"),
    ("Ã¶", "ö"),
    ("Ã·", "÷"),
    ("Ã¸", "ø"),
    ("Ã¹", "ù"),
    ("Ãº", "ú"),
    ("Ã»", "û"),
    ("Ã¼", "ü"),
    ("Ã½", "ý"),
    ("Ã¾", "þ"),
    ("Ã¿", "ÿ"),
)

zerox_unescaped_upper = (
    ("0x80", "€"),
    ("0x82", "‚"),
    ("0x83", "ƒ"),
    ("0x84", "„"),
    ("0x85", "…"),
    ("0x86", "†"),
    ("0x87", "‡"),
    ("0x88", "ˆ"),
    ("0x89", "‰"),
    ("0x8A", "Š"),
    ("0x8B", "‹"),
    ("0x8C", "Œ"),
    ("0x8E", "Ž"),
    ("0x91", "‘"),
    ("0x92", "’"),
    ("0x93", "“"),
    ("0x94", "”"),
    ("0x95", "•"),
    ("0x96", "–"),
    ("0x97", "—"),
    ("0x98", "˜"),
    ("0x99", "™"),
    ("0x9A", "š"),
    ("0x9B", "›"),
    ("0x9C", "œ"),
    ("0x9E", "ž"),
    ("0x9F", "Ÿ"),
    ("0xA1", "¡"),
    ("0xA2", "¢"),
    ("0xA3", "£"),
    ("0xA4", "¤"),
    ("0xA5", "¥"),
    ("0xA6", "¦"),
    ("0xA7", "§"),
    ("0xA8", "¨"),
    ("0xA9", "©"),
    ("0xAA", "ª"),
    ("0xAB", "«"),
    ("0xAC", "¬"),
    ("0xAD", " "),
    ("0xAE", "®"),
    ("0xAF", "¯"),
    ("0xB0", "°"),
    ("0xB1", "±"),
    ("0xB2", "²"),
    ("0xB3", "³"),
    ("0xB4", "´"),
    ("0xB5", "µ"),
    ("0xB6", "¶"),
    ("0xB7", "·"),
    ("0xB8", "¸"),
    ("0xB9", "¹"),
    ("0xBA", "º"),
    ("0xBB", "»"),
    ("0xBC", "¼"),
    ("0xBD", "½"),
    ("0xBE", "¾"),
    ("0xBF", "¿"),
    ("0xC0", "À"),
    ("0xC1", "Á"),
    ("0xC2", "Â"),
    ("0xC3", "Ã"),
    ("0xC4", "Ä"),
    ("0xC5", "Å"),
    ("0xC6", "Æ"),
    ("0xC7", "Ç"),
    ("0xC8", "È"),
    ("0xC9", "É"),
    ("0xCA", "Ê"),
    ("0xCB", "Ë"),
    ("0xCC", "Ì"),
    ("0xCD", "Í"),
    ("0xCE", "Î"),
    ("0xCF", "Ï"),
    ("0xD0", "Ð"),
    ("0xD1", "Ñ"),
    ("0xD2", "Ò"),
    ("0xD3", "Ó"),
    ("0xD4", "Ô"),
    ("0xD5", "Õ"),
    ("0xD6", "Ö"),
    ("0xD7", "×"),
    ("0xD8", "Ø"),
    ("0xD9", "Ù"),
    ("0xDA", "Ú"),
    ("0xDB", "Û"),
    ("0xDC", "Ü"),
    ("0xDD", "Ý"),
    ("0xDE", "Þ"),
    ("0xDF", "ß"),
    ("0xE0", "à"),
    ("0xE1", "á"),
    ("0xE2", "â"),
    ("0xE3", "ã"),
    ("0xE4", "ä"),
    ("0xE5", "å"),
    ("0xE6", "æ"),
    ("0xE7", "ç"),
    ("0xE8", "è"),
    ("0xE9", "é"),
    ("0xEA", "ê"),
    ("0xEB", "ë"),
    ("0xEC", "ì"),
    ("0xED", "í"),
    ("0xEE", "î"),
    ("0xEF", "ï"),
    ("0xF0", "ð"),
    ("0xF1", "ñ"),
    ("0xF2", "ò"),
    ("0xF3", "ó"),
    ("0xF4", "ô"),
    ("0xF5", "õ"),
    ("0xF6", "ö"),
    ("0xF7", "÷"),
    ("0xF8", "ø"),
    ("0xF9", "ù"),
    ("0xFA", "ú"),
    ("0xFB", "û"),
    ("0xFC", "ü"),
    ("0xFD", "ý"),
    ("0xFE", "þ"),
    ("0xFF", "ÿ"),
)

zerox_unescaped_lower = (
    ("0x80", "€"),
    ("0x82", "‚"),
    ("0x83", "ƒ"),
    ("0x84", "„"),
    ("0x85", "…"),
    ("0x86", "†"),
    ("0x87", "‡"),
    ("0x88", "ˆ"),
    ("0x89", "‰"),
    ("0x8a", "Š"),
    ("0x8b", "‹"),
    ("0x8c", "Œ"),
    ("0x8e", "Ž"),
    ("0x91", "‘"),
    ("0x92", "’"),
    ("0x93", "“"),
    ("0x94", "”"),
    ("0x95", "•"),
    ("0x96", "–"),
    ("0x97", "—"),
    ("0x98", "˜"),
    ("0x99", "™"),
    ("0x9a", "š"),
    ("0x9b", "›"),
    ("0x9c", "œ"),
    ("0x9e", "ž"),
    ("0x9f", "Ÿ"),
    ("0xa1", "¡"),
    ("0xa2", "¢"),
    ("0xa3", "£"),
    ("0xa4", "¤"),
    ("0xa5", "¥"),
    ("0xa6", "¦"),
    ("0xa7", "§"),
    ("0xa8", "¨"),
    ("0xa9", "©"),
    ("0xaa", "ª"),
    ("0xab", "«"),
    ("0xac", "¬"),
    ("0xad", " "),
    ("0xae", "®"),
    ("0xaf", "¯"),
    ("0xb0", "°"),
    ("0xb1", "±"),
    ("0xb2", "²"),
    ("0xb3", "³"),
    ("0xb4", "´"),
    ("0xb5", "µ"),
    ("0xb6", "¶"),
    ("0xb7", "·"),
    ("0xb8", "¸"),
    ("0xb9", "¹"),
    ("0xba", "º"),
    ("0xbb", "»"),
    ("0xbc", "¼"),
    ("0xbd", "½"),
    ("0xbe", "¾"),
    ("0xbf", "¿"),
    ("0xc0", "À"),
    ("0xc1", "Á"),
    ("0xc2", "Â"),
    ("0xc3", "Ã"),
    ("0xc4", "Ä"),
    ("0xc5", "Å"),
    ("0xc6", "Æ"),
    ("0xc7", "Ç"),
    ("0xc8", "È"),
    ("0xc9", "É"),
    ("0xca", "Ê"),
    ("0xcb", "Ë"),
    ("0xcc", "Ì"),
    ("0xcd", "Í"),
    ("0xce", "Î"),
    ("0xcf", "Ï"),
    ("0xd0", "Ð"),
    ("0xd1", "Ñ"),
    ("0xd2", "Ò"),
    ("0xd3", "Ó"),
    ("0xd4", "Ô"),
    ("0xd5", "Õ"),
    ("0xd6", "Ö"),
    ("0xd7", "×"),
    ("0xd8", "Ø"),
    ("0xd9", "Ù"),
    ("0xda", "Ú"),
    ("0xdb", "Û"),
    ("0xdc", "Ü"),
    ("0xdd", "Ý"),
    ("0xde", "Þ"),
    ("0xdf", "ß"),
    ("0xe0", "à"),
    ("0xe1", "á"),
    ("0xe2", "â"),
    ("0xe3", "ã"),
    ("0xe4", "ä"),
    ("0xe5", "å"),
    ("0xe6", "æ"),
    ("0xe7", "ç"),
    ("0xe8", "è"),
    ("0xe9", "é"),
    ("0xea", "ê"),
    ("0xeb", "ë"),
    ("0xec", "ì"),
    ("0xed", "í"),
    ("0xee", "î"),
    ("0xef", "ï"),
    ("0xf0", "ð"),
    ("0xf1", "ñ"),
    ("0xf2", "ò"),
    ("0xf3", "ó"),
    ("0xf4", "ô"),
    ("0xf5", "õ"),
    ("0xf6", "ö"),
    ("0xf7", "÷"),
    ("0xf8", "ø"),
    ("0xf9", "ù"),
    ("0xfa", "ú"),
    ("0xfb", "û"),
    ("0xfc", "ü"),
    ("0xfd", "ý"),
    ("0xfe", "þ"),
    ("0xff", "ÿ"),
)

html_reference = (
    ("&#032;", " "),
    ("&#033;", "!"),
    ("&#034;", '"'),
    ("&#035;", "#"),
    ("&#036;", "$"),
    ("&#037;", "%"),
    ("&#038;", "&"),
    ("&#039;", "'"),
    ("&#040;", "("),
    ("&#041;", ")"),
    ("&#042;", "*"),
    ("&#043;", "+"),
    ("&#044;", ","),
    ("&#045;", "-"),
    ("&#046;", "."),
    ("&#047;", "/"),
    ("&#048;", "0"),
    ("&#049;", "1"),
    ("&#050;", "2"),
    ("&#051;", "3"),
    ("&#052;", "4"),
    ("&#053;", "5"),
    ("&#054;", "6"),
    ("&#055;", "7"),
    ("&#056;", "8"),
    ("&#057;", "9"),
    ("&#058;", ":"),
    ("&#059;", ";"),
    ("&#060;", "<"),
    ("&#061;", "="),
    ("&#062;", ">"),
    ("&#063;", "?"),
    ("&#064;", "@"),
    ("&#065;", "A"),
    ("&#066;", "B"),
    ("&#067;", "C"),
    ("&#068;", "D"),
    ("&#069;", "E"),
    ("&#070;", "F"),
    ("&#071;", "G"),
    ("&#072;", "H"),
    ("&#073;", "I"),
    ("&#074;", "J"),
    ("&#075;", "K"),
    ("&#076;", "L"),
    ("&#077;", "M"),
    ("&#078;", "N"),
    ("&#079;", "O"),
    ("&#080;", "P"),
    ("&#081;", "Q"),
    ("&#082;", "R"),
    ("&#083;", "S"),
    ("&#084;", "T"),
    ("&#085;", "U"),
    ("&#086;", "V"),
    ("&#087;", "W"),
    ("&#088;", "X"),
    ("&#089;", "Y"),
    ("&#090;", "Z"),
    ("&#091;", "["),
    ("&#092;", "\\"),
    ("&#093;", "]"),
    ("&#094;", "^"),
    ("&#095;", "_"),
    ("&#096;", "`"),
    ("&#097;", "a"),
    ("&#098;", "b"),
    ("&#099;", "c"),
    ("&#100;", "d"),
    ("&#101;", "e"),
    ("&#102;", "f"),
    ("&#103;", "g"),
    ("&#104;", "h"),
    ("&#105;", "i"),
    ("&#106;", "j"),
    ("&#107;", "k"),
    ("&#108;", "l"),
    ("&#109;", "m"),
    ("&#110;", "n"),
    ("&#111;", "o"),
    ("&#112;", "p"),
    ("&#113;", "q"),
    ("&#114;", "r"),
    ("&#115;", "s"),
    ("&#116;", "t"),
    ("&#117;", "u"),
    ("&#118;", "v"),
    ("&#119;", "w"),
    ("&#120;", "x"),
    ("&#121;", "y"),
    ("&#122;", "z"),
    ("&#123;", "{"),
    ("&#124;", "|"),
    ("&#125;", "}"),
    ("&#126;", "~"),
    ("&#161;", "¡"),
    ("&#162;", "¢"),
    ("&#163;", "£"),
    ("&#164;", "¤"),
    ("&#165;", "¥"),
    ("&#166;", "¦"),
    ("&#167;", "§"),
    ("&#168;", "¨"),
    ("&#169;", "©"),
    ("&#170;", "ª"),
    ("&#171;", "«"),
    ("&#172;", "¬"),
    ("&#173;", " "),
    ("&#174;", "®"),
    ("&#175;", "¯"),
    ("&#176;", "°"),
    ("&#177;", "±"),
    ("&#178;", "²"),
    ("&#179;", "³"),
    ("&#180;", "´"),
    ("&#181;", "µ"),
    ("&#182;", "¶"),
    ("&#183;", "·"),
    ("&#184;", "¸"),
    ("&#185;", "¹"),
    ("&#186;", "º"),
    ("&#187;", "»"),
    ("&#188;", "¼"),
    ("&#189;", "½"),
    ("&#190;", "¾"),
    ("&#191;", "¿"),
    ("&#192;", "À"),
    ("&#193;", "Á"),
    ("&#194;", "Â"),
    ("&#195;", "Ã"),
    ("&#196;", "Ä"),
    ("&#197;", "Å"),
    ("&#198;", "Æ"),
    ("&#199;", "Ç"),
    ("&#200;", "È"),
    ("&#201;", "É"),
    ("&#202;", "Ê"),
    ("&#203;", "Ë"),
    ("&#204;", "Ì"),
    ("&#205;", "Í"),
    ("&#206;", "Î"),
    ("&#207;", "Ï"),
    ("&#208;", "Ð"),
    ("&#209;", "Ñ"),
    ("&#210;", "Ò"),
    ("&#211;", "Ó"),
    ("&#212;", "Ô"),
    ("&#213;", "Õ"),
    ("&#214;", "Ö"),
    ("&#215;", "×"),
    ("&#216;", "Ø"),
    ("&#217;", "Ù"),
    ("&#218;", "Ú"),
    ("&#219;", "Û"),
    ("&#220;", "Ü"),
    ("&#221;", "Ý"),
    ("&#222;", "Þ"),
    ("&#223;", "ß"),
    ("&#224;", "à"),
    ("&#225;", "á"),
    ("&#226;", "â"),
    ("&#227;", "ã"),
    ("&#228;", "ä"),
    ("&#229;", "å"),
    ("&#230;", "æ"),
    ("&#231;", "ç"),
    ("&#232;", "è"),
    ("&#233;", "é"),
    ("&#234;", "ê"),
    ("&#235;", "ë"),
    ("&#236;", "ì"),
    ("&#237;", "í"),
    ("&#238;", "î"),
    ("&#239;", "ï"),
    ("&#240;", "ð"),
    ("&#241;", "ñ"),
    ("&#242;", "ò"),
    ("&#243;", "ó"),
    ("&#244;", "ô"),
    ("&#245;", "õ"),
    ("&#246;", "ö"),
    ("&#247;", "÷"),
    ("&#248;", "ø"),
    ("&#249;", "ù"),
    ("&#250;", "ú"),
    ("&#251;", "û"),
    ("&#252;", "ü"),
    ("&#253;", "ý"),
    ("&#254;", "þ"),
    ("&#255;", "ÿ"),
)

html_entity = (
    ("&sp;", " "),
    ("&excl;", "!"),
    ("&quot;", '"'),
    ("&num;", "#"),
    ("&dollar;", "$"),
    ("&percnt;", "%"),
    ("&amp;", "&"),
    ("&apos;", "'"),
    ("&lpar;", "("),
    ("&rpar;", ")"),
    ("&ast;", "*"),
    ("&plus;", "+"),
    ("&comma;", ","),
    ("&minus;", "-"),
    ("&hyphen;", "-"),
    ("&period;", "."),
    ("&sol;", "/"),
    ("&colon;", ":"),
    ("&semi;", ";"),
    ("&lt;", "<"),
    ("&equals;", "="),
    ("&gt;", ">"),
    ("&quest;", "?"),
    ("&commat;", "@"),
    ("&lsqb;", "["),
    ("&bsol;", "\\"),
    ("&rsqb;", "]"),
    ("&circ;", "^"),
    ("&lowbar;", "_"),
    ("&horbar;", "_"),
    ("&grave;", "`"),
    ("&lcub;", "{"),
    ("&verbar;", "|"),
    ("&rcub;", "}"),
    ("&tilde;", "~"),
    ("&nbsp;", " "),
    ("&iexcl;", "¡"),
    ("&cent;", "¢"),
    ("&pound;", "£"),
    ("&curren;", "¤"),
    ("&yen;", "¥"),
    ("&brkbar;", "¦"),
    ("&brvbar;", "¦"),
    ("&sect;", "§"),
    ("&uml;", "¨"),
    ("&die;", "¨"),
    ("&copy;", "©"),
    ("&ordf;", "ª"),
    ("&laquo;", "«"),
    ("&not;", "¬"),
    ("&shy;", "­"),
    ("&reg;", "®"),
    ("&macr;", "¯"),
    ("&hibar;", "¯"),
    ("&deg;", "°"),
    ("&plusmn;", "±"),
    ("&sup2;", "²"),
    ("&sup3;", "³"),
    ("&acute;", "´"),
    ("&micro;", "µ"),
    ("&para;", "¶"),
    ("&middot;", "·"),
    ("&cedil;", "¸"),
    ("&sup1;", "¹"),
    ("&ordm;", "º"),
    ("&raquo;;", "»"),
    ("&frac14;", "¼"),
    ("&frac12;", "½"),
    ("&half;", "½"),
    ("&frac34;", "¾"),
    ("&iquest;", "¿"),
    ("&Agrave;", "À"),
    ("&Aacute;", "Á"),
    ("&Acirc;", "Â"),
    ("&Atilde;", "Ã"),
    ("&Auml;", "Ä"),
    ("&Aring;", "Å"),
    ("&AElig;", "Æ"),
    ("&Ccedil;", "Ç"),
    ("&Egrave;", "È"),
    ("&Eacute;", "É"),
    ("&Ecirc;", "Ê"),
    ("&Euml;", "Ë"),
    ("&Igrave;", "Ì"),
    ("&Iacute;", "Í"),
    ("&Icirc;", "Î"),
    ("&Iuml;", "Ï"),
    ("&ETH;", "Ð"),
    ("&Ntilde;", "Ñ"),
    ("&Ograve;", "Ò"),
    ("&Oacute;", "Ó"),
    ("&Ocirc;", "Ô"),
    ("&Otilde;", "Õ"),
    ("&Ouml;", "Ö"),
    ("&times;", "×"),
    ("&Oslash;", "Ø"),
    ("&Ugrave;;", "Ù"),
    ("&Uacute;", "Ú"),
    ("&Ucirc;", "Û"),
    ("&Uuml;", "Ü"),
    ("&Yacute;", "Ý"),
    ("&THORN;", "Þ"),
    ("&szlig;", "ß"),
    ("&agrave;", "à"),
    ("&aacute;", "á"),
    ("&acirc;", "â"),
    ("&atilde;", "ã"),
    ("&auml;", "ä"),
    ("&aring;", "å"),
    ("&aelig;", "æ"),
    ("&ccedil;", "ç"),
    ("&egrave;", "è"),
    ("&eacute;", "é"),
    ("&ecirc;", "ê"),
    ("&euml;", "ë"),
    ("&igrave;", "ì"),
    ("&iacute;", "í"),
    ("&icirc;", "î"),
    ("&iuml;", "ï"),
    ("&eth;", "ð"),
    ("&ntilde;", "ñ"),
    ("&ograve;", "ò"),
    ("&oacute;", "ó"),
    ("&ocirc;", "ô"),
    ("&otilde;", "õ"),
    ("&ouml;", "ö"),
    ("&divide;", "÷"),
    ("&oslash;", "ø"),
    ("&ugrave;", "ù"),
    ("&uacute;", "ú"),
    ("&ucirc;", "û"),
    ("&uuml;", "ü"),
    ("&yacute;", "ý"),
    ("&thorn;", "þ"),
    ("&yuml;", "ÿ"),
)
x_3_lower_case_escaped_sorted = tuple(
    reversed(sorted(x_3_lower_case_escaped, key=lambda x: x[0]))
)
x_3_upper_case_escaped_sorted = tuple(
    reversed(sorted(x_3_upper_case_escaped, key=lambda x: x[0]))
)
u_4_upper_case_escaped_sorted = tuple(
    reversed(sorted(u_4_upper_case_escaped, key=lambda x: x[0]))
)
u_4_lower_case_escaped_sorted = tuple(
    reversed(sorted(u_4_lower_case_escaped, key=lambda x: x[0]))
)
x_69_upper_case_escaped_sorted = tuple(
    reversed(sorted(x_69_upper_case_escaped, key=lambda x: x[0]))
)
x_69_lower_case_escaped_sorted = tuple(
    reversed(sorted(x_69_lower_case_escaped, key=lambda x: x[0]))
)
n_escaped_sorted = tuple(reversed(sorted(n_escaped, key=lambda x: x[0])))
wrong_chars_sorted = tuple(reversed(sorted(wrong_chars, key=lambda x: x[0])))
zerox_unescaped_lower_sorted = tuple(
    reversed(sorted(zerox_unescaped_lower, key=lambda x: x[0]))
)
zerox_unescaped_upper_sorted = tuple(
    reversed(sorted(zerox_unescaped_upper, key=lambda x: x[0]))
)

html_entity_sorted = tuple(reversed(sorted(html_entity, key=lambda x: x[0])))

alllatinchars = regex.compile(
    r"[^\s\n\t"
    + regex.escape(
        r"""&shy;-!"#$%&'()*+,./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"""
    )
    + "]"
)


class LatinFix:
    def __init__(self, text, debug=False):
        self.text = text
        self.debug = debug

    def __repr__(self):
        return self.__str__()

    def __str__(self):
        return self.text

    def apply_x_3_lower_case_escaped(self):
        r"""
        ('\\xff', 'ÿ'),('\\xfe', 'þ'),('\\xfd', 'ý'),('\\xfc', 'ü'),('\\xfb', 'û'),('\\xfa', 'ú'),('\\xf9', 'ù'),('\\xf8', 'ø'),('\\xf7', '÷'),('\\xf6', 'ö')....
        """
        for ecapedletter, goodletter in x_3_lower_case_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_x_3_lower_case_escaped", ecapedletter, goodletter, self.text
                )

        return self

    def apply_x_3_upper_case_escaped(self):
        r"""
        ('\\xFF', 'ÿ'),('\\xFE', 'þ'),('\\xFD', 'ý'),('\\xFC', 'ü'),('\\xFB', 'û'),('\\xFA', 'ú'),('\\xF9', 'ù'),('\\xF8', 'ø'),('\\xF7', '÷') ...
        """
        for ecapedletter, goodletter in x_3_upper_case_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_x_3_upper_case_escaped", ecapedletter, goodletter, self.text
                )
        return self

    def apply_u_4_upper_case_escaped(self):
        r"""
        ('\\u2122', '™'),('\\u20AC', '€'),('\\u203A', '›'),('\\u2039', '‹'),('\\u2030', '‰'),('\\u2026', '…'),('\\u2022', '•'),('\\u2021', '‡'),('\\u2020', '†') ...
        """
        for ecapedletter, goodletter in u_4_upper_case_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_u_4_upper_case_escaped", ecapedletter, goodletter, self.text
                )
        return self

    def apply_u_4_lower_case_escaped(self):
        r"""
        ('\\u2122', '™'),('\\u20ac', '€'),('\\u203a', '›'),('\\u2039', '‹'),('\\u2030', '‰'),('\\u2026', '…'),('\\u2022', '•'),('\\u2021', '‡'),('\\u2020', '†'),('\\u201e', '„')...
        """
        for ecapedletter, goodletter in u_4_lower_case_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_u_4_lower_case_escaped", ecapedletter, goodletter, self.text
                )
        return self

    def apply_x_69_upper_case_escaped(self):
        r"""
        ('\\xE2\\x84\\xA2', '™'),('\\xE2\\x82\\xAC', '€'),('\\xE2\\x80\\xBA', '›'),('\\xE2\\x80\\xB9', '‹'),('\\xE2\\x80\\xB0', '‰'),('\\xE2\\x80\\xA6', '…'),('\\xE2\\x80\\xA2', '•')...
        """
        for ecapedletter, goodletter in x_69_upper_case_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_x_69_upper_case_escaped", ecapedletter, goodletter, self.text
                )
        return self

    def apply_x_69_lower_case_escaped(self):
        r"""
        ('\\xe2\\x84\\xa2', '™'),('\\xe2\\x82\\xac', '€'),('\\xe2\\x80\\xba', '›'),('\\xe2\\x80\\xb9', '‹'),('\\xe2\\x80\\xb0', '‰'),('\\xe2\\x80\\xa6', '…') ...
        """
        for ecapedletter, goodletter in x_69_lower_case_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_x_69_lower_case_escaped", ecapedletter, goodletter, self.text
                )
        return self

    def apply_n_escaped(self):
        r"""
        ('\\226\\132\\162', '™'),('\\226\\130\\172', '€'),('\\226\\128\\186', '›'),('\\226\\128\\185', '‹'),('\\226\\128\\176', '‰'),('\\226\\128\\166', '…'),('\\226\\128\\162', '•')...
        """
        for ecapedletter, goodletter in n_escaped_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print("apply_n_escaped", ecapedletter, goodletter, self.text)
        return self

    def apply_zerox_unescaped_lower(self):
        r"""
        ('0xff', 'ÿ'),('0xfe', 'þ'),('0xfd', 'ý'),('0xfc', 'ü'),('0xfb', 'û'),('0xfa', 'ú'),('0xf9', 'ù'),('0xf8', 'ø'),('0xf7', '÷'),('0xf6', 'ö')...
        """
        for ecapedletter, goodletter in zerox_unescaped_lower_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_zerox_unescaped_lower", ecapedletter, goodletter, self.text
                )
        return self

    def apply_zerox_unescaped_upper(self):
        r"""
        ('0xFF', 'ÿ'),('0xFE', 'þ'),('0xFD', 'ý'),('0xFC', 'ü'),('0xFB', 'û'),('0xFA', 'ú'),('0xF9', 'ù'),('0xF8', 'ø'),('0xF7', '÷'),('0xF6', 'ö'),('0xF5', 'õ'),('0xF4', 'ô'),('0xF3', 'ó')
        """
        for ecapedletter, goodletter in zerox_unescaped_upper_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_zerox_unescaped_upper", ecapedletter, goodletter, self.text
                )
        return self

    def apply_wrong_chars(self):
        r"""
        ('â€™', '’'),('â€”', '—'),('â€“', '–'),('â€˜', '‘'),('â€ž', '„'),('â€š', '‚'),('â€œ', '“'),('â€º', '›'),('â€¹', '‹'),('â€°', '‰') ...
        """
        for ecapedletter, goodletter in wrong_chars_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print("apply_wrong_chars", ecapedletter, goodletter, self.text)

        self.text = self.text.replace("\xa0", "")
        self.text = self.text.replace("\xA0", "")

        self.text = self.text.replace("\\xa0", "")
        self.text = self.text.replace("\\xA0", "")
        if self.debug:
            print("\\xA0", "", self.text)

        return self

    def remove_non_printable_chars(self):
        texttmp = self.text.splitlines()
        textfixed = []
        for text in texttmp:
            text = remove_escaped_characters(text)
            if self.debug:
                print("remove_non_printable_chars", text)
            textfixed.append(text)
        self.text = "\n".join(textfixed)
        return self

    def apply_html_character_reference(self):
        r"""
        ('&#032;', ' '),('&#033;', '!'),('&#034;', '"'),('&#035;', '#'),('&#036;', '$'),('&#037;', '%'),('&#038;', '&'),('&#039;', "'"),('&#040;', '('),('&#041;', ')'),('&#042;', '*')...
        """
        for ecapedletter, goodletter in html_reference:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_html_character_reference",
                    ecapedletter,
                    goodletter,
                    self.text,
                )
        return self

    def apply_html_entity_reference(self):
        r"""
        ('&yuml;', 'ÿ'),('&yen;', '¥'),('&yacute;', 'ý'),('&verbar;', '|'),('&uuml;', 'ü'),('&uml;', '¨'),('&ugrave;', 'ù'),('&ucirc;', 'û')...
        """
        for ecapedletter, goodletter in html_entity_sorted:
            self.text = self.text.replace(ecapedletter, goodletter)
            if self.debug:
                print(
                    "apply_html_entity_reference", ecapedletter, goodletter, self.text
                )
        return self

    def replace_multispaces(self):
        texttmp = self.text.splitlines()
        textfixed = []
        for text in texttmp:
            text = regex.sub(r"\s+", " ", text)
            textfixed.append(text)
            if self.debug:
                print("replace_multispaces", text)
        self.text = "\n".join(textfixed)
        return self

    def delete_all_non_latin_chars(self):
        texttmp = self.text.splitlines()
        textfixed = []
        for text in texttmp:
            text = alllatinchars.sub(" ", text)
            textfixed.append(text)
            if self.debug:
                print("delete_all_non_latin_chars", text)
        self.text = "\n".join(textfixed)
        return self


if __name__ == "__main__":
    text = r"""
    Suzy &amp; John &quot; 
    &pound;682m
    \u00FF\u00FF\u00F0\u00f0\x95\xFF 
    SmÃ¶rgÃ¥s 
    Non ti suscita niente la parola pietÃ\xa0 
    RosÅ½ 
    RUF MICH ZURÃœCK. 
    aqu\195\173 
    09. BÃ¡t NhÃ£ TÃ¢m Kinh 
    crianÃ§a
    KoÃ§ University
    Technische UniversitÃ¤t Dresden
    UniversitÃ¤t fÃ¼r Musik und darstellende Kunst Wien
    Technische UniversitÃ¤t Wien
    Ã\x89cole Nationale SupÃ©rieure des Beaux-Arts Paris
    Universidad SimÃ³n BolÃ\xadvar (USB)
    PontifÃ\xadcia Universidade CatÃ³lica do Rio Grande do Sul (PUCRS)
    BogaziÃ§i Ã\x9cniversitesi
    UniversitÃ\xa0 degli Studi di Udine
    Universitat AutÃ²noma de Barcelona
    UniversitÃ© de Rennes 1
    Ã\x89cole Normale SupÃ©rieure de Lyon
    Ã\x89cole Nationale SupÃ©rieure de CrÃ©ation Industrielle
    ENSCI Les Ateliers
    UniversitÃ¤t Bremen
    Institut National des Sciences AppliquÃ©es de Lyon (INSA)
    UniversitÃ© Laval
    UniversitÃ¤t des Saarlandes
    UniversitÃ¤t Konstanz
    Philipps-UniversitÃ¤t Marburg
    El Colegio de MÃ©xico A.C.
    Humboldt-UniversitÃ¤t zu Berlin
    PontifÃ\xadcia Universidade CatÃ³lica do Rio de Janeiro
    Universidade Federal do ParanÃ¡ - UFPR
    UniversitÃ¤t Potsdam
    USI - UniversitÃ  della Svizzera italiana
    PalackÃ½ University Olomouc
    CentraleSupÃ©lec
    Arts et MÃ©tiers ParisTech
    UniversitÃ© de Sherbrooke
    UniversitÃ\xa0 degli studi Roma Tre
    WestfÃ¤lische Wilhelms-UniversitÃ¤t MÃ¼nster
    Universidad PolitÃ©cnica de Madrid (UPM)
    Universidad Adolfo IbÃ\xa0Ã±ez
    Ã\x89cole Centrale de Lille
    UniversitÃ© Paris 13 Nord
    UniversitÃ  degli Studi di Udine
    Universidade Federal de SÃ£o Paulo
    Instituto Nacional de MatemÃ¡tica Pura e Aplicada (IMPA)
    UniversitÃ¤t Mannheim
    UniversitÃ© Toulouse 1 Capitole
    Technische UniversitÃ¤t Braunschweig
    Eberhard Karls UniversitÃ¤t TÃ¼bingen
    UniversitÃ¤t Rostock
    UniversitÃ© Grenoble Alpes
    UniversitÃ© de Fribourg
    UniversitÃ¤t Innsbruck
    Universidad Adolfo IbÃ Ã±ez
    UniversitÃ© du QuÃ©bec
    Universidad de la RepÃºblica (Udelar)
    Universitat PolitÃ¨cnica de Catalunya Â· BarcelonaTech (UPC)
    UniversitÃ¤t Regensburg
    UniversitÃ© de Paris
    UniversitÃ© Paris 1 PanthÃ©on-Sorbonne
    Universidad TÃ©cnica Federico Santa MarÃ\xada (USM)
    Ruprecht-Karls-UniversitÃ¤t Heidelberg
    Pontificia Universidad CatÃ³lica Argentina
    UniversitÃ\xa0Â\xa0di Padova
    Technische UniversitÃ¤t Berlin (TU Berlin)
    UniversitÃ¤t Stuttgart
    FundaÃ§Ã£o Getulio Vargas (FGV)
    Universidade de SÃ£o Paulo
    Universidad Nacional AutÃ³noma de MÃ©xico  (UNAM)
    Universidade Federal de SÃ£o Carlos (UFSCar)
    Ã\x89cole Centrale de Nantes
    Technische UniversitÃ¤t Kaiserslautern
    UniversitÃ  degli studi Roma Tre
    Pontificia Universidad CatÃ³lica del PerÃº
    UniversitÃ\xa0 degli Studi di Pavia
    UniversitÃ© PSL
    UniversitÃ© de MontrÃ©al
    Pontificia Universidad CatÃ³lica de ValparaÃ\xadso
    University Paris 2 PanthÃ©on-Assas
    UniversitÃ© Paris-Nanterre
    Universidad AutÃ³noma de San Luis de PotosÃ\xad
    UniversitÃ¤t  Leipzig
    Ruhr-UniversitÃ¤t Bochum
    UniversitÃ© LumiÃ¨re Lyon 2
    UniversitÃ© de Lille
    UniversitÃ© Claude Bernard Lyon 1
    UniversitÃ© catholique de Louvain (UCLouvain)
    UniversitÃ©  de Technologie Troyes (UTT)
    Universidad de San AndrÃ©s - UdeSA
    Martin-Luther-UniversitÃ¤t Halle-Wittenberg
    University of TromsÃ¸ The Arctic University of Norway
    Rheinische Friedrich-Wilhelms-UniversitÃ¤t Bonn
    Universidad de AlcalÃ¡
    USI - UniversitÃ\xa0 della Svizzera italiana
    LinkÃ¶ping University
    Universidad Nacional de CÃ³rdoba - UNC
    UniversitÃ\xa0 degli Studi di Perugia
    UniversitÃ  degli Studi di Pavia
    Johannes Gutenberg UniversitÃ¤t Mainz
    UniversitÃ  Iuav di Venezia
    Friedrich-Alexander-UniversitÃ¤t Erlangen-NÃ¼rnberg
    UniversitÃ© de Nantes
    Universidad de CÃ³rdoba
    Universidade de BrasÃ\xadlia
    UniversitÃ© de Strasbourg
    Universidad AutÃ³noma de Nuevo LeÃ³n
    Pontificia Universidad CatÃ³lica de Chile (UC)
    UniversitÃ© Paris-Est CrÃ©teil Val de Marne
    Universidad AutÃ³noma del Estado de MÃ©xico (UAEMex)
    UniversitÃ© de Montpellier
    UniversitÃ¤t der KÃ¼nste Berlin
    UniversitÃ Â di Padova
    UniversitÃ© Paris-Saclay
    EÃ¶tvÃ¶s LorÃ¡nd University
    Technische UniversitÃ¤t Bergakademie Freiberg
    Technische UniversitÃ¤t Hamburg
    Universidade CatÃ³lica Portuguesa - UCP
    Ã\x89cole Nationale SupÃ©rieure des Industries Chimiques (ENSIC) Nancy
    Instituto TecnolÃ³gico AutÃ³nomo de MÃ©xico (ITAM)
    UniversitÃ© de Limoges
    UniversitÃ© Sorbonne Nouvelle Paris 3
    UniversitÃ© Paul Sabatier Toulouse III
    Julius-Maximilians-UniversitÃ¤t WÃ¼rzburg
    UniversitÃ© de Poitiers
    Universitat PolitÃ¨cnica de ValÃ¨ncia
    UniversitÃ\xa0Â\xa0Cattolica del Sacro Cuore
    UniversitÃ© Nice Sophia Antipolis
    University of JyvÃ¤skylÃ¤
    Bauhaus-UniversitÃ¤t Weimar
    UniversitÃ© de LiÃ¨ge
    UniversitÃ¤t Jena
    University of GÃ¶ttingen
    Technische UniversitÃ¤t Ilmenau
    Ã\x89cole Centrale de Lyon
    Ludwig-Maximilians-UniversitÃ¤t MÃ¼nchen
    UniversitÃ© de Lorraine
    UniversitÃ© de Technologie de CompiÃ¨gne (UTC)
    UniversitÃ¤t Siegen
    UniversitÃ¤t Duisburg-Essen
    UniversitÃ© de Savoie
    Universidad AutÃ³noma de Madrid
    UniversitÃ Â Cattolica del Sacro Cuore
    Ankara Ã\x9cniversitesi
    Universidade da CoruÃ±a
    UniversitÃ degli Studi di Perugia
    Hochschule fÃ¼r Gestaltung und Kunst ZÃ¼rich
    UniversitÃ¤t Hamburg
    """
    lfix = LatinFix(text, debug=False)
    nw = (
        lfix.apply_n_escaped()
        # ('\\226\\132\\162', '™'),('\\226\\130\\172', '€'),('\\226\\128\\186', '›'),('\\226\\128\\185', '‹'),('\\226\\128\\176', '‰'),('\\226\\128\\166', '…'),('\\226\\128\\162', '•')
        .remove_non_printable_chars()
        # \x00 ...
        .apply_wrong_chars()
        # ('â€™', '’'),('â€”', '—'),('â€“', '–'),('â€˜', '‘'),('â€ž', '„'),('â€š', '‚'),('â€œ', '“') ...
        .apply_x_69_lower_case_escaped()
        # ('\\xe2\\x84\\xa2', '™'),('\\xe2\\x82\\xac', '€'),('\\xe2\\x80\\xba', '›'),('\\xe2\\x80\\xb9', '‹'),('\\xe2\\x80\\xb0', '‰'),('\\xe2\\x80\\xa6', '…') ...
        .apply_x_69_upper_case_escaped()
        # ('\\xE2\\x84\\xA2', '™'),('\\xE2\\x82\\xAC', '€'),('\\xE2\\x80\\xBA', '›'),('\\xE2\\x80\\xB9', '‹'),('\\xE2\\x80\\xB0', '‰')...
        .apply_x_3_lower_case_escaped()
        # ('\\xff', 'ÿ'),('\\xfe', 'þ'),('\\xfd', 'ý'),('\\xfc', 'ü'),('\\xfb', 'û'),('\\xfa', 'ú'),('\\xf9', 'ù'),('\\xf8', 'ø')...
        .apply_x_3_upper_case_escaped()
        #        ('\\xFF', 'ÿ'),('\\xFE', 'þ'),('\\xFD', 'ý'),('\\xFC', 'ü'),('\\xFB', 'û'),('\\xFA', 'ú'),('\\xF9', 'ù'),('\\xF8', 'ø'),('\\xF7', '÷') ...
        .apply_u_4_upper_case_escaped()
        # ('\\u2122', '™'),('\\u20AC', '€'),('\\u203A', '›'),('\\u2039', '‹'),('\\u2030', '‰'),('\\u2026', '…'),('\\u2022', '•') ...
        .apply_u_4_lower_case_escaped()
        # ('\\u2122', '™'),('\\u20ac', '€'),('\\u203a', '›'),('\\u2039', '‹'),('\\u2030', '‰'),('\\u2026', '…'),('\\u2022', '•'),('\\u2021', '‡')...
        .apply_zerox_unescaped_lower()
        # ('0xff', 'ÿ'),('0xfe', 'þ'),('0xfd', 'ý'),('0xfc', 'ü'),('0xfb', 'û'),('0xfa', 'ú'),('0xf9', 'ù'),('0xf8', 'ø')....
        .apply_zerox_unescaped_upper()
        # ('0xFF', 'ÿ'),('0xFE', 'þ'),('0xFD', 'ý'),('0xFC', 'ü'),('0xFB', 'û'),('0xFA', 'ú'),('0xF9', 'ù')...
        .apply_html_character_reference()
        # ('&#032;', ' '),('&#033;', '!'),('&#034;', '"'),('&#035;', '#'),('&#036;', '$'),('&#037;', '%'),('&#038;', '&'),('&#039;', "'"),('&#040;', '('),('&#041;', ')'),('&#042;', '*')...
        .apply_html_entity_reference()
        # ('&yuml;', 'ÿ'),('&yen;', '¥'),('&yacute;', 'ý'),('&verbar;', '|'),('&uuml;', 'ü'),('&uml;', '¨'),('&ugrave;', 'ù'),('&ucirc;', 'û')...
        .delete_all_non_latin_chars().replace_multispaces()
    )

    print(lfix.text)
    for t1, t2 in zip(lfix.text.splitlines(), text.splitlines()):
        print(f"Original: {t2}")
        print(f"Repaired: {t1}")
        print("")
