import re
import sys

DOMAIN_TOKEN_MAP = {
"negative": [
    "aggression", "aggressive", "agitated", "anger", "angry",
    "anxiety", "anxiety_disorder", "anxious", "appetite", "ashamed",
    "assaultive", "attack", "avoidance", "avoidant", "blue", "calm",
    "chest", "chest_pain", "chill", "constricted", "depressed", "depressed_mood",
    "depressive", "depressive_disorder", "diaphoretic", "diarrhea",
    "difficulty", "down", "dysphoric", "dysthymic", "emotional",
    "energy", "fatigue", "fearful", "feelings", "fever", "frighten",
    "gain", "guilt", "guilty", "head", "helpless", "hopeful", "hopelessness",
    "hypervigilant", "irritable", "lesion", "living", "major", "meaningless",
    "nausea", "nervous", "optimistic", "outburst", "palpitation",
    "panic", "panic_attack", "panic_disorder", "pessimistic", "phobia",
    "phobic", "posttraumatic", "posttraumatic_stress", "refuse",
    "relaxed", "respond", "sad", "scare", "sensory", "sensory_deficit",
    "situation", "startle", "stress", "stress_disorder", "stressor",
    "suicidal", "sweat", "tachycardic", "tearful", "tense", "threaten",
    "trauma", "tremulous", "violence", "violent", "weight_gain",
    "worried", "worth", "worthless"],
"positive": [
    "abuse", "alcohol", "alcohol_abuse",
    "alcohol_dependance", "alcohol_withdrawal", "appetite", "back",
    "barbiturate", "blackout", "bowel", "compulsive", "count", "couple",
    "craving", "cut", "dependance", "disorder_nos", "drink", "drug",
    "effect", "etoh", "financial", "gamble", "heavy", "heroin", "high",
    "high_school", "hoard", "indication", "induce", "induce_mood",
    "ingestion", "intake", "interest", "intoxicate", "intoxication",
    "liver", "lose", "money", "monitoring", "motivate", "motor",
    "movement", "ordering", "partial", "planning", "psychotic_disorder",
    "related", "remission", "repetitive", "report_that", "resource",
    "routine", "sexual_abuse", "stereotypy", "stop", "strategy",
    "therapy", "with_alcohol", "withdrawal", "withdrawal_symptom"],
"cognitive": [
    "ability", "answer", "aphasic", "attention", "attentive",
    "auditory", "auditory_hallucination", "behavior", "cognitive",
    "compliance", "content", "conversation", "conviction", "current_episode",
    "decrease_need", "delusion", "disinhibit", "disorganisation",
    "disorganise", "disorganise_behavior", "distracted", "distractible",
    "employ", "engage", "executive", "exercise", "factor_include",
    "fast", "flight", "forget", "forgetful", "grandiosity", "hallucinate",
    "hallucination", "idea", "illusion", "impairment", "impulse",
    "impulsive", "impulsive_behavior", "inappropriate", "incoherent",
    "information", "injurious_behavior", "insertion", "intact", "irritable",
    "memory", "mildly", "name", "ongoing", "perception", "planning",
    "possibly", "problem", "psychotic", "psychotic_disorder", "psychotic_feature",
    "question", "race", "recall", "regularly", "related", "schizophrenia",
    "sensation", "sexual_abuse", "sweat", "talk", "term", "testing",
    "thought", "thought_insertion", "type", "ultimately", "understand",
    "understanding", "visual", "visual_hallucination", "voice", "vomiting",
    "with_psychotic", "word"],
"social": [
    "aggressive", "auditory", "auditory_hallucination",
    "behavior", "belief", "compliance", "concern_that", "content",
    "control", "current_episode", "delusion", "delusional", "disorder_nos",
    "disorganise", "elicit", "empathic", "engaged", "expressive",
    "fast", "flat", "grandiose", "hallucinate", "hallucination",
    "impulse", "impulse_control", "inappropriate", "interview", "isolated",
    "language", "lonely", "misinterpret", "misunderstand", "paranoid",
    "paranoid_ideation", "partial", "prominent", "psychotic", "psychotic_disorder",
    "psychotic_feature", "regular", "related", "theme", "think",
    "thought_broadcasting", "threaten", "type", "vague", "visual",
    "voice", "wall", "with_psychotic", "withdrawn", "worthless"],
"arousal_regulatory": [
    "abnormality", "agitated", "agitation", "alertness", "alfa",
    "amount", "apnea", "appetite", "architecture", "attention",
    "bipap", "blink", "cardiovascular", "chest_pain", "chill",
    "cigaret", "circadian", "compare", "concentration", "continuity",
    "cortisol", "cpap", "deficit", "deprivation", "difficulty",
    "dizziness", "dream", "duration", "eeg", "emotional", "energy",
    "executive_function", "express", "fatigue", "faze", "feelings",
    "ferritin", "fragmentation", "have_been", "headache", "hormone",
    "hour", "hypersomnia", "identify", "information", "insomnia",
    "irritability", "language", "melatonin", "memory", "minutes",
    "motivate", "motor", "motor_activity", "neck", "night", "nightmare",
    "notice", "offer", "oral", "osa", "posttraumatic", "posttraumatic_stress",
    "psychomotor", "psychomotor_agitation", "quantity", "range",
    "reactivity", "rem_sleep", "respiratory", "restless", "restoration",
    "rhythm", "sensation", "sensitivity", "sensory", "sensory_deficit",
    "sexual_abuse", "sleep_latency", "sleep_study", "sleepiness",
    "someone_else", "spindle", "stability", "stable", "startle",
    "stress", "stress_disorder", "symptom_include", "these_problem",
    "theta", "timing", "tingle", "tired", "trauma", "traumatic",
    "user", "variability", "vigilance", "voluntary", "wakefulness",
    "waking", "weight_gain", "with_general", "worsen"]
}


TOKENS = sorted(set(sum(list(DOMAIN_TOKEN_MAP.values()), [])))


TOKEN_LVG_MAP = {
    'abilities': 'ability',
    'ability': 'ability',
    'abnormalities': 'abnormality',
    'abnormality': 'abnormality',
    'abuse': 'abuse',
    'abused': 'abuse',
    'abuses': 'abuse',
    'abusing': 'abuse',
    'activities': 'activity',
    'activity': 'activity',
    'aggression': 'aggression',
    'aggressive': 'aggressive',
    'agitated': 'agitated',
    'agitation': 'agitation',
    'alcohol': 'alcohol',
    'alcohols': 'alcohol',
    'alertness': 'alertness',
    'alfa': 'alfa',
    'alpha': 'alfa',
    'alphas': 'alfa',
    'amount': 'amount',
    'amounted': 'amount',
    'amounting': 'amount',
    'amounts': 'amount',
    'anger': 'anger',
    'angered': 'anger',
    'angers': 'anger',
    'angrier': 'angry',
    'angry': 'angry',
    'answer': 'answer',
    'answered': 'answer',
    'answering': 'answer',
    'answers': 'answer',
    'anxieties': 'anxiety',
    'anxiety': 'anxiety',
    'anxious': 'anxious',
    'aphasic': 'aphasic',
    'apnea': 'apnea',
    'appetite': 'appetite',
    'appetites': 'appetite',
    'architecture': 'architecture',
    'ashamed': 'ashamed',
    'assaultive': 'assaultive',
    'attack': 'attack',
    'attacked': 'attack',
    'attacking': 'attack',
    'attacks': 'attack',
    'attention': 'attention',
    'attentive': 'attentive',
    'auditory': 'auditory',
    'avoidance': 'avoidance',
    'avoidant': 'avoidant',
    'back': 'back',
    'backed': 'back',
    'backs': 'back',
    'barbiturate': 'barbiturate',
    'barbiturates': 'barbiturate',
    'behavior': 'behavior',
    'behaviors': 'behavior',
    'behaviour': 'behavior',
    'behaviours': 'behavior',
    'belief': 'belief',
    'beliefs': 'belief',
    'bipap': 'bipap',
    'blackout': 'blackout',
    'blackouts': 'blackout',
    'blink': 'blink',
    'blinks': 'blink',
    'blue': 'blue',
    'bowel': 'bowel',
    'bowels': 'bowel',
    'broadcasting': 'broadcasting',
    'calm': 'calm',
    'calmed': 'calm',
    'calmer': 'calm',
    'calming': 'calm',
    'calms': 'calm',
    'cardiovascular': 'cardiovascular',
    'chest': 'chest',
    'chill': 'chill',
    'chilled': 'chill',
    'chills': 'chill',
    'cigarette': 'cigaret',
    'cigarettes': 'cigaret',
    'circadian': 'circadian',
    'cognitive': 'cognitive',
    'compare': 'compare',
    'compared': 'compare',
    'compares': 'compare',
    'comparing': 'compare',
    'compliance': 'compliance',
    'compulsive': 'compulsive',
    'concentration': 'concentration',
    'concentrations': 'concentration',
    'concern': 'concern',
    'concerns': 'concern',
    'constricted': 'constricted',
    'content': 'content',
    'continuity': 'continuity',
    'control': 'control',
    'controled': 'control',
    'controling': 'control',
    'controls': 'control',
    'conversation': 'conversation',
    'conversations': 'conversation',
    'conviction': 'conviction',
    'convictions': 'conviction',
    'cortisol': 'cortisol',
    'count': 'count',
    'counted': 'count',
    'counting': 'count',
    'counts': 'count',
    'couple': 'couple',
    'coupled': 'couple',
    'couples': 'couple',
    'cpap': 'cpap',
    'craving': 'craving',
    'cravings': 'craving',
    'current': 'current',
    'cut': 'cut',
    'cuts': 'cut',
    'decrease': 'decrease',
    'decreased': 'decrease',
    'decreases': 'decrease',
    'decreasing': 'decrease',
    'deficit': 'deficit',
    'deficits': 'deficit',
    'delusion': 'delusion',
    'delusional': 'delusional',
    'delusions': 'delusion',
    'dependance': 'dependance',
    'dependence': 'dependance',
    'dependences': 'dependance',
    'depressed': 'depressed',
    'depressive': 'depressive',
    'deprivation': 'deprivation',
    'diaphoretic': 'diaphoretic',
    'diarrhea': 'diarrhea',
    'difficulties': 'difficulty',
    'difficulty': 'difficulty',
    'disinhibit': 'disinhibit',
    'disinhibited': 'disinhibit',
    'disorder': 'disorder',
    'disorders': 'disorder',
    'disorganised': 'disorganise',
    'disorganization': 'disorganisation',
    'disorganize': 'disorganise',
    'disorganized': 'disorganise',
    'distracted': 'distracted',
    'distractible': 'distractible',
    'dizziness': 'dizziness',
    'dizzyness': 'dizziness',
    'downing': 'down',
    'downs': 'down',
    'drank': 'drink',
    'dream': 'dream',
    'dreamed': 'dream',
    'dreaming': 'dream',
    'dreams': 'dream',
    'drink': 'drink',
    'drinking': 'drink',
    'drinks': 'drink',
    'drug': 'drug',
    'drugging': 'drug',
    'drugs': 'drug',
    'duration': 'duration',
    'dysphoric': 'dysphoric',
    'dysthymic': 'dysthymic',
    'eeg': 'eeg',
    'eegs': 'eeg',
    'effect': 'effect',
    'effected': 'effect',
    'effecting': 'effect',
    'effects': 'effect',
    'elicit': 'elicit',
    'elicited': 'elicit',
    'eliciting': 'elicit',
    'elicits': 'elicit',
    'emotional': 'emotional',
    'empathic': 'empathic',
    'employ': 'employ',
    'employed': 'employ',
    'employing': 'employ',
    'employs': 'employ',
    'energies': 'energy',
    'energy': 'energy',
    'engage': 'engage',
    'engaged': 'engaged',
    'engages': 'engage',
    'engaging': 'engage',
    'episode': 'episode',
    'episodes': 'episode',
    'etoh': 'etoh',
    'executive': 'executive',
    'exercise': 'exercise',
    'exercised': 'exercise',
    'exercises': 'exercise',
    'exercising': 'exercise',
    'exercizes': 'exercise',
    'express': 'express',
    'expressed': 'express',
    'expresses': 'express',
    'expressing': 'express',
    'expressive': 'expressive',
    'factor': 'factor',
    'factored': 'factor',
    'factors': 'factor',
    'fast': 'fast',
    'fasted': 'fast',
    'faster': 'fast',
    'fastest': 'fast',
    'fasting': 'fast',
    'fatigue': 'fatigue',
    'fatigued': 'fatigue',
    'fearful': 'fearful',
    'feature': 'feature',
    'featured': 'feature',
    'features': 'feature',
    'featuring': 'feature',
    'feelings': 'feelings',
    'ferritin': 'ferritin',
    'fever': 'fever',
    'fevers': 'fever',
    'financial': 'financial',
    'flat': 'flat',
    'flight': 'flight',
    'flights': 'flight',
    'forget': 'forget',
    'forgetful': 'forgetful',
    'forgets': 'forget',
    'forgetting': 'forget',
    'forgot': 'forget',
    'forgotten': 'forget',
    'fragmentation': 'fragmentation',
    'frighten': 'frighten',
    'frightened': 'frighten',
    'frightens': 'frighten',
    'function': 'function',
    'functioned': 'function',
    'functioning': 'function',
    'functions': 'function',
    'gain': 'gain',
    'gained': 'gain',
    'gaining': 'gain',
    'gamble': 'gamble',
    'gambled': 'gamble',
    'gambles': 'gamble',
    'gambling': 'gamble',
    'general': 'general',
    'grandiose': 'grandiose',
    'grandiosity': 'grandiosity',
    'guilt': 'guilt',
    'guilty': 'guilty',
    'hallucinate': 'hallucinate',
    'hallucinated': 'hallucinate',
    'hallucinating': 'hallucinate',
    'hallucination': 'hallucination',
    'hallucinations': 'hallucination',
    'head': 'head',
    'headache': 'headache',
    'headaches': 'headache',
    'heads': 'head',
    'heavier': 'heavy',
    'heaviest': 'heavy',
    'heavy': 'heavy',
    'helpless': 'helpless',
    'heroin': 'heroin',
    'high': 'high',
    'higher': 'high',
    'highest': 'high',
    'highs': 'high',
    'hoard': 'hoard',
    'hoarded': 'hoard',
    'hoarding': 'hoard',
    'hoards': 'hoard',
    'hopeful': 'hopeful',
    'hopelessness': 'hopelessness',
    'hormone': 'hormone',
    'hormones': 'hormone',
    'hour': 'hour',
    'hours': 'hour',
    'hypersomnia': 'hypersomnia',
    'hypervigilant': 'hypervigilant',
    'idea': 'idea',
    'ideas': 'idea',
    'ideation': 'ideation',
    'ideations': 'ideation',
    'identified': 'identify',
    'identifies': 'identify',
    'identify': 'identify',
    'identifying': 'identify',
    'illusion': 'illusion',
    'illusions': 'illusion',
    'impairment': 'impairment',
    'impairments': 'impairment',
    'impulse': 'impulse',
    'impulses': 'impulse',
    'impulsive': 'impulsive',
    'inappropriate': 'inappropriate',
    'include': 'include',
    'includes': 'include',
    'incoherent': 'incoherent',
    'indication': 'indication',
    'indications': 'indication',
    'induce': 'induce',
    'induced': 'induce',
    'induces': 'induce',
    'inducing': 'induce',
    'information': 'information',
    'ingestion': 'ingestion',
    'ingestions': 'ingestion',
    'injurious': 'injurious',
    'insertion': 'insertion',
    'insertions': 'insertion',
    'insomnia': 'insomnia',
    'intact': 'intact',
    'intake': 'intake',
    'intakes': 'intake',
    'interest': 'interest',
    'interests': 'interest',
    'interview': 'interview',
    'interviewed': 'interview',
    'interviewing': 'interview',
    'interviews': 'interview',
    'intoxicated': 'intoxicate',
    'intoxication': 'intoxication',
    'irritability': 'irritability',
    'irritable': 'irritable',
    'isolated': 'isolated',
    'language': 'language',
    'languaged': 'language',
    'languages': 'language',
    'latencies': 'latency',
    'latency': 'latency',
    'lesion': 'lesion',
    'lesioning': 'lesion',
    'lesions': 'lesion',
    'liver': 'liver',
    'living': 'living',
    'lonelier': 'lonely',
    'lonely': 'lonely',
    'lose': 'lose',
    'loses': 'lose',
    'losing': 'lose',
    'lost': 'lose',
    'major': 'major',
    'majored': 'major',
    'majoring': 'major',
    'majors': 'major',
    'meaningless': 'meaningless',
    'melatonin': 'melatonin',
    'memories': 'memory',
    'memory': 'memory',
    'mildly': 'mildly',
    'minutes': 'minutes',
    'misinterpret': 'misinterpret',
    'misinterpreted': 'misinterpret',
    'misinterpreting': 'misinterpret',
    'misinterprets': 'misinterpret',
    'misunderstand': 'misunderstand',
    'misunderstands': 'misunderstand',
    'misunderstood': 'misunderstand',
    'money': 'money',
    'monitoring': 'monitoring',
    'mood': 'mood',
    'moods': 'mood',
    'motivate': 'motivate',
    'motivated': 'motivate',
    'motivating': 'motivate',
    'motor': 'motor',
    'movement': 'movement',
    'movements': 'movement',
    'named': 'name',
    'names': 'name',
    'naming': 'name',
    'nausea': 'nausea',
    'nauseas': 'nausea',
    'neck': 'neck',
    'necks': 'neck',
    'needed': 'need',
    'needing': 'need',
    'nervous': 'nervous',
    'night': 'night',
    'nightmare': 'nightmare',
    'nightmares': 'nightmare',
    'nights': 'night',
    'notice': 'notice',
    'noticed': 'notice',
    'notices': 'notice',
    'noticing': 'notice',
    'offer': 'offer',
    'offered': 'offer',
    'offers': 'offer',
    'ongoing': 'ongoing',
    'optimistic': 'optimistic',
    'oral': 'oral',
    'ordering': 'ordering',
    'osa': 'osa',
    'outburst': 'outburst',
    'outbursts': 'outburst',
    'pain': 'pain',
    'pained': 'pain',
    'pains': 'pain',
    'palpitation': 'palpitation',
    'palpitations': 'palpitation',
    'panic': 'panic',
    'panicked': 'panic',
    'panicking': 'panic',
    'panics': 'panic',
    'paranoid': 'paranoid',
    'paranoids': 'paranoid',
    'partial': 'partial',
    'perception': 'perception',
    'perceptions': 'perception',
    'pessimistic': 'pessimistic',
    'phase': 'faze',
    'phases': 'faze',
    'phasing': 'faze',
    'phobia': 'phobia',
    'phobias': 'phobia',
    'phobic': 'phobic',
    'planning': 'planning',
    'possibly': 'possibly',
    'posttraumatic': 'posttraumatic',
    'problem': 'problem',
    'problems': 'problem',
    'prominent': 'prominent',
    'psychomotor': 'psychomotor',
    'psychotic': 'psychotic',
    'psychotics': 'psychotic',
    'quantities': 'quantity',
    'quantity': 'quantity',
    'question': 'question',
    'questioned': 'question',
    'questions': 'question',
    'race': 'race',
    'raced': 'race',
    'races': 'race',
    'racing': 'race',
    'range': 'range',
    'ranged': 'range',
    'ranges': 'range',
    'ranging': 'range',
    'reactivity': 'reactivity',
    'recall': 'recall',
    'recalled': 'recall',
    'recalling': 'recall',
    'recalls': 'recall',
    'refuse': 'refuse',
    'refused': 'refuse',
    'refuses': 'refuse',
    'refusing': 'refuse',
    'regular': 'regular',
    'regularly': 'regularly',
    'related': 'related',
    'relaxed': 'relaxed',
    'rem': 'rem',
    'remission': 'remission',
    'remissions': 'remission',
    'repetitive': 'repetitive',
    'report': 'report',
    'reported': 'report',
    'reports': 'report',
    'resource': 'resource',
    'resources': 'resource',
    'respiratory': 'respiratory',
    'respond': 'respond',
    'responded': 'respond',
    'responding': 'respond',
    'responds': 'respond',
    'restless': 'restless',
    'restoration': 'restoration',
    'rhythm': 'rhythm',
    'rhythms': 'rhythm',
    'routine': 'routine',
    'routines': 'routine',
    'sad': 'sad',
    'scare': 'scare',
    'scared': 'scare',
    'scares': 'scare',
    'scaring': 'scare',
    'schizophrenia': 'schizophrenia',
    'school': 'school',
    'schooled': 'school',
    'schooling': 'school',
    'schools': 'school',
    'sensation': 'sensation',
    'sensations': 'sensation',
    'sensitivities': 'sensitivity',
    'sensitivity': 'sensitivity',
    'sensory': 'sensory',
    'sexual': 'sexual',
    'situation': 'situation',
    'situations': 'situation',
    'sleep': 'sleep',
    'sleepiness': 'sleepiness',
    'sleeping': 'sleep',
    'sleeps': 'sleep',
    'slept': 'sleep',
    'spindle': 'spindle',
    'spindles': 'spindle',
    'stability': 'stability',
    'stable': 'stable',
    'stables': 'stable',
    'startle': 'startle',
    'startled': 'startle',
    'startles': 'startle',
    'stereotypies': 'stereotypy',
    'stereotypy': 'stereotypy',
    'stop': 'stop',
    'stopped': 'stop',
    'stopping': 'stop',
    'stops': 'stop',
    'strategies': 'strategy',
    'strategy': 'strategy',
    'stress': 'stress',
    'stressed': 'stress',
    'stresses': 'stress',
    'stressing': 'stress',
    'stressor': 'stressor',
    'stressors': 'stressor',
    'studies': 'study',
    'study': 'study',
    'studying': 'study',
    'suicidal': 'suicidal',
    'sweat': 'sweat',
    'sweated': 'sweat',
    'sweating': 'sweat',
    'sweats': 'sweat',
    'symptom': 'symptom',
    'symptoms': 'symptom',
    'tachycardic': 'tachycardic',
    'talk': 'talk',
    'talked': 'talk',
    'talking': 'talk',
    'talks': 'talk',
    'tearful': 'tearful',
    'tense': 'tense',
    'tensing': 'tense',
    'term': 'term',
    'termed': 'term',
    'terms': 'term',
    'testing': 'testing',
    'testings': 'testing',
    'theme': 'theme',
    'themes': 'theme',
    'therapies': 'therapy',
    'therapy': 'therapy',
    'theta': 'theta',
    'thinking': 'think',
    'thinks': 'think',
    'thought': 'thought',
    'thoughts': 'thought',
    'threaten': 'threaten',
    'threatened': 'threaten',
    'threatening': 'threaten',
    'threatens': 'threaten',
    'timing': 'timing',
    'timings': 'timing',
    'tingle': 'tingle',
    'tingling': 'tingle',
    'tired': 'tired',
    'trauma': 'trauma',
    'traumas': 'trauma',
    'traumata': 'trauma',
    'traumatic': 'traumatic',
    'tremulous': 'tremulous',
    'type': 'type',
    'types': 'type',
    'ultimately': 'ultimately',
    'understand': 'understand',
    'understanding': 'understanding',
    'understandings': 'understanding',
    'understands': 'understand',
    'understood': 'understand',
    'user': 'user',
    'users': 'user',
    'vague': 'vague',
    'variability': 'variability',
    'vigilance': 'vigilance',
    'violence': 'violence',
    'violent': 'violent',
    'visual': 'visual',
    'voice': 'voice',
    'voices': 'voice',
    'voicing': 'voice',
    'voluntary': 'voluntary',
    'vomiting': 'vomiting',
    'wakefulness': 'wakefulness',
    'waking': 'waking',
    'wakings': 'waking',
    'wall': 'wall',
    'walls': 'wall',
    'weight': 'weight',
    'weighted': 'weight',
    'weights': 'weight',
    'withdrawal': 'withdrawal',
    'withdrawals': 'withdrawal',
    'withdrawn': 'withdrawn',
    'word': 'word',
    'worded': 'word',
    'wording': 'word',
    'words': 'word',
    'worried': 'worried',
    'worsen': 'worsen',
    'worsened': 'worsen',
    'worsening': 'worsen',
    'worsens': 'worsen',
    'worth': 'worth',
    'worthless': 'worthless'
}


STOP_WORDS = [
    "a's",
    'able',
    'about',
    'above',
    'according',
    'accordingly',
    'across',
    'actually',
    'after',
    'afterwards',
    'again',
    'against',
    "ain't",
    'all',
    'allow',
    'allows',
    'almost',
    'alone',
    'along',
    'already',
    'also',
    'although',
    'always',
    'am',
    'among',
    'amongst',
    'an',
    'and',
    'another',
    'any',
    'anybody',
    'anyhow',
    'anyone',
    'anything',
    'anyway',
    'anyways',
    'anywhere',
    'apart',
    'appear',
    'appreciate',
    'appropriate',
    'are',
    "aren't",
    'around',
    'as',
    'aside',
    'ask',
    'asking',
    'associated',
    'at',
    'available',
    'away',
    'awfully',
    'be',
    'became',
    'because',
    'become',
    'becomes',
    'becoming',
    'before',
    'beforehand',
    'behind',
    'being',
    'believe',
    'below',
    'beside',
    'besides',
    'best',
    'better',
    'between',
    'beyond',
    'both',
    'brief',
    'but',
    'by',
    "c'mon",
    "c's",
    'came',
    'can',
    "can't",
    'cannot',
    'cant',
    'cause',
    'causes',
    'certain',
    'certainly',
    'changes',
    'clearly',
    'co',
    'com',
    'come',
    'comes',
    'concerning',
    'consequently',
    'consider',
    'considering',
    'contain',
    'containing',
    'contains',
    'corresponding',
    'could',
    "couldn't",
    'course',
    'currently',
    'definitely',
    'described',
    'despite',
    'did',
    "didn't",
    'different',
    'do',
    'does',
    "doesn't",
    'doing',
    "don't",
    'done',
    'down',
    'downwards',
    'during',
    'each',
    'edu',
    'eg',
    'eight',
    'either',
    'elsewhere',
    'enough',
    'entirely',
    'especially',
    'et',
    'etc',
    'even',
    'ever',
    'every',
    'everybody',
    'everyone',
    'everything',
    'everywhere',
    'ex',
    'exactly',
    'example',
    'except',
    'far',
    'few',
    'fifth',
    'first',
    'five',
    'followed',
    'following',
    'follows',
    'for',
    'former',
    'formerly',
    'forth',
    'four',
    'from',
    'further',
    'furthermore',
    'get',
    'gets',
    'getting',
    'given',
    'gives',
    'go',
    'goes',
    'going',
    'gone',
    'got',
    'gotten',
    'greetings',
    'had',
    "hadn't",
    'happens',
    'hardly',
    'has',
    "hasn't",
    "haven't",
    'having',
    'he',
    "he's",
    'hello',
    'help',
    'hence',
    'her',
    'here',
    "here's",
    'hereafter',
    'hereby',
    'herein',
    'hereupon',
    'hers',
    'herself',
    'hi',
    'him',
    'himself',
    'his',
    'hither',
    'hopefully',
    'how',
    'howbeit',
    'however',
    "i'd",
    "i'll",
    "i'm",
    "i've",
    'ie',
    'if',
    'ignored',
    'immediate',
    'in',
    'inasmuch',
    'inc',
    'indeed',
    'indicate',
    'indicated',
    'indicates',
    'inner',
    'insofar',
    'instead',
    'into',
    'inward',
    'is',
    "isn't",
    'it',
    "it'd",
    "it'll",
    "it's",
    'its',
    'itself',
    'just',
    'keep',
    'keeps',
    'kept',
    'know',
    'knows',
    'known',
    'last',
    'lately',
    'later',
    'latter',
    'latterly',
    'least',
    'less',
    'lest',
    'let',
    "let's",
    'like',
    'liked',
    'likely',
    'little',
    'look',
    'looking',
    'looks',
    'ltd',
    'mainly',
    'many',
    'may',
    'maybe',
    'me',
    'mean',
    'meanwhile',
    'merely',
    'might',
    'more',
    'moreover',
    'most',
    'mostly',
    'much',
    'must',
    'my',
    'myself',
    'name',
    'namely',
    'nd',
    'near',
    'nearly',
    'necessary',
    'needs',
    'neither',
    'never',
    'nevertheless',
    'new',
    'next',
    'nine',
    'no',
    'nobody',
    'non',
    'none',
    'noone',
    'nor',
    'normally',
    'not',
    'nothing',
    'novel',
    'now',
    'nowhere',
    'obviously',
    'of',
    'off',
    'often',
    'oh',
    'ok',
    'okay',
    'old',
    'on',
    'once',
    'one',
    'ones',
    'only',
    'onto',
    'or',
    'other',
    'others',
    'otherwise',
    'ought',
    'our',
    'ours',
    'ourselves',
    'out',
    'outside',
    'over',
    'overall',
    'own',
    'particular',
    'particularly',
    'per',
    'perhaps',
    'placed',
    'please',
    'plus',
    'possible',
    'presumably',
    'probably',
    'provides',
    'que',
    'quite',
    'qv',
    'rather',
    'rd',
    're',
    'really',
    'reasonably',
    'regarding',
    'regardless',
    'regards',
    'relatively',
    'respectively',
    'right',
    'said',
    'same',
    'saw',
    'say',
    'saying',
    'says',
    'second',
    'secondly',
    'see',
    'seeing',
    'seem',
    'seemed',
    'seeming',
    'seems',
    'seen',
    'self',
    'selves',
    'sensible',
    'sent',
    'serious',
    'seriously',
    'seven',
    'several',
    'shall',
    'she',
    'should',
    "shouldn't",
    'since',
    'six',
    'so',
    'some',
    'somebody',
    'somehow',
    'something',
    'sometime',
    'sometimes',
    'somewhat',
    'somewhere',
    'soon',
    'sorry',
    'specified',
    'specify',
    'specifying',
    'still',
    'sub',
    'such',
    'sup',
    'sure',
    "t's",
    'take',
    'taken',
    'tell',
    'tends',
    'th',
    'than',
    'thank',
    'thanks',
    'thanx',
    "that's",
    'thats',
    'the',
    'their',
    'theirs',
    'them',
    'themselves',
    'then',
    'thence',
    'there',
    "there's",
    'thereafter',
    'thereby',
    'therefore',
    'therein',
    'theres',
    'thereupon',
    'they',
    "they'd",
    "they'll",
    "they're",
    "they've",
    'think',
    'third',
    'this',
    'thorough',
    'thoroughly',
    'those',
    'though',
    'three',
    'through',
    'throughout',
    'thru',
    'thus',
    'to',
    'together',
    'too',
    'took',
    'toward',
    'towards',
    'tried',
    'tries',
    'truly',
    'try',
    'trying',
    'twice',
    'two',
    'un',
    'under',
    'unfortunately',
    'unless',
    'unlikely',
    'until',
    'unto',
    'up',
    'upon',
    'us',
    'use',
    'used',
    'useful',
    'uses',
    'using',
    'usually',
    'value',
    'various',
    'very',
    'via',
    'viz',
    'vs',
    'want',
    'wants',
    'was',
    "wasn't",
    'way',
    'we',
    "we'd",
    "we'll",
    "we're",
    "we've",
    'welcome',
    'well',
    'went',
    'were',
    "weren't",
    'what',
    "what's",
    'whatever',
    'when',
    'whence',
    'whenever',
    'where',
    "where's",
    'whereafter',
    'whereas',
    'whereby',
    'wherein',
    'whereupon',
    'wherever',
    'whether',
    'which',
    'while',
    'whither',
    'who',
    "who's",
    'whoever',
    'whole',
    'whom',
    'whose',
    'why',
    'will',
    'willing',
    'wish',
    'within',
    'without',
    "won't",
    'wonder',
    'would',
    'would',
    "wouldn't",
    'yes',
    'yet',
    'you',
    "you'd",
    "you'll",
    "you're",
    "you've",
    'your',
    'yours',
    'yourself',
    'yourselves',
    'zero'
]


WHITE_SPACE = re.compile(r'^\s+', re.MULTILINE)
SPECIAL = re.compile(r'[^A-Za-z0-9]+', re.MULTILINE)