WEAKNESS_BEFORE_STOP_WORDS = r'(,|) ((has|contain[s]?|introduc(es|ed|ing)|suffers from) (an |a |)|(was|were) (found|discovered) to (contain|have) (a|an) )'
WEAKNESS_PATTERN = r'(?P<weakness>.*?)'
WEAKNESS_AFTER_STOP_WORDS = r'( ((V|v)ulnerabilit(y|ies)|issue[s]?)( (was|were) (discovered|found)|)| exists|)'

COMPONENT_BEFORE_STOP_WORDS = r' (in (the |its |)|(starting |)at )'
COMPONENT_PATTERN = r'(?P<component>.+?)'
COMPONENT_AFTER_STOP_WORDS = r'(file|component|call|feature|function|method|process|implementation)[s]?'

QUOTES_PATTERN = r'[`\'"\u2018\u2019\u201C\u201D]'
DATE_VERSION_PATTERN = r'(before|through) \d{4}-\d{2}-\d{2}'
YEAR_PATTERN = r'(?:200\d|201\d|202\d)'
VERSION_PATTERN = r'\b(?:v\.|v|V|R)*(?:\d+(?:\.(?:\d+|x))+)(?:[a-zA-Z]+\d*)*\b'
COMMIT_VERSION_PATTERN = r'\b[0-9a-f]{5,40}\b'

CONFIGURATION_BEFORE_STOP_WORDS = r' (in|of|on) (some |the |a |)'
CONFIGURATION_PATTERN = (r'(?P<configuration>[a-zA-Z0-9-\/\(\),]{2,}.+?(?:' + VERSION_PATTERN + r'|' + COMMIT_VERSION_PATTERN
                         + r'|' + DATE_VERSION_PATTERN + r'|' + YEAR_PATTERN + r')(?: and (?:before|prior|(possibly |)earlier|below|up to and including)| plugin|.+?version[s]?|))')
CONFIGURATION_AFTER_STOP_WORDS = r'( for(?: [a-zA-Z0-9\!\-\.]+){1,4}|)'

ATTACKER_BEFORE_STOP_WORDS = r'(,|) (could |which (could |)|may |might |that (can |)|)(allow|enabl[e]?|(exploitable|used) by)(s|ed|ing|) (a |an |any |)'
ATTACKER_PATTERN = r'(?P<attacker>.*?(attacker|user|server|administrator|admin|subscriber|adversary)[s]?)'
ATTACKER_AFTER_STOP_WORDS = r'(,|)( with.+?(permission|privilege|role|access|right)[s]?( and above| as low.+?|)| (((who are |)able|with the ability) to).+?| such as.+?| permitted|)'

ATTACK_BEFORE_STOP_WORDS = r' (to (perform|execute) )'
ATTACK_PATTERN = r'(?P<attack>.*?(attack)[s]?)'

CONDITION_BEFORE_STOP_WORDS = r'(,|) (if |where |(even |)when )'
CONDITION_PATTERN = r'(?P<condition>.+?)'

IMPACT_ACTIONS = [
    "cause", "bypass", "gain", "hijack", "escalate", "inject", "trigger", "upload", "update", "connect", "send",
    "redirect", "replace", "change", "create", "read", "enumerate", "impersonate", "call", "request", "delete", "steal",
    "define", "lead", "check", "access", "execute", "run", "force", "crash", "overwrite", "corrupt", "write", "take",
    "make", "conduct", "perform", "obtain", "achieve", "remotely", "arbitrarily", "trick", "copy", "replace", "accept",
    "disable", "initiate", "rebuild", "include", "modify", "add", "edit", "configure", "have", "guess", "view", "list",
    "retrieve", "insert", "disclose", "reset", "craft", "open", "discover", "determine", "visit", "introduce",
    "harvest", "exfiltrate", "clear", "supply", "leak", "use", "implement"
]

IMPACT_ACTIONS_PATTERN = '|'.join(IMPACT_ACTIONS)

IMPACT_BEFORE_STOP_WORDS = r'(,|) ((to|which( could)) (' + IMPACT_ACTIONS_PATTERN + '|potentially( enable|))[s]?(,|) (a |)|resulting in |leading to |causing a |could cause |and can lead to )'
IMPACT_PATTERN = r'(?P<impact>.+?)'

VECTOR_BEFORE_STOP_WORDS = r'(,|) (via|using|(as demonstrated |)by) (an |a |vectors |)'
VECTOR_PATTERN = r'(?P<vector>.+?)'

ROOT_CAUSE_BEFORE_STOP_WORD = r'(,|) ((which (is|are)|does) not|because( of| a| the|)|(due|fail) to|insecurely|uses) '
ROOT_CAUSE_PATTERN = r'(?P<root_cause>.+?)'


FUNCTION_NAME_PATTERN = r'[a-zA-Z_][a-zA-Z0-9_:\(\)]+'
EXTENSION_PATTERN = r"\.[a-zA-Z0-9]{1,4}"
DIR_FILE_NAME_PATTERN = r"[a-zA-Z0-9_-]+"
FILE_PATTERN = DIR_FILE_NAME_PATTERN + EXTENSION_PATTERN
RELATIVE_PATH_PATTERN = DIR_FILE_NAME_PATTERN + r"(?:/(?:" + DIR_FILE_NAME_PATTERN + r"))+" + EXTENSION_PATTERN


REPLACEMENT_PHRASES = {
    r'via the (' + FUNCTION_NAME_PATTERN + r') function in the component (.+?)\.$': r'in the \1 function at \2 component.',
    r'via the (' + FUNCTION_NAME_PATTERN + r') (function|component) (at|in) (.+?)\.$': r'in the \1 at \4.',
    r'via the (function|component) (' + FUNCTION_NAME_PATTERN + r') (at|in) (.+?)\.$': r'in the \2 \1 at \4.',
    r'via (' + FUNCTION_NAME_PATTERN + r') class in (.+?)\.$': r'in the \1 at \2.',
    r'via the (' + FUNCTION_NAME_PATTERN + r') function.$': r'in the \1 function.',
    r'via the function (' + FUNCTION_NAME_PATTERN + r').$': r'in the \1 function.',
    r'via the component (.+?)\.$': r'in the \1.',
    r'in function (' + FUNCTION_NAME_PATTERN + r') in (.+?) in ': r'in \1 function at \2 in ',
    r'in function (' + FUNCTION_NAME_PATTERN + r') in ' + CONFIGURATION_PATTERN: r'in \1 function in \2',
    r'at (.+?) in (' + FUNCTION_NAME_PATTERN + r') in ' + CONFIGURATION_PATTERN: r'in \2 at \1 in \3',
    r'out of bounds': r'out-of-bounds',
    r'has been identified in': r'exists in',
    r'(that |)leads to( a|)': r'leading to',
    r'in the context of': r'within',
    r'that( can|) result[s]?( in|)': r'resulting in',
    r'^There is a': r'A',
    r' to to ': ' to ',
    r',(?=\S)': ', ', # fixes commas missing a space after them
    r'in ' + QUOTES_PATTERN + r'([a-zA-Z0-9-\/\(\),]{2,})' + QUOTES_PATTERN: r'in \1',
    r' <= (' + VERSION_PATTERN + ')': r' up to and including \1',
    r'is vulnerable to': r'has',
    r'may be vulnerable to': r"could cause",
    r' attackers to Injecting ': ' attackers to inject ',
    r'which can be used by an attacker': 'allows an attacker',
    r'resulting in (.*?) vulnerability': r'introduces \1 vulnerability',
    r'is affected by (.*?) vulnerability': r'has \1 vulnerability',
    r'(\d+), ([a-z]+)': r'\1 \2',
    r'^In (applications using |)' + CONFIGURATION_PATTERN + r'(,|) there is a': r'\2 has a',
    r'^(?:In |)' + CONFIGURATION_PATTERN + r' ((?:a |).+?)(?: vulnerability|) exists in': r'\1 has \4 in',
    r'^(' + FUNCTION_NAME_PATTERN + r') (in) (' + RELATIVE_PATH_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 \2 \3 file \4 ',
    r'(' + FUNCTION_NAME_PATTERN + r') function in (' + RELATIVE_PATH_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 function at \2 file \3 ',
    r'(' + FUNCTION_NAME_PATTERN + r') function in (' + FILE_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 function at \2 file \3 ',
    r'(' + FUNCTION_NAME_PATTERN + r') (in) (' + FILE_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 at \3 file \4 ',
    r'^(' + RELATIVE_PATH_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS: r'\1 file \2 ',
    r'^(' + FILE_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS: r'\1 file \2 '
}

NOISE_PATTERNS = [
    r'\s+\(.*?\)',
    r'\s+\[CWE-\d{1,4}\]',
    r'( of type|)\s+CWE-\d{1,4}',
    r' a different (?:vulnerability|vector) than (CVE-\d{4}-\d{4,7}(?:,\s*CVE-\d{4}-\d{4,7})*)',
    r' a related issue to CVE-\d{4}-\d{4,7}',
    r' and CVE-\d{4}-\d{4,7}',
    r' fixed in ' + VERSION_PATTERN,
    r'can be abused '
]
