import os
import re

src_dst_header = ["src_host", "src_user_agent", "src_content_length", "src_connection", "src_content_type",
                  "src_x_forwarded_for", "src_accept", "src_accept_encoding", "src_accept_language", "src_referer",
                  "src_cookie", "src_origin", "src_x_requested_with", "src_sec_fetch_mode", "src_sec_fetch_site",
                  "src_sec_fetch_dest", "src_content_encoding", "src_sec_ch_ua", "src_sec_ch_ua_mobile",
                  "src_sec_ch_ua_platform", "src_cache_control", "src_upgrade_insecure_requests", "src_pragma",
                  "src_x_real_ip", "src_soapaction", "src_x_forwarded_proto", "src_x_forwarded_host",
                  "src_x_request_id",
                  "src_x_forwarded_port", "src_x_forwarded_scheme", "src_x_original_forwarded_for", "src_x_scheme",
                  "src_x_forwarded_scheme_diy", "src_authorization", "src_trace_id", "src_distinct_id", "src_url_path",
                  "src_if_modified_since", "src_ecid_context", "src_token", "src_access_control_request_method",
                  "src_access_token", "src_expect", "src_access_control_request_headers", "src_sec_fetch_user",
                  "src_snk_location", "src_accept_charset", "src_date", "src_usercode", "src_logincode", "src_range",
                  "src_checktime", "src_client_lang", "src_requestsource", "src_if_range", "src_if_none_match",
                  "src_sw8",
                  "src_sw8_correlation", "src_sw8_x", "src_x_sign", "src_x_timestamp", "src_charset",
                  "src_x_custom_header", "src_sec_websocket_key", "src_sec_websocket_version", "src_upgrade",
                  "src_sec_websocket_extensions", "src_xms_auth_token", "src_xms_cluster_id", "src_purpose",
                  "src_x_ti_app_id", "src_x_ti_secret_code", "src_x_prototype_version", "src_systemcode",
                  "src_access_control_request_private_network", "src_wxr", "src_client_version", "src_requestid",
                  "src_request_module", "src_x_mule_encoding", "src_x_mule_session", "src_elite_tag", "src_dnt",
                  "src_x_mule_endpoint", "src_x_mule_root_message_id", "src_x_mass_tappid",
                  "src_x_elastic_product_origin",
                  "src_priority", "src_server", "src_transfer_encoding", "src_xxl_job_access_token", "src_te",
                  "src_access_control_allow_origin", "src_eset_spread_control", "src_x_splunk_digest",
                  "src_x_splunk_lm_nonce", "src_x_splunk_lm_signature", "src_x_splunk_lm_timestamp", "src_avb_version",
                  "src_set_cookie", "src_x_oracle_dms_ecid", "src_x_oracle_dms_rid",
                  "src_x_prometheus_scrape_timeout_seconds", "src_client", "src_post", "src_ucosessionid", "src_user",
                  "src_winuser", "src_showloading", "src_digest", "src_ocrimageflag", "src_x_content_type_options",
                  "src_access_control_allow_methods", "src_access_control_allow_credentials", "src_expires",
                  "src_keep_alive", "src_url_token", "src_postman_token", "src_x_xss_protection", "src_vary",
                  "src_access_control_allow_headers", "src_signdata", "src_ua_cpu", "src_authorg", "src_yssip",
                  "src_yssmac", "src_location", "src_content_language", "src_git_protocol", "src_x_forwarded_prefix",
                  "src_invoke_type", "src_x_ua_compatible", "src_x_bd_traceid", "src_x_from_h3_trnet", "src_n",
                  "src_nx_anti_csrf_token", "src_proxy_connection", "src_via", "src_x_nexus_ui", "src_pragma_type",
                  "src_appkey", "src_area", "src_check", "src_clientname", "src_lang", "src_networktype", "src_nonce",
                  "src_osversion", "src_screen", "src_st", "src_timestamp", "src_userid", "src_x_frame_options",
                  "src_x_b3_parentspanid", "src_x_b3_sampled", "src_x_b3_spanid", "src_x_b3_traceid", "src_sec_purpose",
                  "src_kbn_version", "src_access_control_expose_headers", "src_iszip", "src_from", "src_state",
                  "src_access_control_max_age", "src_allow", "src_kbn_system_request", "src_tracelogcontext",
                  "src_etag",
                  "src_ms_cv", "src_x_application_context", "src_x_csrftoken", "src_tlogtraceid", "src_link_pwd_token",
                  "src_preip", "src_preivkapp", "src_preivkhost", "src_tlogspanid", "src_accesstoken",
                  "src_icy_metadata",
                  "src_vivo_browser_text_zoom", "src_contents_client_adapter_id", "src_last_modified",
                  "src_need_select_sub_url", "src_x_bd_quic", "src_x_bdboxapp_netengine", "src_x_playback_session_id",
                  "src_x_turbonet_info", "src_cas_tgc", "src_imgids", "src_jsessionid", "src_accept_ranges",
                  "src_am_traceid", "src_xms_backend_server", "src_ss_language", "src_sssessionid", "src_username",
                  "src_content_md5", "src_x_acs_security_token", "src_x_log_apiversion", "src_x_log_bodyrawsize",
                  "src_x_log_compresstype", "src_x_log_signaturemethod", "src_x_nod32_mode", "src_client_appname",
                  "src_client_requesttoken", "src_client_requestts", "src_exconfiginfo", "src_long_pulling_timeout",
                  "src_dispatch_header", "src_sign", "src_strict_transport_security", "src_ts", "src_bdpparallelload",
                  "src_referrer_policy", "src_x_amz_request_id", "src_x_cm_service", "src_x_miorigin",
                  "src_x_seafile_client_version", "src_amz_sdk_invocation_id", "src_amz_sdk_request",
                  "src_amz_sdk_retry",
                  "src_bussno", "src_busstype", "src_classcode", "src_comcode", "src_createtime", "src_createuser",
                  "src_imgfilepath", "src_imgid", "src_imgtype", "src_s_cnection", "src_storagecondition",
                  "src_x_eset_updateid", "src_x_powered_by", "src_allow_cross_domain_redirect",
                  "src_amp_cache_transform",
                  "src_x_csrf_token", "src_x_check_exist", "src_sec_gpc", "src_sf_ajax", "src_request_type",
                  "src_x_envoy_decorator_operation", "src_x_envoy_upstream_service_time", "src_x_from_cdn",
                  "src_x_download_options", "src_x_permitted_cross_domain_policies", "src_busscode", "src_companyno",
                  "src_fcpos", "src_forwarded", "src_prod_sw8", "src_prod_sw8_correlation", "src_prod_sw8_x",
                  "src_rewritepath", "src_x_elastic_product", "src_x_forwarded_server", "src_in_form_img", "src_x_pjax",
                  "src_age", "src_channel", "src_client_ip", "src_content_transfer_encoding", "src_guid",
                  "src_oldchannel",
                  "src_product", "src_remote_addr", "src_seafile_repo_token", "src_starttag", "src_starttype",
                  "src_traceid", "src_warded_for", "src_x_aspnet_version", "src_zcid", "src_agent_version",
                  "src_alt_svc",
                  "src_cf_ray", "src_chrome_proxy", "src_content_disposition", "src_content_range",
                  "src_content_security_policy", "src_d_for", "src_grpc_accept_encoding", "src_grpc_timeout",
                  "src_gslb_okhttp", "src_jenkins_crumb", "src_nel", "src_orderid", "src_p3p", "src_report_to",
                  "src_route_data", "src_service_worker", "src_tap_app_conf_ver", "src_tap_gslb", "src_tc_anp",
                  "src_tc_entsig", "src_tc_spanid", "src_tc_traceid", "src_www_authenticate", "src_x_aggregate_auth",
                  "src_x_amz_storage_class", "src_x_cache_status", "src_x_ccc", "src_x_cdn_request_id", "src_x_cid",
                  "src_x_clickhouse_format", "src_x_clickhouse_query_id", "src_x_clickhouse_server_display_name",
                  "src_x_clickhouse_summary", "src_x_clickhouse_timezone", "src_x_gitlab_feature_category",
                  "src_x_link_via", "src_x_ucbrowser_ua", "dst_date", "dst_content_type", "dst_content_length",
                  "dst_transfer_encoding", "dst_connection", "dst_set_cookie", "dst_x_oracle_dms_ecid",
                  "dst_x_oracle_dms_rid", "dst_content_language", "dst_cache_control", "dst_server", "dst_expires",
                  "dst_pragma", "dst_access_control_allow_origin", "dst_keep_alive", "dst_vary", "dst_last_modified",
                  "dst_access_control_allow_credentials", "dst_access_control_allow_methods", "dst_accept_ranges",
                  "dst_access_control_allow_headers", "dst_x_content_type_options", "dst_etag",
                  "dst_access_control_max_age", "dst_content_encoding", "dst_access_control_expose_headers",
                  "dst_x_xss_protection", "dst_access_control_request_headers", "dst_x_frame_options", "dst_location",
                  "dst_x_powered_by", "dst_x_application_context", "dst_p3p", "dst_x_ua_compatible",
                  "dst_x_mule_encoding",
                  "dst_x_mule_session", "dst_s_cnection", "dst_x_amz_request_id", "dst_content_disposition",
                  "dst_x_amz_storage_class", "dst_content_range", "dst_pragma_type", "dst_allow",
                  "dst_xms_backend_server",
                  "dst_x_clickhouse_server_display_name", "dst_x_clickhouse_summary", "dst_x_clickhouse_format",
                  "dst_x_clickhouse_query_id", "dst_x_clickhouse_timezone", "dst_referrer_policy",
                  "dst_content_security_policy", "dst_content_id", "dst_sec_websocket_accept", "dst_upgrade",
                  "dst_tlogtraceid", "dst_sec_websocket_extensions", "dst_x_elastic_product", "dst_via", "dst_x_cache",
                  "dst_x_amz_cf_id", "dst_x_amz_cf_pop", "dst_rgwx_embedded_metadata_len", "dst_rgwx_mtime",
                  "dst_rgwx_obj_pg_ver", "dst_rgwx_object_size", "dst_rgwx_source_zone_short_id",
                  "dst_x_amz_version_id",
                  "dst_x_request_id", "dst_x_envoy_decorator_operation", "dst_x_envoy_upstream_service_time", "dst_age",
                  "dst_www_authenticate", "dst_strict_transport_security", "dst_terminationurl", "dst_x_splunk_digest",
                  "dst_x_splunk_lm_nonce", "dst_x_splunk_lm_timestamp", "dst_x_download_options",
                  "dst_x_permitted_cross_domain_policies", "dst_pragrma", "dst_content_transfer_encoding",
                  "dst_x_runtime",
                  "dst_x_arequestid", "dst_x_ausername", "dst_x_asessionid", "dst_x_aspnet_version",
                  "dst_x_seraph_loginreason", "dst_accept_charset", "dst_n", "dst_error_code", "dst_error_msg",
                  "dst_x_w_no", "dst_kbn_license_sig", "dst_kbn_name", "dst_cross_origin_opener_policy",
                  "dst_xdomainrequestallowed", "dst_x_ratelimit_limit_vass_zuul_api_user_24",
                  "dst_x_ratelimit_remaining_vass_zuul_api_use", "dst_x_ratelimit_remaining_vass_zuul_wx_port",
                  "dst_x_ratelimit_reset_vass_zuul_api_user_24", "dst_audit_id", "dst_x_kubernetes_pf_flowschema_uid",
                  "dst_x_kubernetes_pf_prioritylevel_uid", "dst_alt_svc", "dst_cf_ray", "dst_nel", "dst_report_to",
                  "dst_x_ratelimit_remaining_vass_zuul_api_ord", "dst_x_protected_by", "dst_traceresponse",
                  "dst_x_br_response", "dst_x_cache_status", "dst_x_ratelimit_limit_vass_zuul_api_order_1",
                  "dst_x_ratelimit_reset_vass_zuul_api_order_1", "dst_grpc_metadata_accept_encoding",
                  "dst_grpc_metadata_content_type", "dst_grpc_metadata_grpc_accept_encoding",
                  "dst_x_ratelimit_limit_vass_zuul_wx_port_240", "dst_x_ratelimit_reset_vass_zuul_wx_port_240",
                  "dst_permissions_policy", "dst_ctl_cache_status", "dst_progma", "dst_request_id", "dst_x_csrf_token",
                  "dst_x_log_append_meta", "dst_x_log_requestid", "dst_x_log_time", "dst_hostname",
                  "dst_x_networkmanager_status", "dst_browseruid", "dst_k_cache_status", "dst_kcs_via", "dst_x_ccc",
                  "dst_x_cid", "dst_page_title", "dst_x_via", "dst_x_ws_request_id", "dst_enable_encrypted_library",
                  "dst_expire", "dst_x_ratelimit_limit_vass_zuul_wx_port_39.",
                  "dst_x_ratelimit_reset_vass_zuul_wx_port_39.", "dst_content_location", "dst_gsid", "dst_sc",
                  "dst_x_response_timestrap", "dst_link", "dst_x_ratelimit_limit_vass_zuul_api_order_2",
                  "dst_x_ratelimit_limit_vass_zuul_wx_port_10.", "dst_x_ratelimit_reset_vass_zuul_api_order_2",
                  "dst_x_ratelimit_reset_vass_zuul_wx_port_10.", "dst_x_reqid", "dst_login", "dst_x_cache_lookup",
                  "dst_x_cdn_request_id", "dst_x_link_via", "dst_x_nws_log_uuid",
                  "dst_x_ratelimit_limit_vass_zuul_wx_port_111", "dst_x_ratelimit_reset_vass_zuul_wx_port_111",
                  "dst_cache_contror", "dst_cdn_cache", "dst_cdn_cachedat", "dst_cdn_edgestorageid", "dst_cdn_proxyver",
                  "dst_cdn_pullzone", "dst_cdn_requestcountrycode", "dst_cdn_requestid", "dst_cdn_requestpullcode",
                  "dst_cdn_requestpullsuccess", "dst_cdn_status", "dst_cdn_uid", "dst_gitlab_ci_builds_polling",
                  "dst_new_jwt", "dst_ohc_cache_hit", "dst_ohc_file_size", "dst_ohc_global_saved_time", "dst_x_hudson",
                  "dst_x_instance_identity", "dst_x_jenkins", "dst_x_jenkins_session", "dst_dir_perm",
                  "dst_fndfs_error",
                  "dst_oid", "dst_x_errno", "dst_x_hudson_theme", "dst_x_oss_hash_crc64ecma", "dst_x_ser",
                  "dst_content_md5", "dst_exception", "dst_exceptiontype", "dst_praga", "dst_x_oss_object_type",
                  "dst_x_oss_request_id", "dst_x_oss_server_time", "dst_x_oss_storage_class",
                  "dst_x_ratelimit_limit_vass_zuul_api_order_3", "dst_x_ratelimit_reset_vass_zuul_api_order_3"]
dns_domain_list = ['ac.cnNEW', 'ah.cn', 'archiNEW', 'artHOT', 'asia', 'autoNEW', 'autosNEW', 'babyNEW', 'band',
                   'beautyNEW',
                   'beer', 'bioNEW', 'biz', 'bj.cn', 'blackNEW', 'blueNEW', 'bondNEW', 'cabNEW', 'cafeNEW', 'carNEW',
                   'carsNEW',
                   'cashNEW', 'cc', 'center', 'chat', 'cityNEW', 'clickNEW', 'cloud', 'clubHOT', 'cnHOT',
                   'collegeNEW',
                   'comHOT', 'com.cn', 'company', 'cool', 'cq.cn', 'cyouNEW', 'design', 'email', 'fanNEW', 'fans',
                   'fashionNEW',
                   'fit', 'fj.cn', 'fun', 'fund', 'fyiNEW', 'games', 'gd.cn', 'globalNEW', 'gold', 'gov.cn', 'greenNEW',
                   'group',
                   'gs.cn', 'guru', 'gx.cn', 'gz.cn', 'ha.cn', 'hairNEW', 'hb.cn', 'he.cn', 'hi.cn', 'hk.cn', 'hl.cn',
                   'hn.cn',
                   'homesNEW', 'host', 'icuHOT', 'info', 'ink', 'jl.cn', 'js.cn', 'jx.cn', 'kim', 'law', 'life', 'live',
                   'ln.cn',
                   'lottoNEW', 'love', 'ltdHOT', 'luxe', 'makeupNEW', 'market', 'mbaNEW', 'meNEW', 'mediaNEW', 'mo.cn',
                   'mobi',
                   'monsterNEW', 'motorcyclesNEW', 'net', 'net.cn', 'news', 'nm.cn', 'nx.cn', 'online', 'org.cn',
                   'organicNEW',
                   'pinkNEW', 'plus', 'pokerNEW', 'press', 'pro', 'promoNEW', 'protectionNEW', 'pub', 'pwNEW', 'qh.cn',
                   'questNEW', 'red', 'ren', 'rentNEW', 'run', 'sc.cn', 'schoolNEW', 'sd.cn', 'securityNEW', 'sh.cn',
                   'shopHOT',
                   'shoppingNEW', 'show', 'site', 'skiNEW', 'skinNEW', 'sn.cn', 'socialNEW', 'space', 'storageNEW',
                   'store',
                   'studio', 'sx.cn', 'taxNEW', 'team', 'tech', 'technologyNEW', 'theatreNEW', 'ticketsNEW', 'tj.cn',
                   'today',
                   'topHOT', 'tvNEW', 'tw.cn', 'unoNEW', 'video', 'vinNEW', 'vip', 'voteNEW', 'votoNEW', 'wang',
                   'website',
                   'wiki', 'work', 'world', 'xin', 'xj.cn', 'xyz', 'xz.cn', 'yachtsNEW', 'yn.cn', 'yoga', 'zj.cn',
                   'zone',
                   '餐厅', '佛山', '公司', '广东', '集团', '企业NEW', '商标', '商城', '商店', '网店', '网络', '网址NEW',
                   '我爱你', '游戏', '娱乐NEW', '在线', '招聘', '中国HOT', '中文网']
statisticHeader = ['packet_size_mean', 'same_src_dst_size_mean', 'same_src_dst_size_var', 'packet_size_variance',
                   'packet_len_total_count', 'packet_len_total_average',
                   'packet_len_total_min', 'packet_len_total_max', 'packet_len_total_rate', 'packet_len_total_percent',
                   'packet_len_0_19_count', 'packet_len_0_19_average', 'packet_len_0_19_min', 'packet_len_0_19_max',
                   'packet_len_0_19_rate', 'packet_len_0_19_percent', 'packet_len_20_39_count',
                   'packet_len_20_39_average', 'packet_len_20_39_min', 'packet_len_20_39_max', 'packet_len_20_39_rate',
                   'packet_len_20_39_percent', 'packet_len_40_79_count', 'packet_len_40_79_average',
                   'packet_len_40_79_min', 'packet_len_40_79_max', 'packet_len_40_79_rate', 'packet_len_40_79_percent',
                   'packet_len_80_159_count', 'packet_len_80_159_average', 'packet_len_80_159_min',
                   'packet_len_80_159_max', 'packet_len_80_159_rate', 'packet_len_80_159_percent',
                   'packet_len_160_319_count', 'packet_len_160_319_average', 'packet_len_160_319_min',
                   'packet_len_160_319_max', 'packet_len_160_319_rate', 'packet_len_160_319_percent',
                   'packet_len_320_639_count', 'packet_len_320_639_average', 'packet_len_320_639_min',
                   'packet_len_320_639_max', 'packet_len_320_639_rate', 'packet_len_320_639_percent',
                   'packet_len_640_1279_count', 'packet_len_640_1279_average', 'packet_len_640_1279_min',
                   'packet_len_640_1279_max', 'packet_len_640_1279_rate', 'packet_len_640_1279_percent',
                   'packet_len_1280_2559_count', 'packet_len_1280_2559_average', 'packet_len_1280_2559_min',
                   'packet_len_1280_2559_max', 'packet_len_1280_2559_rate', 'packet_len_1280_2559_percent',
                   'packet_len_2560_5119_count', 'packet_len_2560_5119_average', 'packet_len_2560_5119_min',
                   'packet_len_2560_5119_max', 'packet_len_2560_5119_rate', 'packet_len_2560_5119_percent',
                   'packet_len_more_than_5120_count', 'packet_len_more_than_5120_average',
                   'packet_len_more_than_5120_min', 'packet_len_more_than_5120_max', 'packet_len_more_than_5120_rate',
                   'packet_len_more_than_5120_percent', 'all_req_packet_size_mean', 'all_req_packet_size_var',
                   'all_res_packet_size_mean', 'all_res_packet_size_var', 'all_req_packet_time_period_mean',
                   'all_res_packet_time_period_mean', 'all_req_packet_time_period_var',
                   'all_res_packet_time_period_var', 'req_header_count_mean', 'req_header_count_var']
features_key = [
    'URI_FEATURES_EXTRA_contains_sql', 'URI_FEATURES_EXTRA_contains_xss', 'URI_FEATURES_EXTRA_contains_cmd',
    'URI_FEATURES_EXTRA_contains_path', 'URI_FEATURES_EXTRA_contains_redirect',
    'URI_FEATURES_EXTRA_contains_danger', 'URI_FEATURES_EXTRA_contains_suspicious_ext',
    'URI_FEATURES_EXTRA_param_count', 'URI_FEATURES_EXTRA_path_depth', 'URI_FEATURES_EXTRA_param_length_avg',
    'URI_FEATURES_EXTRA_param_length_max', 'UserAgent_is_attack', 'UserAgent_is_enterprise', 'UserAgent_browser',
    'UserAgent_browser_version', 'UserAgent_os', 'UserAgent_os_version', 'UserAgent_device_type',
    'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
    'UserAgent_is_unknown']
regex_patterns = {
    "sql": re.compile(
        r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
        re.IGNORECASE),
    "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
    "cmd": re.compile(
        r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
        re.IGNORECASE),
    "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
    "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
    "danger": re.compile(
        r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
        re.IGNORECASE),
    "suspicious_ext": re.compile(
        r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
        re.IGNORECASE)
}
# 可见的content-type值
plain_content_type_columns = ['text/json;charset=gbk', 'text/javascript', 'text/css', 'text/html;charset=gb2312',
                              'application/xml;charset=gbk', 'application/xml;charset=utf_8', 'application/tlt_notify',
                              'application/json;charset=gbk', 'text/xml;charset=utf_8', 'application/json',
                              'text/csv;charset=utf_8', 'application/json;charse=utf_8',
                              'application/soap+xml;charset=utf_8;action="urn:dopricetaxseparated"',
                              'text/xml;charset=gbk', 'text/xml', 'application/x_cm_json;charset=utf_8',
                              'application/xml;tz=utc', 'text/xml;charset="utf_8"', 'application/x_java_archive',
                              'application/msword', 'application/xml',
                              'application/x_stapler_method_invocation;charset=utf_8', 'text/plain;charset=iso_8859_1',
                              'application/x_www_form_urlencoded;charset=utf_8', 'text/plain;charset=gbk',
                              'application/octet_stream;charset=utf_8', 'application/x_tika_ooxml',
                              'application/soap+xml;charset=utf_8;action="urn:sendcommand"', 'application/dns_message',
                              'application/json;charset=utf_8', 'application/vnd.docker.distribution.manifest.v2+json',
                              'application/vnd.elasticsearch+json;compatible_with=8', 'off/ping', 'text/plain',
                              'application/x_git_upload_pack_request', 'application/json;charset=gbk',
                              'text/html;charset=iso_8859_1', 'text/http;charset=utf_8',
                              'application/soap+xml;charset=gbk', 'text/html',
                              'application/vnd.openxmlformats_officedocument.spreadsheetml.sheet',
                              'application/x_www_form_urlencoded;charset=gbk', 'text/plain;charset=utf_8',
                              'text/html;charset=gbk', 'application/soap+xml;charset=gbk;',
                              'application/x_www_form_urlencoded', 'application/x_ndjson', 'text/xml;charset=gbk',
                              'application/json;chartset=utf_8',
                              'application/soap+xml;charset=utf_8;action="urn:getcostbyruleengine"',
                              'application/json_rpc', 'text/json;charset=utf_8', 'application/json;charset=utf8',
                              'application/xml;charset=utf_8', 'application/x_www_form_urlencoded;charset=gbk',
                              'application/soap+xml;charset=utf_8;', 'application/merge_patch+json',
                              'application/json;', 'text/xml;charset="utf_16le"', 'text/html;charset=utf_8']
packetKeyname = ['id', 'segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.urg', 'tcpflags.psh',
                 'tcpflags.syn-ack', 'tcpflags.fin', 'source.ip', 'destination.ip', 'source.port', 'source.packets',
                 'source.bytes', 'destination.port', 'destination.bytes', 'destination.packets', 'initRTT',
                 'firstPacket', 'lastPacket', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes', 'totDataBytes',
                 'network.packets', 'network.bytes', 'length', 'client.bytes', 'http.uri',
                 'http.response-content-type', 'http.bodyMagicCnt', 'http.statuscodeCnt', 'http.clientVersionCnt',
                 'http.response-content-typeCnt', 'http.xffIpCnt', 'http.requestHeaderCnt', 'http.serverVersion',
                 'http.responseHeaderCnt', 'http.xffIp', 'http.clientVersion',
                 # 'http.uriTokens',
                 'http.request-refererCnt', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic', 'http.methodCnt',
                 'http.request-content-type', 'http.uriCnt', 'http.serverVersionCnt', 'http.useragent',
                 'http.keyCnt',
                 'http.request-referer', 'http.path', 'http.hostCnt', 'http.response-server', 'http.pathCnt',
                 # 'http.useragentTokens',
                 'http.method-GET', 'http.method',
                 'http.key',
                 'http.hostTokens',
                 'http.requestHeader', 'http.responseHeader', 'http.method-POST', 'dns.ASN', 'dns.RIR', 'dns.GEO',
                 'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.host', 'dns.ipCnt', 'dns.OpCode', 'dns.OpCodeCnt',
                 'dns.Puny', 'dns.PunyCnt', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType', 'dns.QueryTypeCnt',
                 'dns.status', 'dns.statusCnt', 'tls.cipher', 'tls.cipherCnt', 'tls.dstSessionId', 'tls.ja3',
                 'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4', 'tls.ja4Cnt', 'tls.srcSessionId', 'tls.version',
                 'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt', 'packetPos', 'source.ip_Country_IsoCode',
                 'source.ip_Country_Name', 'source.ip_Country_SpecificName',
                 'source.ip_Country_SpecificIsoCode', 'source.ip_City_Name', 'source.ip_City_PostalCode',
                 'source.ip_Location_Latitude', 'source.ip_Location_Longitude', 'destination.ip_Country_IsoCode',
                 'destination.ip_Country_Name', 'destination.ip_Country_SpecificName',
                 'destination.ip_Country_SpecificIsoCode', 'destination.ip_City_Name',
                 'destination.ip_City_PostalCode', 'destination.ip_Location_Latitude',
                 'destination.ip_Location_Longitude', 'http.uri_length_mean', 'http.uri_length_var',
                 "http.uri_param_count_mean", "http.uri_param_count_var", "http.uri_depth_mean", "http.uri_depth_var",
                 "http.uri_filename_length_mean", "http.uri_filename_length_var", "dns_domain_length_mean",
                 "dns_domain_length_var", "traffic_type", "PROTOCOL", "DENY_METHOD", "THREAT_SUMMARY", "SEVERITY",
                 "dns_domain_length", "dns_domain_suffix", "dns_domain", "dns_domain_suffix_length", "dns_base_domain",
                 "dns_base_domain_length", "req_res_period_mean", "req_res_period_var", "status_code_1x_count",
                 "status_code_2x_count", "status_code_3x_count", "status_code_4x_count", "status_code_5x_count",
                 "req_bytes_percentage", "res_bytes_percentage", "cookie_end_with_semicolon_count",
                 "ua_duplicate_count"]
plain_body_columns = ["plain_body_src",
                      "plain_body_dst"]
abnormal_features_column = ['abnormal_has_xff', 'abnormal_has_dir_penetration', 'abnormal_has_templates_injection',
                            'abnormal_has_crlf_injection', 'abnormal_has_xxe_attack',
                            'abnormal_has_code_injection_or_execute', 'abnormal_has_sql_injection']
pcap_flow_text_column = ['pcap_flow_text']

pattern_chuncked = re.compile(rb"Transfer-Encoding:\s*chunked", re.IGNORECASE)
pattern_gzip = re.compile(rb"Content-Encoding:\s*gzip", re.IGNORECASE)

http_version_pattern = re.compile(r"HTTP\/(\d\.\d)")
http_req_method_pattern = re.compile(r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d")
http_req_path_pattern = re.compile(r"(?:GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH)\s+(\/[^\s]*)\s+HTTP\/\d\.\d")
res_status_code_pattern = re.compile(r"HTTP\/\d\.\d\s+(\d{3})\s+.*")
xbase_const_dir = os.path.dirname(__file__)
waf_exp_file=os.path.join(xbase_const_dir, 'changtingwaf.txt')
pa_exp_file=os.path.join(xbase_const_dir, 'pa.txt')