xbase-util 0.3.8__tar.gz → 0.4.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {xbase_util-0.3.8 → xbase_util-0.4.0}/PKG-INFO +1 -1
- {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.py +1 -1
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/add_column_util.py +33 -17
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/handle_features_util.py +14 -9
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_util.py +9 -10
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.3.8 → xbase_util-0.4.0}/README.md +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.cfg +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/esreq.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/pcap_util.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_constant.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/SOURCES.txt +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/arkimeparse.js +0 -0
@@ -128,24 +128,40 @@ def parse_list(x):
|
|
128
128
|
return x
|
129
129
|
|
130
130
|
|
131
|
-
def handle_dns(origin_list, isDataFrame=False):
|
132
|
-
print("handle_dnslist")
|
131
|
+
def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
|
133
132
|
if not isDataFrame:
|
134
133
|
origin_list = pd.DataFrame(origin_list)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
134
|
+
if use_tqdm:
|
135
|
+
origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
|
136
|
+
origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
|
137
|
+
lambda x: any(is_long_domain(domain) for domain in x))
|
138
|
+
origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
|
139
|
+
lambda x: any(has_random_characters(domain) for domain in x))
|
140
|
+
origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
|
141
|
+
lambda x: any(has_special_characters(domain) for domain in x))
|
142
|
+
origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
|
143
|
+
lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
|
144
|
+
origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
|
145
|
+
lambda x: any(is_danger_domain(domain) for domain in x))
|
146
|
+
origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
|
147
|
+
lambda x: any(is_danger_subdomain(domain) for domain in x))
|
148
|
+
origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
|
149
|
+
lambda x: any(has_uncommon_tld(domain) for domain in x))
|
150
|
+
else:
|
151
|
+
origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
|
152
|
+
origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
|
153
|
+
lambda x: any(is_long_domain(domain) for domain in x))
|
154
|
+
origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
|
155
|
+
lambda x: any(has_random_characters(domain) for domain in x))
|
156
|
+
origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
|
157
|
+
lambda x: any(has_special_characters(domain) for domain in x))
|
158
|
+
origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
|
159
|
+
lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
|
160
|
+
origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
|
161
|
+
lambda x: any(is_danger_domain(domain) for domain in x))
|
162
|
+
origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
|
163
|
+
lambda x: any(is_danger_subdomain(domain) for domain in x))
|
164
|
+
origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
|
165
|
+
lambda x: any(has_uncommon_tld(domain) for domain in x))
|
150
166
|
origin_list.drop(columns=['dnslist'], inplace=True)
|
151
167
|
return origin_list
|
@@ -8,9 +8,7 @@ from tqdm import tqdm
|
|
8
8
|
|
9
9
|
|
10
10
|
|
11
|
-
def handle_uri(data):
|
12
|
-
tqdm.pandas()
|
13
|
-
print(f"处理URI:{len(data)}")
|
11
|
+
def handle_uri(data,use_tqdm=True):
|
14
12
|
# 定义正则表达式,确保精确匹配各种攻击特征
|
15
13
|
regex_patterns = {
|
16
14
|
"sql": re.compile(
|
@@ -97,14 +95,15 @@ def handle_uri(data):
|
|
97
95
|
result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
|
98
96
|
|
99
97
|
return result
|
100
|
-
|
98
|
+
if use_tqdm:
|
99
|
+
feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
|
100
|
+
else:
|
101
|
+
feature_data = data.apply(process_row, axis=1, result_type="expand")
|
101
102
|
data = pd.concat([data, feature_data], axis=1)
|
102
103
|
return data
|
103
104
|
|
104
105
|
|
105
|
-
def handle_ua(data):
|
106
|
-
tqdm.pandas()
|
107
|
-
print("处理UA")
|
106
|
+
def handle_ua(data,use_tqdm=True):
|
108
107
|
data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
|
109
108
|
# 处理换行符及多余空格
|
110
109
|
data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
|
@@ -157,8 +156,14 @@ def handle_ua(data):
|
|
157
156
|
data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
|
158
157
|
flags=re.IGNORECASE).fillna("Unknown")
|
159
158
|
# 统计 User-Agent 中的特殊字符个数
|
160
|
-
|
161
|
-
|
159
|
+
|
160
|
+
if use_tqdm:
|
161
|
+
data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
|
162
|
+
lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
|
163
|
+
else:
|
164
|
+
data['UserAgent_special_char_count'] = data['http.useragent'].apply(
|
165
|
+
lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
|
166
|
+
|
162
167
|
# 更新 UserAgent_is_unknown 的计算逻辑
|
163
168
|
data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
|
164
169
|
axis=1).fillna("Unknown")
|
@@ -402,16 +402,15 @@ def get_uri_filename_length(uri):
|
|
402
402
|
return 0
|
403
403
|
|
404
404
|
|
405
|
-
def get_dns_domain_suffix(domain
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
return ""
|
405
|
+
def get_dns_domain_suffix(domain):
|
406
|
+
try:
|
407
|
+
for tmp_suffix in dns_domain_list:
|
408
|
+
if tmp_suffix in domain:
|
409
|
+
return tmp_suffix
|
410
|
+
extracted = tldextract.extract(domain)
|
411
|
+
return extracted.suffix
|
412
|
+
except Exception as e:
|
413
|
+
return ""
|
415
414
|
|
416
415
|
|
417
416
|
def check_path(file_path: str):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|