xbase-util 0.3.8__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xbase_util-0.3.8 → xbase_util-0.4.0}/PKG-INFO +1 -1
- {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.py +1 -1
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/add_column_util.py +33 -17
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/handle_features_util.py +14 -9
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_util.py +9 -10
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.3.8 → xbase_util-0.4.0}/README.md +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.cfg +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/esreq.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/pcap_util.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_constant.py +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/SOURCES.txt +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/arkimeparse.js +0 -0
@@ -128,24 +128,40 @@ def parse_list(x):
|
|
128
128
|
return x
|
129
129
|
|
130
130
|
|
131
|
-
def handle_dns(origin_list, isDataFrame=False):
|
132
|
-
print("handle_dnslist")
|
131
|
+
def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
|
133
132
|
if not isDataFrame:
|
134
133
|
origin_list = pd.DataFrame(origin_list)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
134
|
+
if use_tqdm:
|
135
|
+
origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
|
136
|
+
origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
|
137
|
+
lambda x: any(is_long_domain(domain) for domain in x))
|
138
|
+
origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
|
139
|
+
lambda x: any(has_random_characters(domain) for domain in x))
|
140
|
+
origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
|
141
|
+
lambda x: any(has_special_characters(domain) for domain in x))
|
142
|
+
origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
|
143
|
+
lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
|
144
|
+
origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
|
145
|
+
lambda x: any(is_danger_domain(domain) for domain in x))
|
146
|
+
origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
|
147
|
+
lambda x: any(is_danger_subdomain(domain) for domain in x))
|
148
|
+
origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
|
149
|
+
lambda x: any(has_uncommon_tld(domain) for domain in x))
|
150
|
+
else:
|
151
|
+
origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
|
152
|
+
origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
|
153
|
+
lambda x: any(is_long_domain(domain) for domain in x))
|
154
|
+
origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
|
155
|
+
lambda x: any(has_random_characters(domain) for domain in x))
|
156
|
+
origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
|
157
|
+
lambda x: any(has_special_characters(domain) for domain in x))
|
158
|
+
origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
|
159
|
+
lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
|
160
|
+
origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
|
161
|
+
lambda x: any(is_danger_domain(domain) for domain in x))
|
162
|
+
origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
|
163
|
+
lambda x: any(is_danger_subdomain(domain) for domain in x))
|
164
|
+
origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
|
165
|
+
lambda x: any(has_uncommon_tld(domain) for domain in x))
|
150
166
|
origin_list.drop(columns=['dnslist'], inplace=True)
|
151
167
|
return origin_list
|
@@ -8,9 +8,7 @@ from tqdm import tqdm
|
|
8
8
|
|
9
9
|
|
10
10
|
|
11
|
-
def handle_uri(data):
|
12
|
-
tqdm.pandas()
|
13
|
-
print(f"处理URI:{len(data)}")
|
11
|
+
def handle_uri(data,use_tqdm=True):
|
14
12
|
# 定义正则表达式,确保精确匹配各种攻击特征
|
15
13
|
regex_patterns = {
|
16
14
|
"sql": re.compile(
|
@@ -97,14 +95,15 @@ def handle_uri(data):
|
|
97
95
|
result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
|
98
96
|
|
99
97
|
return result
|
100
|
-
|
98
|
+
if use_tqdm:
|
99
|
+
feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
|
100
|
+
else:
|
101
|
+
feature_data = data.apply(process_row, axis=1, result_type="expand")
|
101
102
|
data = pd.concat([data, feature_data], axis=1)
|
102
103
|
return data
|
103
104
|
|
104
105
|
|
105
|
-
def handle_ua(data):
|
106
|
-
tqdm.pandas()
|
107
|
-
print("处理UA")
|
106
|
+
def handle_ua(data,use_tqdm=True):
|
108
107
|
data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
|
109
108
|
# 处理换行符及多余空格
|
110
109
|
data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
|
@@ -157,8 +156,14 @@ def handle_ua(data):
|
|
157
156
|
data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
|
158
157
|
flags=re.IGNORECASE).fillna("Unknown")
|
159
158
|
# 统计 User-Agent 中的特殊字符个数
|
160
|
-
|
161
|
-
|
159
|
+
|
160
|
+
if use_tqdm:
|
161
|
+
data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
|
162
|
+
lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
|
163
|
+
else:
|
164
|
+
data['UserAgent_special_char_count'] = data['http.useragent'].apply(
|
165
|
+
lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
|
166
|
+
|
162
167
|
# 更新 UserAgent_is_unknown 的计算逻辑
|
163
168
|
data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
|
164
169
|
axis=1).fillna("Unknown")
|
@@ -402,16 +402,15 @@ def get_uri_filename_length(uri):
|
|
402
402
|
return 0
|
403
403
|
|
404
404
|
|
405
|
-
def get_dns_domain_suffix(domain
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
return ""
|
405
|
+
def get_dns_domain_suffix(domain):
|
406
|
+
try:
|
407
|
+
for tmp_suffix in dns_domain_list:
|
408
|
+
if tmp_suffix in domain:
|
409
|
+
return tmp_suffix
|
410
|
+
extracted = tldextract.extract(domain)
|
411
|
+
return extracted.suffix
|
412
|
+
except Exception as e:
|
413
|
+
return ""
|
415
414
|
|
416
415
|
|
417
416
|
def check_path(file_path: str):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|