xbase-util 0.3.9__tar.gz → 0.4.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {xbase_util-0.3.9 → xbase_util-0.4.1}/PKG-INFO +1 -1
- {xbase_util-0.3.9 → xbase_util-0.4.1}/setup.py +1 -1
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/add_column_util.py +33 -17
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/handle_features_util.py +18 -32
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/xbase_constant.py +19 -1
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.3.9 → xbase_util-0.4.1}/README.md +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/setup.cfg +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/__init__.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/esreq.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/pcap_util.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/xbase_util.py +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/SOURCES.txt +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util_assets/arkimeparse.js +0 -0
@@ -128,24 +128,40 @@ def parse_list(x):
|
|
128
128
|
return x
|
129
129
|
|
130
130
|
|
131
|
-
def handle_dns(origin_list, isDataFrame=False):
|
132
|
-
print("handle_dnslist")
|
131
|
+
def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
|
133
132
|
if not isDataFrame:
|
134
133
|
origin_list = pd.DataFrame(origin_list)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
134
|
+
if use_tqdm:
|
135
|
+
origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
|
136
|
+
origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
|
137
|
+
lambda x: any(is_long_domain(domain) for domain in x))
|
138
|
+
origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
|
139
|
+
lambda x: any(has_random_characters(domain) for domain in x))
|
140
|
+
origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
|
141
|
+
lambda x: any(has_special_characters(domain) for domain in x))
|
142
|
+
origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
|
143
|
+
lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
|
144
|
+
origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
|
145
|
+
lambda x: any(is_danger_domain(domain) for domain in x))
|
146
|
+
origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
|
147
|
+
lambda x: any(is_danger_subdomain(domain) for domain in x))
|
148
|
+
origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
|
149
|
+
lambda x: any(has_uncommon_tld(domain) for domain in x))
|
150
|
+
else:
|
151
|
+
origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
|
152
|
+
origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
|
153
|
+
lambda x: any(is_long_domain(domain) for domain in x))
|
154
|
+
origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
|
155
|
+
lambda x: any(has_random_characters(domain) for domain in x))
|
156
|
+
origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
|
157
|
+
lambda x: any(has_special_characters(domain) for domain in x))
|
158
|
+
origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
|
159
|
+
lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
|
160
|
+
origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
|
161
|
+
lambda x: any(is_danger_domain(domain) for domain in x))
|
162
|
+
origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
|
163
|
+
lambda x: any(is_danger_subdomain(domain) for domain in x))
|
164
|
+
origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
|
165
|
+
lambda x: any(has_uncommon_tld(domain) for domain in x))
|
150
166
|
origin_list.drop(columns=['dnslist'], inplace=True)
|
151
167
|
return origin_list
|
@@ -4,32 +4,11 @@ import traceback
|
|
4
4
|
from urllib.parse import unquote
|
5
5
|
|
6
6
|
import pandas as pd
|
7
|
-
from tqdm import tqdm
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def handle_uri(data):
|
12
|
-
tqdm.pandas()
|
13
|
-
print(f"处理URI:{len(data)}")
|
14
|
-
# 定义正则表达式,确保精确匹配各种攻击特征
|
15
|
-
regex_patterns = {
|
16
|
-
"sql": re.compile(
|
17
|
-
r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
|
18
|
-
re.IGNORECASE),
|
19
|
-
"xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
|
20
|
-
"cmd": re.compile(
|
21
|
-
r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
|
22
|
-
re.IGNORECASE),
|
23
|
-
"path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
|
24
|
-
"redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
|
25
|
-
"danger": re.compile(
|
26
|
-
r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
|
27
|
-
re.IGNORECASE),
|
28
|
-
"suspicious_ext": re.compile(
|
29
|
-
r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
|
30
|
-
re.IGNORECASE)
|
31
|
-
}
|
32
7
|
|
8
|
+
from xbase_util.xbase_constant import regex_patterns
|
9
|
+
|
10
|
+
|
11
|
+
def handle_uri(data, use_tqdm=True):
|
33
12
|
# 定义多层解码函数,确保完全解码 URI
|
34
13
|
def fully_decode_uri(uri):
|
35
14
|
try:
|
@@ -55,7 +34,6 @@ def handle_uri(data):
|
|
55
34
|
traceback.print_exc()
|
56
35
|
exit(0)
|
57
36
|
|
58
|
-
|
59
37
|
# 初始化统计变量
|
60
38
|
param_count = 0
|
61
39
|
path_depth = 0
|
@@ -97,14 +75,16 @@ def handle_uri(data):
|
|
97
75
|
result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
|
98
76
|
|
99
77
|
return result
|
100
|
-
|
78
|
+
|
79
|
+
if use_tqdm:
|
80
|
+
feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
|
81
|
+
else:
|
82
|
+
feature_data = data.apply(process_row, axis=1, result_type="expand")
|
101
83
|
data = pd.concat([data, feature_data], axis=1)
|
102
84
|
return data
|
103
85
|
|
104
86
|
|
105
|
-
def handle_ua(data):
|
106
|
-
tqdm.pandas()
|
107
|
-
print("处理UA")
|
87
|
+
def handle_ua(data, use_tqdm=True):
|
108
88
|
data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
|
109
89
|
# 处理换行符及多余空格
|
110
90
|
data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
|
@@ -157,8 +137,14 @@ def handle_ua(data):
|
|
157
137
|
data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
|
158
138
|
flags=re.IGNORECASE).fillna("Unknown")
|
159
139
|
# 统计 User-Agent 中的特殊字符个数
|
160
|
-
|
161
|
-
|
140
|
+
|
141
|
+
if use_tqdm:
|
142
|
+
data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
|
143
|
+
lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
|
144
|
+
else:
|
145
|
+
data['UserAgent_special_char_count'] = data['http.useragent'].apply(
|
146
|
+
lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
|
147
|
+
|
162
148
|
# 更新 UserAgent_is_unknown 的计算逻辑
|
163
149
|
data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
|
164
150
|
axis=1).fillna("Unknown")
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import os
|
2
|
+
import re
|
2
3
|
|
3
4
|
current_dir = os.path.dirname(__file__)
|
4
5
|
parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
|
@@ -203,4 +204,21 @@ features_key = [
|
|
203
204
|
'URI_FEATURES_EXTRA_param_length_max', 'UserAgent_is_attack', 'UserAgent_is_enterprise', 'UserAgent_browser',
|
204
205
|
'UserAgent_browser_version', 'UserAgent_os', 'UserAgent_os_version', 'UserAgent_device_type',
|
205
206
|
'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
|
206
|
-
'UserAgent_is_unknown']
|
207
|
+
'UserAgent_is_unknown']
|
208
|
+
regex_patterns = {
|
209
|
+
"sql": re.compile(
|
210
|
+
r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
|
211
|
+
re.IGNORECASE),
|
212
|
+
"xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
|
213
|
+
"cmd": re.compile(
|
214
|
+
r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
|
215
|
+
re.IGNORECASE),
|
216
|
+
"path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
|
217
|
+
"redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
|
218
|
+
"danger": re.compile(
|
219
|
+
r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
|
220
|
+
re.IGNORECASE),
|
221
|
+
"suspicious_ext": re.compile(
|
222
|
+
r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
|
223
|
+
re.IGNORECASE)
|
224
|
+
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|