xbase-util 0.4.0__tar.gz → 0.4.2__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {xbase_util-0.4.0 → xbase_util-0.4.2}/PKG-INFO +1 -1
- {xbase_util-0.4.0 → xbase_util-0.4.2}/setup.py +1 -1
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/handle_features_util.py +6 -25
- xbase_util-0.4.2/xbase_util/packet_util.py +93 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/pcap_util.py +0 -1
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/xbase_constant.py +82 -1
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util.egg-info/SOURCES.txt +1 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/README.md +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/setup.cfg +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/__init__.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/add_column_util.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/esreq.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util/xbase_util.py +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.4.0 → xbase_util-0.4.2}/xbase_util_assets/arkimeparse.js +0 -0
@@ -4,30 +4,11 @@ import traceback
|
|
4
4
|
from urllib.parse import unquote
|
5
5
|
|
6
6
|
import pandas as pd
|
7
|
-
from tqdm import tqdm
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def handle_uri(data,use_tqdm=True):
|
12
|
-
# 定义正则表达式,确保精确匹配各种攻击特征
|
13
|
-
regex_patterns = {
|
14
|
-
"sql": re.compile(
|
15
|
-
r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
|
16
|
-
re.IGNORECASE),
|
17
|
-
"xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
|
18
|
-
"cmd": re.compile(
|
19
|
-
r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
|
20
|
-
re.IGNORECASE),
|
21
|
-
"path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
|
22
|
-
"redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
|
23
|
-
"danger": re.compile(
|
24
|
-
r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
|
25
|
-
re.IGNORECASE),
|
26
|
-
"suspicious_ext": re.compile(
|
27
|
-
r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
|
28
|
-
re.IGNORECASE)
|
29
|
-
}
|
30
7
|
|
8
|
+
from xbase_util.xbase_constant import regex_patterns
|
9
|
+
|
10
|
+
|
11
|
+
def handle_uri(data, use_tqdm=True):
|
31
12
|
# 定义多层解码函数,确保完全解码 URI
|
32
13
|
def fully_decode_uri(uri):
|
33
14
|
try:
|
@@ -53,7 +34,6 @@ def handle_uri(data,use_tqdm=True):
|
|
53
34
|
traceback.print_exc()
|
54
35
|
exit(0)
|
55
36
|
|
56
|
-
|
57
37
|
# 初始化统计变量
|
58
38
|
param_count = 0
|
59
39
|
path_depth = 0
|
@@ -95,6 +75,7 @@ def handle_uri(data,use_tqdm=True):
|
|
95
75
|
result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
|
96
76
|
|
97
77
|
return result
|
78
|
+
|
98
79
|
if use_tqdm:
|
99
80
|
feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
|
100
81
|
else:
|
@@ -103,7 +84,7 @@ def handle_uri(data,use_tqdm=True):
|
|
103
84
|
return data
|
104
85
|
|
105
86
|
|
106
|
-
def handle_ua(data,use_tqdm=True):
|
87
|
+
def handle_ua(data, use_tqdm=True):
|
107
88
|
data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
|
108
89
|
# 处理换行符及多余空格
|
109
90
|
data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
|
@@ -0,0 +1,93 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
from scapy.layers.inet import TCP
|
4
|
+
|
5
|
+
from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
|
6
|
+
features_key, plain_body_columns
|
7
|
+
|
8
|
+
|
9
|
+
def content_type_is_plain(packet):
|
10
|
+
"""
|
11
|
+
从单个包(包括header和body)中获取content-type并判断是否为可见类型
|
12
|
+
:param packet:
|
13
|
+
:return:
|
14
|
+
"""
|
15
|
+
if ":" not in packet:
|
16
|
+
return False
|
17
|
+
for item in packet.replace("-", "_").replace(" ", "").lower().split("\n"):
|
18
|
+
if "content_type" in item:
|
19
|
+
if ":" not in item:
|
20
|
+
continue
|
21
|
+
content_type = item.split(":")[1].replace("\r", "").strip()
|
22
|
+
return content_type in plain_content_type_columns
|
23
|
+
return False
|
24
|
+
|
25
|
+
|
26
|
+
def filter_visible_chars(data):
|
27
|
+
"""
|
28
|
+
过滤不可见字符,仅保留可打印的ASCII字符
|
29
|
+
:param data:
|
30
|
+
:return:
|
31
|
+
"""
|
32
|
+
return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
|
33
|
+
|
34
|
+
|
35
|
+
def get_all_columns(
|
36
|
+
contains_packet_column=False,
|
37
|
+
contains_src_dst_column=False,
|
38
|
+
contains_statistic_column=False,
|
39
|
+
contains_features_column=False,
|
40
|
+
contains_plain_body_column=False,
|
41
|
+
contains_pcap_flow_text=False
|
42
|
+
):
|
43
|
+
result_columns = []
|
44
|
+
if contains_packet_column:
|
45
|
+
result_columns += packetKeyname
|
46
|
+
if contains_src_dst_column:
|
47
|
+
result_columns += src_dst_header
|
48
|
+
if contains_statistic_column:
|
49
|
+
result_columns += statisticHeader
|
50
|
+
if contains_features_column:
|
51
|
+
result_columns += features_key
|
52
|
+
if contains_plain_body_column:
|
53
|
+
result_columns += plain_body_columns
|
54
|
+
if contains_pcap_flow_text:
|
55
|
+
result_columns.append(contains_pcap_flow_text)
|
56
|
+
return result_columns
|
57
|
+
|
58
|
+
|
59
|
+
def get_all_packets_by_regx(packets):
|
60
|
+
"""
|
61
|
+
通过正则pcap获取所有包的数据
|
62
|
+
:param packets:
|
63
|
+
:return:
|
64
|
+
"""
|
65
|
+
streams = b""
|
66
|
+
for pkt in packets:
|
67
|
+
if TCP in pkt:
|
68
|
+
streams += bytes(pkt[TCP].payload)
|
69
|
+
text = filter_visible_chars(streams)
|
70
|
+
pattern = r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d"
|
71
|
+
requests = re.split(f"(?={pattern})", text, re.M)
|
72
|
+
all_packets = []
|
73
|
+
for item in requests:
|
74
|
+
if len(re.findall(pattern, item)) != 0:
|
75
|
+
request_text = ""
|
76
|
+
response_text = ""
|
77
|
+
response_text_list = re.findall(r"HTTP/\d\.\d \d{3}[\s\S]*", item)
|
78
|
+
if len(response_text_list) != 0:
|
79
|
+
# 有响应数据
|
80
|
+
response_text = response_text_list[0]
|
81
|
+
if response_text == "":
|
82
|
+
# 没有响应数据,那么全是请求数据
|
83
|
+
request_text = item
|
84
|
+
else:
|
85
|
+
# 有响应数据,用正则获取请求数据
|
86
|
+
request_re = re.search(
|
87
|
+
r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?\r\n\r\n", item)
|
88
|
+
if request_re:
|
89
|
+
request_text = request_re.group(0)
|
90
|
+
else:
|
91
|
+
request_text = ""
|
92
|
+
all_packets.append({"req": request_text, "res": response_text})
|
93
|
+
return all_packets
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import os
|
2
|
+
import re
|
2
3
|
|
3
4
|
current_dir = os.path.dirname(__file__)
|
4
5
|
parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
|
@@ -203,4 +204,84 @@ features_key = [
|
|
203
204
|
'URI_FEATURES_EXTRA_param_length_max', 'UserAgent_is_attack', 'UserAgent_is_enterprise', 'UserAgent_browser',
|
204
205
|
'UserAgent_browser_version', 'UserAgent_os', 'UserAgent_os_version', 'UserAgent_device_type',
|
205
206
|
'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
|
206
|
-
'UserAgent_is_unknown']
|
207
|
+
'UserAgent_is_unknown']
|
208
|
+
regex_patterns = {
|
209
|
+
"sql": re.compile(
|
210
|
+
r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
|
211
|
+
re.IGNORECASE),
|
212
|
+
"xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
|
213
|
+
"cmd": re.compile(
|
214
|
+
r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
|
215
|
+
re.IGNORECASE),
|
216
|
+
"path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
|
217
|
+
"redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
|
218
|
+
"danger": re.compile(
|
219
|
+
r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
|
220
|
+
re.IGNORECASE),
|
221
|
+
"suspicious_ext": re.compile(
|
222
|
+
r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
|
223
|
+
re.IGNORECASE)
|
224
|
+
}
|
225
|
+
# 可见的content-type值
|
226
|
+
plain_content_type_columns = ['text/json;charset=gbk',
|
227
|
+
'application/xml;charset=gbk', 'application/xml;charset=utf_8', 'application/tlt_notify',
|
228
|
+
'application/json;charset=gbk', 'text/xml;charset=utf_8', 'application/json',
|
229
|
+
'text/csv;charset=utf_8', 'application/json;charse=utf_8',
|
230
|
+
'application/soap+xml;charset=utf_8;action="urn:dopricetaxseparated"',
|
231
|
+
'text/xml;charset=gbk', 'text/xml', 'application/x_cm_json;charset=utf_8',
|
232
|
+
'application/xml;tz=utc', 'text/xml;charset="utf_8"', 'application/x_java_archive',
|
233
|
+
'application/msword', 'image/png', 'application/xml',
|
234
|
+
'application/x_stapler_method_invocation;charset=utf_8', 'text/plain;charset=iso_8859_1',
|
235
|
+
'application/x_www_form_urlencoded;charset=utf_8', 'text/plain;charset=gbk',
|
236
|
+
'application/octet_stream;charset=utf_8', 'application/x_tika_ooxml',
|
237
|
+
'application/soap+xml;charset=utf_8;action="urn:sendcommand"', 'application/dns_message',
|
238
|
+
'application/json;charset=utf_8', 'application/vnd.docker.distribution.manifest.v2+json',
|
239
|
+
'application/vnd.elasticsearch+json;compatible_with=8', 'off/ping', 'text/plain',
|
240
|
+
'application/x_git_upload_pack_request', 'application/json;charset=gbk',
|
241
|
+
'text/html;charset=iso_8859_1', 'text/http;charset=utf_8',
|
242
|
+
'application/soap+xml;charset=gbk', 'text/html',
|
243
|
+
'application/vnd.openxmlformats_officedocument.spreadsheetml.sheet',
|
244
|
+
'application/x_www_form_urlencoded;charset=gbk', 'text/plain;charset=utf_8',
|
245
|
+
'text/html;charset=gbk', 'application/soap+xml;charset=gbk;',
|
246
|
+
'application/x_www_form_urlencoded', 'application/x_ndjson', 'text/xml;charset=gbk',
|
247
|
+
'application/json;chartset=utf_8',
|
248
|
+
'application/soap+xml;charset=utf_8;action="urn:getcostbyruleengine"',
|
249
|
+
'application/json_rpc', 'text/json;charset=utf_8', 'application/json;charset=utf8',
|
250
|
+
'application/xml;charset=utf_8', 'application/x_www_form_urlencoded;charset=gbk',
|
251
|
+
'application/soap+xml;charset=utf_8;', 'application/merge_patch+json',
|
252
|
+
'application/json;', 'text/xml;charset="utf_16le"', 'text/html;charset=utf_8']
|
253
|
+
packetKeyname = ['id', 'segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.urg', 'tcpflags.psh',
|
254
|
+
'tcpflags.syn-ack', 'tcpflags.fin', 'source.ip', 'destination.ip', 'source.port', 'source.packets',
|
255
|
+
'source.bytes', 'destination.port', 'destination.bytes', 'destination.packets', 'initRTT',
|
256
|
+
'firstPacket', 'lastPacket', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes', 'totDataBytes',
|
257
|
+
'network.packets', 'network.bytes', 'length', 'client.bytes', 'http.uri',
|
258
|
+
'http.response-content-type', 'http.bodyMagicCnt', 'http.statuscodeCnt', 'http.clientVersionCnt',
|
259
|
+
'http.response-content-typeCnt', 'http.xffIpCnt', 'http.requestHeaderCnt', 'http.serverVersion',
|
260
|
+
'http.responseHeaderCnt', 'http.xffIp', 'http.clientVersion', 'http.uriTokens',
|
261
|
+
'http.request-refererCnt', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic', 'http.methodCnt',
|
262
|
+
'http.request-content-type', 'http.uriCnt', 'http.serverVersionCnt', 'http.useragent', 'http.keyCnt',
|
263
|
+
'http.request-referer', 'http.path', 'http.hostCnt', 'http.response-server', 'http.pathCnt',
|
264
|
+
'http.useragentTokens', 'http.method-GET', 'http.method', 'http.key', 'http.hostTokens',
|
265
|
+
'http.requestHeader', 'http.responseHeader', 'http.method-POST', 'dns.ASN', 'dns.RIR', 'dns.GEO',
|
266
|
+
'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.host', 'dns.ipCnt', 'dns.OpCode', 'dns.OpCodeCnt',
|
267
|
+
'dns.Puny', 'dns.PunyCnt', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType', 'dns.QueryTypeCnt',
|
268
|
+
'dns.status', 'dns.statusCnt', 'tls.cipher', 'tls.cipherCnt', 'tls.dstSessionId', 'tls.ja3',
|
269
|
+
'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4', 'tls.ja4Cnt', 'tls.srcSessionId', 'tls.version',
|
270
|
+
'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt', 'packetPos', 'source.ip_Country_IsoCode',
|
271
|
+
'source.ip_Country_Name', 'source.ip_Country_SpecificName',
|
272
|
+
'source.ip_Country_SpecificIsoCode', 'source.ip_City_Name', 'source.ip_City_PostalCode',
|
273
|
+
'source.ip_Location_Latitude', 'source.ip_Location_Longitude', 'destination.ip_Country_IsoCode',
|
274
|
+
'destination.ip_Country_Name', 'destination.ip_Country_SpecificName',
|
275
|
+
'destination.ip_Country_SpecificIsoCode', 'destination.ip_City_Name',
|
276
|
+
'destination.ip_City_PostalCode', 'destination.ip_Location_Latitude',
|
277
|
+
'destination.ip_Location_Longitude', 'http.uri_length_mean', 'http.uri_length_var',
|
278
|
+
"http.uri_param_count_mean", "http.uri_param_count_var", "http.uri_depth_mean", "http.uri_depth_var",
|
279
|
+
"http.uri_filename_length_mean", "http.uri_filename_length_var", "dns_domain_length_mean",
|
280
|
+
"dns_domain_length_var", "traffic_type", "PROTOCOL", "DENY_METHOD", "THREAT_SUMMARY", "SEVERITY",
|
281
|
+
"dns_domain_length", "dns_domain_suffix", "dns_domain", "dns_domain_suffix_length", "dns_base_domain",
|
282
|
+
"dns_base_domain_length", "req_res_period_mean", "req_res_period_var", "status_code_1x_count",
|
283
|
+
"status_code_2x_count", "status_code_3x_count", "status_code_4x_count", "status_code_5x_count",
|
284
|
+
"req_bytes_percentage", "res_bytes_percentage", "cookie_end_with_semicolon_count",
|
285
|
+
"ua_duplicate_count"]
|
286
|
+
plain_body_columns = ["plain_body_src",
|
287
|
+
"plain_body_dst"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|