xbase-util 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xbase_util-0.4.1 → xbase_util-0.4.3}/PKG-INFO +1 -1
- {xbase_util-0.4.1 → xbase_util-0.4.3}/setup.py +1 -1
- xbase_util-0.4.3/xbase_util/packet_util.py +171 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/pcap_util.py +0 -1
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/xbase_constant.py +79 -16
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/SOURCES.txt +1 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/README.md +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/setup.cfg +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/__init__.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/add_column_util.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/esreq.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/handle_features_util.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/xbase_util.py +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util_assets/arkimeparse.js +0 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
from scapy.layers.inet import TCP
|
4
|
+
|
5
|
+
from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
|
6
|
+
features_key, plain_body_columns
|
7
|
+
from xbase_util.xbase_util import firstOrZero
|
8
|
+
|
9
|
+
|
10
|
+
def content_type_is_plain(packet):
|
11
|
+
"""
|
12
|
+
从单个包(包括header和body)中获取content-type并判断是否为可见类型
|
13
|
+
:param packet:
|
14
|
+
:return:
|
15
|
+
"""
|
16
|
+
if ":" not in packet:
|
17
|
+
return False
|
18
|
+
for item in packet.replace("-", "_").replace(" ", "").lower().split("\n"):
|
19
|
+
if "content_type" in item:
|
20
|
+
if ":" not in item:
|
21
|
+
continue
|
22
|
+
content_type = item.split(":")[1].replace("\r", "").strip()
|
23
|
+
return content_type in plain_content_type_columns
|
24
|
+
return False
|
25
|
+
|
26
|
+
|
27
|
+
def filter_visible_chars(data):
|
28
|
+
"""
|
29
|
+
过滤不可见字符,仅保留可打印的ASCII字符
|
30
|
+
:param data:
|
31
|
+
:return:
|
32
|
+
"""
|
33
|
+
return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
|
34
|
+
|
35
|
+
|
36
|
+
def get_all_columns(
|
37
|
+
contains_packet_column=False,
|
38
|
+
contains_src_dst_column=False,
|
39
|
+
contains_statistic_column=False,
|
40
|
+
contains_features_column=False,
|
41
|
+
contains_plain_body_column=False,
|
42
|
+
contains_pcap_flow_text=False
|
43
|
+
):
|
44
|
+
result_columns = []
|
45
|
+
if contains_packet_column:
|
46
|
+
result_columns += packetKeyname
|
47
|
+
if contains_src_dst_column:
|
48
|
+
result_columns += src_dst_header
|
49
|
+
if contains_statistic_column:
|
50
|
+
result_columns += statisticHeader
|
51
|
+
if contains_features_column:
|
52
|
+
result_columns += features_key
|
53
|
+
if contains_plain_body_column:
|
54
|
+
result_columns += plain_body_columns
|
55
|
+
if contains_pcap_flow_text:
|
56
|
+
result_columns.append(contains_pcap_flow_text)
|
57
|
+
return result_columns
|
58
|
+
|
59
|
+
|
60
|
+
def get_all_packets_by_regx(packets):
|
61
|
+
"""
|
62
|
+
通过正则pcap获取所有包的数据
|
63
|
+
:param packets:
|
64
|
+
:return:
|
65
|
+
"""
|
66
|
+
streams = b""
|
67
|
+
for pkt in packets:
|
68
|
+
if TCP in pkt:
|
69
|
+
streams += bytes(pkt[TCP].payload)
|
70
|
+
text = filter_visible_chars(streams)
|
71
|
+
pattern = r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d"
|
72
|
+
requests = re.split(f"(?={pattern})", text, re.M)
|
73
|
+
all_packets = []
|
74
|
+
for item in requests:
|
75
|
+
if len(re.findall(pattern, item)) != 0:
|
76
|
+
request_text = ""
|
77
|
+
response_text = ""
|
78
|
+
response_text_list = re.findall(r"HTTP/\d\.\d \d{3}[\s\S]*", item)
|
79
|
+
if len(response_text_list) != 0:
|
80
|
+
# 有响应数据
|
81
|
+
response_text = response_text_list[0]
|
82
|
+
if response_text == "":
|
83
|
+
# 没有响应数据,那么全是请求数据
|
84
|
+
request_text = item
|
85
|
+
else:
|
86
|
+
# 有响应数据,用正则获取请求数据
|
87
|
+
request_re = re.search(
|
88
|
+
r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?\r\n\r\n", item)
|
89
|
+
if request_re:
|
90
|
+
request_text = request_re.group(0)
|
91
|
+
else:
|
92
|
+
request_text = ""
|
93
|
+
all_packets.append({"req": request_text, "res": response_text})
|
94
|
+
return all_packets
|
95
|
+
|
96
|
+
|
97
|
+
def get_body(param, is_src):
|
98
|
+
body = param.split("\r\n\r\n")[1].strip()
|
99
|
+
return "" if body is None else body
|
100
|
+
|
101
|
+
|
102
|
+
def get_header_value(header_set, value):
|
103
|
+
result = [item for item in header_set if value in item]
|
104
|
+
if len(result) != 0:
|
105
|
+
return result[0].replace(f"{value}:", "").strip()
|
106
|
+
else:
|
107
|
+
return ""
|
108
|
+
|
109
|
+
|
110
|
+
def get_detail_by_package(packets_from_pcap, publicField, use_regx):
|
111
|
+
"""
|
112
|
+
通过pcap的数量分离session并完善相关字段
|
113
|
+
:param packets_from_pcap: 通过PcAp解析出的包
|
114
|
+
:param publicField: 原始的session单条数据
|
115
|
+
:return: 完整的单条数据
|
116
|
+
"""
|
117
|
+
res_field = publicField.copy()
|
118
|
+
if use_regx:
|
119
|
+
req = packets_from_pcap['req']
|
120
|
+
res = packets_from_pcap['res']
|
121
|
+
else:
|
122
|
+
res = packets_from_pcap["response"]
|
123
|
+
req = packets_from_pcap["request"]
|
124
|
+
res_field["initRTT"] = firstOrZero(res_field.get("initRTT", 0))
|
125
|
+
res_field["length"] = firstOrZero(res_field.get("length", 0))
|
126
|
+
request_lines = req.strip().split("\n")
|
127
|
+
http_request_lines = [item for item in request_lines if "HTTP" in item]
|
128
|
+
if len(http_request_lines) != 0:
|
129
|
+
first_line = http_request_lines[0].split(" ")
|
130
|
+
res_field['http.clientVersion'] = str(first_line[2]).replace("\n", "").replace("\r", "")
|
131
|
+
res_field['http.path'] = first_line[1]
|
132
|
+
res_field['http.method'] = first_line[0]
|
133
|
+
else:
|
134
|
+
res_field['http.clientVersion'] = ''
|
135
|
+
res_field['http.path'] = ''
|
136
|
+
res_field['http.method'] = ''
|
137
|
+
res_field['http.request-referer'] = get_header_value(header_set=request_lines, value="Referer")
|
138
|
+
res_field['http.request-content-type'] = get_header_value(header_set=request_lines,
|
139
|
+
value="Content-Type")
|
140
|
+
res_field['http.hostTokens'] = get_header_value(header_set=request_lines, value="Host")
|
141
|
+
|
142
|
+
if use_regx:
|
143
|
+
res_field['plain_body_src'] = ""
|
144
|
+
res_field['plain_body_dst'] = ""
|
145
|
+
if content_type_is_plain(req):
|
146
|
+
res_field['plain_body_src'] = get_body(req, is_src=True)
|
147
|
+
if content_type_is_plain(res):
|
148
|
+
res_field['plain_body_dst'] = get_body(res, is_src=False)
|
149
|
+
|
150
|
+
response_lines = res.strip().split("\n")
|
151
|
+
http_response_lines = [item for item in response_lines if "HTTP" in item]
|
152
|
+
if len(http_response_lines) != 0:
|
153
|
+
first_line = http_response_lines[0].strip().split(" ")
|
154
|
+
res_field['http.statuscode'] = first_line[1]
|
155
|
+
res_field['http.serverVersion'] = first_line[0].split("/")[1]
|
156
|
+
else:
|
157
|
+
res_field['http.statuscode'] = ""
|
158
|
+
res_field['http.serverVersion'] = ""
|
159
|
+
res_field['http.response-server'] = get_header_value(header_set=response_lines, value="Server")
|
160
|
+
res_field['http.response-content-type'] = get_header_value(header_set=response_lines,
|
161
|
+
value="Content-Type")
|
162
|
+
for response in list(set(response_lines + request_lines)):
|
163
|
+
key_value = response.replace("\r", "").split(":")
|
164
|
+
if len(key_value) == 2:
|
165
|
+
key = key_value[0].replace(" ", "").replace("-", "_").lower()
|
166
|
+
value = key_value[1].replace(" ", "")
|
167
|
+
if f"src_{key}" in src_dst_header:
|
168
|
+
res_field[f"src_{key}"] = value
|
169
|
+
if f"dst_{key}" in src_dst_header:
|
170
|
+
res_field[f"dst_{key}"] = value
|
171
|
+
return res_field
|
@@ -206,19 +206,82 @@ features_key = [
|
|
206
206
|
'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
|
207
207
|
'UserAgent_is_unknown']
|
208
208
|
regex_patterns = {
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
209
|
+
"sql": re.compile(
|
210
|
+
r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
|
211
|
+
re.IGNORECASE),
|
212
|
+
"xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
|
213
|
+
"cmd": re.compile(
|
214
|
+
r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
|
215
|
+
re.IGNORECASE),
|
216
|
+
"path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
|
217
|
+
"redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
|
218
|
+
"danger": re.compile(
|
219
|
+
r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
|
220
|
+
re.IGNORECASE),
|
221
|
+
"suspicious_ext": re.compile(
|
222
|
+
r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
|
223
|
+
re.IGNORECASE)
|
224
|
+
}
|
225
|
+
# 可见的content-type值
|
226
|
+
plain_content_type_columns = ['text/json;charset=gbk',
|
227
|
+
'application/xml;charset=gbk', 'application/xml;charset=utf_8', 'application/tlt_notify',
|
228
|
+
'application/json;charset=gbk', 'text/xml;charset=utf_8', 'application/json',
|
229
|
+
'text/csv;charset=utf_8', 'application/json;charse=utf_8',
|
230
|
+
'application/soap+xml;charset=utf_8;action="urn:dopricetaxseparated"',
|
231
|
+
'text/xml;charset=gbk', 'text/xml', 'application/x_cm_json;charset=utf_8',
|
232
|
+
'application/xml;tz=utc', 'text/xml;charset="utf_8"', 'application/x_java_archive',
|
233
|
+
'application/msword', 'image/png', 'application/xml',
|
234
|
+
'application/x_stapler_method_invocation;charset=utf_8', 'text/plain;charset=iso_8859_1',
|
235
|
+
'application/x_www_form_urlencoded;charset=utf_8', 'text/plain;charset=gbk',
|
236
|
+
'application/octet_stream;charset=utf_8', 'application/x_tika_ooxml',
|
237
|
+
'application/soap+xml;charset=utf_8;action="urn:sendcommand"', 'application/dns_message',
|
238
|
+
'application/json;charset=utf_8', 'application/vnd.docker.distribution.manifest.v2+json',
|
239
|
+
'application/vnd.elasticsearch+json;compatible_with=8', 'off/ping', 'text/plain',
|
240
|
+
'application/x_git_upload_pack_request', 'application/json;charset=gbk',
|
241
|
+
'text/html;charset=iso_8859_1', 'text/http;charset=utf_8',
|
242
|
+
'application/soap+xml;charset=gbk', 'text/html',
|
243
|
+
'application/vnd.openxmlformats_officedocument.spreadsheetml.sheet',
|
244
|
+
'application/x_www_form_urlencoded;charset=gbk', 'text/plain;charset=utf_8',
|
245
|
+
'text/html;charset=gbk', 'application/soap+xml;charset=gbk;',
|
246
|
+
'application/x_www_form_urlencoded', 'application/x_ndjson', 'text/xml;charset=gbk',
|
247
|
+
'application/json;chartset=utf_8',
|
248
|
+
'application/soap+xml;charset=utf_8;action="urn:getcostbyruleengine"',
|
249
|
+
'application/json_rpc', 'text/json;charset=utf_8', 'application/json;charset=utf8',
|
250
|
+
'application/xml;charset=utf_8', 'application/x_www_form_urlencoded;charset=gbk',
|
251
|
+
'application/soap+xml;charset=utf_8;', 'application/merge_patch+json',
|
252
|
+
'application/json;', 'text/xml;charset="utf_16le"', 'text/html;charset=utf_8']
|
253
|
+
packetKeyname = ['id', 'segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.urg', 'tcpflags.psh',
|
254
|
+
'tcpflags.syn-ack', 'tcpflags.fin', 'source.ip', 'destination.ip', 'source.port', 'source.packets',
|
255
|
+
'source.bytes', 'destination.port', 'destination.bytes', 'destination.packets', 'initRTT',
|
256
|
+
'firstPacket', 'lastPacket', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes', 'totDataBytes',
|
257
|
+
'network.packets', 'network.bytes', 'length', 'client.bytes', 'http.uri',
|
258
|
+
'http.response-content-type', 'http.bodyMagicCnt', 'http.statuscodeCnt', 'http.clientVersionCnt',
|
259
|
+
'http.response-content-typeCnt', 'http.xffIpCnt', 'http.requestHeaderCnt', 'http.serverVersion',
|
260
|
+
'http.responseHeaderCnt', 'http.xffIp', 'http.clientVersion', 'http.uriTokens',
|
261
|
+
'http.request-refererCnt', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic', 'http.methodCnt',
|
262
|
+
'http.request-content-type', 'http.uriCnt', 'http.serverVersionCnt', 'http.useragent', 'http.keyCnt',
|
263
|
+
'http.request-referer', 'http.path', 'http.hostCnt', 'http.response-server', 'http.pathCnt',
|
264
|
+
'http.useragentTokens', 'http.method-GET', 'http.method', 'http.key', 'http.hostTokens',
|
265
|
+
'http.requestHeader', 'http.responseHeader', 'http.method-POST', 'dns.ASN', 'dns.RIR', 'dns.GEO',
|
266
|
+
'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.host', 'dns.ipCnt', 'dns.OpCode', 'dns.OpCodeCnt',
|
267
|
+
'dns.Puny', 'dns.PunyCnt', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType', 'dns.QueryTypeCnt',
|
268
|
+
'dns.status', 'dns.statusCnt', 'tls.cipher', 'tls.cipherCnt', 'tls.dstSessionId', 'tls.ja3',
|
269
|
+
'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4', 'tls.ja4Cnt', 'tls.srcSessionId', 'tls.version',
|
270
|
+
'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt', 'packetPos', 'source.ip_Country_IsoCode',
|
271
|
+
'source.ip_Country_Name', 'source.ip_Country_SpecificName',
|
272
|
+
'source.ip_Country_SpecificIsoCode', 'source.ip_City_Name', 'source.ip_City_PostalCode',
|
273
|
+
'source.ip_Location_Latitude', 'source.ip_Location_Longitude', 'destination.ip_Country_IsoCode',
|
274
|
+
'destination.ip_Country_Name', 'destination.ip_Country_SpecificName',
|
275
|
+
'destination.ip_Country_SpecificIsoCode', 'destination.ip_City_Name',
|
276
|
+
'destination.ip_City_PostalCode', 'destination.ip_Location_Latitude',
|
277
|
+
'destination.ip_Location_Longitude', 'http.uri_length_mean', 'http.uri_length_var',
|
278
|
+
"http.uri_param_count_mean", "http.uri_param_count_var", "http.uri_depth_mean", "http.uri_depth_var",
|
279
|
+
"http.uri_filename_length_mean", "http.uri_filename_length_var", "dns_domain_length_mean",
|
280
|
+
"dns_domain_length_var", "traffic_type", "PROTOCOL", "DENY_METHOD", "THREAT_SUMMARY", "SEVERITY",
|
281
|
+
"dns_domain_length", "dns_domain_suffix", "dns_domain", "dns_domain_suffix_length", "dns_base_domain",
|
282
|
+
"dns_base_domain_length", "req_res_period_mean", "req_res_period_var", "status_code_1x_count",
|
283
|
+
"status_code_2x_count", "status_code_3x_count", "status_code_4x_count", "status_code_5x_count",
|
284
|
+
"req_bytes_percentage", "res_bytes_percentage", "cookie_end_with_semicolon_count",
|
285
|
+
"ua_duplicate_count"]
|
286
|
+
plain_body_columns = ["plain_body_src",
|
287
|
+
"plain_body_dst"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|