xbase-util 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xbase_util-0.1.1 → xbase_util-0.1.2}/PKG-INFO +1 -1
- {xbase_util-0.1.1 → xbase_util-0.1.2}/setup.py +1 -1
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/geo_util.py +1 -1
- xbase_util-0.1.2/xbase_util/xbase_util.py +381 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util.egg-info/PKG-INFO +1 -1
- xbase_util-0.1.1/xbase_util/xbase_util.py +0 -82
- {xbase_util-0.1.1 → xbase_util-0.1.2}/README.md +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/setup.cfg +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/__init__.py +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/esreq.py +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/handle_features_util.py +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/pcap_util.py +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util/xbase_constant.py +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util.egg-info/SOURCES.txt +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.1.1 → xbase_util-0.1.2}/xbase_util_assets/arkimeparse.js +0 -0
@@ -0,0 +1,381 @@
|
|
1
|
+
import json
|
2
|
+
import re
|
3
|
+
from urllib.parse import urlparse, parse_qs
|
4
|
+
|
5
|
+
import execjs
|
6
|
+
import numpy as np
|
7
|
+
from scapy.layers.dns import DNS
|
8
|
+
|
9
|
+
from xbase_util.xbase_constant import parse_path
|
10
|
+
|
11
|
+
|
12
|
+
def parse_expression(expression):
|
13
|
+
if expression:
|
14
|
+
with open(parse_path, "r") as f:
|
15
|
+
ctx = execjs.compile(f.read())
|
16
|
+
return ctx.call("parse_exp", expression)
|
17
|
+
else:
|
18
|
+
return None
|
19
|
+
|
20
|
+
|
21
|
+
def get_cookie_end_with_semicolon_count(text_data):
|
22
|
+
count = 0
|
23
|
+
for text in text_data.replace("-", "_").lower().split("\n"):
|
24
|
+
item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
|
25
|
+
if "cookie:" in item_text and f"{item_text}".endswith(";"):
|
26
|
+
count = count + 1
|
27
|
+
if count == 0:
|
28
|
+
return -1
|
29
|
+
return len(count)
|
30
|
+
|
31
|
+
|
32
|
+
def get_ua_duplicate_count(text_data):
|
33
|
+
ua_list = []
|
34
|
+
for text in text_data.replace("-", "_").lower().split("\n"):
|
35
|
+
item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
|
36
|
+
if "user_agent:" in item_text and f"{item_text}".endswith(";"):
|
37
|
+
ua_list.append(item_text.replace("user_agent:", ""))
|
38
|
+
count = list(set(ua_list))
|
39
|
+
if count == 0:
|
40
|
+
return -1
|
41
|
+
return sum(count)
|
42
|
+
|
43
|
+
|
44
|
+
def get_res_status_code_list(text_data):
|
45
|
+
value_res = []
|
46
|
+
res = []
|
47
|
+
num_1 = 0
|
48
|
+
num_2 = 0
|
49
|
+
num_3 = 0
|
50
|
+
num_4 = 0
|
51
|
+
num_5 = 0
|
52
|
+
|
53
|
+
res.extend([item for item in text_data.split("\n") if item.startswith("HTTP/")])
|
54
|
+
for item in res:
|
55
|
+
m = re.search(r"\b(\d{3})\b", item)
|
56
|
+
if m:
|
57
|
+
value_res.append(int(m.group(0)))
|
58
|
+
for value in value_res:
|
59
|
+
if 0 <= value < 200:
|
60
|
+
num_1 = num_1 + 1
|
61
|
+
if 200 <= value < 300:
|
62
|
+
num_2 = num_2 + 1
|
63
|
+
if 300 <= value < 400:
|
64
|
+
num_3 = num_3 + 1
|
65
|
+
if 400 <= value < 500:
|
66
|
+
num_4 = num_4 + 1
|
67
|
+
if 500 <= value < 600:
|
68
|
+
num_5 = num_5 + 1
|
69
|
+
return num_1, num_2, num_3, num_4, num_5
|
70
|
+
|
71
|
+
|
72
|
+
def get_packets_percentage(session, isReq):
|
73
|
+
if "source.bytes" in session and "destination.bytes" in session:
|
74
|
+
total_bytes = session["source.bytes"] + session["destination.bytes"]
|
75
|
+
if total_bytes > 0:
|
76
|
+
if isReq:
|
77
|
+
return session["source.bytes"] / total_bytes
|
78
|
+
else:
|
79
|
+
return session["destination.bytes"] / total_bytes
|
80
|
+
else:
|
81
|
+
return 0.0 # 避免除以0的情况
|
82
|
+
else:
|
83
|
+
return 0.5
|
84
|
+
|
85
|
+
|
86
|
+
def split_samples(sample, per_subsection):
|
87
|
+
num_subsections = len(sample) // per_subsection
|
88
|
+
remainder = len(sample) % per_subsection
|
89
|
+
subsection_sizes = [per_subsection] * num_subsections
|
90
|
+
if remainder > 0:
|
91
|
+
subsection_sizes.append(remainder)
|
92
|
+
num_subsections += 1
|
93
|
+
return num_subsections, subsection_sizes
|
94
|
+
|
95
|
+
|
96
|
+
def split_process(subsection, process_count):
|
97
|
+
subsection_per_process = len(subsection) // process_count
|
98
|
+
remainder = len(subsection) % process_count
|
99
|
+
lengths = []
|
100
|
+
start = 0
|
101
|
+
for i in range(process_count):
|
102
|
+
end = start + subsection_per_process + (1 if i < remainder else 0)
|
103
|
+
lengths.append(end - start)
|
104
|
+
start = end
|
105
|
+
return lengths
|
106
|
+
|
107
|
+
|
108
|
+
def build_es_expression(size, start_time, end_time, arkime_expression):
|
109
|
+
expression = {"query": {"bool": {"filter": []}}}
|
110
|
+
try:
|
111
|
+
if size:
|
112
|
+
expression['size'] = size
|
113
|
+
if start_time:
|
114
|
+
expression['query']['bool']['filter'].append(
|
115
|
+
{"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
|
116
|
+
if end_time:
|
117
|
+
expression['query']['bool']['filter'].append(
|
118
|
+
{"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
|
119
|
+
arkime_2_es = parse_expression(arkime_expression)
|
120
|
+
if arkime_2_es:
|
121
|
+
expression['query']['bool']['filter'].append(arkime_2_es)
|
122
|
+
return expression
|
123
|
+
except Exception as e:
|
124
|
+
print(f"请安装nodejs{e}")
|
125
|
+
print(arkime_expression)
|
126
|
+
exit(1)
|
127
|
+
|
128
|
+
|
129
|
+
def get_uri_depth(url):
|
130
|
+
match = re.match(r'^[^?]*', url)
|
131
|
+
if match:
|
132
|
+
path = match.group(0)
|
133
|
+
# 去除协议和域名部分
|
134
|
+
path = re.sub(r'^https?://[^/]+', '', path)
|
135
|
+
segments = [segment for segment in path.split('/') if segment]
|
136
|
+
return len(segments)
|
137
|
+
return 0
|
138
|
+
|
139
|
+
|
140
|
+
def firstOrZero(param):
|
141
|
+
if type(param).__name__ == 'list':
|
142
|
+
if (len(param)) != 0:
|
143
|
+
return param[0]
|
144
|
+
else:
|
145
|
+
return 0
|
146
|
+
else:
|
147
|
+
return 0
|
148
|
+
|
149
|
+
|
150
|
+
def get_statistic_fields(packets):
|
151
|
+
length_ranges = {
|
152
|
+
"0_19": (0, 19),
|
153
|
+
"20_39": (20, 39),
|
154
|
+
"40_79": (40, 79),
|
155
|
+
"80_159": (80, 159),
|
156
|
+
"160_319": (160, 319),
|
157
|
+
"320_639": (320, 639),
|
158
|
+
"640_1279": (640, 1279),
|
159
|
+
"1280_2559": (1280, 2559),
|
160
|
+
"2560_5119": (2560, 5119),
|
161
|
+
"more_than_5120": (5120, float('inf'))
|
162
|
+
}
|
163
|
+
|
164
|
+
def get_length_range(le):
|
165
|
+
for key, (min_len, max_len) in length_ranges.items():
|
166
|
+
if min_len <= le <= max_len:
|
167
|
+
return key
|
168
|
+
return "more_than_5120"
|
169
|
+
|
170
|
+
packet_lengths = {key: [] for key in length_ranges}
|
171
|
+
total_length = 0
|
172
|
+
packet_len_total_count = len(packets)
|
173
|
+
for packet_item in packets:
|
174
|
+
length = len(packet_item)
|
175
|
+
length_range = get_length_range(length)
|
176
|
+
packet_lengths[length_range].append(length)
|
177
|
+
total_length += length
|
178
|
+
total_time = packets[-1].time - packets[0].time if packet_len_total_count > 1 else 1
|
179
|
+
packet_len_average = round(total_length / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
|
180
|
+
packet_len_min = min(len(packet_item) for packet_item in packets) if packets else 0
|
181
|
+
packet_len_max = max(len(packet_item) for packet_item in packets) if packets else 0
|
182
|
+
packet_len_rate = round((packet_len_total_count / total_time) / 1000, 5) if total_time > 0 else 0
|
183
|
+
packet_size = [len(p) for p in packets]
|
184
|
+
field_map = {
|
185
|
+
"packet_size_mean": float(round(np.mean(packet_size), 5)),
|
186
|
+
"packet_size_variance": float(round(np.var(packet_size), 5)),
|
187
|
+
'packet_len_total_count': packet_len_total_count,
|
188
|
+
'packet_len_total_average': packet_len_average,
|
189
|
+
'packet_len_total_min': packet_len_min,
|
190
|
+
'packet_len_total_max': packet_len_max,
|
191
|
+
'packet_len_total_rate': float(packet_len_rate),
|
192
|
+
'packet_len_total_percent': 1,
|
193
|
+
}
|
194
|
+
for length_range, lengths in packet_lengths.items():
|
195
|
+
count = len(lengths)
|
196
|
+
if count > 0:
|
197
|
+
average = round(sum(lengths) / count, 5)
|
198
|
+
min_val = min(lengths)
|
199
|
+
max_val = max(lengths)
|
200
|
+
else:
|
201
|
+
average = min_val = max_val = 0
|
202
|
+
packet_len_rate = round((count / total_time) / 1000, 5) if total_time > 0 else 0
|
203
|
+
percent = round(count / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
|
204
|
+
field_map.update({
|
205
|
+
f"packet_len_{length_range}_count": count,
|
206
|
+
f"packet_len_{length_range}_average": average,
|
207
|
+
f"packet_len_{length_range}_min": min_val,
|
208
|
+
f"packet_len_{length_range}_max": max_val,
|
209
|
+
f"packet_len_{length_range}_rate": float(packet_len_rate),
|
210
|
+
f"packet_len_{length_range}_percent": percent
|
211
|
+
})
|
212
|
+
return field_map
|
213
|
+
|
214
|
+
|
215
|
+
def get_dns_domain(packets):
|
216
|
+
domain_name = ""
|
217
|
+
for packet_item in packets:
|
218
|
+
if DNS in packet_item:
|
219
|
+
dns_layer = packet_item[DNS]
|
220
|
+
if dns_layer.qd:
|
221
|
+
try:
|
222
|
+
domain_name = dns_layer.qd.qname.decode('utf-8')
|
223
|
+
# print(f"dns域名:{domain_name}")
|
224
|
+
except Exception:
|
225
|
+
domain_name = str(dns_layer.qd.qname)
|
226
|
+
print(f"dns域名编码失败的字符串:{domain_name}")
|
227
|
+
break
|
228
|
+
if domain_name.endswith("."):
|
229
|
+
domain_name = domain_name[:-1]
|
230
|
+
return domain_name
|
231
|
+
|
232
|
+
|
233
|
+
def extract_session_fields(cls, origin_list, geoUtil):
|
234
|
+
res = []
|
235
|
+
for item in origin_list:
|
236
|
+
_source = item.get("_source", {})
|
237
|
+
source = _source.get("source", {})
|
238
|
+
tcpflags = _source.get("tcpflags", {})
|
239
|
+
destination = _source.get("destination", {})
|
240
|
+
http = _source.get("http", {})
|
241
|
+
dns = _source.get("dns", {})
|
242
|
+
tls = _source.get("tls", {})
|
243
|
+
uri = http.get('uri', [])
|
244
|
+
uri_length = [len(u) for u in uri]
|
245
|
+
uri_depth = [get_uri_depth(u) for u in uri]
|
246
|
+
uri_filename_length = [cls.get_uri_filename_length(u) for u in uri]
|
247
|
+
uri_params = [cls.get_url_param_count(u) for u in uri]
|
248
|
+
res.append(geoUtil.get_geo_by_ip({
|
249
|
+
"id": item["_id"],
|
250
|
+
"node": _source.get("node", ""),
|
251
|
+
"segmentCnt": _source.get("segmentCnt", 0),
|
252
|
+
"tcpflags.rst": tcpflags.get("rst", 0),
|
253
|
+
"tcpflags.ack": tcpflags.get("ack", 0),
|
254
|
+
"tcpflags.syn": tcpflags.get("syn", 0),
|
255
|
+
"tcpflags.urg": tcpflags.get("urg", 0),
|
256
|
+
"tcpflags.psh": tcpflags.get("psh", 0),
|
257
|
+
"tcpflags.syn-ack": tcpflags.get("syn-ack", 0),
|
258
|
+
"tcpflags.fin": tcpflags.get("fin", 0),
|
259
|
+
"source.ip": source.get("ip", ""),
|
260
|
+
"destination.ip": destination.get("ip", ""),
|
261
|
+
"source.port": source.get("port", ""),
|
262
|
+
"source.packets": source.get("packets", ""),
|
263
|
+
"source.bytes": source.get("bytes", 0),
|
264
|
+
"destination.port": destination.get("port", ""),
|
265
|
+
"destination.bytes": destination.get("bytes", 0),
|
266
|
+
"destination.packets": destination.get("packets", 0),
|
267
|
+
"initRTT": _source.get("initRTT", ""),
|
268
|
+
"firstPacket": _source.get("firstPacket", 0),
|
269
|
+
"lastPacket": _source.get("lastPacket", 0),
|
270
|
+
"ipProtocol": _source.get("ipProtocol", 0),
|
271
|
+
"protocolCnt": _source.get("protocolCnt", 0),
|
272
|
+
"protocol": _source.get("protocol", []),
|
273
|
+
"server.bytes": _source.get("server", {}).get("bytes", 0),
|
274
|
+
"totDataBytes": _source.get("totDataBytes", 0),
|
275
|
+
"network.packets": _source.get("network", {}).get("packets", 0),
|
276
|
+
"network.bytes": _source.get("network", {}).get("bytes", 0),
|
277
|
+
"length": _source.get("length", 0),
|
278
|
+
"client.bytes": _source.get("client", {}).get("bytes", 0),
|
279
|
+
"http.uri": uri,
|
280
|
+
"http.uri_length_mean": round(np.nan_to_num(np.mean(uri_length)), 5),
|
281
|
+
"http.uri_length_var": round(np.nan_to_num(np.var(uri_length)), 5),
|
282
|
+
"http.uri_param_count_mean": round(np.nan_to_num(np.mean(uri_params)), 5),
|
283
|
+
"http.uri_param_count_var": round(np.nan_to_num(np.var(uri_params)), 5),
|
284
|
+
"http.uri_depth_mean": round(np.nan_to_num(np.mean(uri_depth)), 5),
|
285
|
+
"http.uri_depth_var": round(np.nan_to_num(np.var(uri_depth)), 5),
|
286
|
+
"http.uri_filename_length_mean": round(np.nan_to_num(np.mean(uri_filename_length)), 5),
|
287
|
+
"http.uri_filename_length_var": round(np.nan_to_num(np.var(uri_filename_length)), 5),
|
288
|
+
|
289
|
+
"http.response-content-type": http.get("response-content-type", []),
|
290
|
+
"http.bodyMagicCnt": http.get("bodyMagicCnt", 0),
|
291
|
+
"http.statuscodeCnt": http.get("statusCodeCnt", 0),
|
292
|
+
"http.clientVersionCnt": http.get("clientVersionCnt", 0),
|
293
|
+
"http.response-content-typeCnt": http.get("response-content-typeCnt", 0),
|
294
|
+
"http.xffIpCnt": http.get("xffIpCnt", 0),
|
295
|
+
"http.requestHeaderCnt": http.get("requestHeaderCnt", 0),
|
296
|
+
"http.serverVersion": http.get("serverVersion", []),
|
297
|
+
"http.serverVersionCnt": http.get("serverVersionCnt", 0),
|
298
|
+
"http.responseHeaderCnt": http.get("responseHeaderCnt", 0),
|
299
|
+
"http.xffIp": http.get("xffIp", []),
|
300
|
+
"http.clientVersion": http.get("clientVersion", []),
|
301
|
+
"http.uriTokens": http.get("uriTokens", ""),
|
302
|
+
"http.useragentCnt": http.get("useragentCnt", 0),
|
303
|
+
"http.statuscode": http.get("statusCode", []),
|
304
|
+
"http.bodyMagic": http.get("bodyMagic", []),
|
305
|
+
"http.request-content-type": http.get("request-content-type", []),
|
306
|
+
"http.uriCnt": http.get("uriCnt", 0),
|
307
|
+
|
308
|
+
"http.useragent": http.get("useragent", ""),
|
309
|
+
"http.keyCnt": http.get("keyCnt", 0),
|
310
|
+
"http.request-referer": http.get("requestReferer", []),
|
311
|
+
"http.request-refererCnt": http.get("requestRefererCnt", 0),
|
312
|
+
"http.path": http.get("path", []),
|
313
|
+
"http.hostCnt": http.get("hostCnt", 0),
|
314
|
+
"http.response-server": http.get("response-server", []),
|
315
|
+
"http.pathCnt": http.get("pathCnt", 0),
|
316
|
+
"http.useragentTokens": http.get("useragentTokens", ""),
|
317
|
+
"http.methodCnt": http.get("methodCnt", 0),
|
318
|
+
"http.method": http.get("method", []),
|
319
|
+
"http.method-GET": http.get("method-GET", 0),
|
320
|
+
"http.method-POST": http.get("method-POST", 0),
|
321
|
+
"http.key": http.get("key", []),
|
322
|
+
"http.hostTokens": http.get("hostTokens", ""),
|
323
|
+
"http.requestHeader": http.get("requestHeader", []),
|
324
|
+
"http.responseHeader": http.get("responseHeader", []),
|
325
|
+
|
326
|
+
"dns.ASN": dns.get("ASN", []),
|
327
|
+
"dns.RIR": dns.get("RIR", []),
|
328
|
+
"dns.GEO": dns.get("GEO", []),
|
329
|
+
"dns.alpn": dns.get("https.alpn", []),
|
330
|
+
"dns.alpnCnt": dns.get("https.alpnCnt", 0),
|
331
|
+
"dns.ip": dns.get("ip", []),
|
332
|
+
"dns.ipCnt": dns.get("ipCnt", 0),
|
333
|
+
"dns.OpCode": dns.get("opcode", []),
|
334
|
+
"dns.OpCodeCnt": dns.get("opcodeCnt", 0),
|
335
|
+
"dns.Puny": dns.get("puny", []),
|
336
|
+
"dns.PunyCnt": dns.get("puntCnt", 0),
|
337
|
+
"dns.QueryClass": dns.get("qc", []),
|
338
|
+
"dns.QueryClassCnt": dns.get("qcCnt", 0),
|
339
|
+
"dns.QueryType": dns.get("qt", []),
|
340
|
+
"dns.QueryTypeCnt": dns.get("qtCnt", 0),
|
341
|
+
"dns.status": dns.get("status", []),
|
342
|
+
"dns.hostCnt": json.dumps(dns.get("hostCnt", 0)),
|
343
|
+
"dns.host": json.dumps(dns.get("host", [])),
|
344
|
+
"dns.statusCnt": dns.get("statusCnt", 0),
|
345
|
+
|
346
|
+
"tls.cipher": tls.get("cipher", []),
|
347
|
+
"tls.cipherCnt": tls.get("cipherCnt", 0),
|
348
|
+
"tls.dstSessionId": tls.get("dstSessionId", []),
|
349
|
+
"tls.ja3": tls.get("ja3", []),
|
350
|
+
"tls.ja3Cnt": tls.get("ja3Cnt", 0),
|
351
|
+
"tls.ja3s": tls.get("ja3s", []),
|
352
|
+
"tls.ja3sCnt": tls.get("ja3sCnt", 0),
|
353
|
+
"tls.ja4": tls.get("ja4", []),
|
354
|
+
"tls.ja4Cnt": tls.get("ja4Cnt", 0),
|
355
|
+
"tls.srcSessionId": tls.get("srcSessionId", []),
|
356
|
+
"tls.version": tls.get("version", []),
|
357
|
+
"tls.versionCnt": tls.get("versionCnt", 0),
|
358
|
+
"tls.ja4_r": tls.get("versionCnt", 0),
|
359
|
+
"tls.ja4_rCnt": tls.get("versionCnt", 0),
|
360
|
+
"packetPos": json.dumps(_source.get("packetPos", [])),
|
361
|
+
"traffic_type": item.get("traffic_type", ""),
|
362
|
+
"PROTOCOL": item.get("PROTOCOL", ""),
|
363
|
+
"DENY_METHOD": item.get("DENY_METHOD", ""),
|
364
|
+
"THREAT_SUMMARY": item.get("THREAT_SUMMARY", ""),
|
365
|
+
"SEVERITY": item.get("SEVERITY", ""),
|
366
|
+
}))
|
367
|
+
return res
|
368
|
+
|
369
|
+
|
370
|
+
def get_url_param_count(url):
|
371
|
+
query = urlparse(url).query # 解析 URL 中的查询字符串
|
372
|
+
params = parse_qs(query) # 解析查询字符串为字典
|
373
|
+
return len(params)
|
374
|
+
|
375
|
+
|
376
|
+
def get_uri_filename_length(uri):
|
377
|
+
match = re.search(r'\.([^./?#]+)$', uri)
|
378
|
+
if match:
|
379
|
+
extension = match.group(0)
|
380
|
+
return len(extension)
|
381
|
+
return 0
|
@@ -1,82 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
|
3
|
-
import execjs
|
4
|
-
|
5
|
-
from xbase_util.xbase_constant import parse_path
|
6
|
-
|
7
|
-
|
8
|
-
def parse_expression(expression):
|
9
|
-
if expression:
|
10
|
-
with open(parse_path, "r") as f:
|
11
|
-
ctx = execjs.compile(f.read())
|
12
|
-
return ctx.call("parse_exp", expression)
|
13
|
-
else:
|
14
|
-
return None
|
15
|
-
|
16
|
-
|
17
|
-
# def geo_reader():
|
18
|
-
# return geoip2.database.Reader(geo_path)
|
19
|
-
|
20
|
-
|
21
|
-
def split_samples(sample, per_subsection):
|
22
|
-
num_subsections = len(sample) // per_subsection
|
23
|
-
remainder = len(sample) % per_subsection
|
24
|
-
subsection_sizes = [per_subsection] * num_subsections
|
25
|
-
if remainder > 0:
|
26
|
-
subsection_sizes.append(remainder)
|
27
|
-
num_subsections += 1
|
28
|
-
return num_subsections, subsection_sizes
|
29
|
-
|
30
|
-
|
31
|
-
def split_process(subsection, process_count):
|
32
|
-
subsection_per_process = len(subsection) // process_count
|
33
|
-
remainder = len(subsection) % process_count
|
34
|
-
lengths = []
|
35
|
-
start = 0
|
36
|
-
for i in range(process_count):
|
37
|
-
end = start + subsection_per_process + (1 if i < remainder else 0)
|
38
|
-
lengths.append(end - start)
|
39
|
-
start = end
|
40
|
-
return lengths
|
41
|
-
|
42
|
-
|
43
|
-
def build_es_expression(size, start_time, end_time, arkime_expression):
|
44
|
-
expression = {"query": {"bool": {"filter": []}}}
|
45
|
-
try:
|
46
|
-
if size:
|
47
|
-
expression['size'] = size
|
48
|
-
if start_time:
|
49
|
-
expression['query']['bool']['filter'].append(
|
50
|
-
{"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
|
51
|
-
if end_time:
|
52
|
-
expression['query']['bool']['filter'].append(
|
53
|
-
{"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
|
54
|
-
arkime_2_es = parse_expression(arkime_expression)
|
55
|
-
if arkime_2_es:
|
56
|
-
expression['query']['bool']['filter'].append(arkime_2_es)
|
57
|
-
return expression
|
58
|
-
except Exception as e:
|
59
|
-
print(f"请安装nodejs{e}")
|
60
|
-
print(arkime_expression)
|
61
|
-
exit(1)
|
62
|
-
|
63
|
-
|
64
|
-
def get_uri_depth(url):
|
65
|
-
match = re.match(r'^[^?]*', url)
|
66
|
-
if match:
|
67
|
-
path = match.group(0)
|
68
|
-
# 去除协议和域名部分
|
69
|
-
path = re.sub(r'^https?://[^/]+', '', path)
|
70
|
-
segments = [segment for segment in path.split('/') if segment]
|
71
|
-
return len(segments)
|
72
|
-
return 0
|
73
|
-
|
74
|
-
|
75
|
-
def firstOrZero(param):
|
76
|
-
if type(param).__name__ == 'list':
|
77
|
-
if (len(param)) != 0:
|
78
|
-
return param[0]
|
79
|
-
else:
|
80
|
-
return 0
|
81
|
-
else:
|
82
|
-
return 0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|