xbase-util 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.1.1",
6
+ version="0.1.2",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -6,7 +6,7 @@ from xbase_util.xbase_constant import geo_path
6
6
 
7
7
 
8
8
  class GeoUtil:
9
- def __init__(self, reader):
9
+ def __init__(self):
10
10
  self.reader = geoip2.database.Reader(geo_path)
11
11
  print("初始化:GeoUtil")
12
12
 
@@ -0,0 +1,381 @@
1
+ import json
2
+ import re
3
+ from urllib.parse import urlparse, parse_qs
4
+
5
+ import execjs
6
+ import numpy as np
7
+ from scapy.layers.dns import DNS
8
+
9
+ from xbase_util.xbase_constant import parse_path
10
+
11
+
12
+ def parse_expression(expression):
13
+ if expression:
14
+ with open(parse_path, "r") as f:
15
+ ctx = execjs.compile(f.read())
16
+ return ctx.call("parse_exp", expression)
17
+ else:
18
+ return None
19
+
20
+
21
+ def get_cookie_end_with_semicolon_count(text_data):
22
+ count = 0
23
+ for text in text_data.replace("-", "_").lower().split("\n"):
24
+ item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
25
+ if "cookie:" in item_text and f"{item_text}".endswith(";"):
26
+ count = count + 1
27
+ if count == 0:
28
+ return -1
29
+ return len(count)
30
+
31
+
32
+ def get_ua_duplicate_count(text_data):
33
+ ua_list = []
34
+ for text in text_data.replace("-", "_").lower().split("\n"):
35
+ item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
36
+ if "user_agent:" in item_text and f"{item_text}".endswith(";"):
37
+ ua_list.append(item_text.replace("user_agent:", ""))
38
+ count = list(set(ua_list))
39
+ if count == 0:
40
+ return -1
41
+ return sum(count)
42
+
43
+
44
+ def get_res_status_code_list(text_data):
45
+ value_res = []
46
+ res = []
47
+ num_1 = 0
48
+ num_2 = 0
49
+ num_3 = 0
50
+ num_4 = 0
51
+ num_5 = 0
52
+
53
+ res.extend([item for item in text_data.split("\n") if item.startswith("HTTP/")])
54
+ for item in res:
55
+ m = re.search(r"\b(\d{3})\b", item)
56
+ if m:
57
+ value_res.append(int(m.group(0)))
58
+ for value in value_res:
59
+ if 0 <= value < 200:
60
+ num_1 = num_1 + 1
61
+ if 200 <= value < 300:
62
+ num_2 = num_2 + 1
63
+ if 300 <= value < 400:
64
+ num_3 = num_3 + 1
65
+ if 400 <= value < 500:
66
+ num_4 = num_4 + 1
67
+ if 500 <= value < 600:
68
+ num_5 = num_5 + 1
69
+ return num_1, num_2, num_3, num_4, num_5
70
+
71
+
72
+ def get_packets_percentage(session, isReq):
73
+ if "source.bytes" in session and "destination.bytes" in session:
74
+ total_bytes = session["source.bytes"] + session["destination.bytes"]
75
+ if total_bytes > 0:
76
+ if isReq:
77
+ return session["source.bytes"] / total_bytes
78
+ else:
79
+ return session["destination.bytes"] / total_bytes
80
+ else:
81
+ return 0.0 # 避免除以0的情况
82
+ else:
83
+ return 0.5
84
+
85
+
86
+ def split_samples(sample, per_subsection):
87
+ num_subsections = len(sample) // per_subsection
88
+ remainder = len(sample) % per_subsection
89
+ subsection_sizes = [per_subsection] * num_subsections
90
+ if remainder > 0:
91
+ subsection_sizes.append(remainder)
92
+ num_subsections += 1
93
+ return num_subsections, subsection_sizes
94
+
95
+
96
+ def split_process(subsection, process_count):
97
+ subsection_per_process = len(subsection) // process_count
98
+ remainder = len(subsection) % process_count
99
+ lengths = []
100
+ start = 0
101
+ for i in range(process_count):
102
+ end = start + subsection_per_process + (1 if i < remainder else 0)
103
+ lengths.append(end - start)
104
+ start = end
105
+ return lengths
106
+
107
+
108
+ def build_es_expression(size, start_time, end_time, arkime_expression):
109
+ expression = {"query": {"bool": {"filter": []}}}
110
+ try:
111
+ if size:
112
+ expression['size'] = size
113
+ if start_time:
114
+ expression['query']['bool']['filter'].append(
115
+ {"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
116
+ if end_time:
117
+ expression['query']['bool']['filter'].append(
118
+ {"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
119
+ arkime_2_es = parse_expression(arkime_expression)
120
+ if arkime_2_es:
121
+ expression['query']['bool']['filter'].append(arkime_2_es)
122
+ return expression
123
+ except Exception as e:
124
+ print(f"请安装nodejs{e}")
125
+ print(arkime_expression)
126
+ exit(1)
127
+
128
+
129
+ def get_uri_depth(url):
130
+ match = re.match(r'^[^?]*', url)
131
+ if match:
132
+ path = match.group(0)
133
+ # 去除协议和域名部分
134
+ path = re.sub(r'^https?://[^/]+', '', path)
135
+ segments = [segment for segment in path.split('/') if segment]
136
+ return len(segments)
137
+ return 0
138
+
139
+
140
+ def firstOrZero(param):
141
+ if type(param).__name__ == 'list':
142
+ if (len(param)) != 0:
143
+ return param[0]
144
+ else:
145
+ return 0
146
+ else:
147
+ return 0
148
+
149
+
150
+ def get_statistic_fields(packets):
151
+ length_ranges = {
152
+ "0_19": (0, 19),
153
+ "20_39": (20, 39),
154
+ "40_79": (40, 79),
155
+ "80_159": (80, 159),
156
+ "160_319": (160, 319),
157
+ "320_639": (320, 639),
158
+ "640_1279": (640, 1279),
159
+ "1280_2559": (1280, 2559),
160
+ "2560_5119": (2560, 5119),
161
+ "more_than_5120": (5120, float('inf'))
162
+ }
163
+
164
+ def get_length_range(le):
165
+ for key, (min_len, max_len) in length_ranges.items():
166
+ if min_len <= le <= max_len:
167
+ return key
168
+ return "more_than_5120"
169
+
170
+ packet_lengths = {key: [] for key in length_ranges}
171
+ total_length = 0
172
+ packet_len_total_count = len(packets)
173
+ for packet_item in packets:
174
+ length = len(packet_item)
175
+ length_range = get_length_range(length)
176
+ packet_lengths[length_range].append(length)
177
+ total_length += length
178
+ total_time = packets[-1].time - packets[0].time if packet_len_total_count > 1 else 1
179
+ packet_len_average = round(total_length / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
180
+ packet_len_min = min(len(packet_item) for packet_item in packets) if packets else 0
181
+ packet_len_max = max(len(packet_item) for packet_item in packets) if packets else 0
182
+ packet_len_rate = round((packet_len_total_count / total_time) / 1000, 5) if total_time > 0 else 0
183
+ packet_size = [len(p) for p in packets]
184
+ field_map = {
185
+ "packet_size_mean": float(round(np.mean(packet_size), 5)),
186
+ "packet_size_variance": float(round(np.var(packet_size), 5)),
187
+ 'packet_len_total_count': packet_len_total_count,
188
+ 'packet_len_total_average': packet_len_average,
189
+ 'packet_len_total_min': packet_len_min,
190
+ 'packet_len_total_max': packet_len_max,
191
+ 'packet_len_total_rate': float(packet_len_rate),
192
+ 'packet_len_total_percent': 1,
193
+ }
194
+ for length_range, lengths in packet_lengths.items():
195
+ count = len(lengths)
196
+ if count > 0:
197
+ average = round(sum(lengths) / count, 5)
198
+ min_val = min(lengths)
199
+ max_val = max(lengths)
200
+ else:
201
+ average = min_val = max_val = 0
202
+ packet_len_rate = round((count / total_time) / 1000, 5) if total_time > 0 else 0
203
+ percent = round(count / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
204
+ field_map.update({
205
+ f"packet_len_{length_range}_count": count,
206
+ f"packet_len_{length_range}_average": average,
207
+ f"packet_len_{length_range}_min": min_val,
208
+ f"packet_len_{length_range}_max": max_val,
209
+ f"packet_len_{length_range}_rate": float(packet_len_rate),
210
+ f"packet_len_{length_range}_percent": percent
211
+ })
212
+ return field_map
213
+
214
+
215
+ def get_dns_domain(packets):
216
+ domain_name = ""
217
+ for packet_item in packets:
218
+ if DNS in packet_item:
219
+ dns_layer = packet_item[DNS]
220
+ if dns_layer.qd:
221
+ try:
222
+ domain_name = dns_layer.qd.qname.decode('utf-8')
223
+ # print(f"dns域名:{domain_name}")
224
+ except Exception:
225
+ domain_name = str(dns_layer.qd.qname)
226
+ print(f"dns域名编码失败的字符串:{domain_name}")
227
+ break
228
+ if domain_name.endswith("."):
229
+ domain_name = domain_name[:-1]
230
+ return domain_name
231
+
232
+
233
+ def extract_session_fields(cls, origin_list, geoUtil):
234
+ res = []
235
+ for item in origin_list:
236
+ _source = item.get("_source", {})
237
+ source = _source.get("source", {})
238
+ tcpflags = _source.get("tcpflags", {})
239
+ destination = _source.get("destination", {})
240
+ http = _source.get("http", {})
241
+ dns = _source.get("dns", {})
242
+ tls = _source.get("tls", {})
243
+ uri = http.get('uri', [])
244
+ uri_length = [len(u) for u in uri]
245
+ uri_depth = [get_uri_depth(u) for u in uri]
246
+ uri_filename_length = [cls.get_uri_filename_length(u) for u in uri]
247
+ uri_params = [cls.get_url_param_count(u) for u in uri]
248
+ res.append(geoUtil.get_geo_by_ip({
249
+ "id": item["_id"],
250
+ "node": _source.get("node", ""),
251
+ "segmentCnt": _source.get("segmentCnt", 0),
252
+ "tcpflags.rst": tcpflags.get("rst", 0),
253
+ "tcpflags.ack": tcpflags.get("ack", 0),
254
+ "tcpflags.syn": tcpflags.get("syn", 0),
255
+ "tcpflags.urg": tcpflags.get("urg", 0),
256
+ "tcpflags.psh": tcpflags.get("psh", 0),
257
+ "tcpflags.syn-ack": tcpflags.get("syn-ack", 0),
258
+ "tcpflags.fin": tcpflags.get("fin", 0),
259
+ "source.ip": source.get("ip", ""),
260
+ "destination.ip": destination.get("ip", ""),
261
+ "source.port": source.get("port", ""),
262
+ "source.packets": source.get("packets", ""),
263
+ "source.bytes": source.get("bytes", 0),
264
+ "destination.port": destination.get("port", ""),
265
+ "destination.bytes": destination.get("bytes", 0),
266
+ "destination.packets": destination.get("packets", 0),
267
+ "initRTT": _source.get("initRTT", ""),
268
+ "firstPacket": _source.get("firstPacket", 0),
269
+ "lastPacket": _source.get("lastPacket", 0),
270
+ "ipProtocol": _source.get("ipProtocol", 0),
271
+ "protocolCnt": _source.get("protocolCnt", 0),
272
+ "protocol": _source.get("protocol", []),
273
+ "server.bytes": _source.get("server", {}).get("bytes", 0),
274
+ "totDataBytes": _source.get("totDataBytes", 0),
275
+ "network.packets": _source.get("network", {}).get("packets", 0),
276
+ "network.bytes": _source.get("network", {}).get("bytes", 0),
277
+ "length": _source.get("length", 0),
278
+ "client.bytes": _source.get("client", {}).get("bytes", 0),
279
+ "http.uri": uri,
280
+ "http.uri_length_mean": round(np.nan_to_num(np.mean(uri_length)), 5),
281
+ "http.uri_length_var": round(np.nan_to_num(np.var(uri_length)), 5),
282
+ "http.uri_param_count_mean": round(np.nan_to_num(np.mean(uri_params)), 5),
283
+ "http.uri_param_count_var": round(np.nan_to_num(np.var(uri_params)), 5),
284
+ "http.uri_depth_mean": round(np.nan_to_num(np.mean(uri_depth)), 5),
285
+ "http.uri_depth_var": round(np.nan_to_num(np.var(uri_depth)), 5),
286
+ "http.uri_filename_length_mean": round(np.nan_to_num(np.mean(uri_filename_length)), 5),
287
+ "http.uri_filename_length_var": round(np.nan_to_num(np.var(uri_filename_length)), 5),
288
+
289
+ "http.response-content-type": http.get("response-content-type", []),
290
+ "http.bodyMagicCnt": http.get("bodyMagicCnt", 0),
291
+ "http.statuscodeCnt": http.get("statusCodeCnt", 0),
292
+ "http.clientVersionCnt": http.get("clientVersionCnt", 0),
293
+ "http.response-content-typeCnt": http.get("response-content-typeCnt", 0),
294
+ "http.xffIpCnt": http.get("xffIpCnt", 0),
295
+ "http.requestHeaderCnt": http.get("requestHeaderCnt", 0),
296
+ "http.serverVersion": http.get("serverVersion", []),
297
+ "http.serverVersionCnt": http.get("serverVersionCnt", 0),
298
+ "http.responseHeaderCnt": http.get("responseHeaderCnt", 0),
299
+ "http.xffIp": http.get("xffIp", []),
300
+ "http.clientVersion": http.get("clientVersion", []),
301
+ "http.uriTokens": http.get("uriTokens", ""),
302
+ "http.useragentCnt": http.get("useragentCnt", 0),
303
+ "http.statuscode": http.get("statusCode", []),
304
+ "http.bodyMagic": http.get("bodyMagic", []),
305
+ "http.request-content-type": http.get("request-content-type", []),
306
+ "http.uriCnt": http.get("uriCnt", 0),
307
+
308
+ "http.useragent": http.get("useragent", ""),
309
+ "http.keyCnt": http.get("keyCnt", 0),
310
+ "http.request-referer": http.get("requestReferer", []),
311
+ "http.request-refererCnt": http.get("requestRefererCnt", 0),
312
+ "http.path": http.get("path", []),
313
+ "http.hostCnt": http.get("hostCnt", 0),
314
+ "http.response-server": http.get("response-server", []),
315
+ "http.pathCnt": http.get("pathCnt", 0),
316
+ "http.useragentTokens": http.get("useragentTokens", ""),
317
+ "http.methodCnt": http.get("methodCnt", 0),
318
+ "http.method": http.get("method", []),
319
+ "http.method-GET": http.get("method-GET", 0),
320
+ "http.method-POST": http.get("method-POST", 0),
321
+ "http.key": http.get("key", []),
322
+ "http.hostTokens": http.get("hostTokens", ""),
323
+ "http.requestHeader": http.get("requestHeader", []),
324
+ "http.responseHeader": http.get("responseHeader", []),
325
+
326
+ "dns.ASN": dns.get("ASN", []),
327
+ "dns.RIR": dns.get("RIR", []),
328
+ "dns.GEO": dns.get("GEO", []),
329
+ "dns.alpn": dns.get("https.alpn", []),
330
+ "dns.alpnCnt": dns.get("https.alpnCnt", 0),
331
+ "dns.ip": dns.get("ip", []),
332
+ "dns.ipCnt": dns.get("ipCnt", 0),
333
+ "dns.OpCode": dns.get("opcode", []),
334
+ "dns.OpCodeCnt": dns.get("opcodeCnt", 0),
335
+ "dns.Puny": dns.get("puny", []),
336
+ "dns.PunyCnt": dns.get("puntCnt", 0),
337
+ "dns.QueryClass": dns.get("qc", []),
338
+ "dns.QueryClassCnt": dns.get("qcCnt", 0),
339
+ "dns.QueryType": dns.get("qt", []),
340
+ "dns.QueryTypeCnt": dns.get("qtCnt", 0),
341
+ "dns.status": dns.get("status", []),
342
+ "dns.hostCnt": json.dumps(dns.get("hostCnt", 0)),
343
+ "dns.host": json.dumps(dns.get("host", [])),
344
+ "dns.statusCnt": dns.get("statusCnt", 0),
345
+
346
+ "tls.cipher": tls.get("cipher", []),
347
+ "tls.cipherCnt": tls.get("cipherCnt", 0),
348
+ "tls.dstSessionId": tls.get("dstSessionId", []),
349
+ "tls.ja3": tls.get("ja3", []),
350
+ "tls.ja3Cnt": tls.get("ja3Cnt", 0),
351
+ "tls.ja3s": tls.get("ja3s", []),
352
+ "tls.ja3sCnt": tls.get("ja3sCnt", 0),
353
+ "tls.ja4": tls.get("ja4", []),
354
+ "tls.ja4Cnt": tls.get("ja4Cnt", 0),
355
+ "tls.srcSessionId": tls.get("srcSessionId", []),
356
+ "tls.version": tls.get("version", []),
357
+ "tls.versionCnt": tls.get("versionCnt", 0),
358
+ "tls.ja4_r": tls.get("versionCnt", 0),
359
+ "tls.ja4_rCnt": tls.get("versionCnt", 0),
360
+ "packetPos": json.dumps(_source.get("packetPos", [])),
361
+ "traffic_type": item.get("traffic_type", ""),
362
+ "PROTOCOL": item.get("PROTOCOL", ""),
363
+ "DENY_METHOD": item.get("DENY_METHOD", ""),
364
+ "THREAT_SUMMARY": item.get("THREAT_SUMMARY", ""),
365
+ "SEVERITY": item.get("SEVERITY", ""),
366
+ }))
367
+ return res
368
+
369
+
370
+ def get_url_param_count(url):
371
+ query = urlparse(url).query # 解析 URL 中的查询字符串
372
+ params = parse_qs(query) # 解析查询字符串为字典
373
+ return len(params)
374
+
375
+
376
+ def get_uri_filename_length(uri):
377
+ match = re.search(r'\.([^./?#]+)$', uri)
378
+ if match:
379
+ extension = match.group(0)
380
+ return len(extension)
381
+ return 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -1,82 +0,0 @@
1
- import re
2
-
3
- import execjs
4
-
5
- from xbase_util.xbase_constant import parse_path
6
-
7
-
8
- def parse_expression(expression):
9
- if expression:
10
- with open(parse_path, "r") as f:
11
- ctx = execjs.compile(f.read())
12
- return ctx.call("parse_exp", expression)
13
- else:
14
- return None
15
-
16
-
17
- # def geo_reader():
18
- # return geoip2.database.Reader(geo_path)
19
-
20
-
21
- def split_samples(sample, per_subsection):
22
- num_subsections = len(sample) // per_subsection
23
- remainder = len(sample) % per_subsection
24
- subsection_sizes = [per_subsection] * num_subsections
25
- if remainder > 0:
26
- subsection_sizes.append(remainder)
27
- num_subsections += 1
28
- return num_subsections, subsection_sizes
29
-
30
-
31
- def split_process(subsection, process_count):
32
- subsection_per_process = len(subsection) // process_count
33
- remainder = len(subsection) % process_count
34
- lengths = []
35
- start = 0
36
- for i in range(process_count):
37
- end = start + subsection_per_process + (1 if i < remainder else 0)
38
- lengths.append(end - start)
39
- start = end
40
- return lengths
41
-
42
-
43
- def build_es_expression(size, start_time, end_time, arkime_expression):
44
- expression = {"query": {"bool": {"filter": []}}}
45
- try:
46
- if size:
47
- expression['size'] = size
48
- if start_time:
49
- expression['query']['bool']['filter'].append(
50
- {"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
51
- if end_time:
52
- expression['query']['bool']['filter'].append(
53
- {"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
54
- arkime_2_es = parse_expression(arkime_expression)
55
- if arkime_2_es:
56
- expression['query']['bool']['filter'].append(arkime_2_es)
57
- return expression
58
- except Exception as e:
59
- print(f"请安装nodejs{e}")
60
- print(arkime_expression)
61
- exit(1)
62
-
63
-
64
- def get_uri_depth(url):
65
- match = re.match(r'^[^?]*', url)
66
- if match:
67
- path = match.group(0)
68
- # 去除协议和域名部分
69
- path = re.sub(r'^https?://[^/]+', '', path)
70
- segments = [segment for segment in path.split('/') if segment]
71
- return len(segments)
72
- return 0
73
-
74
-
75
- def firstOrZero(param):
76
- if type(param).__name__ == 'list':
77
- if (len(param)) != 0:
78
- return param[0]
79
- else:
80
- return 0
81
- else:
82
- return 0
File without changes
File without changes