xbase-util 0.1.1__tar.gz → 0.1.2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.1.1",
6
+ version="0.1.2",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -6,7 +6,7 @@ from xbase_util.xbase_constant import geo_path
6
6
 
7
7
 
8
8
  class GeoUtil:
9
- def __init__(self, reader):
9
+ def __init__(self):
10
10
  self.reader = geoip2.database.Reader(geo_path)
11
11
  print("初始化:GeoUtil")
12
12
 
@@ -0,0 +1,381 @@
1
+ import json
2
+ import re
3
+ from urllib.parse import urlparse, parse_qs
4
+
5
+ import execjs
6
+ import numpy as np
7
+ from scapy.layers.dns import DNS
8
+
9
+ from xbase_util.xbase_constant import parse_path
10
+
11
+
12
+ def parse_expression(expression):
13
+ if expression:
14
+ with open(parse_path, "r") as f:
15
+ ctx = execjs.compile(f.read())
16
+ return ctx.call("parse_exp", expression)
17
+ else:
18
+ return None
19
+
20
+
21
+ def get_cookie_end_with_semicolon_count(text_data):
22
+ count = 0
23
+ for text in text_data.replace("-", "_").lower().split("\n"):
24
+ item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
25
+ if "cookie:" in item_text and f"{item_text}".endswith(";"):
26
+ count = count + 1
27
+ if count == 0:
28
+ return -1
29
+ return len(count)
30
+
31
+
32
+ def get_ua_duplicate_count(text_data):
33
+ ua_list = []
34
+ for text in text_data.replace("-", "_").lower().split("\n"):
35
+ item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
36
+ if "user_agent:" in item_text and f"{item_text}".endswith(";"):
37
+ ua_list.append(item_text.replace("user_agent:", ""))
38
+ count = list(set(ua_list))
39
+ if count == 0:
40
+ return -1
41
+ return sum(count)
42
+
43
+
44
+ def get_res_status_code_list(text_data):
45
+ value_res = []
46
+ res = []
47
+ num_1 = 0
48
+ num_2 = 0
49
+ num_3 = 0
50
+ num_4 = 0
51
+ num_5 = 0
52
+
53
+ res.extend([item for item in text_data.split("\n") if item.startswith("HTTP/")])
54
+ for item in res:
55
+ m = re.search(r"\b(\d{3})\b", item)
56
+ if m:
57
+ value_res.append(int(m.group(0)))
58
+ for value in value_res:
59
+ if 0 <= value < 200:
60
+ num_1 = num_1 + 1
61
+ if 200 <= value < 300:
62
+ num_2 = num_2 + 1
63
+ if 300 <= value < 400:
64
+ num_3 = num_3 + 1
65
+ if 400 <= value < 500:
66
+ num_4 = num_4 + 1
67
+ if 500 <= value < 600:
68
+ num_5 = num_5 + 1
69
+ return num_1, num_2, num_3, num_4, num_5
70
+
71
+
72
+ def get_packets_percentage(session, isReq):
73
+ if "source.bytes" in session and "destination.bytes" in session:
74
+ total_bytes = session["source.bytes"] + session["destination.bytes"]
75
+ if total_bytes > 0:
76
+ if isReq:
77
+ return session["source.bytes"] / total_bytes
78
+ else:
79
+ return session["destination.bytes"] / total_bytes
80
+ else:
81
+ return 0.0 # 避免除以0的情况
82
+ else:
83
+ return 0.5
84
+
85
+
86
+ def split_samples(sample, per_subsection):
87
+ num_subsections = len(sample) // per_subsection
88
+ remainder = len(sample) % per_subsection
89
+ subsection_sizes = [per_subsection] * num_subsections
90
+ if remainder > 0:
91
+ subsection_sizes.append(remainder)
92
+ num_subsections += 1
93
+ return num_subsections, subsection_sizes
94
+
95
+
96
+ def split_process(subsection, process_count):
97
+ subsection_per_process = len(subsection) // process_count
98
+ remainder = len(subsection) % process_count
99
+ lengths = []
100
+ start = 0
101
+ for i in range(process_count):
102
+ end = start + subsection_per_process + (1 if i < remainder else 0)
103
+ lengths.append(end - start)
104
+ start = end
105
+ return lengths
106
+
107
+
108
+ def build_es_expression(size, start_time, end_time, arkime_expression):
109
+ expression = {"query": {"bool": {"filter": []}}}
110
+ try:
111
+ if size:
112
+ expression['size'] = size
113
+ if start_time:
114
+ expression['query']['bool']['filter'].append(
115
+ {"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
116
+ if end_time:
117
+ expression['query']['bool']['filter'].append(
118
+ {"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
119
+ arkime_2_es = parse_expression(arkime_expression)
120
+ if arkime_2_es:
121
+ expression['query']['bool']['filter'].append(arkime_2_es)
122
+ return expression
123
+ except Exception as e:
124
+ print(f"请安装nodejs{e}")
125
+ print(arkime_expression)
126
+ exit(1)
127
+
128
+
129
+ def get_uri_depth(url):
130
+ match = re.match(r'^[^?]*', url)
131
+ if match:
132
+ path = match.group(0)
133
+ # 去除协议和域名部分
134
+ path = re.sub(r'^https?://[^/]+', '', path)
135
+ segments = [segment for segment in path.split('/') if segment]
136
+ return len(segments)
137
+ return 0
138
+
139
+
140
+ def firstOrZero(param):
141
+ if type(param).__name__ == 'list':
142
+ if (len(param)) != 0:
143
+ return param[0]
144
+ else:
145
+ return 0
146
+ else:
147
+ return 0
148
+
149
+
150
+ def get_statistic_fields(packets):
151
+ length_ranges = {
152
+ "0_19": (0, 19),
153
+ "20_39": (20, 39),
154
+ "40_79": (40, 79),
155
+ "80_159": (80, 159),
156
+ "160_319": (160, 319),
157
+ "320_639": (320, 639),
158
+ "640_1279": (640, 1279),
159
+ "1280_2559": (1280, 2559),
160
+ "2560_5119": (2560, 5119),
161
+ "more_than_5120": (5120, float('inf'))
162
+ }
163
+
164
+ def get_length_range(le):
165
+ for key, (min_len, max_len) in length_ranges.items():
166
+ if min_len <= le <= max_len:
167
+ return key
168
+ return "more_than_5120"
169
+
170
+ packet_lengths = {key: [] for key in length_ranges}
171
+ total_length = 0
172
+ packet_len_total_count = len(packets)
173
+ for packet_item in packets:
174
+ length = len(packet_item)
175
+ length_range = get_length_range(length)
176
+ packet_lengths[length_range].append(length)
177
+ total_length += length
178
+ total_time = packets[-1].time - packets[0].time if packet_len_total_count > 1 else 1
179
+ packet_len_average = round(total_length / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
180
+ packet_len_min = min(len(packet_item) for packet_item in packets) if packets else 0
181
+ packet_len_max = max(len(packet_item) for packet_item in packets) if packets else 0
182
+ packet_len_rate = round((packet_len_total_count / total_time) / 1000, 5) if total_time > 0 else 0
183
+ packet_size = [len(p) for p in packets]
184
+ field_map = {
185
+ "packet_size_mean": float(round(np.mean(packet_size), 5)),
186
+ "packet_size_variance": float(round(np.var(packet_size), 5)),
187
+ 'packet_len_total_count': packet_len_total_count,
188
+ 'packet_len_total_average': packet_len_average,
189
+ 'packet_len_total_min': packet_len_min,
190
+ 'packet_len_total_max': packet_len_max,
191
+ 'packet_len_total_rate': float(packet_len_rate),
192
+ 'packet_len_total_percent': 1,
193
+ }
194
+ for length_range, lengths in packet_lengths.items():
195
+ count = len(lengths)
196
+ if count > 0:
197
+ average = round(sum(lengths) / count, 5)
198
+ min_val = min(lengths)
199
+ max_val = max(lengths)
200
+ else:
201
+ average = min_val = max_val = 0
202
+ packet_len_rate = round((count / total_time) / 1000, 5) if total_time > 0 else 0
203
+ percent = round(count / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
204
+ field_map.update({
205
+ f"packet_len_{length_range}_count": count,
206
+ f"packet_len_{length_range}_average": average,
207
+ f"packet_len_{length_range}_min": min_val,
208
+ f"packet_len_{length_range}_max": max_val,
209
+ f"packet_len_{length_range}_rate": float(packet_len_rate),
210
+ f"packet_len_{length_range}_percent": percent
211
+ })
212
+ return field_map
213
+
214
+
215
+ def get_dns_domain(packets):
216
+ domain_name = ""
217
+ for packet_item in packets:
218
+ if DNS in packet_item:
219
+ dns_layer = packet_item[DNS]
220
+ if dns_layer.qd:
221
+ try:
222
+ domain_name = dns_layer.qd.qname.decode('utf-8')
223
+ # print(f"dns域名:{domain_name}")
224
+ except Exception:
225
+ domain_name = str(dns_layer.qd.qname)
226
+ print(f"dns域名编码失败的字符串:{domain_name}")
227
+ break
228
+ if domain_name.endswith("."):
229
+ domain_name = domain_name[:-1]
230
+ return domain_name
231
+
232
+
233
+ def extract_session_fields(cls, origin_list, geoUtil):
234
+ res = []
235
+ for item in origin_list:
236
+ _source = item.get("_source", {})
237
+ source = _source.get("source", {})
238
+ tcpflags = _source.get("tcpflags", {})
239
+ destination = _source.get("destination", {})
240
+ http = _source.get("http", {})
241
+ dns = _source.get("dns", {})
242
+ tls = _source.get("tls", {})
243
+ uri = http.get('uri', [])
244
+ uri_length = [len(u) for u in uri]
245
+ uri_depth = [get_uri_depth(u) for u in uri]
246
+ uri_filename_length = [cls.get_uri_filename_length(u) for u in uri]
247
+ uri_params = [cls.get_url_param_count(u) for u in uri]
248
+ res.append(geoUtil.get_geo_by_ip({
249
+ "id": item["_id"],
250
+ "node": _source.get("node", ""),
251
+ "segmentCnt": _source.get("segmentCnt", 0),
252
+ "tcpflags.rst": tcpflags.get("rst", 0),
253
+ "tcpflags.ack": tcpflags.get("ack", 0),
254
+ "tcpflags.syn": tcpflags.get("syn", 0),
255
+ "tcpflags.urg": tcpflags.get("urg", 0),
256
+ "tcpflags.psh": tcpflags.get("psh", 0),
257
+ "tcpflags.syn-ack": tcpflags.get("syn-ack", 0),
258
+ "tcpflags.fin": tcpflags.get("fin", 0),
259
+ "source.ip": source.get("ip", ""),
260
+ "destination.ip": destination.get("ip", ""),
261
+ "source.port": source.get("port", ""),
262
+ "source.packets": source.get("packets", ""),
263
+ "source.bytes": source.get("bytes", 0),
264
+ "destination.port": destination.get("port", ""),
265
+ "destination.bytes": destination.get("bytes", 0),
266
+ "destination.packets": destination.get("packets", 0),
267
+ "initRTT": _source.get("initRTT", ""),
268
+ "firstPacket": _source.get("firstPacket", 0),
269
+ "lastPacket": _source.get("lastPacket", 0),
270
+ "ipProtocol": _source.get("ipProtocol", 0),
271
+ "protocolCnt": _source.get("protocolCnt", 0),
272
+ "protocol": _source.get("protocol", []),
273
+ "server.bytes": _source.get("server", {}).get("bytes", 0),
274
+ "totDataBytes": _source.get("totDataBytes", 0),
275
+ "network.packets": _source.get("network", {}).get("packets", 0),
276
+ "network.bytes": _source.get("network", {}).get("bytes", 0),
277
+ "length": _source.get("length", 0),
278
+ "client.bytes": _source.get("client", {}).get("bytes", 0),
279
+ "http.uri": uri,
280
+ "http.uri_length_mean": round(np.nan_to_num(np.mean(uri_length)), 5),
281
+ "http.uri_length_var": round(np.nan_to_num(np.var(uri_length)), 5),
282
+ "http.uri_param_count_mean": round(np.nan_to_num(np.mean(uri_params)), 5),
283
+ "http.uri_param_count_var": round(np.nan_to_num(np.var(uri_params)), 5),
284
+ "http.uri_depth_mean": round(np.nan_to_num(np.mean(uri_depth)), 5),
285
+ "http.uri_depth_var": round(np.nan_to_num(np.var(uri_depth)), 5),
286
+ "http.uri_filename_length_mean": round(np.nan_to_num(np.mean(uri_filename_length)), 5),
287
+ "http.uri_filename_length_var": round(np.nan_to_num(np.var(uri_filename_length)), 5),
288
+
289
+ "http.response-content-type": http.get("response-content-type", []),
290
+ "http.bodyMagicCnt": http.get("bodyMagicCnt", 0),
291
+ "http.statuscodeCnt": http.get("statusCodeCnt", 0),
292
+ "http.clientVersionCnt": http.get("clientVersionCnt", 0),
293
+ "http.response-content-typeCnt": http.get("response-content-typeCnt", 0),
294
+ "http.xffIpCnt": http.get("xffIpCnt", 0),
295
+ "http.requestHeaderCnt": http.get("requestHeaderCnt", 0),
296
+ "http.serverVersion": http.get("serverVersion", []),
297
+ "http.serverVersionCnt": http.get("serverVersionCnt", 0),
298
+ "http.responseHeaderCnt": http.get("responseHeaderCnt", 0),
299
+ "http.xffIp": http.get("xffIp", []),
300
+ "http.clientVersion": http.get("clientVersion", []),
301
+ "http.uriTokens": http.get("uriTokens", ""),
302
+ "http.useragentCnt": http.get("useragentCnt", 0),
303
+ "http.statuscode": http.get("statusCode", []),
304
+ "http.bodyMagic": http.get("bodyMagic", []),
305
+ "http.request-content-type": http.get("request-content-type", []),
306
+ "http.uriCnt": http.get("uriCnt", 0),
307
+
308
+ "http.useragent": http.get("useragent", ""),
309
+ "http.keyCnt": http.get("keyCnt", 0),
310
+ "http.request-referer": http.get("requestReferer", []),
311
+ "http.request-refererCnt": http.get("requestRefererCnt", 0),
312
+ "http.path": http.get("path", []),
313
+ "http.hostCnt": http.get("hostCnt", 0),
314
+ "http.response-server": http.get("response-server", []),
315
+ "http.pathCnt": http.get("pathCnt", 0),
316
+ "http.useragentTokens": http.get("useragentTokens", ""),
317
+ "http.methodCnt": http.get("methodCnt", 0),
318
+ "http.method": http.get("method", []),
319
+ "http.method-GET": http.get("method-GET", 0),
320
+ "http.method-POST": http.get("method-POST", 0),
321
+ "http.key": http.get("key", []),
322
+ "http.hostTokens": http.get("hostTokens", ""),
323
+ "http.requestHeader": http.get("requestHeader", []),
324
+ "http.responseHeader": http.get("responseHeader", []),
325
+
326
+ "dns.ASN": dns.get("ASN", []),
327
+ "dns.RIR": dns.get("RIR", []),
328
+ "dns.GEO": dns.get("GEO", []),
329
+ "dns.alpn": dns.get("https.alpn", []),
330
+ "dns.alpnCnt": dns.get("https.alpnCnt", 0),
331
+ "dns.ip": dns.get("ip", []),
332
+ "dns.ipCnt": dns.get("ipCnt", 0),
333
+ "dns.OpCode": dns.get("opcode", []),
334
+ "dns.OpCodeCnt": dns.get("opcodeCnt", 0),
335
+ "dns.Puny": dns.get("puny", []),
336
+ "dns.PunyCnt": dns.get("puntCnt", 0),
337
+ "dns.QueryClass": dns.get("qc", []),
338
+ "dns.QueryClassCnt": dns.get("qcCnt", 0),
339
+ "dns.QueryType": dns.get("qt", []),
340
+ "dns.QueryTypeCnt": dns.get("qtCnt", 0),
341
+ "dns.status": dns.get("status", []),
342
+ "dns.hostCnt": json.dumps(dns.get("hostCnt", 0)),
343
+ "dns.host": json.dumps(dns.get("host", [])),
344
+ "dns.statusCnt": dns.get("statusCnt", 0),
345
+
346
+ "tls.cipher": tls.get("cipher", []),
347
+ "tls.cipherCnt": tls.get("cipherCnt", 0),
348
+ "tls.dstSessionId": tls.get("dstSessionId", []),
349
+ "tls.ja3": tls.get("ja3", []),
350
+ "tls.ja3Cnt": tls.get("ja3Cnt", 0),
351
+ "tls.ja3s": tls.get("ja3s", []),
352
+ "tls.ja3sCnt": tls.get("ja3sCnt", 0),
353
+ "tls.ja4": tls.get("ja4", []),
354
+ "tls.ja4Cnt": tls.get("ja4Cnt", 0),
355
+ "tls.srcSessionId": tls.get("srcSessionId", []),
356
+ "tls.version": tls.get("version", []),
357
+ "tls.versionCnt": tls.get("versionCnt", 0),
358
+ "tls.ja4_r": tls.get("versionCnt", 0),
359
+ "tls.ja4_rCnt": tls.get("versionCnt", 0),
360
+ "packetPos": json.dumps(_source.get("packetPos", [])),
361
+ "traffic_type": item.get("traffic_type", ""),
362
+ "PROTOCOL": item.get("PROTOCOL", ""),
363
+ "DENY_METHOD": item.get("DENY_METHOD", ""),
364
+ "THREAT_SUMMARY": item.get("THREAT_SUMMARY", ""),
365
+ "SEVERITY": item.get("SEVERITY", ""),
366
+ }))
367
+ return res
368
+
369
+
370
+ def get_url_param_count(url):
371
+ query = urlparse(url).query # 解析 URL 中的查询字符串
372
+ params = parse_qs(query) # 解析查询字符串为字典
373
+ return len(params)
374
+
375
+
376
+ def get_uri_filename_length(uri):
377
+ match = re.search(r'\.([^./?#]+)$', uri)
378
+ if match:
379
+ extension = match.group(0)
380
+ return len(extension)
381
+ return 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -1,82 +0,0 @@
1
- import re
2
-
3
- import execjs
4
-
5
- from xbase_util.xbase_constant import parse_path
6
-
7
-
8
- def parse_expression(expression):
9
- if expression:
10
- with open(parse_path, "r") as f:
11
- ctx = execjs.compile(f.read())
12
- return ctx.call("parse_exp", expression)
13
- else:
14
- return None
15
-
16
-
17
- # def geo_reader():
18
- # return geoip2.database.Reader(geo_path)
19
-
20
-
21
- def split_samples(sample, per_subsection):
22
- num_subsections = len(sample) // per_subsection
23
- remainder = len(sample) % per_subsection
24
- subsection_sizes = [per_subsection] * num_subsections
25
- if remainder > 0:
26
- subsection_sizes.append(remainder)
27
- num_subsections += 1
28
- return num_subsections, subsection_sizes
29
-
30
-
31
- def split_process(subsection, process_count):
32
- subsection_per_process = len(subsection) // process_count
33
- remainder = len(subsection) % process_count
34
- lengths = []
35
- start = 0
36
- for i in range(process_count):
37
- end = start + subsection_per_process + (1 if i < remainder else 0)
38
- lengths.append(end - start)
39
- start = end
40
- return lengths
41
-
42
-
43
- def build_es_expression(size, start_time, end_time, arkime_expression):
44
- expression = {"query": {"bool": {"filter": []}}}
45
- try:
46
- if size:
47
- expression['size'] = size
48
- if start_time:
49
- expression['query']['bool']['filter'].append(
50
- {"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
51
- if end_time:
52
- expression['query']['bool']['filter'].append(
53
- {"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
54
- arkime_2_es = parse_expression(arkime_expression)
55
- if arkime_2_es:
56
- expression['query']['bool']['filter'].append(arkime_2_es)
57
- return expression
58
- except Exception as e:
59
- print(f"请安装nodejs{e}")
60
- print(arkime_expression)
61
- exit(1)
62
-
63
-
64
- def get_uri_depth(url):
65
- match = re.match(r'^[^?]*', url)
66
- if match:
67
- path = match.group(0)
68
- # 去除协议和域名部分
69
- path = re.sub(r'^https?://[^/]+', '', path)
70
- segments = [segment for segment in path.split('/') if segment]
71
- return len(segments)
72
- return 0
73
-
74
-
75
- def firstOrZero(param):
76
- if type(param).__name__ == 'list':
77
- if (len(param)) != 0:
78
- return param[0]
79
- else:
80
- return 0
81
- else:
82
- return 0
File without changes
File without changes