xbase-util 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.1.0",
6
+ version="0.1.2",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -0,0 +1,107 @@
1
+ import re
2
+
3
+ import geoip2.database
4
+
5
+ from xbase_util.xbase_constant import geo_path
6
+
7
+
8
+ class GeoUtil:
9
+ def __init__(self):
10
+ self.reader = geoip2.database.Reader(geo_path)
11
+ print("初始化:GeoUtil")
12
+
13
+ @staticmethod
14
+ def is_stable_name(ip):
15
+ ip_match = r"^(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[1-9]|0?[1-9]0)\.)(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){2}(?:25[0-4]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[1-9]|0?[1-9]0)$"
16
+ if re.match(ip_match, ip):
17
+ # 分割IP地址
18
+ octets = ip.split('.')
19
+ first_octet = int(octets[0])
20
+ second_octet = int(octets[1])
21
+ # 判断是否是本地地址
22
+ if ip == "10.28.0.0" or ip.startswith("10.28.0.") or ip.startswith("10.28.0.0/16"):
23
+ return "LOCAL_ADDRESS"
24
+ # 判断是否是VPN地址
25
+ if ip.startswith("10.28.15"):
26
+ return "VPN_ADDRESS"
27
+ # 判断是否是分支机构地址
28
+ if (192 <= first_octet <= 195) or (first_octet == 192 and 144 <= second_octet <= 255):
29
+ return "DEPARTMENT_ADDRESS"
30
+ else:
31
+ return False
32
+
33
+ @staticmethod
34
+ def fill_geo_empty(value):
35
+ if value == "" or value is None:
36
+ return "IP_GEO_EMPTY"
37
+ else:
38
+ return value
39
+
40
+ def get_geo_by_ip(self, geo_map):
41
+ source_ip = geo_map["source.ip"]
42
+ source_ip_name = self.is_stable_name(source_ip)
43
+ if source_ip_name is not False:
44
+ try:
45
+ response = self.reader.city(source_ip)
46
+ geo_map["source.ip_Country_IsoCode"] = self.fill_geo_empty(response.country.iso_code)
47
+ geo_map['source.ip_Country_Name'] = self.fill_geo_empty(response.country.name)
48
+ geo_map["source.ip_Country_SpecificName"] = self.fill_geo_empty(
49
+ response.subdivisions.most_specific.name)
50
+ geo_map['source.ip_Country_SpecificIsoCode'] = self.fill_geo_empty(
51
+ response.subdivisions.most_specific.iso_code)
52
+ geo_map['source.ip_City_Name'] = self.fill_geo_empty(response.city.name)
53
+ geo_map['source.ip_City_PostalCode'] = self.fill_geo_empty(response.postal.code)
54
+ geo_map['source.ip_Location_Latitude'] = self.fill_geo_empty(response.location.latitude)
55
+ geo_map["source.ip_Location_Longitude"] = self.fill_geo_empty(response.location.longitude)
56
+ except Exception as e:
57
+ geo_map["source.ip_Country_IsoCode"] = "IP_GEO_EMPTY"
58
+ geo_map['source.ip_Country_Name'] = "IP_GEO_EMPTY"
59
+ geo_map["source.ip_Country_SpecificName"] = "IP_GEO_EMPTY"
60
+ geo_map['source.ip_Country_SpecificIsoCode'] = "IP_GEO_EMPTY"
61
+ geo_map['source.ip_City_Name'] = "IP_GEO_EMPTY"
62
+ geo_map['source.ip_City_PostalCode'] = "IP_GEO_EMPTY"
63
+ geo_map['source.ip_Location_Latitude'] = "IP_GEO_EMPTY"
64
+ geo_map["source.ip_Location_Longitude"] = "IP_GEO_EMPTY"
65
+ else:
66
+ geo_map["source.ip_Country_IsoCode"] = source_ip_name
67
+ geo_map['source.ip_Country_Name'] = source_ip_name
68
+ geo_map["source.ip_Country_SpecificName"] = source_ip_name
69
+ geo_map['source.ip_Country_SpecificIsoCode'] = source_ip_name
70
+ geo_map['source.ip_City_Name'] = source_ip_name
71
+ geo_map['source.ip_City_PostalCode'] = source_ip_name
72
+ geo_map['source.ip_Location_Latitude'] = source_ip_name
73
+ geo_map["source.ip_Location_Longitude"] = source_ip_name
74
+ destination_ip = geo_map["destination.ip"]
75
+ destination_ip_name = self.is_stable_name(destination_ip)
76
+ if destination_ip_name is not False:
77
+ try:
78
+ response = self.reader.city(destination_ip)
79
+ geo_map["destination.ip_Country_IsoCode"] = self.fill_geo_empty(response.country.iso_code)
80
+ geo_map['destination.ip_Country_Name'] = self.fill_geo_empty(response.country.name)
81
+ geo_map["destination.ip_Country_SpecificName"] = self.fill_geo_empty(
82
+ response.subdivisions.most_specific.name)
83
+ geo_map['destination.ip_Country_SpecificIsoCode'] = self.fill_geo_empty(
84
+ response.subdivisions.most_specific.iso_code)
85
+ geo_map['destination.ip_City_Name'] = self.fill_geo_empty(response.city.name)
86
+ geo_map['destination.ip_City_PostalCode'] = self.fill_geo_empty(response.postal.code)
87
+ geo_map['destination.ip_Location_Latitude'] = self.fill_geo_empty(response.location.latitude)
88
+ geo_map["destination.ip_Location_Longitude"] = self.fill_geo_empty(response.location.longitude)
89
+ except Exception:
90
+ geo_map["destination.ip_Country_IsoCode"] = "IP_GEO_EMPTY"
91
+ geo_map['destination.ip_Country_Name'] = "IP_GEO_EMPTY"
92
+ geo_map["destination.ip_Country_SpecificName"] = "IP_GEO_EMPTY"
93
+ geo_map['destination.ip_Country_SpecificIsoCode'] = "IP_GEO_EMPTY"
94
+ geo_map['destination.ip_City_Name'] = "IP_GEO_EMPTY"
95
+ geo_map['destination.ip_City_PostalCode'] = "IP_GEO_EMPTY"
96
+ geo_map['destination.ip_Location_Latitude'] = "IP_GEO_EMPTY"
97
+ geo_map["destination.ip_Location_Longitude"] = "IP_GEO_EMPTY"
98
+ else:
99
+ geo_map["destination.ip_Country_IsoCode"] = destination_ip_name
100
+ geo_map['destination.ip_Country_Name'] = destination_ip_name
101
+ geo_map["destination.ip_Country_SpecificName"] = destination_ip_name
102
+ geo_map['destination.ip_Country_SpecificIsoCode'] = destination_ip_name
103
+ geo_map['destination.ip_City_Name'] = destination_ip_name
104
+ geo_map['destination.ip_City_PostalCode'] = destination_ip_name
105
+ geo_map['destination.ip_Location_Latitude'] = destination_ip_name
106
+ geo_map["destination.ip_Location_Longitude"] = destination_ip_name
107
+ return geo_map
@@ -0,0 +1,5 @@
1
+ import os
2
+
3
+ current_dir = os.path.dirname(__file__)
4
+ parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
5
+ geo_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'GeoLite2-City.mmdb')
@@ -0,0 +1,381 @@
1
+ import json
2
+ import re
3
+ from urllib.parse import urlparse, parse_qs
4
+
5
+ import execjs
6
+ import numpy as np
7
+ from scapy.layers.dns import DNS
8
+
9
+ from xbase_util.xbase_constant import parse_path
10
+
11
+
12
+ def parse_expression(expression):
13
+ if expression:
14
+ with open(parse_path, "r") as f:
15
+ ctx = execjs.compile(f.read())
16
+ return ctx.call("parse_exp", expression)
17
+ else:
18
+ return None
19
+
20
+
21
+ def get_cookie_end_with_semicolon_count(text_data):
22
+ count = 0
23
+ for text in text_data.replace("-", "_").lower().split("\n"):
24
+ item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
25
+ if "cookie:" in item_text and f"{item_text}".endswith(";"):
26
+ count = count + 1
27
+ if count == 0:
28
+ return -1
29
+ return len(count)
30
+
31
+
32
+ def get_ua_duplicate_count(text_data):
33
+ ua_list = []
34
+ for text in text_data.replace("-", "_").lower().split("\n"):
35
+ item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
36
+ if "user_agent:" in item_text and f"{item_text}".endswith(";"):
37
+ ua_list.append(item_text.replace("user_agent:", ""))
38
+ count = list(set(ua_list))
39
+ if count == 0:
40
+ return -1
41
+ return sum(count)
42
+
43
+
44
+ def get_res_status_code_list(text_data):
45
+ value_res = []
46
+ res = []
47
+ num_1 = 0
48
+ num_2 = 0
49
+ num_3 = 0
50
+ num_4 = 0
51
+ num_5 = 0
52
+
53
+ res.extend([item for item in text_data.split("\n") if item.startswith("HTTP/")])
54
+ for item in res:
55
+ m = re.search(r"\b(\d{3})\b", item)
56
+ if m:
57
+ value_res.append(int(m.group(0)))
58
+ for value in value_res:
59
+ if 0 <= value < 200:
60
+ num_1 = num_1 + 1
61
+ if 200 <= value < 300:
62
+ num_2 = num_2 + 1
63
+ if 300 <= value < 400:
64
+ num_3 = num_3 + 1
65
+ if 400 <= value < 500:
66
+ num_4 = num_4 + 1
67
+ if 500 <= value < 600:
68
+ num_5 = num_5 + 1
69
+ return num_1, num_2, num_3, num_4, num_5
70
+
71
+
72
+ def get_packets_percentage(session, isReq):
73
+ if "source.bytes" in session and "destination.bytes" in session:
74
+ total_bytes = session["source.bytes"] + session["destination.bytes"]
75
+ if total_bytes > 0:
76
+ if isReq:
77
+ return session["source.bytes"] / total_bytes
78
+ else:
79
+ return session["destination.bytes"] / total_bytes
80
+ else:
81
+ return 0.0 # 避免除以0的情况
82
+ else:
83
+ return 0.5
84
+
85
+
86
+ def split_samples(sample, per_subsection):
87
+ num_subsections = len(sample) // per_subsection
88
+ remainder = len(sample) % per_subsection
89
+ subsection_sizes = [per_subsection] * num_subsections
90
+ if remainder > 0:
91
+ subsection_sizes.append(remainder)
92
+ num_subsections += 1
93
+ return num_subsections, subsection_sizes
94
+
95
+
96
+ def split_process(subsection, process_count):
97
+ subsection_per_process = len(subsection) // process_count
98
+ remainder = len(subsection) % process_count
99
+ lengths = []
100
+ start = 0
101
+ for i in range(process_count):
102
+ end = start + subsection_per_process + (1 if i < remainder else 0)
103
+ lengths.append(end - start)
104
+ start = end
105
+ return lengths
106
+
107
+
108
+ def build_es_expression(size, start_time, end_time, arkime_expression):
109
+ expression = {"query": {"bool": {"filter": []}}}
110
+ try:
111
+ if size:
112
+ expression['size'] = size
113
+ if start_time:
114
+ expression['query']['bool']['filter'].append(
115
+ {"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
116
+ if end_time:
117
+ expression['query']['bool']['filter'].append(
118
+ {"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
119
+ arkime_2_es = parse_expression(arkime_expression)
120
+ if arkime_2_es:
121
+ expression['query']['bool']['filter'].append(arkime_2_es)
122
+ return expression
123
+ except Exception as e:
124
+ print(f"请安装nodejs{e}")
125
+ print(arkime_expression)
126
+ exit(1)
127
+
128
+
129
+ def get_uri_depth(url):
130
+ match = re.match(r'^[^?]*', url)
131
+ if match:
132
+ path = match.group(0)
133
+ # 去除协议和域名部分
134
+ path = re.sub(r'^https?://[^/]+', '', path)
135
+ segments = [segment for segment in path.split('/') if segment]
136
+ return len(segments)
137
+ return 0
138
+
139
+
140
+ def firstOrZero(param):
141
+ if type(param).__name__ == 'list':
142
+ if (len(param)) != 0:
143
+ return param[0]
144
+ else:
145
+ return 0
146
+ else:
147
+ return 0
148
+
149
+
150
+ def get_statistic_fields(packets):
151
+ length_ranges = {
152
+ "0_19": (0, 19),
153
+ "20_39": (20, 39),
154
+ "40_79": (40, 79),
155
+ "80_159": (80, 159),
156
+ "160_319": (160, 319),
157
+ "320_639": (320, 639),
158
+ "640_1279": (640, 1279),
159
+ "1280_2559": (1280, 2559),
160
+ "2560_5119": (2560, 5119),
161
+ "more_than_5120": (5120, float('inf'))
162
+ }
163
+
164
+ def get_length_range(le):
165
+ for key, (min_len, max_len) in length_ranges.items():
166
+ if min_len <= le <= max_len:
167
+ return key
168
+ return "more_than_5120"
169
+
170
+ packet_lengths = {key: [] for key in length_ranges}
171
+ total_length = 0
172
+ packet_len_total_count = len(packets)
173
+ for packet_item in packets:
174
+ length = len(packet_item)
175
+ length_range = get_length_range(length)
176
+ packet_lengths[length_range].append(length)
177
+ total_length += length
178
+ total_time = packets[-1].time - packets[0].time if packet_len_total_count > 1 else 1
179
+ packet_len_average = round(total_length / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
180
+ packet_len_min = min(len(packet_item) for packet_item in packets) if packets else 0
181
+ packet_len_max = max(len(packet_item) for packet_item in packets) if packets else 0
182
+ packet_len_rate = round((packet_len_total_count / total_time) / 1000, 5) if total_time > 0 else 0
183
+ packet_size = [len(p) for p in packets]
184
+ field_map = {
185
+ "packet_size_mean": float(round(np.mean(packet_size), 5)),
186
+ "packet_size_variance": float(round(np.var(packet_size), 5)),
187
+ 'packet_len_total_count': packet_len_total_count,
188
+ 'packet_len_total_average': packet_len_average,
189
+ 'packet_len_total_min': packet_len_min,
190
+ 'packet_len_total_max': packet_len_max,
191
+ 'packet_len_total_rate': float(packet_len_rate),
192
+ 'packet_len_total_percent': 1,
193
+ }
194
+ for length_range, lengths in packet_lengths.items():
195
+ count = len(lengths)
196
+ if count > 0:
197
+ average = round(sum(lengths) / count, 5)
198
+ min_val = min(lengths)
199
+ max_val = max(lengths)
200
+ else:
201
+ average = min_val = max_val = 0
202
+ packet_len_rate = round((count / total_time) / 1000, 5) if total_time > 0 else 0
203
+ percent = round(count / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
204
+ field_map.update({
205
+ f"packet_len_{length_range}_count": count,
206
+ f"packet_len_{length_range}_average": average,
207
+ f"packet_len_{length_range}_min": min_val,
208
+ f"packet_len_{length_range}_max": max_val,
209
+ f"packet_len_{length_range}_rate": float(packet_len_rate),
210
+ f"packet_len_{length_range}_percent": percent
211
+ })
212
+ return field_map
213
+
214
+
215
+ def get_dns_domain(packets):
216
+ domain_name = ""
217
+ for packet_item in packets:
218
+ if DNS in packet_item:
219
+ dns_layer = packet_item[DNS]
220
+ if dns_layer.qd:
221
+ try:
222
+ domain_name = dns_layer.qd.qname.decode('utf-8')
223
+ # print(f"dns域名:{domain_name}")
224
+ except Exception:
225
+ domain_name = str(dns_layer.qd.qname)
226
+ print(f"dns域名编码失败的字符串:{domain_name}")
227
+ break
228
+ if domain_name.endswith("."):
229
+ domain_name = domain_name[:-1]
230
+ return domain_name
231
+
232
+
233
+ def extract_session_fields(cls, origin_list, geoUtil):
234
+ res = []
235
+ for item in origin_list:
236
+ _source = item.get("_source", {})
237
+ source = _source.get("source", {})
238
+ tcpflags = _source.get("tcpflags", {})
239
+ destination = _source.get("destination", {})
240
+ http = _source.get("http", {})
241
+ dns = _source.get("dns", {})
242
+ tls = _source.get("tls", {})
243
+ uri = http.get('uri', [])
244
+ uri_length = [len(u) for u in uri]
245
+ uri_depth = [get_uri_depth(u) for u in uri]
246
+ uri_filename_length = [cls.get_uri_filename_length(u) for u in uri]
247
+ uri_params = [cls.get_url_param_count(u) for u in uri]
248
+ res.append(geoUtil.get_geo_by_ip({
249
+ "id": item["_id"],
250
+ "node": _source.get("node", ""),
251
+ "segmentCnt": _source.get("segmentCnt", 0),
252
+ "tcpflags.rst": tcpflags.get("rst", 0),
253
+ "tcpflags.ack": tcpflags.get("ack", 0),
254
+ "tcpflags.syn": tcpflags.get("syn", 0),
255
+ "tcpflags.urg": tcpflags.get("urg", 0),
256
+ "tcpflags.psh": tcpflags.get("psh", 0),
257
+ "tcpflags.syn-ack": tcpflags.get("syn-ack", 0),
258
+ "tcpflags.fin": tcpflags.get("fin", 0),
259
+ "source.ip": source.get("ip", ""),
260
+ "destination.ip": destination.get("ip", ""),
261
+ "source.port": source.get("port", ""),
262
+ "source.packets": source.get("packets", ""),
263
+ "source.bytes": source.get("bytes", 0),
264
+ "destination.port": destination.get("port", ""),
265
+ "destination.bytes": destination.get("bytes", 0),
266
+ "destination.packets": destination.get("packets", 0),
267
+ "initRTT": _source.get("initRTT", ""),
268
+ "firstPacket": _source.get("firstPacket", 0),
269
+ "lastPacket": _source.get("lastPacket", 0),
270
+ "ipProtocol": _source.get("ipProtocol", 0),
271
+ "protocolCnt": _source.get("protocolCnt", 0),
272
+ "protocol": _source.get("protocol", []),
273
+ "server.bytes": _source.get("server", {}).get("bytes", 0),
274
+ "totDataBytes": _source.get("totDataBytes", 0),
275
+ "network.packets": _source.get("network", {}).get("packets", 0),
276
+ "network.bytes": _source.get("network", {}).get("bytes", 0),
277
+ "length": _source.get("length", 0),
278
+ "client.bytes": _source.get("client", {}).get("bytes", 0),
279
+ "http.uri": uri,
280
+ "http.uri_length_mean": round(np.nan_to_num(np.mean(uri_length)), 5),
281
+ "http.uri_length_var": round(np.nan_to_num(np.var(uri_length)), 5),
282
+ "http.uri_param_count_mean": round(np.nan_to_num(np.mean(uri_params)), 5),
283
+ "http.uri_param_count_var": round(np.nan_to_num(np.var(uri_params)), 5),
284
+ "http.uri_depth_mean": round(np.nan_to_num(np.mean(uri_depth)), 5),
285
+ "http.uri_depth_var": round(np.nan_to_num(np.var(uri_depth)), 5),
286
+ "http.uri_filename_length_mean": round(np.nan_to_num(np.mean(uri_filename_length)), 5),
287
+ "http.uri_filename_length_var": round(np.nan_to_num(np.var(uri_filename_length)), 5),
288
+
289
+ "http.response-content-type": http.get("response-content-type", []),
290
+ "http.bodyMagicCnt": http.get("bodyMagicCnt", 0),
291
+ "http.statuscodeCnt": http.get("statusCodeCnt", 0),
292
+ "http.clientVersionCnt": http.get("clientVersionCnt", 0),
293
+ "http.response-content-typeCnt": http.get("response-content-typeCnt", 0),
294
+ "http.xffIpCnt": http.get("xffIpCnt", 0),
295
+ "http.requestHeaderCnt": http.get("requestHeaderCnt", 0),
296
+ "http.serverVersion": http.get("serverVersion", []),
297
+ "http.serverVersionCnt": http.get("serverVersionCnt", 0),
298
+ "http.responseHeaderCnt": http.get("responseHeaderCnt", 0),
299
+ "http.xffIp": http.get("xffIp", []),
300
+ "http.clientVersion": http.get("clientVersion", []),
301
+ "http.uriTokens": http.get("uriTokens", ""),
302
+ "http.useragentCnt": http.get("useragentCnt", 0),
303
+ "http.statuscode": http.get("statusCode", []),
304
+ "http.bodyMagic": http.get("bodyMagic", []),
305
+ "http.request-content-type": http.get("request-content-type", []),
306
+ "http.uriCnt": http.get("uriCnt", 0),
307
+
308
+ "http.useragent": http.get("useragent", ""),
309
+ "http.keyCnt": http.get("keyCnt", 0),
310
+ "http.request-referer": http.get("requestReferer", []),
311
+ "http.request-refererCnt": http.get("requestRefererCnt", 0),
312
+ "http.path": http.get("path", []),
313
+ "http.hostCnt": http.get("hostCnt", 0),
314
+ "http.response-server": http.get("response-server", []),
315
+ "http.pathCnt": http.get("pathCnt", 0),
316
+ "http.useragentTokens": http.get("useragentTokens", ""),
317
+ "http.methodCnt": http.get("methodCnt", 0),
318
+ "http.method": http.get("method", []),
319
+ "http.method-GET": http.get("method-GET", 0),
320
+ "http.method-POST": http.get("method-POST", 0),
321
+ "http.key": http.get("key", []),
322
+ "http.hostTokens": http.get("hostTokens", ""),
323
+ "http.requestHeader": http.get("requestHeader", []),
324
+ "http.responseHeader": http.get("responseHeader", []),
325
+
326
+ "dns.ASN": dns.get("ASN", []),
327
+ "dns.RIR": dns.get("RIR", []),
328
+ "dns.GEO": dns.get("GEO", []),
329
+ "dns.alpn": dns.get("https.alpn", []),
330
+ "dns.alpnCnt": dns.get("https.alpnCnt", 0),
331
+ "dns.ip": dns.get("ip", []),
332
+ "dns.ipCnt": dns.get("ipCnt", 0),
333
+ "dns.OpCode": dns.get("opcode", []),
334
+ "dns.OpCodeCnt": dns.get("opcodeCnt", 0),
335
+ "dns.Puny": dns.get("puny", []),
336
+ "dns.PunyCnt": dns.get("puntCnt", 0),
337
+ "dns.QueryClass": dns.get("qc", []),
338
+ "dns.QueryClassCnt": dns.get("qcCnt", 0),
339
+ "dns.QueryType": dns.get("qt", []),
340
+ "dns.QueryTypeCnt": dns.get("qtCnt", 0),
341
+ "dns.status": dns.get("status", []),
342
+ "dns.hostCnt": json.dumps(dns.get("hostCnt", 0)),
343
+ "dns.host": json.dumps(dns.get("host", [])),
344
+ "dns.statusCnt": dns.get("statusCnt", 0),
345
+
346
+ "tls.cipher": tls.get("cipher", []),
347
+ "tls.cipherCnt": tls.get("cipherCnt", 0),
348
+ "tls.dstSessionId": tls.get("dstSessionId", []),
349
+ "tls.ja3": tls.get("ja3", []),
350
+ "tls.ja3Cnt": tls.get("ja3Cnt", 0),
351
+ "tls.ja3s": tls.get("ja3s", []),
352
+ "tls.ja3sCnt": tls.get("ja3sCnt", 0),
353
+ "tls.ja4": tls.get("ja4", []),
354
+ "tls.ja4Cnt": tls.get("ja4Cnt", 0),
355
+ "tls.srcSessionId": tls.get("srcSessionId", []),
356
+ "tls.version": tls.get("version", []),
357
+ "tls.versionCnt": tls.get("versionCnt", 0),
358
+ "tls.ja4_r": tls.get("versionCnt", 0),
359
+ "tls.ja4_rCnt": tls.get("versionCnt", 0),
360
+ "packetPos": json.dumps(_source.get("packetPos", [])),
361
+ "traffic_type": item.get("traffic_type", ""),
362
+ "PROTOCOL": item.get("PROTOCOL", ""),
363
+ "DENY_METHOD": item.get("DENY_METHOD", ""),
364
+ "THREAT_SUMMARY": item.get("THREAT_SUMMARY", ""),
365
+ "SEVERITY": item.get("SEVERITY", ""),
366
+ }))
367
+ return res
368
+
369
+
370
+ def get_url_param_count(url):
371
+ query = urlparse(url).query # 解析 URL 中的查询字符串
372
+ params = parse_qs(query) # 解析查询字符串为字典
373
+ return len(params)
374
+
375
+
376
+ def get_uri_filename_length(uri):
377
+ match = re.search(r'\.([^./?#]+)$', uri)
378
+ if match:
379
+ extension = match.group(0)
380
+ return len(extension)
381
+ return 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,8 +3,10 @@ setup.py
3
3
  xbase_util/__init__.py
4
4
  xbase_util/es_db_util.py
5
5
  xbase_util/esreq.py
6
+ xbase_util/geo_util.py
6
7
  xbase_util/handle_features_util.py
7
8
  xbase_util/pcap_util.py
9
+ xbase_util/xbase_constant.py
8
10
  xbase_util/xbase_util.py
9
11
  xbase_util.egg-info/PKG-INFO
10
12
  xbase_util.egg-info/SOURCES.txt
@@ -1,86 +0,0 @@
1
- import os
2
- import re
3
-
4
- import execjs
5
- import geoip2.database
6
-
7
- current_dir = os.path.dirname(__file__)
8
- parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
9
- geo_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'GeoLite2-City.mmdb')
10
-
11
-
12
- def parse_expression(expression):
13
- if expression:
14
- with open(parse_path, "r") as f:
15
- ctx = execjs.compile(f.read())
16
- return ctx.call("parse_exp", expression)
17
- else:
18
- return None
19
-
20
-
21
- def geo_reader():
22
- return geoip2.database.Reader(geo_path)
23
-
24
-
25
- def split_samples(sample, per_subsection):
26
- num_subsections = len(sample) // per_subsection
27
- remainder = len(sample) % per_subsection
28
- subsection_sizes = [per_subsection] * num_subsections
29
- if remainder > 0:
30
- subsection_sizes.append(remainder)
31
- num_subsections += 1
32
- return num_subsections, subsection_sizes
33
-
34
-
35
- def split_process(subsection, process_count):
36
- subsection_per_process = len(subsection) // process_count
37
- remainder = len(subsection) % process_count
38
- lengths = []
39
- start = 0
40
- for i in range(process_count):
41
- end = start + subsection_per_process + (1 if i < remainder else 0)
42
- lengths.append(end - start)
43
- start = end
44
- return lengths
45
-
46
-
47
- def build_es_expression(size, start_time, end_time, arkime_expression):
48
- expression = {"query": {"bool": {"filter": []}}}
49
- try:
50
- if size:
51
- expression['size'] = size
52
- if start_time:
53
- expression['query']['bool']['filter'].append(
54
- {"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
55
- if end_time:
56
- expression['query']['bool']['filter'].append(
57
- {"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
58
- arkime_2_es = parse_expression(arkime_expression)
59
- if arkime_2_es:
60
- expression['query']['bool']['filter'].append(arkime_2_es)
61
- return expression
62
- except Exception as e:
63
- print(f"请安装nodejs{e}")
64
- print(arkime_expression)
65
- exit(1)
66
-
67
-
68
- def get_uri_depth(url):
69
- match = re.match(r'^[^?]*', url)
70
- if match:
71
- path = match.group(0)
72
- # 去除协议和域名部分
73
- path = re.sub(r'^https?://[^/]+', '', path)
74
- segments = [segment for segment in path.split('/') if segment]
75
- return len(segments)
76
- return 0
77
-
78
-
79
- def firstOrZero(param):
80
- if type(param).__name__ == 'list':
81
- if (len(param)) != 0:
82
- return param[0]
83
- else:
84
- return 0
85
- else:
86
- return 0
File without changes
File without changes