xbase-util 0.1.0__tar.gz → 0.1.2__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {xbase_util-0.1.0 → xbase_util-0.1.2}/PKG-INFO +1 -1
- {xbase_util-0.1.0 → xbase_util-0.1.2}/setup.py +1 -1
- xbase_util-0.1.2/xbase_util/geo_util.py +107 -0
- xbase_util-0.1.2/xbase_util/xbase_constant.py +5 -0
- xbase_util-0.1.2/xbase_util/xbase_util.py +381 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util.egg-info/SOURCES.txt +2 -0
- xbase_util-0.1.0/xbase_util/xbase_util.py +0 -86
- {xbase_util-0.1.0 → xbase_util-0.1.2}/README.md +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/setup.cfg +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util/__init__.py +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util/esreq.py +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util/handle_features_util.py +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util/pcap_util.py +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.1.0 → xbase_util-0.1.2}/xbase_util_assets/arkimeparse.js +0 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
import geoip2.database
|
4
|
+
|
5
|
+
from xbase_util.xbase_constant import geo_path
|
6
|
+
|
7
|
+
|
8
|
+
class GeoUtil:
|
9
|
+
def __init__(self):
|
10
|
+
self.reader = geoip2.database.Reader(geo_path)
|
11
|
+
print("初始化:GeoUtil")
|
12
|
+
|
13
|
+
@staticmethod
|
14
|
+
def is_stable_name(ip):
|
15
|
+
ip_match = r"^(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[1-9]|0?[1-9]0)\.)(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){2}(?:25[0-4]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[1-9]|0?[1-9]0)$"
|
16
|
+
if re.match(ip_match, ip):
|
17
|
+
# 分割IP地址
|
18
|
+
octets = ip.split('.')
|
19
|
+
first_octet = int(octets[0])
|
20
|
+
second_octet = int(octets[1])
|
21
|
+
# 判断是否是本地地址
|
22
|
+
if ip == "10.28.0.0" or ip.startswith("10.28.0.") or ip.startswith("10.28.0.0/16"):
|
23
|
+
return "LOCAL_ADDRESS"
|
24
|
+
# 判断是否是VPN地址
|
25
|
+
if ip.startswith("10.28.15"):
|
26
|
+
return "VPN_ADDRESS"
|
27
|
+
# 判断是否是分支机构地址
|
28
|
+
if (192 <= first_octet <= 195) or (first_octet == 192 and 144 <= second_octet <= 255):
|
29
|
+
return "DEPARTMENT_ADDRESS"
|
30
|
+
else:
|
31
|
+
return False
|
32
|
+
|
33
|
+
@staticmethod
|
34
|
+
def fill_geo_empty(value):
|
35
|
+
if value == "" or value is None:
|
36
|
+
return "IP_GEO_EMPTY"
|
37
|
+
else:
|
38
|
+
return value
|
39
|
+
|
40
|
+
def get_geo_by_ip(self, geo_map):
|
41
|
+
source_ip = geo_map["source.ip"]
|
42
|
+
source_ip_name = self.is_stable_name(source_ip)
|
43
|
+
if source_ip_name is not False:
|
44
|
+
try:
|
45
|
+
response = self.reader.city(source_ip)
|
46
|
+
geo_map["source.ip_Country_IsoCode"] = self.fill_geo_empty(response.country.iso_code)
|
47
|
+
geo_map['source.ip_Country_Name'] = self.fill_geo_empty(response.country.name)
|
48
|
+
geo_map["source.ip_Country_SpecificName"] = self.fill_geo_empty(
|
49
|
+
response.subdivisions.most_specific.name)
|
50
|
+
geo_map['source.ip_Country_SpecificIsoCode'] = self.fill_geo_empty(
|
51
|
+
response.subdivisions.most_specific.iso_code)
|
52
|
+
geo_map['source.ip_City_Name'] = self.fill_geo_empty(response.city.name)
|
53
|
+
geo_map['source.ip_City_PostalCode'] = self.fill_geo_empty(response.postal.code)
|
54
|
+
geo_map['source.ip_Location_Latitude'] = self.fill_geo_empty(response.location.latitude)
|
55
|
+
geo_map["source.ip_Location_Longitude"] = self.fill_geo_empty(response.location.longitude)
|
56
|
+
except Exception as e:
|
57
|
+
geo_map["source.ip_Country_IsoCode"] = "IP_GEO_EMPTY"
|
58
|
+
geo_map['source.ip_Country_Name'] = "IP_GEO_EMPTY"
|
59
|
+
geo_map["source.ip_Country_SpecificName"] = "IP_GEO_EMPTY"
|
60
|
+
geo_map['source.ip_Country_SpecificIsoCode'] = "IP_GEO_EMPTY"
|
61
|
+
geo_map['source.ip_City_Name'] = "IP_GEO_EMPTY"
|
62
|
+
geo_map['source.ip_City_PostalCode'] = "IP_GEO_EMPTY"
|
63
|
+
geo_map['source.ip_Location_Latitude'] = "IP_GEO_EMPTY"
|
64
|
+
geo_map["source.ip_Location_Longitude"] = "IP_GEO_EMPTY"
|
65
|
+
else:
|
66
|
+
geo_map["source.ip_Country_IsoCode"] = source_ip_name
|
67
|
+
geo_map['source.ip_Country_Name'] = source_ip_name
|
68
|
+
geo_map["source.ip_Country_SpecificName"] = source_ip_name
|
69
|
+
geo_map['source.ip_Country_SpecificIsoCode'] = source_ip_name
|
70
|
+
geo_map['source.ip_City_Name'] = source_ip_name
|
71
|
+
geo_map['source.ip_City_PostalCode'] = source_ip_name
|
72
|
+
geo_map['source.ip_Location_Latitude'] = source_ip_name
|
73
|
+
geo_map["source.ip_Location_Longitude"] = source_ip_name
|
74
|
+
destination_ip = geo_map["destination.ip"]
|
75
|
+
destination_ip_name = self.is_stable_name(destination_ip)
|
76
|
+
if destination_ip_name is not False:
|
77
|
+
try:
|
78
|
+
response = self.reader.city(destination_ip)
|
79
|
+
geo_map["destination.ip_Country_IsoCode"] = self.fill_geo_empty(response.country.iso_code)
|
80
|
+
geo_map['destination.ip_Country_Name'] = self.fill_geo_empty(response.country.name)
|
81
|
+
geo_map["destination.ip_Country_SpecificName"] = self.fill_geo_empty(
|
82
|
+
response.subdivisions.most_specific.name)
|
83
|
+
geo_map['destination.ip_Country_SpecificIsoCode'] = self.fill_geo_empty(
|
84
|
+
response.subdivisions.most_specific.iso_code)
|
85
|
+
geo_map['destination.ip_City_Name'] = self.fill_geo_empty(response.city.name)
|
86
|
+
geo_map['destination.ip_City_PostalCode'] = self.fill_geo_empty(response.postal.code)
|
87
|
+
geo_map['destination.ip_Location_Latitude'] = self.fill_geo_empty(response.location.latitude)
|
88
|
+
geo_map["destination.ip_Location_Longitude"] = self.fill_geo_empty(response.location.longitude)
|
89
|
+
except Exception:
|
90
|
+
geo_map["destination.ip_Country_IsoCode"] = "IP_GEO_EMPTY"
|
91
|
+
geo_map['destination.ip_Country_Name'] = "IP_GEO_EMPTY"
|
92
|
+
geo_map["destination.ip_Country_SpecificName"] = "IP_GEO_EMPTY"
|
93
|
+
geo_map['destination.ip_Country_SpecificIsoCode'] = "IP_GEO_EMPTY"
|
94
|
+
geo_map['destination.ip_City_Name'] = "IP_GEO_EMPTY"
|
95
|
+
geo_map['destination.ip_City_PostalCode'] = "IP_GEO_EMPTY"
|
96
|
+
geo_map['destination.ip_Location_Latitude'] = "IP_GEO_EMPTY"
|
97
|
+
geo_map["destination.ip_Location_Longitude"] = "IP_GEO_EMPTY"
|
98
|
+
else:
|
99
|
+
geo_map["destination.ip_Country_IsoCode"] = destination_ip_name
|
100
|
+
geo_map['destination.ip_Country_Name'] = destination_ip_name
|
101
|
+
geo_map["destination.ip_Country_SpecificName"] = destination_ip_name
|
102
|
+
geo_map['destination.ip_Country_SpecificIsoCode'] = destination_ip_name
|
103
|
+
geo_map['destination.ip_City_Name'] = destination_ip_name
|
104
|
+
geo_map['destination.ip_City_PostalCode'] = destination_ip_name
|
105
|
+
geo_map['destination.ip_Location_Latitude'] = destination_ip_name
|
106
|
+
geo_map["destination.ip_Location_Longitude"] = destination_ip_name
|
107
|
+
return geo_map
|
@@ -0,0 +1,381 @@
|
|
1
|
+
import json
|
2
|
+
import re
|
3
|
+
from urllib.parse import urlparse, parse_qs
|
4
|
+
|
5
|
+
import execjs
|
6
|
+
import numpy as np
|
7
|
+
from scapy.layers.dns import DNS
|
8
|
+
|
9
|
+
from xbase_util.xbase_constant import parse_path
|
10
|
+
|
11
|
+
|
12
|
+
def parse_expression(expression):
|
13
|
+
if expression:
|
14
|
+
with open(parse_path, "r") as f:
|
15
|
+
ctx = execjs.compile(f.read())
|
16
|
+
return ctx.call("parse_exp", expression)
|
17
|
+
else:
|
18
|
+
return None
|
19
|
+
|
20
|
+
|
21
|
+
def get_cookie_end_with_semicolon_count(text_data):
|
22
|
+
count = 0
|
23
|
+
for text in text_data.replace("-", "_").lower().split("\n"):
|
24
|
+
item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
|
25
|
+
if "cookie:" in item_text and f"{item_text}".endswith(";"):
|
26
|
+
count = count + 1
|
27
|
+
if count == 0:
|
28
|
+
return -1
|
29
|
+
return len(count)
|
30
|
+
|
31
|
+
|
32
|
+
def get_ua_duplicate_count(text_data):
|
33
|
+
ua_list = []
|
34
|
+
for text in text_data.replace("-", "_").lower().split("\n"):
|
35
|
+
item_text = text.replace("\n", "").replace("\t", "").replace(" ", "")
|
36
|
+
if "user_agent:" in item_text and f"{item_text}".endswith(";"):
|
37
|
+
ua_list.append(item_text.replace("user_agent:", ""))
|
38
|
+
count = list(set(ua_list))
|
39
|
+
if count == 0:
|
40
|
+
return -1
|
41
|
+
return sum(count)
|
42
|
+
|
43
|
+
|
44
|
+
def get_res_status_code_list(text_data):
|
45
|
+
value_res = []
|
46
|
+
res = []
|
47
|
+
num_1 = 0
|
48
|
+
num_2 = 0
|
49
|
+
num_3 = 0
|
50
|
+
num_4 = 0
|
51
|
+
num_5 = 0
|
52
|
+
|
53
|
+
res.extend([item for item in text_data.split("\n") if item.startswith("HTTP/")])
|
54
|
+
for item in res:
|
55
|
+
m = re.search(r"\b(\d{3})\b", item)
|
56
|
+
if m:
|
57
|
+
value_res.append(int(m.group(0)))
|
58
|
+
for value in value_res:
|
59
|
+
if 0 <= value < 200:
|
60
|
+
num_1 = num_1 + 1
|
61
|
+
if 200 <= value < 300:
|
62
|
+
num_2 = num_2 + 1
|
63
|
+
if 300 <= value < 400:
|
64
|
+
num_3 = num_3 + 1
|
65
|
+
if 400 <= value < 500:
|
66
|
+
num_4 = num_4 + 1
|
67
|
+
if 500 <= value < 600:
|
68
|
+
num_5 = num_5 + 1
|
69
|
+
return num_1, num_2, num_3, num_4, num_5
|
70
|
+
|
71
|
+
|
72
|
+
def get_packets_percentage(session, isReq):
|
73
|
+
if "source.bytes" in session and "destination.bytes" in session:
|
74
|
+
total_bytes = session["source.bytes"] + session["destination.bytes"]
|
75
|
+
if total_bytes > 0:
|
76
|
+
if isReq:
|
77
|
+
return session["source.bytes"] / total_bytes
|
78
|
+
else:
|
79
|
+
return session["destination.bytes"] / total_bytes
|
80
|
+
else:
|
81
|
+
return 0.0 # 避免除以0的情况
|
82
|
+
else:
|
83
|
+
return 0.5
|
84
|
+
|
85
|
+
|
86
|
+
def split_samples(sample, per_subsection):
|
87
|
+
num_subsections = len(sample) // per_subsection
|
88
|
+
remainder = len(sample) % per_subsection
|
89
|
+
subsection_sizes = [per_subsection] * num_subsections
|
90
|
+
if remainder > 0:
|
91
|
+
subsection_sizes.append(remainder)
|
92
|
+
num_subsections += 1
|
93
|
+
return num_subsections, subsection_sizes
|
94
|
+
|
95
|
+
|
96
|
+
def split_process(subsection, process_count):
|
97
|
+
subsection_per_process = len(subsection) // process_count
|
98
|
+
remainder = len(subsection) % process_count
|
99
|
+
lengths = []
|
100
|
+
start = 0
|
101
|
+
for i in range(process_count):
|
102
|
+
end = start + subsection_per_process + (1 if i < remainder else 0)
|
103
|
+
lengths.append(end - start)
|
104
|
+
start = end
|
105
|
+
return lengths
|
106
|
+
|
107
|
+
|
108
|
+
def build_es_expression(size, start_time, end_time, arkime_expression):
|
109
|
+
expression = {"query": {"bool": {"filter": []}}}
|
110
|
+
try:
|
111
|
+
if size:
|
112
|
+
expression['size'] = size
|
113
|
+
if start_time:
|
114
|
+
expression['query']['bool']['filter'].append(
|
115
|
+
{"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
|
116
|
+
if end_time:
|
117
|
+
expression['query']['bool']['filter'].append(
|
118
|
+
{"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
|
119
|
+
arkime_2_es = parse_expression(arkime_expression)
|
120
|
+
if arkime_2_es:
|
121
|
+
expression['query']['bool']['filter'].append(arkime_2_es)
|
122
|
+
return expression
|
123
|
+
except Exception as e:
|
124
|
+
print(f"请安装nodejs{e}")
|
125
|
+
print(arkime_expression)
|
126
|
+
exit(1)
|
127
|
+
|
128
|
+
|
129
|
+
def get_uri_depth(url):
|
130
|
+
match = re.match(r'^[^?]*', url)
|
131
|
+
if match:
|
132
|
+
path = match.group(0)
|
133
|
+
# 去除协议和域名部分
|
134
|
+
path = re.sub(r'^https?://[^/]+', '', path)
|
135
|
+
segments = [segment for segment in path.split('/') if segment]
|
136
|
+
return len(segments)
|
137
|
+
return 0
|
138
|
+
|
139
|
+
|
140
|
+
def firstOrZero(param):
|
141
|
+
if type(param).__name__ == 'list':
|
142
|
+
if (len(param)) != 0:
|
143
|
+
return param[0]
|
144
|
+
else:
|
145
|
+
return 0
|
146
|
+
else:
|
147
|
+
return 0
|
148
|
+
|
149
|
+
|
150
|
+
def get_statistic_fields(packets):
|
151
|
+
length_ranges = {
|
152
|
+
"0_19": (0, 19),
|
153
|
+
"20_39": (20, 39),
|
154
|
+
"40_79": (40, 79),
|
155
|
+
"80_159": (80, 159),
|
156
|
+
"160_319": (160, 319),
|
157
|
+
"320_639": (320, 639),
|
158
|
+
"640_1279": (640, 1279),
|
159
|
+
"1280_2559": (1280, 2559),
|
160
|
+
"2560_5119": (2560, 5119),
|
161
|
+
"more_than_5120": (5120, float('inf'))
|
162
|
+
}
|
163
|
+
|
164
|
+
def get_length_range(le):
|
165
|
+
for key, (min_len, max_len) in length_ranges.items():
|
166
|
+
if min_len <= le <= max_len:
|
167
|
+
return key
|
168
|
+
return "more_than_5120"
|
169
|
+
|
170
|
+
packet_lengths = {key: [] for key in length_ranges}
|
171
|
+
total_length = 0
|
172
|
+
packet_len_total_count = len(packets)
|
173
|
+
for packet_item in packets:
|
174
|
+
length = len(packet_item)
|
175
|
+
length_range = get_length_range(length)
|
176
|
+
packet_lengths[length_range].append(length)
|
177
|
+
total_length += length
|
178
|
+
total_time = packets[-1].time - packets[0].time if packet_len_total_count > 1 else 1
|
179
|
+
packet_len_average = round(total_length / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
|
180
|
+
packet_len_min = min(len(packet_item) for packet_item in packets) if packets else 0
|
181
|
+
packet_len_max = max(len(packet_item) for packet_item in packets) if packets else 0
|
182
|
+
packet_len_rate = round((packet_len_total_count / total_time) / 1000, 5) if total_time > 0 else 0
|
183
|
+
packet_size = [len(p) for p in packets]
|
184
|
+
field_map = {
|
185
|
+
"packet_size_mean": float(round(np.mean(packet_size), 5)),
|
186
|
+
"packet_size_variance": float(round(np.var(packet_size), 5)),
|
187
|
+
'packet_len_total_count': packet_len_total_count,
|
188
|
+
'packet_len_total_average': packet_len_average,
|
189
|
+
'packet_len_total_min': packet_len_min,
|
190
|
+
'packet_len_total_max': packet_len_max,
|
191
|
+
'packet_len_total_rate': float(packet_len_rate),
|
192
|
+
'packet_len_total_percent': 1,
|
193
|
+
}
|
194
|
+
for length_range, lengths in packet_lengths.items():
|
195
|
+
count = len(lengths)
|
196
|
+
if count > 0:
|
197
|
+
average = round(sum(lengths) / count, 5)
|
198
|
+
min_val = min(lengths)
|
199
|
+
max_val = max(lengths)
|
200
|
+
else:
|
201
|
+
average = min_val = max_val = 0
|
202
|
+
packet_len_rate = round((count / total_time) / 1000, 5) if total_time > 0 else 0
|
203
|
+
percent = round(count / packet_len_total_count, 5) if packet_len_total_count > 0 else 0
|
204
|
+
field_map.update({
|
205
|
+
f"packet_len_{length_range}_count": count,
|
206
|
+
f"packet_len_{length_range}_average": average,
|
207
|
+
f"packet_len_{length_range}_min": min_val,
|
208
|
+
f"packet_len_{length_range}_max": max_val,
|
209
|
+
f"packet_len_{length_range}_rate": float(packet_len_rate),
|
210
|
+
f"packet_len_{length_range}_percent": percent
|
211
|
+
})
|
212
|
+
return field_map
|
213
|
+
|
214
|
+
|
215
|
+
def get_dns_domain(packets):
|
216
|
+
domain_name = ""
|
217
|
+
for packet_item in packets:
|
218
|
+
if DNS in packet_item:
|
219
|
+
dns_layer = packet_item[DNS]
|
220
|
+
if dns_layer.qd:
|
221
|
+
try:
|
222
|
+
domain_name = dns_layer.qd.qname.decode('utf-8')
|
223
|
+
# print(f"dns域名:{domain_name}")
|
224
|
+
except Exception:
|
225
|
+
domain_name = str(dns_layer.qd.qname)
|
226
|
+
print(f"dns域名编码失败的字符串:{domain_name}")
|
227
|
+
break
|
228
|
+
if domain_name.endswith("."):
|
229
|
+
domain_name = domain_name[:-1]
|
230
|
+
return domain_name
|
231
|
+
|
232
|
+
|
233
|
+
def extract_session_fields(cls, origin_list, geoUtil):
|
234
|
+
res = []
|
235
|
+
for item in origin_list:
|
236
|
+
_source = item.get("_source", {})
|
237
|
+
source = _source.get("source", {})
|
238
|
+
tcpflags = _source.get("tcpflags", {})
|
239
|
+
destination = _source.get("destination", {})
|
240
|
+
http = _source.get("http", {})
|
241
|
+
dns = _source.get("dns", {})
|
242
|
+
tls = _source.get("tls", {})
|
243
|
+
uri = http.get('uri', [])
|
244
|
+
uri_length = [len(u) for u in uri]
|
245
|
+
uri_depth = [get_uri_depth(u) for u in uri]
|
246
|
+
uri_filename_length = [cls.get_uri_filename_length(u) for u in uri]
|
247
|
+
uri_params = [cls.get_url_param_count(u) for u in uri]
|
248
|
+
res.append(geoUtil.get_geo_by_ip({
|
249
|
+
"id": item["_id"],
|
250
|
+
"node": _source.get("node", ""),
|
251
|
+
"segmentCnt": _source.get("segmentCnt", 0),
|
252
|
+
"tcpflags.rst": tcpflags.get("rst", 0),
|
253
|
+
"tcpflags.ack": tcpflags.get("ack", 0),
|
254
|
+
"tcpflags.syn": tcpflags.get("syn", 0),
|
255
|
+
"tcpflags.urg": tcpflags.get("urg", 0),
|
256
|
+
"tcpflags.psh": tcpflags.get("psh", 0),
|
257
|
+
"tcpflags.syn-ack": tcpflags.get("syn-ack", 0),
|
258
|
+
"tcpflags.fin": tcpflags.get("fin", 0),
|
259
|
+
"source.ip": source.get("ip", ""),
|
260
|
+
"destination.ip": destination.get("ip", ""),
|
261
|
+
"source.port": source.get("port", ""),
|
262
|
+
"source.packets": source.get("packets", ""),
|
263
|
+
"source.bytes": source.get("bytes", 0),
|
264
|
+
"destination.port": destination.get("port", ""),
|
265
|
+
"destination.bytes": destination.get("bytes", 0),
|
266
|
+
"destination.packets": destination.get("packets", 0),
|
267
|
+
"initRTT": _source.get("initRTT", ""),
|
268
|
+
"firstPacket": _source.get("firstPacket", 0),
|
269
|
+
"lastPacket": _source.get("lastPacket", 0),
|
270
|
+
"ipProtocol": _source.get("ipProtocol", 0),
|
271
|
+
"protocolCnt": _source.get("protocolCnt", 0),
|
272
|
+
"protocol": _source.get("protocol", []),
|
273
|
+
"server.bytes": _source.get("server", {}).get("bytes", 0),
|
274
|
+
"totDataBytes": _source.get("totDataBytes", 0),
|
275
|
+
"network.packets": _source.get("network", {}).get("packets", 0),
|
276
|
+
"network.bytes": _source.get("network", {}).get("bytes", 0),
|
277
|
+
"length": _source.get("length", 0),
|
278
|
+
"client.bytes": _source.get("client", {}).get("bytes", 0),
|
279
|
+
"http.uri": uri,
|
280
|
+
"http.uri_length_mean": round(np.nan_to_num(np.mean(uri_length)), 5),
|
281
|
+
"http.uri_length_var": round(np.nan_to_num(np.var(uri_length)), 5),
|
282
|
+
"http.uri_param_count_mean": round(np.nan_to_num(np.mean(uri_params)), 5),
|
283
|
+
"http.uri_param_count_var": round(np.nan_to_num(np.var(uri_params)), 5),
|
284
|
+
"http.uri_depth_mean": round(np.nan_to_num(np.mean(uri_depth)), 5),
|
285
|
+
"http.uri_depth_var": round(np.nan_to_num(np.var(uri_depth)), 5),
|
286
|
+
"http.uri_filename_length_mean": round(np.nan_to_num(np.mean(uri_filename_length)), 5),
|
287
|
+
"http.uri_filename_length_var": round(np.nan_to_num(np.var(uri_filename_length)), 5),
|
288
|
+
|
289
|
+
"http.response-content-type": http.get("response-content-type", []),
|
290
|
+
"http.bodyMagicCnt": http.get("bodyMagicCnt", 0),
|
291
|
+
"http.statuscodeCnt": http.get("statusCodeCnt", 0),
|
292
|
+
"http.clientVersionCnt": http.get("clientVersionCnt", 0),
|
293
|
+
"http.response-content-typeCnt": http.get("response-content-typeCnt", 0),
|
294
|
+
"http.xffIpCnt": http.get("xffIpCnt", 0),
|
295
|
+
"http.requestHeaderCnt": http.get("requestHeaderCnt", 0),
|
296
|
+
"http.serverVersion": http.get("serverVersion", []),
|
297
|
+
"http.serverVersionCnt": http.get("serverVersionCnt", 0),
|
298
|
+
"http.responseHeaderCnt": http.get("responseHeaderCnt", 0),
|
299
|
+
"http.xffIp": http.get("xffIp", []),
|
300
|
+
"http.clientVersion": http.get("clientVersion", []),
|
301
|
+
"http.uriTokens": http.get("uriTokens", ""),
|
302
|
+
"http.useragentCnt": http.get("useragentCnt", 0),
|
303
|
+
"http.statuscode": http.get("statusCode", []),
|
304
|
+
"http.bodyMagic": http.get("bodyMagic", []),
|
305
|
+
"http.request-content-type": http.get("request-content-type", []),
|
306
|
+
"http.uriCnt": http.get("uriCnt", 0),
|
307
|
+
|
308
|
+
"http.useragent": http.get("useragent", ""),
|
309
|
+
"http.keyCnt": http.get("keyCnt", 0),
|
310
|
+
"http.request-referer": http.get("requestReferer", []),
|
311
|
+
"http.request-refererCnt": http.get("requestRefererCnt", 0),
|
312
|
+
"http.path": http.get("path", []),
|
313
|
+
"http.hostCnt": http.get("hostCnt", 0),
|
314
|
+
"http.response-server": http.get("response-server", []),
|
315
|
+
"http.pathCnt": http.get("pathCnt", 0),
|
316
|
+
"http.useragentTokens": http.get("useragentTokens", ""),
|
317
|
+
"http.methodCnt": http.get("methodCnt", 0),
|
318
|
+
"http.method": http.get("method", []),
|
319
|
+
"http.method-GET": http.get("method-GET", 0),
|
320
|
+
"http.method-POST": http.get("method-POST", 0),
|
321
|
+
"http.key": http.get("key", []),
|
322
|
+
"http.hostTokens": http.get("hostTokens", ""),
|
323
|
+
"http.requestHeader": http.get("requestHeader", []),
|
324
|
+
"http.responseHeader": http.get("responseHeader", []),
|
325
|
+
|
326
|
+
"dns.ASN": dns.get("ASN", []),
|
327
|
+
"dns.RIR": dns.get("RIR", []),
|
328
|
+
"dns.GEO": dns.get("GEO", []),
|
329
|
+
"dns.alpn": dns.get("https.alpn", []),
|
330
|
+
"dns.alpnCnt": dns.get("https.alpnCnt", 0),
|
331
|
+
"dns.ip": dns.get("ip", []),
|
332
|
+
"dns.ipCnt": dns.get("ipCnt", 0),
|
333
|
+
"dns.OpCode": dns.get("opcode", []),
|
334
|
+
"dns.OpCodeCnt": dns.get("opcodeCnt", 0),
|
335
|
+
"dns.Puny": dns.get("puny", []),
|
336
|
+
"dns.PunyCnt": dns.get("puntCnt", 0),
|
337
|
+
"dns.QueryClass": dns.get("qc", []),
|
338
|
+
"dns.QueryClassCnt": dns.get("qcCnt", 0),
|
339
|
+
"dns.QueryType": dns.get("qt", []),
|
340
|
+
"dns.QueryTypeCnt": dns.get("qtCnt", 0),
|
341
|
+
"dns.status": dns.get("status", []),
|
342
|
+
"dns.hostCnt": json.dumps(dns.get("hostCnt", 0)),
|
343
|
+
"dns.host": json.dumps(dns.get("host", [])),
|
344
|
+
"dns.statusCnt": dns.get("statusCnt", 0),
|
345
|
+
|
346
|
+
"tls.cipher": tls.get("cipher", []),
|
347
|
+
"tls.cipherCnt": tls.get("cipherCnt", 0),
|
348
|
+
"tls.dstSessionId": tls.get("dstSessionId", []),
|
349
|
+
"tls.ja3": tls.get("ja3", []),
|
350
|
+
"tls.ja3Cnt": tls.get("ja3Cnt", 0),
|
351
|
+
"tls.ja3s": tls.get("ja3s", []),
|
352
|
+
"tls.ja3sCnt": tls.get("ja3sCnt", 0),
|
353
|
+
"tls.ja4": tls.get("ja4", []),
|
354
|
+
"tls.ja4Cnt": tls.get("ja4Cnt", 0),
|
355
|
+
"tls.srcSessionId": tls.get("srcSessionId", []),
|
356
|
+
"tls.version": tls.get("version", []),
|
357
|
+
"tls.versionCnt": tls.get("versionCnt", 0),
|
358
|
+
"tls.ja4_r": tls.get("versionCnt", 0),
|
359
|
+
"tls.ja4_rCnt": tls.get("versionCnt", 0),
|
360
|
+
"packetPos": json.dumps(_source.get("packetPos", [])),
|
361
|
+
"traffic_type": item.get("traffic_type", ""),
|
362
|
+
"PROTOCOL": item.get("PROTOCOL", ""),
|
363
|
+
"DENY_METHOD": item.get("DENY_METHOD", ""),
|
364
|
+
"THREAT_SUMMARY": item.get("THREAT_SUMMARY", ""),
|
365
|
+
"SEVERITY": item.get("SEVERITY", ""),
|
366
|
+
}))
|
367
|
+
return res
|
368
|
+
|
369
|
+
|
370
|
+
def get_url_param_count(url):
|
371
|
+
query = urlparse(url).query # 解析 URL 中的查询字符串
|
372
|
+
params = parse_qs(query) # 解析查询字符串为字典
|
373
|
+
return len(params)
|
374
|
+
|
375
|
+
|
376
|
+
def get_uri_filename_length(uri):
|
377
|
+
match = re.search(r'\.([^./?#]+)$', uri)
|
378
|
+
if match:
|
379
|
+
extension = match.group(0)
|
380
|
+
return len(extension)
|
381
|
+
return 0
|
@@ -3,8 +3,10 @@ setup.py
|
|
3
3
|
xbase_util/__init__.py
|
4
4
|
xbase_util/es_db_util.py
|
5
5
|
xbase_util/esreq.py
|
6
|
+
xbase_util/geo_util.py
|
6
7
|
xbase_util/handle_features_util.py
|
7
8
|
xbase_util/pcap_util.py
|
9
|
+
xbase_util/xbase_constant.py
|
8
10
|
xbase_util/xbase_util.py
|
9
11
|
xbase_util.egg-info/PKG-INFO
|
10
12
|
xbase_util.egg-info/SOURCES.txt
|
@@ -1,86 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import re
|
3
|
-
|
4
|
-
import execjs
|
5
|
-
import geoip2.database
|
6
|
-
|
7
|
-
current_dir = os.path.dirname(__file__)
|
8
|
-
parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
|
9
|
-
geo_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'GeoLite2-City.mmdb')
|
10
|
-
|
11
|
-
|
12
|
-
def parse_expression(expression):
|
13
|
-
if expression:
|
14
|
-
with open(parse_path, "r") as f:
|
15
|
-
ctx = execjs.compile(f.read())
|
16
|
-
return ctx.call("parse_exp", expression)
|
17
|
-
else:
|
18
|
-
return None
|
19
|
-
|
20
|
-
|
21
|
-
def geo_reader():
|
22
|
-
return geoip2.database.Reader(geo_path)
|
23
|
-
|
24
|
-
|
25
|
-
def split_samples(sample, per_subsection):
|
26
|
-
num_subsections = len(sample) // per_subsection
|
27
|
-
remainder = len(sample) % per_subsection
|
28
|
-
subsection_sizes = [per_subsection] * num_subsections
|
29
|
-
if remainder > 0:
|
30
|
-
subsection_sizes.append(remainder)
|
31
|
-
num_subsections += 1
|
32
|
-
return num_subsections, subsection_sizes
|
33
|
-
|
34
|
-
|
35
|
-
def split_process(subsection, process_count):
|
36
|
-
subsection_per_process = len(subsection) // process_count
|
37
|
-
remainder = len(subsection) % process_count
|
38
|
-
lengths = []
|
39
|
-
start = 0
|
40
|
-
for i in range(process_count):
|
41
|
-
end = start + subsection_per_process + (1 if i < remainder else 0)
|
42
|
-
lengths.append(end - start)
|
43
|
-
start = end
|
44
|
-
return lengths
|
45
|
-
|
46
|
-
|
47
|
-
def build_es_expression(size, start_time, end_time, arkime_expression):
|
48
|
-
expression = {"query": {"bool": {"filter": []}}}
|
49
|
-
try:
|
50
|
-
if size:
|
51
|
-
expression['size'] = size
|
52
|
-
if start_time:
|
53
|
-
expression['query']['bool']['filter'].append(
|
54
|
-
{"range": {"firstPacket": {"gte": round(start_time.timestamp() * 1000)}}})
|
55
|
-
if end_time:
|
56
|
-
expression['query']['bool']['filter'].append(
|
57
|
-
{"range": {"lastPacket": {"lte": round(end_time.timestamp() * 1000)}}})
|
58
|
-
arkime_2_es = parse_expression(arkime_expression)
|
59
|
-
if arkime_2_es:
|
60
|
-
expression['query']['bool']['filter'].append(arkime_2_es)
|
61
|
-
return expression
|
62
|
-
except Exception as e:
|
63
|
-
print(f"请安装nodejs{e}")
|
64
|
-
print(arkime_expression)
|
65
|
-
exit(1)
|
66
|
-
|
67
|
-
|
68
|
-
def get_uri_depth(url):
|
69
|
-
match = re.match(r'^[^?]*', url)
|
70
|
-
if match:
|
71
|
-
path = match.group(0)
|
72
|
-
# 去除协议和域名部分
|
73
|
-
path = re.sub(r'^https?://[^/]+', '', path)
|
74
|
-
segments = [segment for segment in path.split('/') if segment]
|
75
|
-
return len(segments)
|
76
|
-
return 0
|
77
|
-
|
78
|
-
|
79
|
-
def firstOrZero(param):
|
80
|
-
if type(param).__name__ == 'list':
|
81
|
-
if (len(param)) != 0:
|
82
|
-
return param[0]
|
83
|
-
else:
|
84
|
-
return 0
|
85
|
-
else:
|
86
|
-
return 0
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|