xbase-util 0.8.2__tar.gz → 0.8.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {xbase_util-0.8.2 → xbase_util-0.8.4}/PKG-INFO +1 -1
  2. {xbase_util-0.8.2 → xbase_util-0.8.4}/setup.py +1 -1
  3. xbase_util-0.8.2/xbase_util/xbase_util.py → xbase_util-0.8.4/xbase_util/common_util.py +24 -25
  4. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/packet_util.py +12 -12
  5. xbase_util-0.8.4/xbase_util/pcap_util.py +463 -0
  6. xbase_util-0.8.4/xbase_util/test.py +40 -0
  7. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/PKG-INFO +1 -1
  8. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/SOURCES.txt +2 -2
  9. xbase_util-0.8.2/xbase_util/pcap_util.py +0 -860
  10. xbase_util-0.8.2/xbase_util/segment.py +0 -105
  11. {xbase_util-0.8.2 → xbase_util-0.8.4}/README.md +0 -0
  12. {xbase_util-0.8.2 → xbase_util-0.8.4}/setup.cfg +0 -0
  13. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/__init__.py +0 -0
  14. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/add_column_util.py +0 -0
  15. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/dangerous_util.py +0 -0
  16. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/__init__.py +0 -0
  17. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/ConfigBean.py +0 -0
  18. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  19. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/FlowBean.py +0 -0
  20. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  21. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/__init__.py +0 -0
  22. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/ConfigDao.py +0 -0
  23. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  24. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/FlowDao.py +0 -0
  25. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  26. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/__init__.py +0 -0
  27. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/initsqlite3.py +0 -0
  28. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/es_db_util.py +0 -0
  29. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/esreq.py +0 -0
  30. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/geo_util.py +0 -0
  31. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/handle_features_util.py +0 -0
  32. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/xbase_constant.py +0 -0
  33. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/dependency_links.txt +0 -0
  34. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/not-zip-safe +0 -0
  35. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/top_level.txt +0 -0
  36. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  37. {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.8.2",
6
+ version="0.8.4",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -12,11 +12,20 @@ import numpy as np
12
12
  import tldextract
13
13
  from scapy.layers.dns import DNS
14
14
 
15
- from xbase_util.xbase_constant import dns_domain_list, parse_path
15
+ from xbase_util.xbase_constant import parse_path, dns_domain_list
16
16
 
17
17
 
18
- def parse_chunked_body(data: bytes) -> bytes:
19
- body = b""
18
+ def filter_visible_chars(data):
19
+ """
20
+ 过滤不可见字符,仅保留可打印的ASCII字符
21
+ :param data:
22
+ :return:
23
+ """
24
+ return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
25
+
26
+
27
+ def parse_chunked_body(data: bytes, need_un_gzip=False) -> bytes:
28
+ body = b''
20
29
  while True:
21
30
  chunk_size_end = data.find(b"\r\n")
22
31
  if chunk_size_end == -1:
@@ -29,10 +38,13 @@ def parse_chunked_body(data: bytes) -> bytes:
29
38
  chunk_end = chunk_start + chunk_size
30
39
  body += data[chunk_start:chunk_end]
31
40
  data = data[chunk_end + 2:]
32
- try:
33
- body = gzip.decompress(body)
34
- return body
35
- except gzip.BadGzipFile:
41
+ if need_un_gzip:
42
+ try:
43
+ return gzip.decompress(body)
44
+ except gzip.BadGzipFile:
45
+ print("解压错误")
46
+ return body
47
+ else:
36
48
  return body
37
49
 
38
50
 
@@ -79,20 +91,17 @@ def get_ua_duplicate_count(text_data):
79
91
  return sum(count)
80
92
 
81
93
 
82
- def get_res_status_code_list(text_data):
94
+ def get_res_status_code_list(all_packets):
83
95
  value_res = []
84
- res = []
85
96
  num_1 = 0
86
97
  num_2 = 0
87
98
  num_3 = 0
88
99
  num_4 = 0
89
100
  num_5 = 0
90
-
91
- res.extend([item for item in text_data.splitlines() if item.startswith("HTTP/")])
92
- for item in res:
93
- m = re.search(r"\b(\d{3})\b", item)
94
- if m:
95
- value_res.append(int(m.group(0)))
101
+ for item in all_packets:
102
+ match = re.search(r'HTTP/\d\.\d (\d{3})', item['res_header'])
103
+ if match:
104
+ value_res.append(int(match.group(1)))
96
105
  for value in value_res:
97
106
  if 0 <= value < 200:
98
107
  num_1 = num_1 + 1
@@ -182,16 +191,6 @@ def get_uri_depth(url):
182
191
  return 0
183
192
 
184
193
 
185
- def firstOrZero(param):
186
- if type(param).__name__ == 'list':
187
- if (len(param)) != 0:
188
- return param[0]
189
- else:
190
- return 0
191
- else:
192
- return 0
193
-
194
-
195
194
  def get_statistic_fields(packets):
196
195
  length_ranges = {
197
196
  "0_19": (0, 19),
@@ -1,9 +1,8 @@
1
1
  import copy
2
- import re
3
2
 
4
3
  from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
5
- features_key, plain_body_columns, http_version_pattern, http_req_method_pattern, http_req_path_pattern, res_status_code_pattern
6
- from xbase_util.xbase_util import firstOrZero
4
+ features_key, plain_body_columns, http_version_pattern, http_req_method_pattern, http_req_path_pattern, \
5
+ res_status_code_pattern
7
6
 
8
7
 
9
8
  def content_type_is_plain(packet):
@@ -23,15 +22,6 @@ def content_type_is_plain(packet):
23
22
  return False
24
23
 
25
24
 
26
- def filter_visible_chars(data):
27
- """
28
- 过滤不可见字符,仅保留可打印的ASCII字符
29
- :param data:
30
- :return:
31
- """
32
- return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
33
-
34
-
35
25
  def get_all_columns(
36
26
  contains_packet_column=False,
37
27
  contains_src_dst_column=False,
@@ -62,6 +52,16 @@ def get_all_columns(
62
52
  # req_body_pattern = re.compile(
63
53
  # r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?(?=HTTP/\d\.\d)", re.DOTALL)
64
54
 
55
+ def firstOrZero(param):
56
+ if type(param).__name__ == 'list':
57
+ if (len(param)) != 0:
58
+ return param[0]
59
+ else:
60
+ return 0
61
+ else:
62
+ return 0
63
+
64
+
65
65
  def get_header_value(header_set, value):
66
66
  result = [item for item in header_set if value in item]
67
67
  if len(result) != 0:
@@ -0,0 +1,463 @@
1
+ import copy
2
+ import gzip
3
+ import math
4
+ import os
5
+ import struct
6
+ import time
7
+ import zlib
8
+ from functools import cmp_to_key
9
+
10
+ from Crypto.Cipher import AES
11
+ from scapy.layers.inet import TCP, IP
12
+ from scapy.packet import Raw
13
+ from zstandard import ZstdDecompressor
14
+
15
+ from xbase_util.common_util import parse_chunked_body, filter_visible_chars
16
+ from xbase_util.xbase_constant import pattern_chuncked, pattern_gzip
17
+
18
+
19
+ def fix_pos(pos, packetPosEncoding):
20
+ if pos is None or len(pos) == 0:
21
+ return
22
+ if packetPosEncoding == "gap0":
23
+ last = 0
24
+ lastgap = 0
25
+ for i, pos_item in enumerate(pos):
26
+ if pos[i] < 0:
27
+ last = 0
28
+ else:
29
+ if pos[i] == 0:
30
+ pos[i] = last + lastgap
31
+ else:
32
+ lastgap = pos[i]
33
+ pos[i] += last
34
+ last = pos[i]
35
+
36
+
37
+ def group_numbers(nums):
38
+ result = []
39
+ for num in nums:
40
+ if num < 0:
41
+ result.append([num])
42
+ elif result:
43
+ result[-1].append(num)
44
+ return result
45
+
46
+
47
+ def decompress_streaming(compressed_data, session_id):
48
+ try:
49
+ decompressor = ZstdDecompressor()
50
+ with decompressor.stream_reader(compressed_data) as reader:
51
+ decompressed_data = reader.read()
52
+ return decompressed_data
53
+ except Exception as e:
54
+ print(f"解码错误:{e} {session_id}")
55
+ return bytearray()
56
+
57
+
58
+ def readUInt32BE(buffer, offset):
59
+ return struct.unpack('>I', buffer[offset:offset + 4])[0]
60
+
61
+
62
+ def readUInt32LE(buffer, offset):
63
+ return struct.unpack('<I', buffer[offset:offset + 4])[0]
64
+
65
+
66
+ def writeUInt32BE(buffer, pos, value):
67
+ struct.pack_into('>I', buffer, pos, value)
68
+ return buffer
69
+
70
+
71
+ def read_header(param_map, session_id):
72
+ shortHeader = None
73
+ headBuffer = os.read(param_map['fd'], 64)
74
+ if param_map['encoding'] == 'aes-256-ctr':
75
+ if 'iv' in param_map:
76
+ param_map['iv'][12:16] = struct.pack('>I', 0)
77
+ headBuffer = bytearray(
78
+ AES.new(param_map['encKey'], AES.MODE_CTR, nonce=param_map['iv']).decrypt(bytes(headBuffer)))
79
+ else:
80
+ print("读取头部信息失败,iv向量为空")
81
+ elif param_map['encoding'] == 'xor-2048':
82
+ for i in range(len(headBuffer)):
83
+ headBuffer[i] ^= param_map['encKey'][i % 256]
84
+ if param_map['uncompressedBits']:
85
+ if param_map['compression'] == 'gzip':
86
+ headBuffer = zlib.decompress(bytes(headBuffer), zlib.MAX_WBITS | 16)
87
+ elif param_map['compression'] == 'zstd':
88
+ headBuffer = decompress_streaming(headBuffer, session_id)
89
+ headBuffer = headBuffer[:24]
90
+ magic = struct.unpack('<I', headBuffer[:4])[0]
91
+ bigEndian = (magic == 0xd4c3b2a1 or magic == 0x4d3cb2a1)
92
+ nanosecond = (magic == 0xa1b23c4d or magic == 0x4d3cb2a1)
93
+ if not bigEndian and magic not in {0xa1b2c3d4, 0xa1b23c4d, 0xa1b2c3d5}:
94
+ raise ValueError("Corrupt PCAP header")
95
+ if magic == 0xa1b2c3d5:
96
+ shortHeader = readUInt32LE(headBuffer, 8)
97
+ headBuffer[0] = 0xd4 # Reset header to normal
98
+ if bigEndian:
99
+ linkType = readUInt32BE(headBuffer, 20)
100
+ else:
101
+ linkType = readUInt32LE(headBuffer, 20)
102
+ return headBuffer, shortHeader, bigEndian, linkType, nanosecond
103
+
104
+
105
+ def create_decipher(pos, param_map):
106
+ writeUInt32BE(param_map['iv'], pos, 12)
107
+ return AES.new(param_map['encKey'], AES.MODE_CTR, nonce=param_map['iv'])
108
+
109
+
110
+ def read_packet_internal(pos_arg, hp_len_arg, param_map, session_id):
111
+ pos = pos_arg
112
+ hp_len = hp_len_arg
113
+ if hp_len == -1:
114
+ if param_map['compression'] == "zstd":
115
+ hp_len = param_map['uncompressedBitsSize']
116
+ else:
117
+ hp_len = 2048
118
+ inside_offset = 0
119
+ if param_map['uncompressedBits']:
120
+ inside_offset = pos & param_map['uncompressedBitsSize'] - 1
121
+ pos = math.floor(pos / param_map['uncompressedBitsSize'])
122
+ pos_offset = 0
123
+ if param_map['encoding'] == 'aes-256-ctr':
124
+ pos_offset = pos % 16
125
+ pos = pos - pos_offset
126
+ elif param_map['encoding'] == 'xor-2048':
127
+ pos_offset = pos % 256
128
+ pos = pos - pos_offset
129
+
130
+ hp_len = 256 * math.ceil((hp_len + inside_offset + pos_offset) / 256)
131
+ buffer = bytearray(hp_len)
132
+ os.lseek(param_map['fd'], pos, os.SEEK_SET)
133
+ read_buffer = os.read(param_map['fd'], len(buffer))
134
+ if len(read_buffer) - pos_offset < 16:
135
+ return None
136
+ if param_map['encoding'] == 'aes-256-ctr':
137
+ decipher = create_decipher(pos // 16, param_map)
138
+ read_buffer = bytearray(decipher.decrypt(read_buffer))[pos_offset:]
139
+ elif param_map['encoding'] == 'xor-2048':
140
+ read_buffer = bytearray(b ^ param_map['encKey'][i % 256] for i, b in enumerate(read_buffer))[pos_offset:]
141
+ if param_map['uncompressedBits']:
142
+ try:
143
+ if param_map['compression'] == 'gzip':
144
+ read_buffer = zlib.decompress(read_buffer, zlib.MAX_WBITS | 16)
145
+ elif param_map['compression'] == 'zstd':
146
+ read_buffer = decompress_streaming(read_buffer, session_id)
147
+ except Exception as e:
148
+ print(f"PCAP uncompress issue: {pos} {len(buffer)} {read_buffer} {e}")
149
+ return None
150
+ if inside_offset:
151
+ read_buffer = read_buffer[inside_offset:]
152
+ header_len = 16 if param_map['shortHeader'] is None else 6
153
+ if len(read_buffer) < header_len:
154
+ if hp_len_arg == -1 and param_map['compression'] == 'zstd':
155
+ return read_packet_internal(pos_arg, param_map['uncompressedBitsSize'] * 2, param_map, session_id)
156
+ print(f"Not enough data {len(read_buffer)} for header {header_len}")
157
+ return None
158
+ packet_len = struct.unpack('>I' if param_map['bigEndian'] else '<I', read_buffer[8:12])[
159
+ 0] if param_map['shortHeader'] is None else \
160
+ struct.unpack('>H' if param_map['bigEndian'] else '<H', read_buffer[:2])[0]
161
+ if packet_len < 0 or packet_len > 0xffff:
162
+ return None
163
+ if header_len + packet_len <= len(read_buffer):
164
+ if param_map['shortHeader'] is not None:
165
+ t = struct.unpack('<I', read_buffer[2:6])[0]
166
+ sec = (t >> 20) + param_map['shortHeader']
167
+ usec = t & 0xfffff
168
+ new_buffer = bytearray(16 + packet_len)
169
+ struct.pack_into('<I', new_buffer, 0, sec)
170
+ struct.pack_into('<I', new_buffer, 4, usec)
171
+ struct.pack_into('<I', new_buffer, 8, packet_len)
172
+ struct.pack_into('<I', new_buffer, 12, packet_len)
173
+ new_buffer[16:] = read_buffer[6:packet_len + 6]
174
+ return new_buffer
175
+ return read_buffer[:header_len + packet_len]
176
+
177
+ if hp_len_arg != -1:
178
+ return None
179
+
180
+ return read_packet_internal(pos_arg, 16 + packet_len, param_map, session_id)
181
+
182
+
183
+ def read_packet(pos, param_map, session_id):
184
+ if 'fd' not in param_map or not param_map['fd']:
185
+ time.sleep(0.01)
186
+ return read_packet(pos, param_map['fd'], session_id)
187
+ return read_packet_internal(pos, -1, param_map, session_id)
188
+
189
+
190
+ def get_file_and_read_pos(session_id, file, pos_list):
191
+ filename = file['name']
192
+ if not os.path.isfile(filename):
193
+ print(f"文件不存在:{filename}")
194
+ return None
195
+ encoding = file.get('encoding', 'normal')
196
+ encKey = None
197
+ iv = None
198
+ compression = None
199
+ if 'dek' in file:
200
+ dek = bytes.fromhex(file['dek'])
201
+ encKey = AES.new(file['kek'].encode(), AES.MODE_CBC).decrypt(dek)
202
+
203
+ if 'uncompressedBits' in file:
204
+ uncompressedBits = file['uncompressedBits']
205
+ uncompressedBitsSize = 2 ** uncompressedBits
206
+ compression = 'gzip'
207
+ else:
208
+ uncompressedBits = None
209
+ uncompressedBitsSize = 0
210
+ if 'compression' in file:
211
+ compression = file['compression']
212
+
213
+ if 'iv' in file:
214
+ iv_ = bytes.fromhex(file['iv'])
215
+ iv = bytearray(16)
216
+ iv[:len(iv_)] = iv_
217
+ fd = os.open(filename, os.O_RDONLY)
218
+ param_map = {
219
+ "fd": fd,
220
+ "encoding": encoding,
221
+ "iv": iv,
222
+ "encKey": encKey,
223
+ "uncompressedBits": uncompressedBits,
224
+ "compression": compression,
225
+ "uncompressedBitsSize": uncompressedBitsSize
226
+ }
227
+ res = bytearray()
228
+ headBuffer, shortHeader, bigEndian, linkType, nanosecond = read_header(param_map, session_id)
229
+ res.extend(headBuffer)
230
+ param_map['shortHeader'] = shortHeader
231
+ param_map['bigEndian'] = bigEndian
232
+ # _________________________________
233
+ byte_array = bytearray(0xfffe)
234
+ next_packet = 0
235
+ b_offset = 0
236
+ packets = {}
237
+ # packet_objs = []
238
+ i = 0
239
+ for pos in pos_list:
240
+ packet_bytes = read_packet(pos, param_map, session_id)
241
+ # if reture_obj:
242
+ # obj = decode_obj(packet_bytes, bigEndian, linkType, nanosecond, )
243
+ # packet_objs.append(copy.deepcopy(obj))
244
+ if not packet_bytes:
245
+ continue
246
+ packets[i] = packet_bytes
247
+ while next_packet in packets:
248
+ buffer = packets[next_packet]
249
+
250
+ next_packet += 1
251
+ # del packets[next_packet]
252
+ next_packet = next_packet + 1
253
+ if b_offset + len(buffer) > len(byte_array):
254
+ res.extend(byte_array[:b_offset])
255
+ b_offset = 0
256
+ byte_array = bytearray(0xfffe)
257
+ byte_array[b_offset:b_offset + len(buffer)] = buffer
258
+ b_offset += len(buffer)
259
+ i = i + 1
260
+ os.close(fd)
261
+ res.extend(byte_array[:b_offset])
262
+ return res
263
+
264
+
265
+ def process_session_id_disk_simple(id, node, packet_pos, esdb, pcap_path_prefix):
266
+ packetPos = packet_pos
267
+ file = esdb.get_file_by_file_id(node=node, num=abs(packetPos[0]),
268
+ prefix=None if pcap_path_prefix == "origin" else pcap_path_prefix)
269
+ if file is None:
270
+ return None, None
271
+ fix_pos(packetPos, file['packetPosEncoding'])
272
+ pos_list = group_numbers(packetPos)[0]
273
+ pos_list.pop(0)
274
+ return get_file_and_read_pos(id, file, pos_list)
275
+
276
+
277
+ def parse_body(data):
278
+ if data.find(b"\r\n\r\n") != -1:
279
+ res = data.split(b"\r\n\r\n", 1)
280
+ header = res[0]
281
+ body = res[1]
282
+ else:
283
+ header = data
284
+ body = b''
285
+ chunked_pattern = pattern_chuncked.search(header)
286
+ gzip_pattern = pattern_gzip.search(header)
287
+ need_gzip = gzip_pattern and b'gzip' in gzip_pattern.group()
288
+ if chunked_pattern and b'chunked' in chunked_pattern.group():
289
+ body = parse_chunked_body(body, need_un_gzip=need_gzip)
290
+ elif need_gzip:
291
+ try:
292
+ body = gzip.decompress(body)
293
+ except:
294
+ print("解压失败")
295
+ pass
296
+ result_body_str = filter_visible_chars(body)
297
+ return filter_visible_chars(header), result_body_str
298
+
299
+
300
+ def reassemble_session_pcap(reassemble_tcp_res, skey):
301
+ my_map = {
302
+ 'key': '',
303
+ 'req_header': '',
304
+ 'req_body': '',
305
+ 'req_time': 0,
306
+ 'req_size': 0,
307
+ 'res_header': '',
308
+ 'res_body': '',
309
+ 'res_time': 0,
310
+ 'res_size': 0,
311
+ }
312
+ packet_list = []
313
+ for index, packet in enumerate(reassemble_tcp_res):
314
+ header, body = parse_body(packet['data'])
315
+ if index == len(reassemble_tcp_res) - 1:
316
+ packet_list.append(copy.deepcopy(my_map))
317
+ if packet['key'] == skey:
318
+ if index != 0:
319
+ packet_list.append(copy.deepcopy(my_map))
320
+ my_map = {
321
+ 'key': packet['key'],
322
+ 'req_header': '',
323
+ 'req_body': b'',
324
+ 'req_time': 0,
325
+ 'req_size': 0,
326
+ 'res_header': '',
327
+ 'res_body': b'',
328
+ 'res_time': 0,
329
+ 'res_size': 0,
330
+ }
331
+ my_map["req_header"] = header
332
+ my_map["req_body"] = body
333
+ my_map["req_time"] = packet['ts']
334
+ my_map["req_size"] = len(packet['data'])
335
+ else:
336
+ my_map["res_header"] = header
337
+ my_map["res_body"] = body
338
+ my_map["res_time"] = packet['ts']
339
+ my_map["res_size"] = len(packet['data'])
340
+ return packet_list
341
+
342
+
343
+ def reassemble_tcp_pcap(p):
344
+ packets = [{'pkt': item} for item in p if TCP in item and Raw in item]
345
+ packets2 = []
346
+ info = {}
347
+ keys = []
348
+ for index, packet in enumerate(packets):
349
+ data = packet['pkt'][Raw].load
350
+ flags = packet['pkt'][TCP].flags
351
+ seq = packet['pkt'][TCP].seq
352
+ if len(data) == 0 or 'R' in flags or 'S' in flags:
353
+ continue
354
+ key = f"{packet['pkt'][IP].src}:{packet['pkt'][IP].sport}"
355
+ if key not in info.keys():
356
+ info[key] = {
357
+ "min": seq,
358
+ "max": seq,
359
+ "wrapseq": False,
360
+ "wrapack": False,
361
+ }
362
+ keys.append(key)
363
+ elif info[key]["min"] > seq:
364
+ info[key]['min'] = seq
365
+ elif info[key]["max"] < seq:
366
+ info[key]['max'] = seq
367
+ packets2.append(packet)
368
+ if len(keys) == 1:
369
+ key = f"{packets['pkt'][IP].dst}:{packets['pkt'][IP].dport}"
370
+ ack = packets['pkt'][TCP].ack
371
+ info[key] = {
372
+ "min": ack,
373
+ "max": ack,
374
+ "wrapseq": False,
375
+ "wrapack": False,
376
+ }
377
+ keys.append(key)
378
+ packets = packets2
379
+ if len(packets) == 0:
380
+ return []
381
+ needwrap = False
382
+ if info[keys[0]] and info[keys[0]]['max'] - info[keys[0]]['min'] > 0x7fffffff:
383
+ info[keys[0]]['wrapseq'] = True
384
+ info[keys[0]]['wrapack'] = True
385
+ needwrap = True
386
+ if info[keys[1]] and info[keys[1]]['max'] - info[keys[1]]['min'] > 0x7fffffff:
387
+ info[keys[1]]['wrapseq'] = True
388
+ info[keys[0]]['wrapack'] = True
389
+ needwrap = True
390
+ if needwrap:
391
+ for packet in packets:
392
+ key = f"{packet['ip']['addr1']}:{packet['tcp']['sport']}"
393
+ if info[key]['wrapseq'] and packet['tcp']['seq'] < 0x7fffffff:
394
+ packet['tcp']['seq'] += 0xffffffff
395
+ if info[key]['wrapack'] and packet['tcp']['ack'] < 0x7fffffff:
396
+ packet['tcp']['ack'] += 0xffffffff
397
+ clientKey = f"{packets[0]['pkt'][IP].src}:{packets[0]['pkt'][IP].sport}"
398
+
399
+ def compare_packets(a, b):
400
+ a_seq = a['pkt'][TCP].seq
401
+ b_seq = b['pkt'][TCP].seq
402
+ a_ack = a['pkt'][TCP].ack
403
+ b_ack = b['pkt'][TCP].ack
404
+ a_data = a['pkt'][Raw].load
405
+ b_data = b['pkt'][Raw].load
406
+ a_ip = a['pkt'][IP].src
407
+ a_port = a['pkt'][TCP].sport
408
+ b_port = b['pkt'][TCP].sport
409
+ b_ip = b['pkt'][IP].src
410
+ if a_ip == b_ip and a_port == b_port:
411
+ return a_seq - b_seq
412
+ if clientKey == f"{a_ip}:{a_port}":
413
+ return (a_seq + len(a_data) - 1) - b_ack
414
+ return a_ack - (b_seq + len(b_data) - 1)
415
+
416
+ packets.sort(key=cmp_to_key(compare_packets))
417
+ # del packets[num_packets:]
418
+ # Now divide up conversation
419
+ clientSeq = 0
420
+ hostSeq = 0
421
+ previous = 0
422
+ results = []
423
+ for i, item in enumerate(packets):
424
+ sip = item['pkt'][IP].src
425
+ sport = item['pkt'][IP].sport
426
+ seq = item['pkt'][TCP].seq
427
+ data = item['pkt'][Raw].load
428
+ pkey = f"{sip}:{sport}"
429
+ seq_datalen = seq + len(data)
430
+ if pkey == clientKey:
431
+ if clientSeq >= seq_datalen:
432
+ continue
433
+ clientSeq = seq_datalen
434
+ else:
435
+ if hostSeq >= seq_datalen:
436
+ continue
437
+ hostSeq = seq_datalen
438
+ if len(results) == 0 or pkey != results[len(results) - 1]['key']:
439
+ previous = seq
440
+ results.append({
441
+ 'key': pkey,
442
+ 'data': copy.deepcopy(data),
443
+ 'ts': float(item['pkt'].time),
444
+ 'pkt': item['pkt'],
445
+ })
446
+ elif seq - previous > 0xffff:
447
+ results.append(
448
+ {'key': '',
449
+ 'data': b'',
450
+ 'ts': float(item['pkt'].time),
451
+ 'pkt': item['pkt'],
452
+ })
453
+ previous = seq
454
+ results.append({
455
+ 'key': pkey,
456
+ 'data': copy.deepcopy(data),
457
+ 'ts': float(item['pkt'].time),
458
+ 'pkt': item['pkt'],
459
+ })
460
+ else:
461
+ previous = seq
462
+ results[-1]['data'] += data
463
+ return results
@@ -0,0 +1,40 @@
1
+ import re
2
+
3
+ import numpy as np
4
+ from scapy.packet import Raw
5
+ from scapy.utils import rdpcap
6
+
7
+ from xbase_util.common_util import get_res_status_code_list
8
+ from xbase_util.pcap_util import reassemble_tcp_pcap, reassemble_session_pcap
9
+ from xbase_util.xbase_constant import res_status_code_pattern
10
+
11
+ if __name__ == '__main__':
12
+ packets_scapy = reassemble_tcp_pcap(rdpcap("gzip2.pcap"))
13
+ skey = '10.28.7.16:54398'
14
+ streams = b""
15
+ for pkt in packets_scapy:
16
+ if Raw in pkt:
17
+ streams += pkt[Raw].load
18
+ text_data = streams.decode('ascii', errors='ignore')
19
+ all_packets = reassemble_session_pcap(packets_scapy, skey=skey)
20
+ if len(all_packets) != 0:
21
+ all_req_size = [item['req_size'] for item in all_packets if item['key'] == skey]
22
+ all_res_size = [item['res_size'] for item in all_packets if item['key'] != skey]
23
+ num_1, num_2, num_3, num_4, num_5 = get_res_status_code_list(all_packets)
24
+ # 获取请求头参数数量
25
+ req_header_count_list = [req['req_header'].count(":") for req in all_packets]
26
+ # 请求的时间间隔
27
+ request_flattened_time = [item['req_time'] for item in all_packets]
28
+ request_time_diffs = [request_flattened_time[i + 1] - request_flattened_time[i] for i in
29
+ range(len(request_flattened_time) - 1)]
30
+ request_mean_diff = round(np.nanmean(request_time_diffs), 5) or 0
31
+ request_variance_diff = round(np.nanvar(request_time_diffs), 5) or 0
32
+ # 响应的时间间隔
33
+ response_flattened_time = [item['res_time'] for item in all_packets]
34
+ response_time_diffs = [response_flattened_time[i + 1] - response_flattened_time[i] for i in
35
+ range(len(response_flattened_time) - 1)]
36
+ response_mean_diff = round(np.nanmean(response_time_diffs), 5) or 0
37
+ response_variance_diff = round(np.nanvar(response_time_diffs), 5) or 0
38
+
39
+ time_period = [(abs(item['res_time'] - item['req_time'])) for item in
40
+ all_packets if item['res_time'] != 0 and item['req_time'] != 0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -2,6 +2,7 @@ README.md
2
2
  setup.py
3
3
  xbase_util/__init__.py
4
4
  xbase_util/add_column_util.py
5
+ xbase_util/common_util.py
5
6
  xbase_util/dangerous_util.py
6
7
  xbase_util/es_db_util.py
7
8
  xbase_util/esreq.py
@@ -9,9 +10,8 @@ xbase_util/geo_util.py
9
10
  xbase_util/handle_features_util.py
10
11
  xbase_util/packet_util.py
11
12
  xbase_util/pcap_util.py
12
- xbase_util/segment.py
13
+ xbase_util/test.py
13
14
  xbase_util/xbase_constant.py
14
- xbase_util/xbase_util.py
15
15
  xbase_util.egg-info/PKG-INFO
16
16
  xbase_util.egg-info/SOURCES.txt
17
17
  xbase_util.egg-info/dependency_links.txt