xbase-util 0.8.2__tar.gz → 0.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xbase_util-0.8.2 → xbase_util-0.8.4}/PKG-INFO +1 -1
- {xbase_util-0.8.2 → xbase_util-0.8.4}/setup.py +1 -1
- xbase_util-0.8.2/xbase_util/xbase_util.py → xbase_util-0.8.4/xbase_util/common_util.py +24 -25
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/packet_util.py +12 -12
- xbase_util-0.8.4/xbase_util/pcap_util.py +463 -0
- xbase_util-0.8.4/xbase_util/test.py +40 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/SOURCES.txt +2 -2
- xbase_util-0.8.2/xbase_util/pcap_util.py +0 -860
- xbase_util-0.8.2/xbase_util/segment.py +0 -105
- {xbase_util-0.8.2 → xbase_util-0.8.4}/README.md +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/setup.cfg +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/add_column_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/dangerous_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/esreq.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/handle_features_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/xbase_constant.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util_assets/arkimeparse.js +0 -0
@@ -12,11 +12,20 @@ import numpy as np
|
|
12
12
|
import tldextract
|
13
13
|
from scapy.layers.dns import DNS
|
14
14
|
|
15
|
-
from xbase_util.xbase_constant import
|
15
|
+
from xbase_util.xbase_constant import parse_path, dns_domain_list
|
16
16
|
|
17
17
|
|
18
|
-
def
|
19
|
-
|
18
|
+
def filter_visible_chars(data):
|
19
|
+
"""
|
20
|
+
过滤不可见字符,仅保留可打印的ASCII字符
|
21
|
+
:param data:
|
22
|
+
:return:
|
23
|
+
"""
|
24
|
+
return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
|
25
|
+
|
26
|
+
|
27
|
+
def parse_chunked_body(data: bytes, need_un_gzip=False) -> bytes:
|
28
|
+
body = b''
|
20
29
|
while True:
|
21
30
|
chunk_size_end = data.find(b"\r\n")
|
22
31
|
if chunk_size_end == -1:
|
@@ -29,10 +38,13 @@ def parse_chunked_body(data: bytes) -> bytes:
|
|
29
38
|
chunk_end = chunk_start + chunk_size
|
30
39
|
body += data[chunk_start:chunk_end]
|
31
40
|
data = data[chunk_end + 2:]
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
41
|
+
if need_un_gzip:
|
42
|
+
try:
|
43
|
+
return gzip.decompress(body)
|
44
|
+
except gzip.BadGzipFile:
|
45
|
+
print("解压错误")
|
46
|
+
return body
|
47
|
+
else:
|
36
48
|
return body
|
37
49
|
|
38
50
|
|
@@ -79,20 +91,17 @@ def get_ua_duplicate_count(text_data):
|
|
79
91
|
return sum(count)
|
80
92
|
|
81
93
|
|
82
|
-
def get_res_status_code_list(
|
94
|
+
def get_res_status_code_list(all_packets):
|
83
95
|
value_res = []
|
84
|
-
res = []
|
85
96
|
num_1 = 0
|
86
97
|
num_2 = 0
|
87
98
|
num_3 = 0
|
88
99
|
num_4 = 0
|
89
100
|
num_5 = 0
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if m:
|
95
|
-
value_res.append(int(m.group(0)))
|
101
|
+
for item in all_packets:
|
102
|
+
match = re.search(r'HTTP/\d\.\d (\d{3})', item['res_header'])
|
103
|
+
if match:
|
104
|
+
value_res.append(int(match.group(1)))
|
96
105
|
for value in value_res:
|
97
106
|
if 0 <= value < 200:
|
98
107
|
num_1 = num_1 + 1
|
@@ -182,16 +191,6 @@ def get_uri_depth(url):
|
|
182
191
|
return 0
|
183
192
|
|
184
193
|
|
185
|
-
def firstOrZero(param):
|
186
|
-
if type(param).__name__ == 'list':
|
187
|
-
if (len(param)) != 0:
|
188
|
-
return param[0]
|
189
|
-
else:
|
190
|
-
return 0
|
191
|
-
else:
|
192
|
-
return 0
|
193
|
-
|
194
|
-
|
195
194
|
def get_statistic_fields(packets):
|
196
195
|
length_ranges = {
|
197
196
|
"0_19": (0, 19),
|
@@ -1,9 +1,8 @@
|
|
1
1
|
import copy
|
2
|
-
import re
|
3
2
|
|
4
3
|
from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
|
5
|
-
features_key, plain_body_columns, http_version_pattern, http_req_method_pattern, http_req_path_pattern,
|
6
|
-
|
4
|
+
features_key, plain_body_columns, http_version_pattern, http_req_method_pattern, http_req_path_pattern, \
|
5
|
+
res_status_code_pattern
|
7
6
|
|
8
7
|
|
9
8
|
def content_type_is_plain(packet):
|
@@ -23,15 +22,6 @@ def content_type_is_plain(packet):
|
|
23
22
|
return False
|
24
23
|
|
25
24
|
|
26
|
-
def filter_visible_chars(data):
|
27
|
-
"""
|
28
|
-
过滤不可见字符,仅保留可打印的ASCII字符
|
29
|
-
:param data:
|
30
|
-
:return:
|
31
|
-
"""
|
32
|
-
return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
|
33
|
-
|
34
|
-
|
35
25
|
def get_all_columns(
|
36
26
|
contains_packet_column=False,
|
37
27
|
contains_src_dst_column=False,
|
@@ -62,6 +52,16 @@ def get_all_columns(
|
|
62
52
|
# req_body_pattern = re.compile(
|
63
53
|
# r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?(?=HTTP/\d\.\d)", re.DOTALL)
|
64
54
|
|
55
|
+
def firstOrZero(param):
|
56
|
+
if type(param).__name__ == 'list':
|
57
|
+
if (len(param)) != 0:
|
58
|
+
return param[0]
|
59
|
+
else:
|
60
|
+
return 0
|
61
|
+
else:
|
62
|
+
return 0
|
63
|
+
|
64
|
+
|
65
65
|
def get_header_value(header_set, value):
|
66
66
|
result = [item for item in header_set if value in item]
|
67
67
|
if len(result) != 0:
|
@@ -0,0 +1,463 @@
|
|
1
|
+
import copy
|
2
|
+
import gzip
|
3
|
+
import math
|
4
|
+
import os
|
5
|
+
import struct
|
6
|
+
import time
|
7
|
+
import zlib
|
8
|
+
from functools import cmp_to_key
|
9
|
+
|
10
|
+
from Crypto.Cipher import AES
|
11
|
+
from scapy.layers.inet import TCP, IP
|
12
|
+
from scapy.packet import Raw
|
13
|
+
from zstandard import ZstdDecompressor
|
14
|
+
|
15
|
+
from xbase_util.common_util import parse_chunked_body, filter_visible_chars
|
16
|
+
from xbase_util.xbase_constant import pattern_chuncked, pattern_gzip
|
17
|
+
|
18
|
+
|
19
|
+
def fix_pos(pos, packetPosEncoding):
|
20
|
+
if pos is None or len(pos) == 0:
|
21
|
+
return
|
22
|
+
if packetPosEncoding == "gap0":
|
23
|
+
last = 0
|
24
|
+
lastgap = 0
|
25
|
+
for i, pos_item in enumerate(pos):
|
26
|
+
if pos[i] < 0:
|
27
|
+
last = 0
|
28
|
+
else:
|
29
|
+
if pos[i] == 0:
|
30
|
+
pos[i] = last + lastgap
|
31
|
+
else:
|
32
|
+
lastgap = pos[i]
|
33
|
+
pos[i] += last
|
34
|
+
last = pos[i]
|
35
|
+
|
36
|
+
|
37
|
+
def group_numbers(nums):
|
38
|
+
result = []
|
39
|
+
for num in nums:
|
40
|
+
if num < 0:
|
41
|
+
result.append([num])
|
42
|
+
elif result:
|
43
|
+
result[-1].append(num)
|
44
|
+
return result
|
45
|
+
|
46
|
+
|
47
|
+
def decompress_streaming(compressed_data, session_id):
|
48
|
+
try:
|
49
|
+
decompressor = ZstdDecompressor()
|
50
|
+
with decompressor.stream_reader(compressed_data) as reader:
|
51
|
+
decompressed_data = reader.read()
|
52
|
+
return decompressed_data
|
53
|
+
except Exception as e:
|
54
|
+
print(f"解码错误:{e} {session_id}")
|
55
|
+
return bytearray()
|
56
|
+
|
57
|
+
|
58
|
+
def readUInt32BE(buffer, offset):
|
59
|
+
return struct.unpack('>I', buffer[offset:offset + 4])[0]
|
60
|
+
|
61
|
+
|
62
|
+
def readUInt32LE(buffer, offset):
|
63
|
+
return struct.unpack('<I', buffer[offset:offset + 4])[0]
|
64
|
+
|
65
|
+
|
66
|
+
def writeUInt32BE(buffer, pos, value):
|
67
|
+
struct.pack_into('>I', buffer, pos, value)
|
68
|
+
return buffer
|
69
|
+
|
70
|
+
|
71
|
+
def read_header(param_map, session_id):
|
72
|
+
shortHeader = None
|
73
|
+
headBuffer = os.read(param_map['fd'], 64)
|
74
|
+
if param_map['encoding'] == 'aes-256-ctr':
|
75
|
+
if 'iv' in param_map:
|
76
|
+
param_map['iv'][12:16] = struct.pack('>I', 0)
|
77
|
+
headBuffer = bytearray(
|
78
|
+
AES.new(param_map['encKey'], AES.MODE_CTR, nonce=param_map['iv']).decrypt(bytes(headBuffer)))
|
79
|
+
else:
|
80
|
+
print("读取头部信息失败,iv向量为空")
|
81
|
+
elif param_map['encoding'] == 'xor-2048':
|
82
|
+
for i in range(len(headBuffer)):
|
83
|
+
headBuffer[i] ^= param_map['encKey'][i % 256]
|
84
|
+
if param_map['uncompressedBits']:
|
85
|
+
if param_map['compression'] == 'gzip':
|
86
|
+
headBuffer = zlib.decompress(bytes(headBuffer), zlib.MAX_WBITS | 16)
|
87
|
+
elif param_map['compression'] == 'zstd':
|
88
|
+
headBuffer = decompress_streaming(headBuffer, session_id)
|
89
|
+
headBuffer = headBuffer[:24]
|
90
|
+
magic = struct.unpack('<I', headBuffer[:4])[0]
|
91
|
+
bigEndian = (magic == 0xd4c3b2a1 or magic == 0x4d3cb2a1)
|
92
|
+
nanosecond = (magic == 0xa1b23c4d or magic == 0x4d3cb2a1)
|
93
|
+
if not bigEndian and magic not in {0xa1b2c3d4, 0xa1b23c4d, 0xa1b2c3d5}:
|
94
|
+
raise ValueError("Corrupt PCAP header")
|
95
|
+
if magic == 0xa1b2c3d5:
|
96
|
+
shortHeader = readUInt32LE(headBuffer, 8)
|
97
|
+
headBuffer[0] = 0xd4 # Reset header to normal
|
98
|
+
if bigEndian:
|
99
|
+
linkType = readUInt32BE(headBuffer, 20)
|
100
|
+
else:
|
101
|
+
linkType = readUInt32LE(headBuffer, 20)
|
102
|
+
return headBuffer, shortHeader, bigEndian, linkType, nanosecond
|
103
|
+
|
104
|
+
|
105
|
+
def create_decipher(pos, param_map):
|
106
|
+
writeUInt32BE(param_map['iv'], pos, 12)
|
107
|
+
return AES.new(param_map['encKey'], AES.MODE_CTR, nonce=param_map['iv'])
|
108
|
+
|
109
|
+
|
110
|
+
def read_packet_internal(pos_arg, hp_len_arg, param_map, session_id):
|
111
|
+
pos = pos_arg
|
112
|
+
hp_len = hp_len_arg
|
113
|
+
if hp_len == -1:
|
114
|
+
if param_map['compression'] == "zstd":
|
115
|
+
hp_len = param_map['uncompressedBitsSize']
|
116
|
+
else:
|
117
|
+
hp_len = 2048
|
118
|
+
inside_offset = 0
|
119
|
+
if param_map['uncompressedBits']:
|
120
|
+
inside_offset = pos & param_map['uncompressedBitsSize'] - 1
|
121
|
+
pos = math.floor(pos / param_map['uncompressedBitsSize'])
|
122
|
+
pos_offset = 0
|
123
|
+
if param_map['encoding'] == 'aes-256-ctr':
|
124
|
+
pos_offset = pos % 16
|
125
|
+
pos = pos - pos_offset
|
126
|
+
elif param_map['encoding'] == 'xor-2048':
|
127
|
+
pos_offset = pos % 256
|
128
|
+
pos = pos - pos_offset
|
129
|
+
|
130
|
+
hp_len = 256 * math.ceil((hp_len + inside_offset + pos_offset) / 256)
|
131
|
+
buffer = bytearray(hp_len)
|
132
|
+
os.lseek(param_map['fd'], pos, os.SEEK_SET)
|
133
|
+
read_buffer = os.read(param_map['fd'], len(buffer))
|
134
|
+
if len(read_buffer) - pos_offset < 16:
|
135
|
+
return None
|
136
|
+
if param_map['encoding'] == 'aes-256-ctr':
|
137
|
+
decipher = create_decipher(pos // 16, param_map)
|
138
|
+
read_buffer = bytearray(decipher.decrypt(read_buffer))[pos_offset:]
|
139
|
+
elif param_map['encoding'] == 'xor-2048':
|
140
|
+
read_buffer = bytearray(b ^ param_map['encKey'][i % 256] for i, b in enumerate(read_buffer))[pos_offset:]
|
141
|
+
if param_map['uncompressedBits']:
|
142
|
+
try:
|
143
|
+
if param_map['compression'] == 'gzip':
|
144
|
+
read_buffer = zlib.decompress(read_buffer, zlib.MAX_WBITS | 16)
|
145
|
+
elif param_map['compression'] == 'zstd':
|
146
|
+
read_buffer = decompress_streaming(read_buffer, session_id)
|
147
|
+
except Exception as e:
|
148
|
+
print(f"PCAP uncompress issue: {pos} {len(buffer)} {read_buffer} {e}")
|
149
|
+
return None
|
150
|
+
if inside_offset:
|
151
|
+
read_buffer = read_buffer[inside_offset:]
|
152
|
+
header_len = 16 if param_map['shortHeader'] is None else 6
|
153
|
+
if len(read_buffer) < header_len:
|
154
|
+
if hp_len_arg == -1 and param_map['compression'] == 'zstd':
|
155
|
+
return read_packet_internal(pos_arg, param_map['uncompressedBitsSize'] * 2, param_map, session_id)
|
156
|
+
print(f"Not enough data {len(read_buffer)} for header {header_len}")
|
157
|
+
return None
|
158
|
+
packet_len = struct.unpack('>I' if param_map['bigEndian'] else '<I', read_buffer[8:12])[
|
159
|
+
0] if param_map['shortHeader'] is None else \
|
160
|
+
struct.unpack('>H' if param_map['bigEndian'] else '<H', read_buffer[:2])[0]
|
161
|
+
if packet_len < 0 or packet_len > 0xffff:
|
162
|
+
return None
|
163
|
+
if header_len + packet_len <= len(read_buffer):
|
164
|
+
if param_map['shortHeader'] is not None:
|
165
|
+
t = struct.unpack('<I', read_buffer[2:6])[0]
|
166
|
+
sec = (t >> 20) + param_map['shortHeader']
|
167
|
+
usec = t & 0xfffff
|
168
|
+
new_buffer = bytearray(16 + packet_len)
|
169
|
+
struct.pack_into('<I', new_buffer, 0, sec)
|
170
|
+
struct.pack_into('<I', new_buffer, 4, usec)
|
171
|
+
struct.pack_into('<I', new_buffer, 8, packet_len)
|
172
|
+
struct.pack_into('<I', new_buffer, 12, packet_len)
|
173
|
+
new_buffer[16:] = read_buffer[6:packet_len + 6]
|
174
|
+
return new_buffer
|
175
|
+
return read_buffer[:header_len + packet_len]
|
176
|
+
|
177
|
+
if hp_len_arg != -1:
|
178
|
+
return None
|
179
|
+
|
180
|
+
return read_packet_internal(pos_arg, 16 + packet_len, param_map, session_id)
|
181
|
+
|
182
|
+
|
183
|
+
def read_packet(pos, param_map, session_id):
|
184
|
+
if 'fd' not in param_map or not param_map['fd']:
|
185
|
+
time.sleep(0.01)
|
186
|
+
return read_packet(pos, param_map['fd'], session_id)
|
187
|
+
return read_packet_internal(pos, -1, param_map, session_id)
|
188
|
+
|
189
|
+
|
190
|
+
def get_file_and_read_pos(session_id, file, pos_list):
|
191
|
+
filename = file['name']
|
192
|
+
if not os.path.isfile(filename):
|
193
|
+
print(f"文件不存在:{filename}")
|
194
|
+
return None
|
195
|
+
encoding = file.get('encoding', 'normal')
|
196
|
+
encKey = None
|
197
|
+
iv = None
|
198
|
+
compression = None
|
199
|
+
if 'dek' in file:
|
200
|
+
dek = bytes.fromhex(file['dek'])
|
201
|
+
encKey = AES.new(file['kek'].encode(), AES.MODE_CBC).decrypt(dek)
|
202
|
+
|
203
|
+
if 'uncompressedBits' in file:
|
204
|
+
uncompressedBits = file['uncompressedBits']
|
205
|
+
uncompressedBitsSize = 2 ** uncompressedBits
|
206
|
+
compression = 'gzip'
|
207
|
+
else:
|
208
|
+
uncompressedBits = None
|
209
|
+
uncompressedBitsSize = 0
|
210
|
+
if 'compression' in file:
|
211
|
+
compression = file['compression']
|
212
|
+
|
213
|
+
if 'iv' in file:
|
214
|
+
iv_ = bytes.fromhex(file['iv'])
|
215
|
+
iv = bytearray(16)
|
216
|
+
iv[:len(iv_)] = iv_
|
217
|
+
fd = os.open(filename, os.O_RDONLY)
|
218
|
+
param_map = {
|
219
|
+
"fd": fd,
|
220
|
+
"encoding": encoding,
|
221
|
+
"iv": iv,
|
222
|
+
"encKey": encKey,
|
223
|
+
"uncompressedBits": uncompressedBits,
|
224
|
+
"compression": compression,
|
225
|
+
"uncompressedBitsSize": uncompressedBitsSize
|
226
|
+
}
|
227
|
+
res = bytearray()
|
228
|
+
headBuffer, shortHeader, bigEndian, linkType, nanosecond = read_header(param_map, session_id)
|
229
|
+
res.extend(headBuffer)
|
230
|
+
param_map['shortHeader'] = shortHeader
|
231
|
+
param_map['bigEndian'] = bigEndian
|
232
|
+
# _________________________________
|
233
|
+
byte_array = bytearray(0xfffe)
|
234
|
+
next_packet = 0
|
235
|
+
b_offset = 0
|
236
|
+
packets = {}
|
237
|
+
# packet_objs = []
|
238
|
+
i = 0
|
239
|
+
for pos in pos_list:
|
240
|
+
packet_bytes = read_packet(pos, param_map, session_id)
|
241
|
+
# if reture_obj:
|
242
|
+
# obj = decode_obj(packet_bytes, bigEndian, linkType, nanosecond, )
|
243
|
+
# packet_objs.append(copy.deepcopy(obj))
|
244
|
+
if not packet_bytes:
|
245
|
+
continue
|
246
|
+
packets[i] = packet_bytes
|
247
|
+
while next_packet in packets:
|
248
|
+
buffer = packets[next_packet]
|
249
|
+
|
250
|
+
next_packet += 1
|
251
|
+
# del packets[next_packet]
|
252
|
+
next_packet = next_packet + 1
|
253
|
+
if b_offset + len(buffer) > len(byte_array):
|
254
|
+
res.extend(byte_array[:b_offset])
|
255
|
+
b_offset = 0
|
256
|
+
byte_array = bytearray(0xfffe)
|
257
|
+
byte_array[b_offset:b_offset + len(buffer)] = buffer
|
258
|
+
b_offset += len(buffer)
|
259
|
+
i = i + 1
|
260
|
+
os.close(fd)
|
261
|
+
res.extend(byte_array[:b_offset])
|
262
|
+
return res
|
263
|
+
|
264
|
+
|
265
|
+
def process_session_id_disk_simple(id, node, packet_pos, esdb, pcap_path_prefix):
|
266
|
+
packetPos = packet_pos
|
267
|
+
file = esdb.get_file_by_file_id(node=node, num=abs(packetPos[0]),
|
268
|
+
prefix=None if pcap_path_prefix == "origin" else pcap_path_prefix)
|
269
|
+
if file is None:
|
270
|
+
return None, None
|
271
|
+
fix_pos(packetPos, file['packetPosEncoding'])
|
272
|
+
pos_list = group_numbers(packetPos)[0]
|
273
|
+
pos_list.pop(0)
|
274
|
+
return get_file_and_read_pos(id, file, pos_list)
|
275
|
+
|
276
|
+
|
277
|
+
def parse_body(data):
|
278
|
+
if data.find(b"\r\n\r\n") != -1:
|
279
|
+
res = data.split(b"\r\n\r\n", 1)
|
280
|
+
header = res[0]
|
281
|
+
body = res[1]
|
282
|
+
else:
|
283
|
+
header = data
|
284
|
+
body = b''
|
285
|
+
chunked_pattern = pattern_chuncked.search(header)
|
286
|
+
gzip_pattern = pattern_gzip.search(header)
|
287
|
+
need_gzip = gzip_pattern and b'gzip' in gzip_pattern.group()
|
288
|
+
if chunked_pattern and b'chunked' in chunked_pattern.group():
|
289
|
+
body = parse_chunked_body(body, need_un_gzip=need_gzip)
|
290
|
+
elif need_gzip:
|
291
|
+
try:
|
292
|
+
body = gzip.decompress(body)
|
293
|
+
except:
|
294
|
+
print("解压失败")
|
295
|
+
pass
|
296
|
+
result_body_str = filter_visible_chars(body)
|
297
|
+
return filter_visible_chars(header), result_body_str
|
298
|
+
|
299
|
+
|
300
|
+
def reassemble_session_pcap(reassemble_tcp_res, skey):
|
301
|
+
my_map = {
|
302
|
+
'key': '',
|
303
|
+
'req_header': '',
|
304
|
+
'req_body': '',
|
305
|
+
'req_time': 0,
|
306
|
+
'req_size': 0,
|
307
|
+
'res_header': '',
|
308
|
+
'res_body': '',
|
309
|
+
'res_time': 0,
|
310
|
+
'res_size': 0,
|
311
|
+
}
|
312
|
+
packet_list = []
|
313
|
+
for index, packet in enumerate(reassemble_tcp_res):
|
314
|
+
header, body = parse_body(packet['data'])
|
315
|
+
if index == len(reassemble_tcp_res) - 1:
|
316
|
+
packet_list.append(copy.deepcopy(my_map))
|
317
|
+
if packet['key'] == skey:
|
318
|
+
if index != 0:
|
319
|
+
packet_list.append(copy.deepcopy(my_map))
|
320
|
+
my_map = {
|
321
|
+
'key': packet['key'],
|
322
|
+
'req_header': '',
|
323
|
+
'req_body': b'',
|
324
|
+
'req_time': 0,
|
325
|
+
'req_size': 0,
|
326
|
+
'res_header': '',
|
327
|
+
'res_body': b'',
|
328
|
+
'res_time': 0,
|
329
|
+
'res_size': 0,
|
330
|
+
}
|
331
|
+
my_map["req_header"] = header
|
332
|
+
my_map["req_body"] = body
|
333
|
+
my_map["req_time"] = packet['ts']
|
334
|
+
my_map["req_size"] = len(packet['data'])
|
335
|
+
else:
|
336
|
+
my_map["res_header"] = header
|
337
|
+
my_map["res_body"] = body
|
338
|
+
my_map["res_time"] = packet['ts']
|
339
|
+
my_map["res_size"] = len(packet['data'])
|
340
|
+
return packet_list
|
341
|
+
|
342
|
+
|
343
|
+
def reassemble_tcp_pcap(p):
|
344
|
+
packets = [{'pkt': item} for item in p if TCP in item and Raw in item]
|
345
|
+
packets2 = []
|
346
|
+
info = {}
|
347
|
+
keys = []
|
348
|
+
for index, packet in enumerate(packets):
|
349
|
+
data = packet['pkt'][Raw].load
|
350
|
+
flags = packet['pkt'][TCP].flags
|
351
|
+
seq = packet['pkt'][TCP].seq
|
352
|
+
if len(data) == 0 or 'R' in flags or 'S' in flags:
|
353
|
+
continue
|
354
|
+
key = f"{packet['pkt'][IP].src}:{packet['pkt'][IP].sport}"
|
355
|
+
if key not in info.keys():
|
356
|
+
info[key] = {
|
357
|
+
"min": seq,
|
358
|
+
"max": seq,
|
359
|
+
"wrapseq": False,
|
360
|
+
"wrapack": False,
|
361
|
+
}
|
362
|
+
keys.append(key)
|
363
|
+
elif info[key]["min"] > seq:
|
364
|
+
info[key]['min'] = seq
|
365
|
+
elif info[key]["max"] < seq:
|
366
|
+
info[key]['max'] = seq
|
367
|
+
packets2.append(packet)
|
368
|
+
if len(keys) == 1:
|
369
|
+
key = f"{packets['pkt'][IP].dst}:{packets['pkt'][IP].dport}"
|
370
|
+
ack = packets['pkt'][TCP].ack
|
371
|
+
info[key] = {
|
372
|
+
"min": ack,
|
373
|
+
"max": ack,
|
374
|
+
"wrapseq": False,
|
375
|
+
"wrapack": False,
|
376
|
+
}
|
377
|
+
keys.append(key)
|
378
|
+
packets = packets2
|
379
|
+
if len(packets) == 0:
|
380
|
+
return []
|
381
|
+
needwrap = False
|
382
|
+
if info[keys[0]] and info[keys[0]]['max'] - info[keys[0]]['min'] > 0x7fffffff:
|
383
|
+
info[keys[0]]['wrapseq'] = True
|
384
|
+
info[keys[0]]['wrapack'] = True
|
385
|
+
needwrap = True
|
386
|
+
if info[keys[1]] and info[keys[1]]['max'] - info[keys[1]]['min'] > 0x7fffffff:
|
387
|
+
info[keys[1]]['wrapseq'] = True
|
388
|
+
info[keys[0]]['wrapack'] = True
|
389
|
+
needwrap = True
|
390
|
+
if needwrap:
|
391
|
+
for packet in packets:
|
392
|
+
key = f"{packet['ip']['addr1']}:{packet['tcp']['sport']}"
|
393
|
+
if info[key]['wrapseq'] and packet['tcp']['seq'] < 0x7fffffff:
|
394
|
+
packet['tcp']['seq'] += 0xffffffff
|
395
|
+
if info[key]['wrapack'] and packet['tcp']['ack'] < 0x7fffffff:
|
396
|
+
packet['tcp']['ack'] += 0xffffffff
|
397
|
+
clientKey = f"{packets[0]['pkt'][IP].src}:{packets[0]['pkt'][IP].sport}"
|
398
|
+
|
399
|
+
def compare_packets(a, b):
|
400
|
+
a_seq = a['pkt'][TCP].seq
|
401
|
+
b_seq = b['pkt'][TCP].seq
|
402
|
+
a_ack = a['pkt'][TCP].ack
|
403
|
+
b_ack = b['pkt'][TCP].ack
|
404
|
+
a_data = a['pkt'][Raw].load
|
405
|
+
b_data = b['pkt'][Raw].load
|
406
|
+
a_ip = a['pkt'][IP].src
|
407
|
+
a_port = a['pkt'][TCP].sport
|
408
|
+
b_port = b['pkt'][TCP].sport
|
409
|
+
b_ip = b['pkt'][IP].src
|
410
|
+
if a_ip == b_ip and a_port == b_port:
|
411
|
+
return a_seq - b_seq
|
412
|
+
if clientKey == f"{a_ip}:{a_port}":
|
413
|
+
return (a_seq + len(a_data) - 1) - b_ack
|
414
|
+
return a_ack - (b_seq + len(b_data) - 1)
|
415
|
+
|
416
|
+
packets.sort(key=cmp_to_key(compare_packets))
|
417
|
+
# del packets[num_packets:]
|
418
|
+
# Now divide up conversation
|
419
|
+
clientSeq = 0
|
420
|
+
hostSeq = 0
|
421
|
+
previous = 0
|
422
|
+
results = []
|
423
|
+
for i, item in enumerate(packets):
|
424
|
+
sip = item['pkt'][IP].src
|
425
|
+
sport = item['pkt'][IP].sport
|
426
|
+
seq = item['pkt'][TCP].seq
|
427
|
+
data = item['pkt'][Raw].load
|
428
|
+
pkey = f"{sip}:{sport}"
|
429
|
+
seq_datalen = seq + len(data)
|
430
|
+
if pkey == clientKey:
|
431
|
+
if clientSeq >= seq_datalen:
|
432
|
+
continue
|
433
|
+
clientSeq = seq_datalen
|
434
|
+
else:
|
435
|
+
if hostSeq >= seq_datalen:
|
436
|
+
continue
|
437
|
+
hostSeq = seq_datalen
|
438
|
+
if len(results) == 0 or pkey != results[len(results) - 1]['key']:
|
439
|
+
previous = seq
|
440
|
+
results.append({
|
441
|
+
'key': pkey,
|
442
|
+
'data': copy.deepcopy(data),
|
443
|
+
'ts': float(item['pkt'].time),
|
444
|
+
'pkt': item['pkt'],
|
445
|
+
})
|
446
|
+
elif seq - previous > 0xffff:
|
447
|
+
results.append(
|
448
|
+
{'key': '',
|
449
|
+
'data': b'',
|
450
|
+
'ts': float(item['pkt'].time),
|
451
|
+
'pkt': item['pkt'],
|
452
|
+
})
|
453
|
+
previous = seq
|
454
|
+
results.append({
|
455
|
+
'key': pkey,
|
456
|
+
'data': copy.deepcopy(data),
|
457
|
+
'ts': float(item['pkt'].time),
|
458
|
+
'pkt': item['pkt'],
|
459
|
+
})
|
460
|
+
else:
|
461
|
+
previous = seq
|
462
|
+
results[-1]['data'] += data
|
463
|
+
return results
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from scapy.packet import Raw
|
5
|
+
from scapy.utils import rdpcap
|
6
|
+
|
7
|
+
from xbase_util.common_util import get_res_status_code_list
|
8
|
+
from xbase_util.pcap_util import reassemble_tcp_pcap, reassemble_session_pcap
|
9
|
+
from xbase_util.xbase_constant import res_status_code_pattern
|
10
|
+
|
11
|
+
if __name__ == '__main__':
|
12
|
+
packets_scapy = reassemble_tcp_pcap(rdpcap("gzip2.pcap"))
|
13
|
+
skey = '10.28.7.16:54398'
|
14
|
+
streams = b""
|
15
|
+
for pkt in packets_scapy:
|
16
|
+
if Raw in pkt:
|
17
|
+
streams += pkt[Raw].load
|
18
|
+
text_data = streams.decode('ascii', errors='ignore')
|
19
|
+
all_packets = reassemble_session_pcap(packets_scapy, skey=skey)
|
20
|
+
if len(all_packets) != 0:
|
21
|
+
all_req_size = [item['req_size'] for item in all_packets if item['key'] == skey]
|
22
|
+
all_res_size = [item['res_size'] for item in all_packets if item['key'] != skey]
|
23
|
+
num_1, num_2, num_3, num_4, num_5 = get_res_status_code_list(all_packets)
|
24
|
+
# 获取请求头参数数量
|
25
|
+
req_header_count_list = [req['req_header'].count(":") for req in all_packets]
|
26
|
+
# 请求的时间间隔
|
27
|
+
request_flattened_time = [item['req_time'] for item in all_packets]
|
28
|
+
request_time_diffs = [request_flattened_time[i + 1] - request_flattened_time[i] for i in
|
29
|
+
range(len(request_flattened_time) - 1)]
|
30
|
+
request_mean_diff = round(np.nanmean(request_time_diffs), 5) or 0
|
31
|
+
request_variance_diff = round(np.nanvar(request_time_diffs), 5) or 0
|
32
|
+
# 响应的时间间隔
|
33
|
+
response_flattened_time = [item['res_time'] for item in all_packets]
|
34
|
+
response_time_diffs = [response_flattened_time[i + 1] - response_flattened_time[i] for i in
|
35
|
+
range(len(response_flattened_time) - 1)]
|
36
|
+
response_mean_diff = round(np.nanmean(response_time_diffs), 5) or 0
|
37
|
+
response_variance_diff = round(np.nanvar(response_time_diffs), 5) or 0
|
38
|
+
|
39
|
+
time_period = [(abs(item['res_time'] - item['req_time'])) for item in
|
40
|
+
all_packets if item['res_time'] != 0 and item['req_time'] != 0]
|
@@ -2,6 +2,7 @@ README.md
|
|
2
2
|
setup.py
|
3
3
|
xbase_util/__init__.py
|
4
4
|
xbase_util/add_column_util.py
|
5
|
+
xbase_util/common_util.py
|
5
6
|
xbase_util/dangerous_util.py
|
6
7
|
xbase_util/es_db_util.py
|
7
8
|
xbase_util/esreq.py
|
@@ -9,9 +10,8 @@ xbase_util/geo_util.py
|
|
9
10
|
xbase_util/handle_features_util.py
|
10
11
|
xbase_util/packet_util.py
|
11
12
|
xbase_util/pcap_util.py
|
12
|
-
xbase_util/
|
13
|
+
xbase_util/test.py
|
13
14
|
xbase_util/xbase_constant.py
|
14
|
-
xbase_util/xbase_util.py
|
15
15
|
xbase_util.egg-info/PKG-INFO
|
16
16
|
xbase_util.egg-info/SOURCES.txt
|
17
17
|
xbase_util.egg-info/dependency_links.txt
|