xbase-util 0.8.2__tar.gz → 0.8.4__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {xbase_util-0.8.2 → xbase_util-0.8.4}/PKG-INFO +1 -1
- {xbase_util-0.8.2 → xbase_util-0.8.4}/setup.py +1 -1
- xbase_util-0.8.2/xbase_util/xbase_util.py → xbase_util-0.8.4/xbase_util/common_util.py +24 -25
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/packet_util.py +12 -12
- xbase_util-0.8.4/xbase_util/pcap_util.py +463 -0
- xbase_util-0.8.4/xbase_util/test.py +40 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/SOURCES.txt +2 -2
- xbase_util-0.8.2/xbase_util/pcap_util.py +0 -860
- xbase_util-0.8.2/xbase_util/segment.py +0 -105
- {xbase_util-0.8.2 → xbase_util-0.8.4}/README.md +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/setup.cfg +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/add_column_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/dangerous_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/esreq.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/handle_features_util.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util/xbase_constant.py +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.8.2 → xbase_util-0.8.4}/xbase_util_assets/arkimeparse.js +0 -0
@@ -12,11 +12,20 @@ import numpy as np
|
|
12
12
|
import tldextract
|
13
13
|
from scapy.layers.dns import DNS
|
14
14
|
|
15
|
-
from xbase_util.xbase_constant import
|
15
|
+
from xbase_util.xbase_constant import parse_path, dns_domain_list
|
16
16
|
|
17
17
|
|
18
|
-
def
|
19
|
-
|
18
|
+
def filter_visible_chars(data):
|
19
|
+
"""
|
20
|
+
过滤不可见字符,仅保留可打印的ASCII字符
|
21
|
+
:param data:
|
22
|
+
:return:
|
23
|
+
"""
|
24
|
+
return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
|
25
|
+
|
26
|
+
|
27
|
+
def parse_chunked_body(data: bytes, need_un_gzip=False) -> bytes:
|
28
|
+
body = b''
|
20
29
|
while True:
|
21
30
|
chunk_size_end = data.find(b"\r\n")
|
22
31
|
if chunk_size_end == -1:
|
@@ -29,10 +38,13 @@ def parse_chunked_body(data: bytes) -> bytes:
|
|
29
38
|
chunk_end = chunk_start + chunk_size
|
30
39
|
body += data[chunk_start:chunk_end]
|
31
40
|
data = data[chunk_end + 2:]
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
41
|
+
if need_un_gzip:
|
42
|
+
try:
|
43
|
+
return gzip.decompress(body)
|
44
|
+
except gzip.BadGzipFile:
|
45
|
+
print("解压错误")
|
46
|
+
return body
|
47
|
+
else:
|
36
48
|
return body
|
37
49
|
|
38
50
|
|
@@ -79,20 +91,17 @@ def get_ua_duplicate_count(text_data):
|
|
79
91
|
return sum(count)
|
80
92
|
|
81
93
|
|
82
|
-
def get_res_status_code_list(
|
94
|
+
def get_res_status_code_list(all_packets):
|
83
95
|
value_res = []
|
84
|
-
res = []
|
85
96
|
num_1 = 0
|
86
97
|
num_2 = 0
|
87
98
|
num_3 = 0
|
88
99
|
num_4 = 0
|
89
100
|
num_5 = 0
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
if m:
|
95
|
-
value_res.append(int(m.group(0)))
|
101
|
+
for item in all_packets:
|
102
|
+
match = re.search(r'HTTP/\d\.\d (\d{3})', item['res_header'])
|
103
|
+
if match:
|
104
|
+
value_res.append(int(match.group(1)))
|
96
105
|
for value in value_res:
|
97
106
|
if 0 <= value < 200:
|
98
107
|
num_1 = num_1 + 1
|
@@ -182,16 +191,6 @@ def get_uri_depth(url):
|
|
182
191
|
return 0
|
183
192
|
|
184
193
|
|
185
|
-
def firstOrZero(param):
|
186
|
-
if type(param).__name__ == 'list':
|
187
|
-
if (len(param)) != 0:
|
188
|
-
return param[0]
|
189
|
-
else:
|
190
|
-
return 0
|
191
|
-
else:
|
192
|
-
return 0
|
193
|
-
|
194
|
-
|
195
194
|
def get_statistic_fields(packets):
|
196
195
|
length_ranges = {
|
197
196
|
"0_19": (0, 19),
|
@@ -1,9 +1,8 @@
|
|
1
1
|
import copy
|
2
|
-
import re
|
3
2
|
|
4
3
|
from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
|
5
|
-
features_key, plain_body_columns, http_version_pattern, http_req_method_pattern, http_req_path_pattern,
|
6
|
-
|
4
|
+
features_key, plain_body_columns, http_version_pattern, http_req_method_pattern, http_req_path_pattern, \
|
5
|
+
res_status_code_pattern
|
7
6
|
|
8
7
|
|
9
8
|
def content_type_is_plain(packet):
|
@@ -23,15 +22,6 @@ def content_type_is_plain(packet):
|
|
23
22
|
return False
|
24
23
|
|
25
24
|
|
26
|
-
def filter_visible_chars(data):
|
27
|
-
"""
|
28
|
-
过滤不可见字符,仅保留可打印的ASCII字符
|
29
|
-
:param data:
|
30
|
-
:return:
|
31
|
-
"""
|
32
|
-
return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
|
33
|
-
|
34
|
-
|
35
25
|
def get_all_columns(
|
36
26
|
contains_packet_column=False,
|
37
27
|
contains_src_dst_column=False,
|
@@ -62,6 +52,16 @@ def get_all_columns(
|
|
62
52
|
# req_body_pattern = re.compile(
|
63
53
|
# r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?(?=HTTP/\d\.\d)", re.DOTALL)
|
64
54
|
|
55
|
+
def firstOrZero(param):
|
56
|
+
if type(param).__name__ == 'list':
|
57
|
+
if (len(param)) != 0:
|
58
|
+
return param[0]
|
59
|
+
else:
|
60
|
+
return 0
|
61
|
+
else:
|
62
|
+
return 0
|
63
|
+
|
64
|
+
|
65
65
|
def get_header_value(header_set, value):
|
66
66
|
result = [item for item in header_set if value in item]
|
67
67
|
if len(result) != 0:
|
@@ -0,0 +1,463 @@
|
|
1
|
+
import copy
|
2
|
+
import gzip
|
3
|
+
import math
|
4
|
+
import os
|
5
|
+
import struct
|
6
|
+
import time
|
7
|
+
import zlib
|
8
|
+
from functools import cmp_to_key
|
9
|
+
|
10
|
+
from Crypto.Cipher import AES
|
11
|
+
from scapy.layers.inet import TCP, IP
|
12
|
+
from scapy.packet import Raw
|
13
|
+
from zstandard import ZstdDecompressor
|
14
|
+
|
15
|
+
from xbase_util.common_util import parse_chunked_body, filter_visible_chars
|
16
|
+
from xbase_util.xbase_constant import pattern_chuncked, pattern_gzip
|
17
|
+
|
18
|
+
|
19
|
+
def fix_pos(pos, packetPosEncoding):
|
20
|
+
if pos is None or len(pos) == 0:
|
21
|
+
return
|
22
|
+
if packetPosEncoding == "gap0":
|
23
|
+
last = 0
|
24
|
+
lastgap = 0
|
25
|
+
for i, pos_item in enumerate(pos):
|
26
|
+
if pos[i] < 0:
|
27
|
+
last = 0
|
28
|
+
else:
|
29
|
+
if pos[i] == 0:
|
30
|
+
pos[i] = last + lastgap
|
31
|
+
else:
|
32
|
+
lastgap = pos[i]
|
33
|
+
pos[i] += last
|
34
|
+
last = pos[i]
|
35
|
+
|
36
|
+
|
37
|
+
def group_numbers(nums):
|
38
|
+
result = []
|
39
|
+
for num in nums:
|
40
|
+
if num < 0:
|
41
|
+
result.append([num])
|
42
|
+
elif result:
|
43
|
+
result[-1].append(num)
|
44
|
+
return result
|
45
|
+
|
46
|
+
|
47
|
+
def decompress_streaming(compressed_data, session_id):
|
48
|
+
try:
|
49
|
+
decompressor = ZstdDecompressor()
|
50
|
+
with decompressor.stream_reader(compressed_data) as reader:
|
51
|
+
decompressed_data = reader.read()
|
52
|
+
return decompressed_data
|
53
|
+
except Exception as e:
|
54
|
+
print(f"解码错误:{e} {session_id}")
|
55
|
+
return bytearray()
|
56
|
+
|
57
|
+
|
58
|
+
def readUInt32BE(buffer, offset):
|
59
|
+
return struct.unpack('>I', buffer[offset:offset + 4])[0]
|
60
|
+
|
61
|
+
|
62
|
+
def readUInt32LE(buffer, offset):
|
63
|
+
return struct.unpack('<I', buffer[offset:offset + 4])[0]
|
64
|
+
|
65
|
+
|
66
|
+
def writeUInt32BE(buffer, pos, value):
|
67
|
+
struct.pack_into('>I', buffer, pos, value)
|
68
|
+
return buffer
|
69
|
+
|
70
|
+
|
71
|
+
def read_header(param_map, session_id):
|
72
|
+
shortHeader = None
|
73
|
+
headBuffer = os.read(param_map['fd'], 64)
|
74
|
+
if param_map['encoding'] == 'aes-256-ctr':
|
75
|
+
if 'iv' in param_map:
|
76
|
+
param_map['iv'][12:16] = struct.pack('>I', 0)
|
77
|
+
headBuffer = bytearray(
|
78
|
+
AES.new(param_map['encKey'], AES.MODE_CTR, nonce=param_map['iv']).decrypt(bytes(headBuffer)))
|
79
|
+
else:
|
80
|
+
print("读取头部信息失败,iv向量为空")
|
81
|
+
elif param_map['encoding'] == 'xor-2048':
|
82
|
+
for i in range(len(headBuffer)):
|
83
|
+
headBuffer[i] ^= param_map['encKey'][i % 256]
|
84
|
+
if param_map['uncompressedBits']:
|
85
|
+
if param_map['compression'] == 'gzip':
|
86
|
+
headBuffer = zlib.decompress(bytes(headBuffer), zlib.MAX_WBITS | 16)
|
87
|
+
elif param_map['compression'] == 'zstd':
|
88
|
+
headBuffer = decompress_streaming(headBuffer, session_id)
|
89
|
+
headBuffer = headBuffer[:24]
|
90
|
+
magic = struct.unpack('<I', headBuffer[:4])[0]
|
91
|
+
bigEndian = (magic == 0xd4c3b2a1 or magic == 0x4d3cb2a1)
|
92
|
+
nanosecond = (magic == 0xa1b23c4d or magic == 0x4d3cb2a1)
|
93
|
+
if not bigEndian and magic not in {0xa1b2c3d4, 0xa1b23c4d, 0xa1b2c3d5}:
|
94
|
+
raise ValueError("Corrupt PCAP header")
|
95
|
+
if magic == 0xa1b2c3d5:
|
96
|
+
shortHeader = readUInt32LE(headBuffer, 8)
|
97
|
+
headBuffer[0] = 0xd4 # Reset header to normal
|
98
|
+
if bigEndian:
|
99
|
+
linkType = readUInt32BE(headBuffer, 20)
|
100
|
+
else:
|
101
|
+
linkType = readUInt32LE(headBuffer, 20)
|
102
|
+
return headBuffer, shortHeader, bigEndian, linkType, nanosecond
|
103
|
+
|
104
|
+
|
105
|
+
def create_decipher(pos, param_map):
|
106
|
+
writeUInt32BE(param_map['iv'], pos, 12)
|
107
|
+
return AES.new(param_map['encKey'], AES.MODE_CTR, nonce=param_map['iv'])
|
108
|
+
|
109
|
+
|
110
|
+
def read_packet_internal(pos_arg, hp_len_arg, param_map, session_id):
|
111
|
+
pos = pos_arg
|
112
|
+
hp_len = hp_len_arg
|
113
|
+
if hp_len == -1:
|
114
|
+
if param_map['compression'] == "zstd":
|
115
|
+
hp_len = param_map['uncompressedBitsSize']
|
116
|
+
else:
|
117
|
+
hp_len = 2048
|
118
|
+
inside_offset = 0
|
119
|
+
if param_map['uncompressedBits']:
|
120
|
+
inside_offset = pos & param_map['uncompressedBitsSize'] - 1
|
121
|
+
pos = math.floor(pos / param_map['uncompressedBitsSize'])
|
122
|
+
pos_offset = 0
|
123
|
+
if param_map['encoding'] == 'aes-256-ctr':
|
124
|
+
pos_offset = pos % 16
|
125
|
+
pos = pos - pos_offset
|
126
|
+
elif param_map['encoding'] == 'xor-2048':
|
127
|
+
pos_offset = pos % 256
|
128
|
+
pos = pos - pos_offset
|
129
|
+
|
130
|
+
hp_len = 256 * math.ceil((hp_len + inside_offset + pos_offset) / 256)
|
131
|
+
buffer = bytearray(hp_len)
|
132
|
+
os.lseek(param_map['fd'], pos, os.SEEK_SET)
|
133
|
+
read_buffer = os.read(param_map['fd'], len(buffer))
|
134
|
+
if len(read_buffer) - pos_offset < 16:
|
135
|
+
return None
|
136
|
+
if param_map['encoding'] == 'aes-256-ctr':
|
137
|
+
decipher = create_decipher(pos // 16, param_map)
|
138
|
+
read_buffer = bytearray(decipher.decrypt(read_buffer))[pos_offset:]
|
139
|
+
elif param_map['encoding'] == 'xor-2048':
|
140
|
+
read_buffer = bytearray(b ^ param_map['encKey'][i % 256] for i, b in enumerate(read_buffer))[pos_offset:]
|
141
|
+
if param_map['uncompressedBits']:
|
142
|
+
try:
|
143
|
+
if param_map['compression'] == 'gzip':
|
144
|
+
read_buffer = zlib.decompress(read_buffer, zlib.MAX_WBITS | 16)
|
145
|
+
elif param_map['compression'] == 'zstd':
|
146
|
+
read_buffer = decompress_streaming(read_buffer, session_id)
|
147
|
+
except Exception as e:
|
148
|
+
print(f"PCAP uncompress issue: {pos} {len(buffer)} {read_buffer} {e}")
|
149
|
+
return None
|
150
|
+
if inside_offset:
|
151
|
+
read_buffer = read_buffer[inside_offset:]
|
152
|
+
header_len = 16 if param_map['shortHeader'] is None else 6
|
153
|
+
if len(read_buffer) < header_len:
|
154
|
+
if hp_len_arg == -1 and param_map['compression'] == 'zstd':
|
155
|
+
return read_packet_internal(pos_arg, param_map['uncompressedBitsSize'] * 2, param_map, session_id)
|
156
|
+
print(f"Not enough data {len(read_buffer)} for header {header_len}")
|
157
|
+
return None
|
158
|
+
packet_len = struct.unpack('>I' if param_map['bigEndian'] else '<I', read_buffer[8:12])[
|
159
|
+
0] if param_map['shortHeader'] is None else \
|
160
|
+
struct.unpack('>H' if param_map['bigEndian'] else '<H', read_buffer[:2])[0]
|
161
|
+
if packet_len < 0 or packet_len > 0xffff:
|
162
|
+
return None
|
163
|
+
if header_len + packet_len <= len(read_buffer):
|
164
|
+
if param_map['shortHeader'] is not None:
|
165
|
+
t = struct.unpack('<I', read_buffer[2:6])[0]
|
166
|
+
sec = (t >> 20) + param_map['shortHeader']
|
167
|
+
usec = t & 0xfffff
|
168
|
+
new_buffer = bytearray(16 + packet_len)
|
169
|
+
struct.pack_into('<I', new_buffer, 0, sec)
|
170
|
+
struct.pack_into('<I', new_buffer, 4, usec)
|
171
|
+
struct.pack_into('<I', new_buffer, 8, packet_len)
|
172
|
+
struct.pack_into('<I', new_buffer, 12, packet_len)
|
173
|
+
new_buffer[16:] = read_buffer[6:packet_len + 6]
|
174
|
+
return new_buffer
|
175
|
+
return read_buffer[:header_len + packet_len]
|
176
|
+
|
177
|
+
if hp_len_arg != -1:
|
178
|
+
return None
|
179
|
+
|
180
|
+
return read_packet_internal(pos_arg, 16 + packet_len, param_map, session_id)
|
181
|
+
|
182
|
+
|
183
|
+
def read_packet(pos, param_map, session_id):
|
184
|
+
if 'fd' not in param_map or not param_map['fd']:
|
185
|
+
time.sleep(0.01)
|
186
|
+
return read_packet(pos, param_map['fd'], session_id)
|
187
|
+
return read_packet_internal(pos, -1, param_map, session_id)
|
188
|
+
|
189
|
+
|
190
|
+
def get_file_and_read_pos(session_id, file, pos_list):
|
191
|
+
filename = file['name']
|
192
|
+
if not os.path.isfile(filename):
|
193
|
+
print(f"文件不存在:{filename}")
|
194
|
+
return None
|
195
|
+
encoding = file.get('encoding', 'normal')
|
196
|
+
encKey = None
|
197
|
+
iv = None
|
198
|
+
compression = None
|
199
|
+
if 'dek' in file:
|
200
|
+
dek = bytes.fromhex(file['dek'])
|
201
|
+
encKey = AES.new(file['kek'].encode(), AES.MODE_CBC).decrypt(dek)
|
202
|
+
|
203
|
+
if 'uncompressedBits' in file:
|
204
|
+
uncompressedBits = file['uncompressedBits']
|
205
|
+
uncompressedBitsSize = 2 ** uncompressedBits
|
206
|
+
compression = 'gzip'
|
207
|
+
else:
|
208
|
+
uncompressedBits = None
|
209
|
+
uncompressedBitsSize = 0
|
210
|
+
if 'compression' in file:
|
211
|
+
compression = file['compression']
|
212
|
+
|
213
|
+
if 'iv' in file:
|
214
|
+
iv_ = bytes.fromhex(file['iv'])
|
215
|
+
iv = bytearray(16)
|
216
|
+
iv[:len(iv_)] = iv_
|
217
|
+
fd = os.open(filename, os.O_RDONLY)
|
218
|
+
param_map = {
|
219
|
+
"fd": fd,
|
220
|
+
"encoding": encoding,
|
221
|
+
"iv": iv,
|
222
|
+
"encKey": encKey,
|
223
|
+
"uncompressedBits": uncompressedBits,
|
224
|
+
"compression": compression,
|
225
|
+
"uncompressedBitsSize": uncompressedBitsSize
|
226
|
+
}
|
227
|
+
res = bytearray()
|
228
|
+
headBuffer, shortHeader, bigEndian, linkType, nanosecond = read_header(param_map, session_id)
|
229
|
+
res.extend(headBuffer)
|
230
|
+
param_map['shortHeader'] = shortHeader
|
231
|
+
param_map['bigEndian'] = bigEndian
|
232
|
+
# _________________________________
|
233
|
+
byte_array = bytearray(0xfffe)
|
234
|
+
next_packet = 0
|
235
|
+
b_offset = 0
|
236
|
+
packets = {}
|
237
|
+
# packet_objs = []
|
238
|
+
i = 0
|
239
|
+
for pos in pos_list:
|
240
|
+
packet_bytes = read_packet(pos, param_map, session_id)
|
241
|
+
# if reture_obj:
|
242
|
+
# obj = decode_obj(packet_bytes, bigEndian, linkType, nanosecond, )
|
243
|
+
# packet_objs.append(copy.deepcopy(obj))
|
244
|
+
if not packet_bytes:
|
245
|
+
continue
|
246
|
+
packets[i] = packet_bytes
|
247
|
+
while next_packet in packets:
|
248
|
+
buffer = packets[next_packet]
|
249
|
+
|
250
|
+
next_packet += 1
|
251
|
+
# del packets[next_packet]
|
252
|
+
next_packet = next_packet + 1
|
253
|
+
if b_offset + len(buffer) > len(byte_array):
|
254
|
+
res.extend(byte_array[:b_offset])
|
255
|
+
b_offset = 0
|
256
|
+
byte_array = bytearray(0xfffe)
|
257
|
+
byte_array[b_offset:b_offset + len(buffer)] = buffer
|
258
|
+
b_offset += len(buffer)
|
259
|
+
i = i + 1
|
260
|
+
os.close(fd)
|
261
|
+
res.extend(byte_array[:b_offset])
|
262
|
+
return res
|
263
|
+
|
264
|
+
|
265
|
+
def process_session_id_disk_simple(id, node, packet_pos, esdb, pcap_path_prefix):
|
266
|
+
packetPos = packet_pos
|
267
|
+
file = esdb.get_file_by_file_id(node=node, num=abs(packetPos[0]),
|
268
|
+
prefix=None if pcap_path_prefix == "origin" else pcap_path_prefix)
|
269
|
+
if file is None:
|
270
|
+
return None, None
|
271
|
+
fix_pos(packetPos, file['packetPosEncoding'])
|
272
|
+
pos_list = group_numbers(packetPos)[0]
|
273
|
+
pos_list.pop(0)
|
274
|
+
return get_file_and_read_pos(id, file, pos_list)
|
275
|
+
|
276
|
+
|
277
|
+
def parse_body(data):
|
278
|
+
if data.find(b"\r\n\r\n") != -1:
|
279
|
+
res = data.split(b"\r\n\r\n", 1)
|
280
|
+
header = res[0]
|
281
|
+
body = res[1]
|
282
|
+
else:
|
283
|
+
header = data
|
284
|
+
body = b''
|
285
|
+
chunked_pattern = pattern_chuncked.search(header)
|
286
|
+
gzip_pattern = pattern_gzip.search(header)
|
287
|
+
need_gzip = gzip_pattern and b'gzip' in gzip_pattern.group()
|
288
|
+
if chunked_pattern and b'chunked' in chunked_pattern.group():
|
289
|
+
body = parse_chunked_body(body, need_un_gzip=need_gzip)
|
290
|
+
elif need_gzip:
|
291
|
+
try:
|
292
|
+
body = gzip.decompress(body)
|
293
|
+
except:
|
294
|
+
print("解压失败")
|
295
|
+
pass
|
296
|
+
result_body_str = filter_visible_chars(body)
|
297
|
+
return filter_visible_chars(header), result_body_str
|
298
|
+
|
299
|
+
|
300
|
+
def reassemble_session_pcap(reassemble_tcp_res, skey):
|
301
|
+
my_map = {
|
302
|
+
'key': '',
|
303
|
+
'req_header': '',
|
304
|
+
'req_body': '',
|
305
|
+
'req_time': 0,
|
306
|
+
'req_size': 0,
|
307
|
+
'res_header': '',
|
308
|
+
'res_body': '',
|
309
|
+
'res_time': 0,
|
310
|
+
'res_size': 0,
|
311
|
+
}
|
312
|
+
packet_list = []
|
313
|
+
for index, packet in enumerate(reassemble_tcp_res):
|
314
|
+
header, body = parse_body(packet['data'])
|
315
|
+
if index == len(reassemble_tcp_res) - 1:
|
316
|
+
packet_list.append(copy.deepcopy(my_map))
|
317
|
+
if packet['key'] == skey:
|
318
|
+
if index != 0:
|
319
|
+
packet_list.append(copy.deepcopy(my_map))
|
320
|
+
my_map = {
|
321
|
+
'key': packet['key'],
|
322
|
+
'req_header': '',
|
323
|
+
'req_body': b'',
|
324
|
+
'req_time': 0,
|
325
|
+
'req_size': 0,
|
326
|
+
'res_header': '',
|
327
|
+
'res_body': b'',
|
328
|
+
'res_time': 0,
|
329
|
+
'res_size': 0,
|
330
|
+
}
|
331
|
+
my_map["req_header"] = header
|
332
|
+
my_map["req_body"] = body
|
333
|
+
my_map["req_time"] = packet['ts']
|
334
|
+
my_map["req_size"] = len(packet['data'])
|
335
|
+
else:
|
336
|
+
my_map["res_header"] = header
|
337
|
+
my_map["res_body"] = body
|
338
|
+
my_map["res_time"] = packet['ts']
|
339
|
+
my_map["res_size"] = len(packet['data'])
|
340
|
+
return packet_list
|
341
|
+
|
342
|
+
|
343
|
+
def reassemble_tcp_pcap(p):
|
344
|
+
packets = [{'pkt': item} for item in p if TCP in item and Raw in item]
|
345
|
+
packets2 = []
|
346
|
+
info = {}
|
347
|
+
keys = []
|
348
|
+
for index, packet in enumerate(packets):
|
349
|
+
data = packet['pkt'][Raw].load
|
350
|
+
flags = packet['pkt'][TCP].flags
|
351
|
+
seq = packet['pkt'][TCP].seq
|
352
|
+
if len(data) == 0 or 'R' in flags or 'S' in flags:
|
353
|
+
continue
|
354
|
+
key = f"{packet['pkt'][IP].src}:{packet['pkt'][IP].sport}"
|
355
|
+
if key not in info.keys():
|
356
|
+
info[key] = {
|
357
|
+
"min": seq,
|
358
|
+
"max": seq,
|
359
|
+
"wrapseq": False,
|
360
|
+
"wrapack": False,
|
361
|
+
}
|
362
|
+
keys.append(key)
|
363
|
+
elif info[key]["min"] > seq:
|
364
|
+
info[key]['min'] = seq
|
365
|
+
elif info[key]["max"] < seq:
|
366
|
+
info[key]['max'] = seq
|
367
|
+
packets2.append(packet)
|
368
|
+
if len(keys) == 1:
|
369
|
+
key = f"{packets['pkt'][IP].dst}:{packets['pkt'][IP].dport}"
|
370
|
+
ack = packets['pkt'][TCP].ack
|
371
|
+
info[key] = {
|
372
|
+
"min": ack,
|
373
|
+
"max": ack,
|
374
|
+
"wrapseq": False,
|
375
|
+
"wrapack": False,
|
376
|
+
}
|
377
|
+
keys.append(key)
|
378
|
+
packets = packets2
|
379
|
+
if len(packets) == 0:
|
380
|
+
return []
|
381
|
+
needwrap = False
|
382
|
+
if info[keys[0]] and info[keys[0]]['max'] - info[keys[0]]['min'] > 0x7fffffff:
|
383
|
+
info[keys[0]]['wrapseq'] = True
|
384
|
+
info[keys[0]]['wrapack'] = True
|
385
|
+
needwrap = True
|
386
|
+
if info[keys[1]] and info[keys[1]]['max'] - info[keys[1]]['min'] > 0x7fffffff:
|
387
|
+
info[keys[1]]['wrapseq'] = True
|
388
|
+
info[keys[0]]['wrapack'] = True
|
389
|
+
needwrap = True
|
390
|
+
if needwrap:
|
391
|
+
for packet in packets:
|
392
|
+
key = f"{packet['ip']['addr1']}:{packet['tcp']['sport']}"
|
393
|
+
if info[key]['wrapseq'] and packet['tcp']['seq'] < 0x7fffffff:
|
394
|
+
packet['tcp']['seq'] += 0xffffffff
|
395
|
+
if info[key]['wrapack'] and packet['tcp']['ack'] < 0x7fffffff:
|
396
|
+
packet['tcp']['ack'] += 0xffffffff
|
397
|
+
clientKey = f"{packets[0]['pkt'][IP].src}:{packets[0]['pkt'][IP].sport}"
|
398
|
+
|
399
|
+
def compare_packets(a, b):
|
400
|
+
a_seq = a['pkt'][TCP].seq
|
401
|
+
b_seq = b['pkt'][TCP].seq
|
402
|
+
a_ack = a['pkt'][TCP].ack
|
403
|
+
b_ack = b['pkt'][TCP].ack
|
404
|
+
a_data = a['pkt'][Raw].load
|
405
|
+
b_data = b['pkt'][Raw].load
|
406
|
+
a_ip = a['pkt'][IP].src
|
407
|
+
a_port = a['pkt'][TCP].sport
|
408
|
+
b_port = b['pkt'][TCP].sport
|
409
|
+
b_ip = b['pkt'][IP].src
|
410
|
+
if a_ip == b_ip and a_port == b_port:
|
411
|
+
return a_seq - b_seq
|
412
|
+
if clientKey == f"{a_ip}:{a_port}":
|
413
|
+
return (a_seq + len(a_data) - 1) - b_ack
|
414
|
+
return a_ack - (b_seq + len(b_data) - 1)
|
415
|
+
|
416
|
+
packets.sort(key=cmp_to_key(compare_packets))
|
417
|
+
# del packets[num_packets:]
|
418
|
+
# Now divide up conversation
|
419
|
+
clientSeq = 0
|
420
|
+
hostSeq = 0
|
421
|
+
previous = 0
|
422
|
+
results = []
|
423
|
+
for i, item in enumerate(packets):
|
424
|
+
sip = item['pkt'][IP].src
|
425
|
+
sport = item['pkt'][IP].sport
|
426
|
+
seq = item['pkt'][TCP].seq
|
427
|
+
data = item['pkt'][Raw].load
|
428
|
+
pkey = f"{sip}:{sport}"
|
429
|
+
seq_datalen = seq + len(data)
|
430
|
+
if pkey == clientKey:
|
431
|
+
if clientSeq >= seq_datalen:
|
432
|
+
continue
|
433
|
+
clientSeq = seq_datalen
|
434
|
+
else:
|
435
|
+
if hostSeq >= seq_datalen:
|
436
|
+
continue
|
437
|
+
hostSeq = seq_datalen
|
438
|
+
if len(results) == 0 or pkey != results[len(results) - 1]['key']:
|
439
|
+
previous = seq
|
440
|
+
results.append({
|
441
|
+
'key': pkey,
|
442
|
+
'data': copy.deepcopy(data),
|
443
|
+
'ts': float(item['pkt'].time),
|
444
|
+
'pkt': item['pkt'],
|
445
|
+
})
|
446
|
+
elif seq - previous > 0xffff:
|
447
|
+
results.append(
|
448
|
+
{'key': '',
|
449
|
+
'data': b'',
|
450
|
+
'ts': float(item['pkt'].time),
|
451
|
+
'pkt': item['pkt'],
|
452
|
+
})
|
453
|
+
previous = seq
|
454
|
+
results.append({
|
455
|
+
'key': pkey,
|
456
|
+
'data': copy.deepcopy(data),
|
457
|
+
'ts': float(item['pkt'].time),
|
458
|
+
'pkt': item['pkt'],
|
459
|
+
})
|
460
|
+
else:
|
461
|
+
previous = seq
|
462
|
+
results[-1]['data'] += data
|
463
|
+
return results
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from scapy.packet import Raw
|
5
|
+
from scapy.utils import rdpcap
|
6
|
+
|
7
|
+
from xbase_util.common_util import get_res_status_code_list
|
8
|
+
from xbase_util.pcap_util import reassemble_tcp_pcap, reassemble_session_pcap
|
9
|
+
from xbase_util.xbase_constant import res_status_code_pattern
|
10
|
+
|
11
|
+
if __name__ == '__main__':
|
12
|
+
packets_scapy = reassemble_tcp_pcap(rdpcap("gzip2.pcap"))
|
13
|
+
skey = '10.28.7.16:54398'
|
14
|
+
streams = b""
|
15
|
+
for pkt in packets_scapy:
|
16
|
+
if Raw in pkt:
|
17
|
+
streams += pkt[Raw].load
|
18
|
+
text_data = streams.decode('ascii', errors='ignore')
|
19
|
+
all_packets = reassemble_session_pcap(packets_scapy, skey=skey)
|
20
|
+
if len(all_packets) != 0:
|
21
|
+
all_req_size = [item['req_size'] for item in all_packets if item['key'] == skey]
|
22
|
+
all_res_size = [item['res_size'] for item in all_packets if item['key'] != skey]
|
23
|
+
num_1, num_2, num_3, num_4, num_5 = get_res_status_code_list(all_packets)
|
24
|
+
# 获取请求头参数数量
|
25
|
+
req_header_count_list = [req['req_header'].count(":") for req in all_packets]
|
26
|
+
# 请求的时间间隔
|
27
|
+
request_flattened_time = [item['req_time'] for item in all_packets]
|
28
|
+
request_time_diffs = [request_flattened_time[i + 1] - request_flattened_time[i] for i in
|
29
|
+
range(len(request_flattened_time) - 1)]
|
30
|
+
request_mean_diff = round(np.nanmean(request_time_diffs), 5) or 0
|
31
|
+
request_variance_diff = round(np.nanvar(request_time_diffs), 5) or 0
|
32
|
+
# 响应的时间间隔
|
33
|
+
response_flattened_time = [item['res_time'] for item in all_packets]
|
34
|
+
response_time_diffs = [response_flattened_time[i + 1] - response_flattened_time[i] for i in
|
35
|
+
range(len(response_flattened_time) - 1)]
|
36
|
+
response_mean_diff = round(np.nanmean(response_time_diffs), 5) or 0
|
37
|
+
response_variance_diff = round(np.nanvar(response_time_diffs), 5) or 0
|
38
|
+
|
39
|
+
time_period = [(abs(item['res_time'] - item['req_time'])) for item in
|
40
|
+
all_packets if item['res_time'] != 0 and item['req_time'] != 0]
|
@@ -2,6 +2,7 @@ README.md
|
|
2
2
|
setup.py
|
3
3
|
xbase_util/__init__.py
|
4
4
|
xbase_util/add_column_util.py
|
5
|
+
xbase_util/common_util.py
|
5
6
|
xbase_util/dangerous_util.py
|
6
7
|
xbase_util/es_db_util.py
|
7
8
|
xbase_util/esreq.py
|
@@ -9,9 +10,8 @@ xbase_util/geo_util.py
|
|
9
10
|
xbase_util/handle_features_util.py
|
10
11
|
xbase_util/packet_util.py
|
11
12
|
xbase_util/pcap_util.py
|
12
|
-
xbase_util/
|
13
|
+
xbase_util/test.py
|
13
14
|
xbase_util/xbase_constant.py
|
14
|
-
xbase_util/xbase_util.py
|
15
15
|
xbase_util.egg-info/PKG-INFO
|
16
16
|
xbase_util.egg-info/SOURCES.txt
|
17
17
|
xbase_util.egg-info/dependency_links.txt
|