xbase-util 0.6.5__tar.gz → 0.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xbase_util-0.6.5 → xbase_util-0.6.7}/PKG-INFO +1 -1
- {xbase_util-0.6.5 → xbase_util-0.6.7}/setup.py +1 -1
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/packet_util.py +16 -29
- xbase_util-0.6.7/xbase_util/segment.py +163 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util.egg-info/PKG-INFO +1 -1
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util.egg-info/SOURCES.txt +1 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/README.md +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/setup.cfg +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/__init__.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/add_column_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/dangerous_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/__init__.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/bean/ConfigBean.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/bean/FlowBean.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/bean/__init__.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/dao/ConfigDao.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/dao/FlowDao.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/dao/__init__.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/db/initsqlite3.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/es_db_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/esreq.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/geo_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/handle_features_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/pcap_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/xbase_constant.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util/xbase_util.py +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util.egg-info/dependency_links.txt +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util.egg-info/not-zip-safe +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util.egg-info/top_level.txt +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
- {xbase_util-0.6.5 → xbase_util-0.6.7}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,7 +1,7 @@
|
|
1
|
+
import copy
|
1
2
|
import re
|
2
|
-
import traceback
|
3
3
|
|
4
|
-
from scapy.layers.inet import TCP
|
4
|
+
from scapy.layers.inet import TCP
|
5
5
|
from scapy.packet import Raw
|
6
6
|
|
7
7
|
from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
|
@@ -96,14 +96,6 @@ def get_all_packets_by_reg(packets):
|
|
96
96
|
return packet_list
|
97
97
|
|
98
98
|
|
99
|
-
def get_body(packet):
|
100
|
-
try:
|
101
|
-
return "".join([item for item in packet.split("\r\n\r\n") if "HTTP/" not in item])
|
102
|
-
except Exception:
|
103
|
-
traceback.print_exc()
|
104
|
-
return ""
|
105
|
-
|
106
|
-
|
107
99
|
def get_header_value(header_set, value):
|
108
100
|
result = [item for item in header_set if value in item]
|
109
101
|
if len(result) != 0:
|
@@ -112,23 +104,20 @@ def get_header_value(header_set, value):
|
|
112
104
|
return ""
|
113
105
|
|
114
106
|
|
115
|
-
def get_detail_by_package(
|
107
|
+
def get_detail_by_package(publicField, req_header, req_body, res_header, res_body):
|
116
108
|
"""
|
117
109
|
通过pcap的数量分离session并完善相关字段
|
118
|
-
:param packets_from_pcap: 通过PcAp解析出的包
|
119
110
|
:param publicField: 原始的session单条数据
|
111
|
+
:param req_header:请求头
|
112
|
+
:param req_body:请求体
|
113
|
+
:param res_header:响应头
|
114
|
+
:param res_body:响应体
|
120
115
|
:return: 完整的单条数据
|
121
116
|
"""
|
122
|
-
res_field =
|
123
|
-
if use_regx:
|
124
|
-
req = packets_from_pcap['req_body']
|
125
|
-
res = packets_from_pcap['res_body']
|
126
|
-
else:
|
127
|
-
res = packets_from_pcap["response"]
|
128
|
-
req = packets_from_pcap["request"]
|
117
|
+
res_field = copy.deepcopy(publicField)
|
129
118
|
res_field["initRTT"] = firstOrZero(res_field.get("initRTT", 0))
|
130
119
|
res_field["length"] = firstOrZero(res_field.get("length", 0))
|
131
|
-
request_lines =
|
120
|
+
request_lines = req_header.strip().split("\n")
|
132
121
|
http_request_lines = [item for item in request_lines if "HTTP" in item]
|
133
122
|
if len(http_request_lines) != 0:
|
134
123
|
first_line = http_request_lines[0].split(" ")
|
@@ -144,15 +133,13 @@ def get_detail_by_package(packets_from_pcap, publicField, use_regx):
|
|
144
133
|
value="Content-Type")
|
145
134
|
res_field['http.hostTokens'] = get_header_value(header_set=request_lines, value="Host")
|
146
135
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
response_lines = res.strip().split("\n")
|
136
|
+
res_field['plain_body_src'] = ""
|
137
|
+
res_field['plain_body_dst'] = ""
|
138
|
+
if content_type_is_plain(req_header):
|
139
|
+
res_field['plain_body_src'] = req_body
|
140
|
+
if content_type_is_plain(res_header):
|
141
|
+
res_field['plain_body_dst'] = res_body
|
142
|
+
response_lines = res_body.strip().split("\n")
|
156
143
|
http_response_lines = [item for item in response_lines if "HTTP" in item]
|
157
144
|
if len(http_response_lines) != 0:
|
158
145
|
first_line = http_response_lines[0].strip().split(" ")
|
@@ -0,0 +1,163 @@
|
|
1
|
+
import copy
|
2
|
+
import re
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
from scapy.all import *
|
6
|
+
from scapy.layers.inet import TCP
|
7
|
+
|
8
|
+
REQUEST_LINE_RE = re.compile(rb"^(GET|POST|PUT|DELETE|OPTIONS|HEAD|PATCH)\s[^\r\n]+\r\n", re.MULTILINE)
|
9
|
+
RESPONSE_LINE_RE = re.compile(rb"^HTTP/\d\.\d\s+\d{3}\s?[^\r\n]*", re.IGNORECASE)
|
10
|
+
|
11
|
+
|
12
|
+
def read_packets(packets):
|
13
|
+
last_seq_len = -1
|
14
|
+
last_ack = -1
|
15
|
+
packet_list = []
|
16
|
+
tmp_data = b''
|
17
|
+
tmp_packets = []
|
18
|
+
for index, pkt in enumerate(packets):
|
19
|
+
data = pkt[Raw].load if Raw in pkt else b''
|
20
|
+
ack = pkt[TCP].ack
|
21
|
+
seq = pkt[TCP].seq
|
22
|
+
if seq == last_seq_len:
|
23
|
+
# print(f"检测到连续包 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
|
24
|
+
tmp_data += data
|
25
|
+
tmp_packets.append(pkt)
|
26
|
+
elif seq == last_ack:
|
27
|
+
if tmp_data != b'':
|
28
|
+
if REQUEST_LINE_RE.match(tmp_data) or RESPONSE_LINE_RE.match(tmp_data):
|
29
|
+
packet_list.append({'data': copy.deepcopy(tmp_data), 'pkts': copy.deepcopy(tmp_packets)})
|
30
|
+
else:
|
31
|
+
# print("没有新的请求或者响应,就把数据加到上一个里面")
|
32
|
+
if len(packet_list) > 0:
|
33
|
+
# 之前找到过有请求,可以添加到之前的数据,否则说明一开始就没找到请求
|
34
|
+
packet_list[-1]['pkts'].extend(copy.deepcopy(tmp_packets))
|
35
|
+
packet_list[-1]['data'] += tmp_data
|
36
|
+
|
37
|
+
tmp_data = data
|
38
|
+
tmp_packets = [pkt]
|
39
|
+
# print(f"顺序正确 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
|
40
|
+
else:
|
41
|
+
# print(f"顺序错误 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
|
42
|
+
if len(data) > 0:
|
43
|
+
# 但是有数据
|
44
|
+
tmp_data += data
|
45
|
+
tmp_packets.append(pkt)
|
46
|
+
last_ack = ack
|
47
|
+
last_seq_len = seq + len(data)
|
48
|
+
if tmp_data != b'':
|
49
|
+
packet_list.append({'data': copy.deepcopy(tmp_data), 'pkts': copy.deepcopy(tmp_packets)})
|
50
|
+
tmp_packets.clear()
|
51
|
+
return packet_list
|
52
|
+
|
53
|
+
|
54
|
+
def parse_req_or_res(data, pkts):
|
55
|
+
if data.find(b"\r\n\r\n") != -1:
|
56
|
+
res = data.split(b"\r\n\r\n", 1)
|
57
|
+
header = res[0]
|
58
|
+
body = res[1]
|
59
|
+
else:
|
60
|
+
header = data
|
61
|
+
body = b''
|
62
|
+
pattern_chuncked = re.compile(rb"Transfer-Encoding:\s*chunked", re.IGNORECASE)
|
63
|
+
pattern_gzip = re.compile(rb"Content-Encoding:\s*gzip", re.IGNORECASE)
|
64
|
+
chuncked_pattern = pattern_chuncked.search(header)
|
65
|
+
gzip_pattern = pattern_gzip.search(header)
|
66
|
+
if chuncked_pattern and b'chunked' in chuncked_pattern.group():
|
67
|
+
chunk_lines = [item for item in body.split(b"\r\n") if item != b'']
|
68
|
+
data = b''
|
69
|
+
next_chunk_size = 0
|
70
|
+
for chunk in chunk_lines:
|
71
|
+
try:
|
72
|
+
next_chunk_size = int(chunk, 16)
|
73
|
+
if next_chunk_size == 0:
|
74
|
+
break
|
75
|
+
# print(f"接下来的分段大小:{next_chunk_size}")
|
76
|
+
except:
|
77
|
+
if next_chunk_size > 0:
|
78
|
+
data += chunk
|
79
|
+
# print(f"分段数据大小:{len(data)}")
|
80
|
+
result_body = data
|
81
|
+
else:
|
82
|
+
# print("虽然没有指定chunked,但是我猜出来他就是chunked")
|
83
|
+
if body.endswith(b"0\r\n"):
|
84
|
+
chunk_lines = [item for item in body.split(b"\r\n") if item != b'']
|
85
|
+
data = b''
|
86
|
+
next_chunk_size = 0
|
87
|
+
for chunk in chunk_lines:
|
88
|
+
try:
|
89
|
+
next_chunk_size = int(chunk, 16)
|
90
|
+
if next_chunk_size == 0:
|
91
|
+
break
|
92
|
+
# print(f"接下来的分段大小:{next_chunk_size}")
|
93
|
+
except:
|
94
|
+
if next_chunk_size > 0:
|
95
|
+
data += chunk
|
96
|
+
# print(f"分段数据大小:{len(data)}")
|
97
|
+
result_body = data
|
98
|
+
else:
|
99
|
+
result_body = body
|
100
|
+
if gzip_pattern and b'gzip' in gzip_pattern.group():
|
101
|
+
try:
|
102
|
+
decompressed = gzip.decompress(result_body)
|
103
|
+
result_body_str = "\n".join(
|
104
|
+
[line.strip() for line in decompressed.decode("utf-8", errors="replace").splitlines() if
|
105
|
+
line.strip() != ""])
|
106
|
+
except Exception as e:
|
107
|
+
result_body_str = result_body.decode("utf-8", errors="replace")
|
108
|
+
else:
|
109
|
+
result_body_str = result_body.decode("utf-8", errors="replace")
|
110
|
+
return header.decode("utf-8", errors="replace"), result_body_str, [float(pkt.time) for pkt in pkts]
|
111
|
+
|
112
|
+
|
113
|
+
def get_all_packets_by_segment(packets):
|
114
|
+
res = read_packets(packets)
|
115
|
+
request_packets = [item for item in res if REQUEST_LINE_RE.match(item['data'])]
|
116
|
+
response_packets = [
|
117
|
+
{'first_seq': item['pkts'][0][TCP].seq, 'pkts': item['pkts'], 'first_ack': item['pkts'][0][TCP].ack,
|
118
|
+
'data': item['data']} for item in
|
119
|
+
res if RESPONSE_LINE_RE.match(item['data'])]
|
120
|
+
packet_list = []
|
121
|
+
for request in request_packets:
|
122
|
+
pkt_list = request['pkts']
|
123
|
+
last_pkt = pkt_list[-1]
|
124
|
+
# seq = last_pkt[TCP].seq
|
125
|
+
ack = last_pkt[TCP].ack
|
126
|
+
response = [item for item in response_packets if item['first_seq'] == ack]
|
127
|
+
# print(f"找到对应的响应:{len(response)}")
|
128
|
+
# print(f"请求:{request['data'].decode('utf-8', errors='replace')}")
|
129
|
+
if len(response) > 0:
|
130
|
+
res_header, res_body, res_times = parse_req_or_res(response[0]['data'], response[0]['pkts'])
|
131
|
+
req_header, req_body, req_times = parse_req_or_res(request['data'], request['pkts'])
|
132
|
+
packet_list.append({
|
133
|
+
"req_header": req_header,
|
134
|
+
"req_body": req_body,
|
135
|
+
"req_time": req_times,
|
136
|
+
"req_packets": len(request['pkts']),
|
137
|
+
"res_header": res_header,
|
138
|
+
"res_body": res_body,
|
139
|
+
"res_time": res_times,
|
140
|
+
"res_packets": len(response[0]['pkts']),
|
141
|
+
})
|
142
|
+
else:
|
143
|
+
# print("没响应")
|
144
|
+
req_header, req_body, req_times = parse_req_or_res(request['data'], request['pkts'])
|
145
|
+
packet_list.append({
|
146
|
+
"req_header": req_header,
|
147
|
+
"req_body": req_body,
|
148
|
+
"req_time": req_times,
|
149
|
+
"req_packets": len(request['pkts']),
|
150
|
+
"res_header": '',
|
151
|
+
"res_body": '',
|
152
|
+
"res_time": [],
|
153
|
+
"res_packets": 0,
|
154
|
+
})
|
155
|
+
return packet_list
|
156
|
+
|
157
|
+
|
158
|
+
# if __name__ == '__main__':
|
159
|
+
# all_packets = get_all_packets_by_segment(rdpcap("../out/3post.pcap"))
|
160
|
+
# res=[
|
161
|
+
# get_detail_by_package({}, package['req_header'], package['req_body'], package['res_header'],
|
162
|
+
# package['req_body']) for package in all_packets]
|
163
|
+
# print(res)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|