xbase-util 0.6.6__tar.gz → 0.6.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {xbase_util-0.6.6 → xbase_util-0.6.8}/PKG-INFO +1 -1
  2. {xbase_util-0.6.6 → xbase_util-0.6.8}/setup.py +1 -1
  3. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/packet_util.py +24 -23
  4. xbase_util-0.6.8/xbase_util/segment.py +163 -0
  5. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util.egg-info/PKG-INFO +1 -1
  6. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util.egg-info/SOURCES.txt +1 -0
  7. {xbase_util-0.6.6 → xbase_util-0.6.8}/README.md +0 -0
  8. {xbase_util-0.6.6 → xbase_util-0.6.8}/setup.cfg +0 -0
  9. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/__init__.py +0 -0
  10. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/add_column_util.py +0 -0
  11. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/dangerous_util.py +0 -0
  12. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/__init__.py +0 -0
  13. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/bean/ConfigBean.py +0 -0
  14. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  15. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/bean/FlowBean.py +0 -0
  16. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  17. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/bean/__init__.py +0 -0
  18. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/dao/ConfigDao.py +0 -0
  19. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  20. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/dao/FlowDao.py +0 -0
  21. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  22. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/dao/__init__.py +0 -0
  23. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/db/initsqlite3.py +0 -0
  24. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/es_db_util.py +0 -0
  25. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/esreq.py +0 -0
  26. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/geo_util.py +0 -0
  27. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/handle_features_util.py +0 -0
  28. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/pcap_util.py +0 -0
  29. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/xbase_constant.py +0 -0
  30. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util/xbase_util.py +0 -0
  31. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util.egg-info/dependency_links.txt +0 -0
  32. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util.egg-info/not-zip-safe +0 -0
  33. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util.egg-info/top_level.txt +0 -0
  34. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  35. {xbase_util-0.6.6 → xbase_util-0.6.8}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.6.6
3
+ Version: 0.6.8
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.6.6",
6
+ version="0.6.8",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -1,7 +1,7 @@
1
+ import copy
1
2
  import re
2
- import traceback
3
3
 
4
- from scapy.layers.inet import TCP, IP
4
+ from scapy.layers.inet import TCP
5
5
  from scapy.packet import Raw
6
6
 
7
7
  from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
@@ -65,6 +65,11 @@ res_pattern = re.compile(r"HTTP/\d\.\d \d{3}.*", re.DOTALL)
65
65
  req_body_pattern = re.compile(
66
66
  r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?(?=HTTP/\d\.\d)", re.DOTALL)
67
67
 
68
+ http_version = re.compile(r"HTTP\/(\d\.\d)")
69
+ http_req_method = re.compile(r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d")
70
+ http_req_path = re.compile(r"(?:GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH)\s+(\/[^\s]*)\s+HTTP\/\d\.\d")
71
+ res_status_code_pattern = re.compile(r"HTTP\/\d\.\d\s+(\d{3})\s+.*")
72
+
68
73
 
69
74
  def get_all_packets_by_reg(packets):
70
75
  http_Req_Raw = {}
@@ -114,20 +119,18 @@ def get_detail_by_package(publicField, req_header, req_body, res_header, res_bod
114
119
  :param res_body:响应体
115
120
  :return: 完整的单条数据
116
121
  """
117
- res_field = publicField.copy()
122
+ res_field = copy.deepcopy(publicField)
118
123
  res_field["initRTT"] = firstOrZero(res_field.get("initRTT", 0))
119
124
  res_field["length"] = firstOrZero(res_field.get("length", 0))
120
- request_lines = req_header.strip().split("\n")
121
- http_request_lines = [item for item in request_lines if "HTTP" in item]
122
- if len(http_request_lines) != 0:
123
- first_line = http_request_lines[0].split(" ")
124
- res_field['http.clientVersion'] = str(first_line[2]).replace("\n", "").replace("\r", "")
125
- res_field['http.path'] = first_line[1]
126
- res_field['http.method'] = first_line[0]
127
- else:
128
- res_field['http.clientVersion'] = ''
129
- res_field['http.path'] = ''
130
- res_field['http.method'] = ''
125
+
126
+ http_version_res = http_version.findall(req_header)
127
+ res_field['http.clientVersion'] = http_version_res[0] if len(http_version_res) > 0 else ""
128
+ http_method = http_req_method.findall(req_header)
129
+ http_path = http_req_path.findall(req_header)
130
+ res_field['http.clientVersion'] = http_version_res[0] if len(http_version_res) > 0 else ""
131
+ res_field['http.method'] = http_method[0] if len(http_method) > 0 else ""
132
+ res_field['http.path'] = http_path[0] if len(http_path) > 0 else ""
133
+ request_lines = req_header.splitlines()
131
134
  res_field['http.request-referer'] = get_header_value(header_set=request_lines, value="Referer")
132
135
  res_field['http.request-content-type'] = get_header_value(header_set=request_lines,
133
136
  value="Content-Type")
@@ -139,15 +142,13 @@ def get_detail_by_package(publicField, req_header, req_body, res_header, res_bod
139
142
  res_field['plain_body_src'] = req_body
140
143
  if content_type_is_plain(res_header):
141
144
  res_field['plain_body_dst'] = res_body
142
- response_lines = res_body.strip().split("\n")
143
- http_response_lines = [item for item in response_lines if "HTTP" in item]
144
- if len(http_response_lines) != 0:
145
- first_line = http_response_lines[0].strip().split(" ")
146
- res_field['http.statuscode'] = first_line[1]
147
- res_field['http.serverVersion'] = first_line[0].split("/")[1]
148
- else:
149
- res_field['http.statuscode'] = ""
150
- res_field['http.serverVersion'] = ""
145
+
146
+ http_server_version_res = http_version.findall(res_header)
147
+ res_field['http.serverVersion'] = http_server_version_res[0] if len(http_server_version_res) > 0 else ""
148
+
149
+ status_code = res_status_code_pattern.findall(res_header)
150
+ res_field['http.statuscode'] = status_code[0] if len(status_code) > 0 else ""
151
+ response_lines = res_header.striplines()
151
152
  res_field['http.response-server'] = get_header_value(header_set=response_lines, value="Server")
152
153
  res_field['http.response-content-type'] = get_header_value(header_set=response_lines,
153
154
  value="Content-Type")
@@ -0,0 +1,163 @@
1
+ import copy
2
+ import re
3
+
4
+ import numpy as np
5
+ from scapy.all import *
6
+ from scapy.layers.inet import TCP
7
+
8
+ REQUEST_LINE_RE = re.compile(rb"^(GET|POST|PUT|DELETE|OPTIONS|HEAD|PATCH)\s[^\r\n]+\r\n", re.MULTILINE)
9
+ RESPONSE_LINE_RE = re.compile(rb"^HTTP/\d\.\d\s+\d{3}\s?[^\r\n]*", re.IGNORECASE)
10
+
11
+
12
+ def read_packets(packets):
13
+ last_seq_len = -1
14
+ last_ack = -1
15
+ packet_list = []
16
+ tmp_data = b''
17
+ tmp_packets = []
18
+ for index, pkt in enumerate(packets):
19
+ data = pkt[Raw].load if Raw in pkt else b''
20
+ ack = pkt[TCP].ack
21
+ seq = pkt[TCP].seq
22
+ if seq == last_seq_len:
23
+ # print(f"检测到连续包 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
24
+ tmp_data += data
25
+ tmp_packets.append(pkt)
26
+ elif seq == last_ack:
27
+ if tmp_data != b'':
28
+ if REQUEST_LINE_RE.match(tmp_data) or RESPONSE_LINE_RE.match(tmp_data):
29
+ packet_list.append({'data': copy.deepcopy(tmp_data), 'pkts': copy.deepcopy(tmp_packets)})
30
+ else:
31
+ # print("没有新的请求或者响应,就把数据加到上一个里面")
32
+ if len(packet_list) > 0:
33
+ # 之前找到过有请求,可以添加到之前的数据,否则说明一开始就没找到请求
34
+ packet_list[-1]['pkts'].extend(copy.deepcopy(tmp_packets))
35
+ packet_list[-1]['data'] += tmp_data
36
+
37
+ tmp_data = data
38
+ tmp_packets = [pkt]
39
+ # print(f"顺序正确 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
40
+ else:
41
+ # print(f"顺序错误 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
42
+ if len(data) > 0:
43
+ # 但是有数据
44
+ tmp_data += data
45
+ tmp_packets.append(pkt)
46
+ last_ack = ack
47
+ last_seq_len = seq + len(data)
48
+ if tmp_data != b'':
49
+ packet_list.append({'data': copy.deepcopy(tmp_data), 'pkts': copy.deepcopy(tmp_packets)})
50
+ tmp_packets.clear()
51
+ return packet_list
52
+
53
+
54
+ def parse_req_or_res(data, pkts):
55
+ if data.find(b"\r\n\r\n") != -1:
56
+ res = data.split(b"\r\n\r\n", 1)
57
+ header = res[0]
58
+ body = res[1]
59
+ else:
60
+ header = data
61
+ body = b''
62
+ pattern_chuncked = re.compile(rb"Transfer-Encoding:\s*chunked", re.IGNORECASE)
63
+ pattern_gzip = re.compile(rb"Content-Encoding:\s*gzip", re.IGNORECASE)
64
+ chuncked_pattern = pattern_chuncked.search(header)
65
+ gzip_pattern = pattern_gzip.search(header)
66
+ if chuncked_pattern and b'chunked' in chuncked_pattern.group():
67
+ chunk_lines = [item for item in body.split(b"\r\n") if item != b'']
68
+ data = b''
69
+ next_chunk_size = 0
70
+ for chunk in chunk_lines:
71
+ try:
72
+ next_chunk_size = int(chunk, 16)
73
+ if next_chunk_size == 0:
74
+ break
75
+ # print(f"接下来的分段大小:{next_chunk_size}")
76
+ except:
77
+ if next_chunk_size > 0:
78
+ data += chunk
79
+ # print(f"分段数据大小:{len(data)}")
80
+ result_body = data
81
+ else:
82
+ # print("虽然没有指定chunked,但是我猜出来他就是chunked")
83
+ if body.endswith(b"0\r\n"):
84
+ chunk_lines = [item for item in body.split(b"\r\n") if item != b'']
85
+ data = b''
86
+ next_chunk_size = 0
87
+ for chunk in chunk_lines:
88
+ try:
89
+ next_chunk_size = int(chunk, 16)
90
+ if next_chunk_size == 0:
91
+ break
92
+ # print(f"接下来的分段大小:{next_chunk_size}")
93
+ except:
94
+ if next_chunk_size > 0:
95
+ data += chunk
96
+ # print(f"分段数据大小:{len(data)}")
97
+ result_body = data
98
+ else:
99
+ result_body = body
100
+ if gzip_pattern and b'gzip' in gzip_pattern.group():
101
+ try:
102
+ decompressed = gzip.decompress(result_body)
103
+ result_body_str = "\n".join(
104
+ [line.strip() for line in decompressed.decode("utf-8", errors="replace").splitlines() if
105
+ line.strip() != ""])
106
+ except Exception as e:
107
+ result_body_str = result_body.decode("utf-8", errors="replace")
108
+ else:
109
+ result_body_str = result_body.decode("utf-8", errors="replace")
110
+ return header.decode("utf-8", errors="replace"), result_body_str, [float(pkt.time) for pkt in pkts]
111
+
112
+
113
+ def get_all_packets_by_segment(packets):
114
+ res = read_packets(packets)
115
+ request_packets = [item for item in res if REQUEST_LINE_RE.match(item['data'])]
116
+ response_packets = [
117
+ {'first_seq': item['pkts'][0][TCP].seq, 'pkts': item['pkts'], 'first_ack': item['pkts'][0][TCP].ack,
118
+ 'data': item['data']} for item in
119
+ res if RESPONSE_LINE_RE.match(item['data'])]
120
+ packet_list = []
121
+ for request in request_packets:
122
+ pkt_list = request['pkts']
123
+ last_pkt = pkt_list[-1]
124
+ # seq = last_pkt[TCP].seq
125
+ ack = last_pkt[TCP].ack
126
+ response = [item for item in response_packets if item['first_seq'] == ack]
127
+ # print(f"找到对应的响应:{len(response)}")
128
+ # print(f"请求:{request['data'].decode('utf-8', errors='replace')}")
129
+ if len(response) > 0:
130
+ res_header, res_body, res_times = parse_req_or_res(response[0]['data'], response[0]['pkts'])
131
+ req_header, req_body, req_times = parse_req_or_res(request['data'], request['pkts'])
132
+ packet_list.append({
133
+ "req_header": req_header,
134
+ "req_body": req_body,
135
+ "req_time": req_times,
136
+ "req_packets": len(request['pkts']),
137
+ "res_header": res_header,
138
+ "res_body": res_body,
139
+ "res_time": res_times,
140
+ "res_packets": len(response[0]['pkts']),
141
+ })
142
+ else:
143
+ # print("没响应")
144
+ req_header, req_body, req_times = parse_req_or_res(request['data'], request['pkts'])
145
+ packet_list.append({
146
+ "req_header": req_header,
147
+ "req_body": req_body,
148
+ "req_time": req_times,
149
+ "req_packets": len(request['pkts']),
150
+ "res_header": '',
151
+ "res_body": '',
152
+ "res_time": [],
153
+ "res_packets": 0,
154
+ })
155
+ return packet_list
156
+
157
+
158
+ # if __name__ == '__main__':
159
+ # all_packets = get_all_packets_by_segment(rdpcap("../out/3post.pcap"))
160
+ # res=[
161
+ # get_detail_by_package({}, package['req_header'], package['req_body'], package['res_header'],
162
+ # package['req_body']) for package in all_packets]
163
+ # print(res)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.6.6
3
+ Version: 0.6.8
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -9,6 +9,7 @@ xbase_util/geo_util.py
9
9
  xbase_util/handle_features_util.py
10
10
  xbase_util/packet_util.py
11
11
  xbase_util/pcap_util.py
12
+ xbase_util/segment.py
12
13
  xbase_util/xbase_constant.py
13
14
  xbase_util/xbase_util.py
14
15
  xbase_util.egg-info/PKG-INFO
File without changes
File without changes