xbase-util 0.8.0__tar.gz → 0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {xbase_util-0.8.0 → xbase_util-0.8.2}/PKG-INFO +1 -1
  2. {xbase_util-0.8.0 → xbase_util-0.8.2}/setup.py +1 -1
  3. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/pcap_util.py +9 -6
  4. xbase_util-0.8.2/xbase_util/segment.py +105 -0
  5. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/xbase_util.py +23 -1
  6. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util.egg-info/SOURCES.txt +1 -1
  8. xbase_util-0.8.0/test/test.py +0 -38
  9. {xbase_util-0.8.0 → xbase_util-0.8.2}/README.md +0 -0
  10. {xbase_util-0.8.0 → xbase_util-0.8.2}/setup.cfg +0 -0
  11. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/__init__.py +0 -0
  12. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/add_column_util.py +0 -0
  13. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/dangerous_util.py +0 -0
  14. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/__init__.py +0 -0
  15. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/bean/ConfigBean.py +0 -0
  16. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  17. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/bean/FlowBean.py +0 -0
  18. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  19. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/bean/__init__.py +0 -0
  20. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/dao/ConfigDao.py +0 -0
  21. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  22. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/dao/FlowDao.py +0 -0
  23. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  24. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/dao/__init__.py +0 -0
  25. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/db/initsqlite3.py +0 -0
  26. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/es_db_util.py +0 -0
  27. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/esreq.py +0 -0
  28. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/geo_util.py +0 -0
  29. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/handle_features_util.py +0 -0
  30. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/packet_util.py +0 -0
  31. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util/xbase_constant.py +0 -0
  32. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util.egg-info/dependency_links.txt +0 -0
  33. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util.egg-info/not-zip-safe +0 -0
  34. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util.egg-info/top_level.txt +0 -0
  35. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  36. {xbase_util-0.8.0 → xbase_util-0.8.2}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.8.0
3
+ Version: 0.8.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.8.0",
6
+ version="0.8.2",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -56,6 +56,8 @@ def decompress_streaming(compressed_data, session_id):
56
56
  return bytearray()
57
57
 
58
58
 
59
+
60
+
59
61
  def reassemble_tcp(packets, num_packets=1000):
60
62
  packets2 = []
61
63
  info = {}
@@ -121,7 +123,7 @@ def reassemble_tcp(packets, num_packets=1000):
121
123
  return a['tcp']['ack'] - (b['tcp']['seq'] + len(b['tcp']['data']) - 1)
122
124
 
123
125
  packets.sort(key=cmp_to_key(compare_packets))
124
- del packets[num_packets:]
126
+ # del packets[num_packets:]
125
127
  # Now divide up conversation
126
128
  clientSeq = 0
127
129
  hostSeq = 0
@@ -771,7 +773,7 @@ def decode_obj(buffer, bigEndian, linkType, nanosecond):
771
773
  return obj
772
774
 
773
775
 
774
- def get_file_and_read_pos(session_id, file, pos_list):
776
+ def get_file_and_read_pos(session_id, file, pos_list,reture_obj=True):
775
777
  filename = file['name']
776
778
  if not os.path.isfile(filename):
777
779
  print(f"文件不存在:{filename}")
@@ -822,8 +824,9 @@ def get_file_and_read_pos(session_id, file, pos_list):
822
824
  i = 0
823
825
  for pos in pos_list:
824
826
  packet_bytes = read_packet(pos, param_map, session_id)
825
- obj = decode_obj(packet_bytes, bigEndian, linkType, nanosecond, )
826
- packet_objs.append(copy.deepcopy(obj))
827
+ if reture_obj:
828
+ obj = decode_obj(packet_bytes, bigEndian, linkType, nanosecond, )
829
+ packet_objs.append(copy.deepcopy(obj))
827
830
  if not packet_bytes:
828
831
  continue
829
832
  packets[i] = packet_bytes
@@ -845,7 +848,7 @@ def get_file_and_read_pos(session_id, file, pos_list):
845
848
  return res, packet_objs
846
849
 
847
850
 
848
- def process_session_id_disk_simple(id, node, packet_pos, esdb, pcap_path_prefix):
851
+ def process_session_id_disk_simple(id, node, packet_pos, esdb, pcap_path_prefix,reture_obj=True):
849
852
  packetPos = packet_pos
850
853
  file = esdb.get_file_by_file_id(node=node, num=abs(packetPos[0]),
851
854
  prefix=None if pcap_path_prefix == "origin" else pcap_path_prefix)
@@ -854,4 +857,4 @@ def process_session_id_disk_simple(id, node, packet_pos, esdb, pcap_path_prefix)
854
857
  fix_pos(packetPos, file['packetPosEncoding'])
855
858
  pos_list = group_numbers(packetPos)[0]
856
859
  pos_list.pop(0)
857
- return get_file_and_read_pos(id, file, pos_list)
860
+ return get_file_and_read_pos(id, file, pos_list,reture_obj)
@@ -0,0 +1,105 @@
1
+ from scapy.all import *
2
+ from scapy.layers.inet import TCP
3
+
4
+ from xbase_util.packet_util import filter_visible_chars
5
+ from xbase_util.xbase_util import parse_chunked_body
6
+
7
+ REQUEST_LINE_RE = re.compile(rb"^(GET|POST|PUT|DELETE|OPTIONS|HEAD|PATCH)\s[^\r\n]+\r\n", re.MULTILINE)
8
+ RESPONSE_LINE_RE = re.compile(rb"^HTTP/\d\.\d\s+\d{3}\s?[^\r\n]*", re.IGNORECASE)
9
+
10
+
11
+ def read_packets(packets):
12
+ last_seq_len = -1
13
+ last_ack = -1
14
+ packet_list = []
15
+ tmp_data = b''
16
+ tmp_packets = []
17
+ for index, pkt in enumerate(packets):
18
+ data = pkt[Raw].load if Raw in pkt else b''
19
+ ack = pkt[TCP].ack
20
+ seq = pkt[TCP].seq
21
+ if seq == last_seq_len:
22
+ # print(f"检测到连续包 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
23
+ tmp_data += data
24
+ tmp_packets.append(pkt)
25
+ elif seq == last_ack:
26
+ if tmp_data != b'':
27
+ if REQUEST_LINE_RE.match(tmp_data) or RESPONSE_LINE_RE.match(tmp_data):
28
+ packet_list.append({'data': copy.deepcopy(tmp_data), 'pkts': copy.deepcopy(tmp_packets)})
29
+ else:
30
+ # print("没有新的请求或者响应,就把数据加到上一个里面")
31
+ if len(packet_list) > 0:
32
+ # 之前找到过有请求,可以添加到之前的数据,否则说明一开始就没找到请求
33
+ packet_list[-1]['pkts'].extend(copy.deepcopy(tmp_packets))
34
+ packet_list[-1]['data'] += tmp_data
35
+
36
+ tmp_data = data
37
+ tmp_packets = [pkt]
38
+ # print(f"顺序正确 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
39
+ else:
40
+ # print(f"顺序错误 数据长度:{len(data)} + seq:{seq}={len(data) + seq} ack:{ack}")
41
+ if len(data) > 0:
42
+ # 但是有数据
43
+ tmp_data += data
44
+ tmp_packets.append(pkt)
45
+ last_ack = ack
46
+ last_seq_len = seq + len(data)
47
+ if tmp_data != b'':
48
+ packet_list.append({'data': copy.deepcopy(tmp_data), 'pkts': copy.deepcopy(tmp_packets)})
49
+ tmp_packets.clear()
50
+ return packet_list
51
+
52
+
53
+ def parse_req_or_res(data, pkts):
54
+ if data.find(b"\r\n\r\n") != -1:
55
+ res = data.split(b"\r\n\r\n", 1)
56
+ header = res[0]
57
+ body = res[1]
58
+ else:
59
+ header = data
60
+ body = b''
61
+ body = parse_chunked_body(body)
62
+ result_body_str = filter_visible_chars(body)
63
+ return filter_visible_chars(header), result_body_str, [float(pkt.time) for pkt in pkts]
64
+
65
+
66
+ def get_all_packets_by_segment(packets):
67
+ res = read_packets(packets)
68
+ request_packets = [item for item in res if REQUEST_LINE_RE.match(item['data'])]
69
+ response_packets = [
70
+ {'first_seq': item['pkts'][0][TCP].seq, 'pkts': item['pkts'], 'first_ack': item['pkts'][0][TCP].ack,
71
+ 'data': item['data']} for item in
72
+ res if RESPONSE_LINE_RE.match(item['data'])]
73
+ packet_list = []
74
+ for request in request_packets:
75
+ pkt_list = request['pkts']
76
+ last_pkt = pkt_list[-1]
77
+ ack = last_pkt[TCP].ack
78
+ response = [item for item in response_packets if item['first_seq'] == ack]
79
+ if len(response) > 0:
80
+ res_header, res_body, res_times = parse_req_or_res(response[0]['data'], response[0]['pkts'])
81
+ req_header, req_body, req_times = parse_req_or_res(request['data'], request['pkts'])
82
+ packet_list.append({
83
+ "req_header": req_header,
84
+ "req_body": req_body,
85
+ "req_time": req_times,
86
+ "req_packets": len(request['pkts']),
87
+ "res_header": res_header,
88
+ "res_body": res_body,
89
+ "res_time": res_times,
90
+ "res_packets": len(response[0]['pkts']),
91
+ })
92
+ else:
93
+ # print("没响应")
94
+ req_header, req_body, req_times = parse_req_or_res(request['data'], request['pkts'])
95
+ packet_list.append({
96
+ "req_header": req_header,
97
+ "req_body": req_body,
98
+ "req_time": req_times,
99
+ "req_packets": len(request['pkts']),
100
+ "res_header": '',
101
+ "res_body": '',
102
+ "res_time": [],
103
+ "res_packets": 0,
104
+ })
105
+ return packet_list
@@ -1,3 +1,4 @@
1
+ import gzip
1
2
  import json
2
3
  import logging
3
4
  import os
@@ -11,7 +12,28 @@ import numpy as np
11
12
  import tldextract
12
13
  from scapy.layers.dns import DNS
13
14
 
14
- from xbase_util.xbase_constant import parse_path, dns_domain_list
15
+ from xbase_util.xbase_constant import dns_domain_list, parse_path
16
+
17
+
18
+ def parse_chunked_body(data: bytes) -> bytes:
19
+ body = b""
20
+ while True:
21
+ chunk_size_end = data.find(b"\r\n")
22
+ if chunk_size_end == -1:
23
+ break
24
+ chunk_size_hex = data[:chunk_size_end]
25
+ chunk_size = int(chunk_size_hex, 16)
26
+ if chunk_size == 0:
27
+ break
28
+ chunk_start = chunk_size_end + 2
29
+ chunk_end = chunk_start + chunk_size
30
+ body += data[chunk_start:chunk_end]
31
+ data = data[chunk_end + 2:]
32
+ try:
33
+ body = gzip.decompress(body)
34
+ return body
35
+ except gzip.BadGzipFile:
36
+ return body
15
37
 
16
38
 
17
39
  def process_origin_pos(originPos):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.8.0
3
+ Version: 0.8.2
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -1,6 +1,5 @@
1
1
  README.md
2
2
  setup.py
3
- test/test.py
4
3
  xbase_util/__init__.py
5
4
  xbase_util/add_column_util.py
6
5
  xbase_util/dangerous_util.py
@@ -10,6 +9,7 @@ xbase_util/geo_util.py
10
9
  xbase_util/handle_features_util.py
11
10
  xbase_util/packet_util.py
12
11
  xbase_util/pcap_util.py
12
+ xbase_util/segment.py
13
13
  xbase_util/xbase_constant.py
14
14
  xbase_util/xbase_util.py
15
15
  xbase_util.egg-info/PKG-INFO
@@ -1,38 +0,0 @@
1
- import copy
2
- import gzip
3
- import pickle
4
- import re
5
- import traceback
6
-
7
- from requests import session
8
-
9
- from xbase_util.packet_util import filter_visible_chars
10
- from xbase_util.pcap_util import reassemble_tcp, reassemble_session
11
-
12
- if __name__ == '__main__':
13
- # req = EsReq("http://127.0.0.1:9200")
14
- # exp=build_es_expression(size="1",
15
- # start_time=None,
16
- # end_time=None,
17
- # arkime_expression='id == 250106-lKoC7T_SwbNAe4xDQQx7KTOd')
18
- # session=req.search(body=exp,index="arkime_sessions3-*").json()['hits']['hits'][0]
19
- # packetPos=session['_source']['packetPos']
20
- # stream,packet_objs=process_session_id_disk_simple(id=session['_id'], node=session['_source']['node'],
21
- # packet_pos=packetPos, esdb=EsDb(req, multiprocessing.Manager()),
22
- # pcap_path_prefix="origin")
23
- #
24
- # with open('stream.pkl', 'wb') as f:
25
- # pickle.dump(stream, f)
26
- # with open('packet_objs.pkl', 'wb') as f:
27
- # pickle.dump(packet_objs, f)
28
-
29
- with open('stream.pkl', 'rb') as f:
30
- stream = pickle.load(f)
31
- with open('packet_objs.pkl', 'rb') as f:
32
- packet_objs = pickle.load(f)
33
- skey = f"10.28.7.16:54398"
34
- reassemble_tcp_res = reassemble_tcp(packet_objs, skey)
35
- all_packets = reassemble_session(reassemble_tcp_res, skey,session_id="emm")
36
- time_period = [( abs(item['res_time']-item['req_time'])) for item in
37
- all_packets if item['res_time'] != 0 and item['req_time'] != 0]
38
- print(all_packets)
File without changes
File without changes