xbase-util 0.4.1__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {xbase_util-0.4.1 → xbase_util-0.4.3}/PKG-INFO +1 -1
  2. {xbase_util-0.4.1 → xbase_util-0.4.3}/setup.py +1 -1
  3. xbase_util-0.4.3/xbase_util/packet_util.py +171 -0
  4. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/pcap_util.py +0 -1
  5. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/xbase_constant.py +79 -16
  6. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/SOURCES.txt +1 -0
  8. {xbase_util-0.4.1 → xbase_util-0.4.3}/README.md +0 -0
  9. {xbase_util-0.4.1 → xbase_util-0.4.3}/setup.cfg +0 -0
  10. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/__init__.py +0 -0
  11. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/add_column_util.py +0 -0
  12. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/__init__.py +0 -0
  13. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/ConfigBean.py +0 -0
  14. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  15. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/FlowBean.py +0 -0
  16. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  17. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/__init__.py +0 -0
  18. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/ConfigDao.py +0 -0
  19. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  20. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/FlowDao.py +0 -0
  21. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  22. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/__init__.py +0 -0
  23. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/initsqlite3.py +0 -0
  24. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/es_db_util.py +0 -0
  25. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/esreq.py +0 -0
  26. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/geo_util.py +0 -0
  27. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/handle_features_util.py +0 -0
  28. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/xbase_util.py +0 -0
  29. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/dependency_links.txt +0 -0
  30. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/not-zip-safe +0 -0
  31. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/top_level.txt +0 -0
  32. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  33. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.4.1",
6
+ version="0.4.3",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -0,0 +1,171 @@
1
+ import re
2
+
3
+ from scapy.layers.inet import TCP
4
+
5
+ from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
6
+ features_key, plain_body_columns
7
+ from xbase_util.xbase_util import firstOrZero
8
+
9
+
10
+ def content_type_is_plain(packet):
11
+ """
12
+ 从单个包(包括header和body)中获取content-type并判断是否为可见类型
13
+ :param packet:
14
+ :return:
15
+ """
16
+ if ":" not in packet:
17
+ return False
18
+ for item in packet.replace("-", "_").replace(" ", "").lower().split("\n"):
19
+ if "content_type" in item:
20
+ if ":" not in item:
21
+ continue
22
+ content_type = item.split(":")[1].replace("\r", "").strip()
23
+ return content_type in plain_content_type_columns
24
+ return False
25
+
26
+
27
+ def filter_visible_chars(data):
28
+ """
29
+ 过滤不可见字符,仅保留可打印的ASCII字符
30
+ :param data:
31
+ :return:
32
+ """
33
+ return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
34
+
35
+
36
+ def get_all_columns(
37
+ contains_packet_column=False,
38
+ contains_src_dst_column=False,
39
+ contains_statistic_column=False,
40
+ contains_features_column=False,
41
+ contains_plain_body_column=False,
42
+ contains_pcap_flow_text=False
43
+ ):
44
+ result_columns = []
45
+ if contains_packet_column:
46
+ result_columns += packetKeyname
47
+ if contains_src_dst_column:
48
+ result_columns += src_dst_header
49
+ if contains_statistic_column:
50
+ result_columns += statisticHeader
51
+ if contains_features_column:
52
+ result_columns += features_key
53
+ if contains_plain_body_column:
54
+ result_columns += plain_body_columns
55
+ if contains_pcap_flow_text:
56
+ result_columns.append(contains_pcap_flow_text)
57
+ return result_columns
58
+
59
+
60
+ def get_all_packets_by_regx(packets):
61
+ """
62
+ 通过正则pcap获取所有包的数据
63
+ :param packets:
64
+ :return:
65
+ """
66
+ streams = b""
67
+ for pkt in packets:
68
+ if TCP in pkt:
69
+ streams += bytes(pkt[TCP].payload)
70
+ text = filter_visible_chars(streams)
71
+ pattern = r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d"
72
+ requests = re.split(f"(?={pattern})", text, re.M)
73
+ all_packets = []
74
+ for item in requests:
75
+ if len(re.findall(pattern, item)) != 0:
76
+ request_text = ""
77
+ response_text = ""
78
+ response_text_list = re.findall(r"HTTP/\d\.\d \d{3}[\s\S]*", item)
79
+ if len(response_text_list) != 0:
80
+ # 有响应数据
81
+ response_text = response_text_list[0]
82
+ if response_text == "":
83
+ # 没有响应数据,那么全是请求数据
84
+ request_text = item
85
+ else:
86
+ # 有响应数据,用正则获取请求数据
87
+ request_re = re.search(
88
+ r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?\r\n\r\n", item)
89
+ if request_re:
90
+ request_text = request_re.group(0)
91
+ else:
92
+ request_text = ""
93
+ all_packets.append({"req": request_text, "res": response_text})
94
+ return all_packets
95
+
96
+
97
+ def get_body(param, is_src):
98
+ body = param.split("\r\n\r\n")[1].strip()
99
+ return "" if body is None else body
100
+
101
+
102
+ def get_header_value(header_set, value):
103
+ result = [item for item in header_set if value in item]
104
+ if len(result) != 0:
105
+ return result[0].replace(f"{value}:", "").strip()
106
+ else:
107
+ return ""
108
+
109
+
110
+ def get_detail_by_package(packets_from_pcap, publicField, use_regx):
111
+ """
112
+ 通过pcap的数量分离session并完善相关字段
113
+ :param packets_from_pcap: 通过PcAp解析出的包
114
+ :param publicField: 原始的session单条数据
115
+ :return: 完整的单条数据
116
+ """
117
+ res_field = publicField.copy()
118
+ if use_regx:
119
+ req = packets_from_pcap['req']
120
+ res = packets_from_pcap['res']
121
+ else:
122
+ res = packets_from_pcap["response"]
123
+ req = packets_from_pcap["request"]
124
+ res_field["initRTT"] = firstOrZero(res_field.get("initRTT", 0))
125
+ res_field["length"] = firstOrZero(res_field.get("length", 0))
126
+ request_lines = req.strip().split("\n")
127
+ http_request_lines = [item for item in request_lines if "HTTP" in item]
128
+ if len(http_request_lines) != 0:
129
+ first_line = http_request_lines[0].split(" ")
130
+ res_field['http.clientVersion'] = str(first_line[2]).replace("\n", "").replace("\r", "")
131
+ res_field['http.path'] = first_line[1]
132
+ res_field['http.method'] = first_line[0]
133
+ else:
134
+ res_field['http.clientVersion'] = ''
135
+ res_field['http.path'] = ''
136
+ res_field['http.method'] = ''
137
+ res_field['http.request-referer'] = get_header_value(header_set=request_lines, value="Referer")
138
+ res_field['http.request-content-type'] = get_header_value(header_set=request_lines,
139
+ value="Content-Type")
140
+ res_field['http.hostTokens'] = get_header_value(header_set=request_lines, value="Host")
141
+
142
+ if use_regx:
143
+ res_field['plain_body_src'] = ""
144
+ res_field['plain_body_dst'] = ""
145
+ if content_type_is_plain(req):
146
+ res_field['plain_body_src'] = get_body(req, is_src=True)
147
+ if content_type_is_plain(res):
148
+ res_field['plain_body_dst'] = get_body(res, is_src=False)
149
+
150
+ response_lines = res.strip().split("\n")
151
+ http_response_lines = [item for item in response_lines if "HTTP" in item]
152
+ if len(http_response_lines) != 0:
153
+ first_line = http_response_lines[0].strip().split(" ")
154
+ res_field['http.statuscode'] = first_line[1]
155
+ res_field['http.serverVersion'] = first_line[0].split("/")[1]
156
+ else:
157
+ res_field['http.statuscode'] = ""
158
+ res_field['http.serverVersion'] = ""
159
+ res_field['http.response-server'] = get_header_value(header_set=response_lines, value="Server")
160
+ res_field['http.response-content-type'] = get_header_value(header_set=response_lines,
161
+ value="Content-Type")
162
+ for response in list(set(response_lines + request_lines)):
163
+ key_value = response.replace("\r", "").split(":")
164
+ if len(key_value) == 2:
165
+ key = key_value[0].replace(" ", "").replace("-", "_").lower()
166
+ value = key_value[1].replace(" ", "")
167
+ if f"src_{key}" in src_dst_header:
168
+ res_field[f"src_{key}"] = value
169
+ if f"dst_{key}" in src_dst_header:
170
+ res_field[f"dst_{key}"] = value
171
+ return res_field
@@ -4,7 +4,6 @@ import struct
4
4
  import time
5
5
  import zlib
6
6
  from datetime import datetime
7
-
8
7
  from Crypto.Cipher import AES
9
8
  from zstandard import ZstdDecompressor
10
9
 
@@ -206,19 +206,82 @@ features_key = [
206
206
  'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
207
207
  'UserAgent_is_unknown']
208
208
  regex_patterns = {
209
- "sql": re.compile(
210
- r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
211
- re.IGNORECASE),
212
- "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
213
- "cmd": re.compile(
214
- r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
215
- re.IGNORECASE),
216
- "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
217
- "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
218
- "danger": re.compile(
219
- r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
220
- re.IGNORECASE),
221
- "suspicious_ext": re.compile(
222
- r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
223
- re.IGNORECASE)
224
- }
209
+ "sql": re.compile(
210
+ r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
211
+ re.IGNORECASE),
212
+ "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
213
+ "cmd": re.compile(
214
+ r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
215
+ re.IGNORECASE),
216
+ "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
217
+ "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
218
+ "danger": re.compile(
219
+ r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
220
+ re.IGNORECASE),
221
+ "suspicious_ext": re.compile(
222
+ r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
223
+ re.IGNORECASE)
224
+ }
225
+ # 可见的content-type值
226
+ plain_content_type_columns = ['text/json;charset=gbk',
227
+ 'application/xml;charset=gbk', 'application/xml;charset=utf_8', 'application/tlt_notify',
228
+ 'application/json;charset=gbk', 'text/xml;charset=utf_8', 'application/json',
229
+ 'text/csv;charset=utf_8', 'application/json;charse=utf_8',
230
+ 'application/soap+xml;charset=utf_8;action="urn:dopricetaxseparated"',
231
+ 'text/xml;charset=gbk', 'text/xml', 'application/x_cm_json;charset=utf_8',
232
+ 'application/xml;tz=utc', 'text/xml;charset="utf_8"', 'application/x_java_archive',
233
+ 'application/msword', 'image/png', 'application/xml',
234
+ 'application/x_stapler_method_invocation;charset=utf_8', 'text/plain;charset=iso_8859_1',
235
+ 'application/x_www_form_urlencoded;charset=utf_8', 'text/plain;charset=gbk',
236
+ 'application/octet_stream;charset=utf_8', 'application/x_tika_ooxml',
237
+ 'application/soap+xml;charset=utf_8;action="urn:sendcommand"', 'application/dns_message',
238
+ 'application/json;charset=utf_8', 'application/vnd.docker.distribution.manifest.v2+json',
239
+ 'application/vnd.elasticsearch+json;compatible_with=8', 'off/ping', 'text/plain',
240
+ 'application/x_git_upload_pack_request', 'application/json;charset=gbk',
241
+ 'text/html;charset=iso_8859_1', 'text/http;charset=utf_8',
242
+ 'application/soap+xml;charset=gbk', 'text/html',
243
+ 'application/vnd.openxmlformats_officedocument.spreadsheetml.sheet',
244
+ 'application/x_www_form_urlencoded;charset=gbk', 'text/plain;charset=utf_8',
245
+ 'text/html;charset=gbk', 'application/soap+xml;charset=gbk;',
246
+ 'application/x_www_form_urlencoded', 'application/x_ndjson', 'text/xml;charset=gbk',
247
+ 'application/json;chartset=utf_8',
248
+ 'application/soap+xml;charset=utf_8;action="urn:getcostbyruleengine"',
249
+ 'application/json_rpc', 'text/json;charset=utf_8', 'application/json;charset=utf8',
250
+ 'application/xml;charset=utf_8', 'application/x_www_form_urlencoded;charset=gbk',
251
+ 'application/soap+xml;charset=utf_8;', 'application/merge_patch+json',
252
+ 'application/json;', 'text/xml;charset="utf_16le"', 'text/html;charset=utf_8']
253
+ packetKeyname = ['id', 'segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.urg', 'tcpflags.psh',
254
+ 'tcpflags.syn-ack', 'tcpflags.fin', 'source.ip', 'destination.ip', 'source.port', 'source.packets',
255
+ 'source.bytes', 'destination.port', 'destination.bytes', 'destination.packets', 'initRTT',
256
+ 'firstPacket', 'lastPacket', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes', 'totDataBytes',
257
+ 'network.packets', 'network.bytes', 'length', 'client.bytes', 'http.uri',
258
+ 'http.response-content-type', 'http.bodyMagicCnt', 'http.statuscodeCnt', 'http.clientVersionCnt',
259
+ 'http.response-content-typeCnt', 'http.xffIpCnt', 'http.requestHeaderCnt', 'http.serverVersion',
260
+ 'http.responseHeaderCnt', 'http.xffIp', 'http.clientVersion', 'http.uriTokens',
261
+ 'http.request-refererCnt', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic', 'http.methodCnt',
262
+ 'http.request-content-type', 'http.uriCnt', 'http.serverVersionCnt', 'http.useragent', 'http.keyCnt',
263
+ 'http.request-referer', 'http.path', 'http.hostCnt', 'http.response-server', 'http.pathCnt',
264
+ 'http.useragentTokens', 'http.method-GET', 'http.method', 'http.key', 'http.hostTokens',
265
+ 'http.requestHeader', 'http.responseHeader', 'http.method-POST', 'dns.ASN', 'dns.RIR', 'dns.GEO',
266
+ 'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.host', 'dns.ipCnt', 'dns.OpCode', 'dns.OpCodeCnt',
267
+ 'dns.Puny', 'dns.PunyCnt', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType', 'dns.QueryTypeCnt',
268
+ 'dns.status', 'dns.statusCnt', 'tls.cipher', 'tls.cipherCnt', 'tls.dstSessionId', 'tls.ja3',
269
+ 'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4', 'tls.ja4Cnt', 'tls.srcSessionId', 'tls.version',
270
+ 'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt', 'packetPos', 'source.ip_Country_IsoCode',
271
+ 'source.ip_Country_Name', 'source.ip_Country_SpecificName',
272
+ 'source.ip_Country_SpecificIsoCode', 'source.ip_City_Name', 'source.ip_City_PostalCode',
273
+ 'source.ip_Location_Latitude', 'source.ip_Location_Longitude', 'destination.ip_Country_IsoCode',
274
+ 'destination.ip_Country_Name', 'destination.ip_Country_SpecificName',
275
+ 'destination.ip_Country_SpecificIsoCode', 'destination.ip_City_Name',
276
+ 'destination.ip_City_PostalCode', 'destination.ip_Location_Latitude',
277
+ 'destination.ip_Location_Longitude', 'http.uri_length_mean', 'http.uri_length_var',
278
+ "http.uri_param_count_mean", "http.uri_param_count_var", "http.uri_depth_mean", "http.uri_depth_var",
279
+ "http.uri_filename_length_mean", "http.uri_filename_length_var", "dns_domain_length_mean",
280
+ "dns_domain_length_var", "traffic_type", "PROTOCOL", "DENY_METHOD", "THREAT_SUMMARY", "SEVERITY",
281
+ "dns_domain_length", "dns_domain_suffix", "dns_domain", "dns_domain_suffix_length", "dns_base_domain",
282
+ "dns_base_domain_length", "req_res_period_mean", "req_res_period_var", "status_code_1x_count",
283
+ "status_code_2x_count", "status_code_3x_count", "status_code_4x_count", "status_code_5x_count",
284
+ "req_bytes_percentage", "res_bytes_percentage", "cookie_end_with_semicolon_count",
285
+ "ua_duplicate_count"]
286
+ plain_body_columns = ["plain_body_src",
287
+ "plain_body_dst"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -6,6 +6,7 @@ xbase_util/es_db_util.py
6
6
  xbase_util/esreq.py
7
7
  xbase_util/geo_util.py
8
8
  xbase_util/handle_features_util.py
9
+ xbase_util/packet_util.py
9
10
  xbase_util/pcap_util.py
10
11
  xbase_util/xbase_constant.py
11
12
  xbase_util/xbase_util.py
File without changes
File without changes