xbase-util 0.4.1__tar.gz → 0.4.3__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {xbase_util-0.4.1 → xbase_util-0.4.3}/PKG-INFO +1 -1
  2. {xbase_util-0.4.1 → xbase_util-0.4.3}/setup.py +1 -1
  3. xbase_util-0.4.3/xbase_util/packet_util.py +171 -0
  4. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/pcap_util.py +0 -1
  5. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/xbase_constant.py +79 -16
  6. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/SOURCES.txt +1 -0
  8. {xbase_util-0.4.1 → xbase_util-0.4.3}/README.md +0 -0
  9. {xbase_util-0.4.1 → xbase_util-0.4.3}/setup.cfg +0 -0
  10. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/__init__.py +0 -0
  11. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/add_column_util.py +0 -0
  12. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/__init__.py +0 -0
  13. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/ConfigBean.py +0 -0
  14. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  15. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/FlowBean.py +0 -0
  16. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  17. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/bean/__init__.py +0 -0
  18. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/ConfigDao.py +0 -0
  19. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  20. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/FlowDao.py +0 -0
  21. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  22. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/dao/__init__.py +0 -0
  23. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/db/initsqlite3.py +0 -0
  24. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/es_db_util.py +0 -0
  25. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/esreq.py +0 -0
  26. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/geo_util.py +0 -0
  27. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/handle_features_util.py +0 -0
  28. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util/xbase_util.py +0 -0
  29. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/dependency_links.txt +0 -0
  30. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/not-zip-safe +0 -0
  31. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util.egg-info/top_level.txt +0 -0
  32. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  33. {xbase_util-0.4.1 → xbase_util-0.4.3}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.4.1",
6
+ version="0.4.3",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -0,0 +1,171 @@
1
+ import re
2
+
3
+ from scapy.layers.inet import TCP
4
+
5
+ from xbase_util.xbase_constant import plain_content_type_columns, packetKeyname, src_dst_header, statisticHeader, \
6
+ features_key, plain_body_columns
7
+ from xbase_util.xbase_util import firstOrZero
8
+
9
+
10
+ def content_type_is_plain(packet):
11
+ """
12
+ 从单个包(包括header和body)中获取content-type并判断是否为可见类型
13
+ :param packet:
14
+ :return:
15
+ """
16
+ if ":" not in packet:
17
+ return False
18
+ for item in packet.replace("-", "_").replace(" ", "").lower().split("\n"):
19
+ if "content_type" in item:
20
+ if ":" not in item:
21
+ continue
22
+ content_type = item.split(":")[1].replace("\r", "").strip()
23
+ return content_type in plain_content_type_columns
24
+ return False
25
+
26
+
27
+ def filter_visible_chars(data):
28
+ """
29
+ 过滤不可见字符,仅保留可打印的ASCII字符
30
+ :param data:
31
+ :return:
32
+ """
33
+ return ''.join(chr(b) for b in data if 32 <= b <= 126 or b in (9, 10, 13))
34
+
35
+
36
+ def get_all_columns(
37
+ contains_packet_column=False,
38
+ contains_src_dst_column=False,
39
+ contains_statistic_column=False,
40
+ contains_features_column=False,
41
+ contains_plain_body_column=False,
42
+ contains_pcap_flow_text=False
43
+ ):
44
+ result_columns = []
45
+ if contains_packet_column:
46
+ result_columns += packetKeyname
47
+ if contains_src_dst_column:
48
+ result_columns += src_dst_header
49
+ if contains_statistic_column:
50
+ result_columns += statisticHeader
51
+ if contains_features_column:
52
+ result_columns += features_key
53
+ if contains_plain_body_column:
54
+ result_columns += plain_body_columns
55
+ if contains_pcap_flow_text:
56
+ result_columns.append(contains_pcap_flow_text)
57
+ return result_columns
58
+
59
+
60
+ def get_all_packets_by_regx(packets):
61
+ """
62
+ 通过正则pcap获取所有包的数据
63
+ :param packets:
64
+ :return:
65
+ """
66
+ streams = b""
67
+ for pkt in packets:
68
+ if TCP in pkt:
69
+ streams += bytes(pkt[TCP].payload)
70
+ text = filter_visible_chars(streams)
71
+ pattern = r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d"
72
+ requests = re.split(f"(?={pattern})", text, re.M)
73
+ all_packets = []
74
+ for item in requests:
75
+ if len(re.findall(pattern, item)) != 0:
76
+ request_text = ""
77
+ response_text = ""
78
+ response_text_list = re.findall(r"HTTP/\d\.\d \d{3}[\s\S]*", item)
79
+ if len(response_text_list) != 0:
80
+ # 有响应数据
81
+ response_text = response_text_list[0]
82
+ if response_text == "":
83
+ # 没有响应数据,那么全是请求数据
84
+ request_text = item
85
+ else:
86
+ # 有响应数据,用正则获取请求数据
87
+ request_re = re.search(
88
+ r"(GET|POST|HEAD|PUT|DELETE|OPTIONS|PATCH) \/[^\s]* HTTP\/\d\.\d[\s\S]*?\r\n\r\n", item)
89
+ if request_re:
90
+ request_text = request_re.group(0)
91
+ else:
92
+ request_text = ""
93
+ all_packets.append({"req": request_text, "res": response_text})
94
+ return all_packets
95
+
96
+
97
+ def get_body(param, is_src):
98
+ body = param.split("\r\n\r\n")[1].strip()
99
+ return "" if body is None else body
100
+
101
+
102
+ def get_header_value(header_set, value):
103
+ result = [item for item in header_set if value in item]
104
+ if len(result) != 0:
105
+ return result[0].replace(f"{value}:", "").strip()
106
+ else:
107
+ return ""
108
+
109
+
110
+ def get_detail_by_package(packets_from_pcap, publicField, use_regx):
111
+ """
112
+ 通过pcap的数量分离session并完善相关字段
113
+ :param packets_from_pcap: 通过PcAp解析出的包
114
+ :param publicField: 原始的session单条数据
115
+ :return: 完整的单条数据
116
+ """
117
+ res_field = publicField.copy()
118
+ if use_regx:
119
+ req = packets_from_pcap['req']
120
+ res = packets_from_pcap['res']
121
+ else:
122
+ res = packets_from_pcap["response"]
123
+ req = packets_from_pcap["request"]
124
+ res_field["initRTT"] = firstOrZero(res_field.get("initRTT", 0))
125
+ res_field["length"] = firstOrZero(res_field.get("length", 0))
126
+ request_lines = req.strip().split("\n")
127
+ http_request_lines = [item for item in request_lines if "HTTP" in item]
128
+ if len(http_request_lines) != 0:
129
+ first_line = http_request_lines[0].split(" ")
130
+ res_field['http.clientVersion'] = str(first_line[2]).replace("\n", "").replace("\r", "")
131
+ res_field['http.path'] = first_line[1]
132
+ res_field['http.method'] = first_line[0]
133
+ else:
134
+ res_field['http.clientVersion'] = ''
135
+ res_field['http.path'] = ''
136
+ res_field['http.method'] = ''
137
+ res_field['http.request-referer'] = get_header_value(header_set=request_lines, value="Referer")
138
+ res_field['http.request-content-type'] = get_header_value(header_set=request_lines,
139
+ value="Content-Type")
140
+ res_field['http.hostTokens'] = get_header_value(header_set=request_lines, value="Host")
141
+
142
+ if use_regx:
143
+ res_field['plain_body_src'] = ""
144
+ res_field['plain_body_dst'] = ""
145
+ if content_type_is_plain(req):
146
+ res_field['plain_body_src'] = get_body(req, is_src=True)
147
+ if content_type_is_plain(res):
148
+ res_field['plain_body_dst'] = get_body(res, is_src=False)
149
+
150
+ response_lines = res.strip().split("\n")
151
+ http_response_lines = [item for item in response_lines if "HTTP" in item]
152
+ if len(http_response_lines) != 0:
153
+ first_line = http_response_lines[0].strip().split(" ")
154
+ res_field['http.statuscode'] = first_line[1]
155
+ res_field['http.serverVersion'] = first_line[0].split("/")[1]
156
+ else:
157
+ res_field['http.statuscode'] = ""
158
+ res_field['http.serverVersion'] = ""
159
+ res_field['http.response-server'] = get_header_value(header_set=response_lines, value="Server")
160
+ res_field['http.response-content-type'] = get_header_value(header_set=response_lines,
161
+ value="Content-Type")
162
+ for response in list(set(response_lines + request_lines)):
163
+ key_value = response.replace("\r", "").split(":")
164
+ if len(key_value) == 2:
165
+ key = key_value[0].replace(" ", "").replace("-", "_").lower()
166
+ value = key_value[1].replace(" ", "")
167
+ if f"src_{key}" in src_dst_header:
168
+ res_field[f"src_{key}"] = value
169
+ if f"dst_{key}" in src_dst_header:
170
+ res_field[f"dst_{key}"] = value
171
+ return res_field
@@ -4,7 +4,6 @@ import struct
4
4
  import time
5
5
  import zlib
6
6
  from datetime import datetime
7
-
8
7
  from Crypto.Cipher import AES
9
8
  from zstandard import ZstdDecompressor
10
9
 
@@ -206,19 +206,82 @@ features_key = [
206
206
  'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
207
207
  'UserAgent_is_unknown']
208
208
  regex_patterns = {
209
- "sql": re.compile(
210
- r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
211
- re.IGNORECASE),
212
- "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
213
- "cmd": re.compile(
214
- r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
215
- re.IGNORECASE),
216
- "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
217
- "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
218
- "danger": re.compile(
219
- r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
220
- re.IGNORECASE),
221
- "suspicious_ext": re.compile(
222
- r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
223
- re.IGNORECASE)
224
- }
209
+ "sql": re.compile(
210
+ r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
211
+ re.IGNORECASE),
212
+ "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
213
+ "cmd": re.compile(
214
+ r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
215
+ re.IGNORECASE),
216
+ "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
217
+ "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
218
+ "danger": re.compile(
219
+ r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
220
+ re.IGNORECASE),
221
+ "suspicious_ext": re.compile(
222
+ r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
223
+ re.IGNORECASE)
224
+ }
225
+ # 可见的content-type值
226
+ plain_content_type_columns = ['text/json;charset=gbk',
227
+ 'application/xml;charset=gbk', 'application/xml;charset=utf_8', 'application/tlt_notify',
228
+ 'application/json;charset=gbk', 'text/xml;charset=utf_8', 'application/json',
229
+ 'text/csv;charset=utf_8', 'application/json;charse=utf_8',
230
+ 'application/soap+xml;charset=utf_8;action="urn:dopricetaxseparated"',
231
+ 'text/xml;charset=gbk', 'text/xml', 'application/x_cm_json;charset=utf_8',
232
+ 'application/xml;tz=utc', 'text/xml;charset="utf_8"', 'application/x_java_archive',
233
+ 'application/msword', 'image/png', 'application/xml',
234
+ 'application/x_stapler_method_invocation;charset=utf_8', 'text/plain;charset=iso_8859_1',
235
+ 'application/x_www_form_urlencoded;charset=utf_8', 'text/plain;charset=gbk',
236
+ 'application/octet_stream;charset=utf_8', 'application/x_tika_ooxml',
237
+ 'application/soap+xml;charset=utf_8;action="urn:sendcommand"', 'application/dns_message',
238
+ 'application/json;charset=utf_8', 'application/vnd.docker.distribution.manifest.v2+json',
239
+ 'application/vnd.elasticsearch+json;compatible_with=8', 'off/ping', 'text/plain',
240
+ 'application/x_git_upload_pack_request', 'application/json;charset=gbk',
241
+ 'text/html;charset=iso_8859_1', 'text/http;charset=utf_8',
242
+ 'application/soap+xml;charset=gbk', 'text/html',
243
+ 'application/vnd.openxmlformats_officedocument.spreadsheetml.sheet',
244
+ 'application/x_www_form_urlencoded;charset=gbk', 'text/plain;charset=utf_8',
245
+ 'text/html;charset=gbk', 'application/soap+xml;charset=gbk;',
246
+ 'application/x_www_form_urlencoded', 'application/x_ndjson', 'text/xml;charset=gbk',
247
+ 'application/json;chartset=utf_8',
248
+ 'application/soap+xml;charset=utf_8;action="urn:getcostbyruleengine"',
249
+ 'application/json_rpc', 'text/json;charset=utf_8', 'application/json;charset=utf8',
250
+ 'application/xml;charset=utf_8', 'application/x_www_form_urlencoded;charset=gbk',
251
+ 'application/soap+xml;charset=utf_8;', 'application/merge_patch+json',
252
+ 'application/json;', 'text/xml;charset="utf_16le"', 'text/html;charset=utf_8']
253
+ packetKeyname = ['id', 'segmentCnt', 'tcpflags.rst', 'tcpflags.ack', 'tcpflags.syn', 'tcpflags.urg', 'tcpflags.psh',
254
+ 'tcpflags.syn-ack', 'tcpflags.fin', 'source.ip', 'destination.ip', 'source.port', 'source.packets',
255
+ 'source.bytes', 'destination.port', 'destination.bytes', 'destination.packets', 'initRTT',
256
+ 'firstPacket', 'lastPacket', 'ipProtocol', 'protocolCnt', 'protocol', 'server.bytes', 'totDataBytes',
257
+ 'network.packets', 'network.bytes', 'length', 'client.bytes', 'http.uri',
258
+ 'http.response-content-type', 'http.bodyMagicCnt', 'http.statuscodeCnt', 'http.clientVersionCnt',
259
+ 'http.response-content-typeCnt', 'http.xffIpCnt', 'http.requestHeaderCnt', 'http.serverVersion',
260
+ 'http.responseHeaderCnt', 'http.xffIp', 'http.clientVersion', 'http.uriTokens',
261
+ 'http.request-refererCnt', 'http.useragentCnt', 'http.statuscode', 'http.bodyMagic', 'http.methodCnt',
262
+ 'http.request-content-type', 'http.uriCnt', 'http.serverVersionCnt', 'http.useragent', 'http.keyCnt',
263
+ 'http.request-referer', 'http.path', 'http.hostCnt', 'http.response-server', 'http.pathCnt',
264
+ 'http.useragentTokens', 'http.method-GET', 'http.method', 'http.key', 'http.hostTokens',
265
+ 'http.requestHeader', 'http.responseHeader', 'http.method-POST', 'dns.ASN', 'dns.RIR', 'dns.GEO',
266
+ 'dns.alpn', 'dns.alpnCnt', 'dns.ip', 'dns.host', 'dns.ipCnt', 'dns.OpCode', 'dns.OpCodeCnt',
267
+ 'dns.Puny', 'dns.PunyCnt', 'dns.QueryClass', 'dns.QueryClassCnt', 'dns.QueryType', 'dns.QueryTypeCnt',
268
+ 'dns.status', 'dns.statusCnt', 'tls.cipher', 'tls.cipherCnt', 'tls.dstSessionId', 'tls.ja3',
269
+ 'tls.ja3Cnt', 'tls.ja3s', 'tls.ja3sCnt', 'tls.ja4', 'tls.ja4Cnt', 'tls.srcSessionId', 'tls.version',
270
+ 'tls.versionCnt', 'tls.ja4_r', 'tls.ja4_rCnt', 'packetPos', 'source.ip_Country_IsoCode',
271
+ 'source.ip_Country_Name', 'source.ip_Country_SpecificName',
272
+ 'source.ip_Country_SpecificIsoCode', 'source.ip_City_Name', 'source.ip_City_PostalCode',
273
+ 'source.ip_Location_Latitude', 'source.ip_Location_Longitude', 'destination.ip_Country_IsoCode',
274
+ 'destination.ip_Country_Name', 'destination.ip_Country_SpecificName',
275
+ 'destination.ip_Country_SpecificIsoCode', 'destination.ip_City_Name',
276
+ 'destination.ip_City_PostalCode', 'destination.ip_Location_Latitude',
277
+ 'destination.ip_Location_Longitude', 'http.uri_length_mean', 'http.uri_length_var',
278
+ "http.uri_param_count_mean", "http.uri_param_count_var", "http.uri_depth_mean", "http.uri_depth_var",
279
+ "http.uri_filename_length_mean", "http.uri_filename_length_var", "dns_domain_length_mean",
280
+ "dns_domain_length_var", "traffic_type", "PROTOCOL", "DENY_METHOD", "THREAT_SUMMARY", "SEVERITY",
281
+ "dns_domain_length", "dns_domain_suffix", "dns_domain", "dns_domain_suffix_length", "dns_base_domain",
282
+ "dns_base_domain_length", "req_res_period_mean", "req_res_period_var", "status_code_1x_count",
283
+ "status_code_2x_count", "status_code_3x_count", "status_code_4x_count", "status_code_5x_count",
284
+ "req_bytes_percentage", "res_bytes_percentage", "cookie_end_with_semicolon_count",
285
+ "ua_duplicate_count"]
286
+ plain_body_columns = ["plain_body_src",
287
+ "plain_body_dst"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.4.1
3
+ Version: 0.4.3
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -6,6 +6,7 @@ xbase_util/es_db_util.py
6
6
  xbase_util/esreq.py
7
7
  xbase_util/geo_util.py
8
8
  xbase_util/handle_features_util.py
9
+ xbase_util/packet_util.py
9
10
  xbase_util/pcap_util.py
10
11
  xbase_util/xbase_constant.py
11
12
  xbase_util/xbase_util.py
File without changes
File without changes