xbase-util 0.3.9__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {xbase_util-0.3.9 → xbase_util-0.4.1}/PKG-INFO +1 -1
  2. {xbase_util-0.3.9 → xbase_util-0.4.1}/setup.py +1 -1
  3. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/add_column_util.py +33 -17
  4. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/handle_features_util.py +18 -32
  5. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/xbase_constant.py +19 -1
  6. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.3.9 → xbase_util-0.4.1}/README.md +0 -0
  8. {xbase_util-0.3.9 → xbase_util-0.4.1}/setup.cfg +0 -0
  9. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/__init__.py +0 -0
  10. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/__init__.py +0 -0
  11. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/ConfigBean.py +0 -0
  12. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  13. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/FlowBean.py +0 -0
  14. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  15. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/__init__.py +0 -0
  16. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/ConfigDao.py +0 -0
  17. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  18. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/FlowDao.py +0 -0
  19. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  20. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/__init__.py +0 -0
  21. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/initsqlite3.py +0 -0
  22. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/es_db_util.py +0 -0
  23. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/esreq.py +0 -0
  24. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/geo_util.py +0 -0
  25. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/pcap_util.py +0 -0
  26. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/xbase_util.py +0 -0
  27. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/SOURCES.txt +0 -0
  28. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/dependency_links.txt +0 -0
  29. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/not-zip-safe +0 -0
  30. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/top_level.txt +0 -0
  31. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  32. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.3.9
3
+ Version: 0.4.1
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.3.9",
6
+ version="0.4.1",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -128,24 +128,40 @@ def parse_list(x):
128
128
  return x
129
129
 
130
130
 
131
- def handle_dns(origin_list, isDataFrame=False):
132
- print("handle_dnslist")
131
+ def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
133
132
  if not isDataFrame:
134
133
  origin_list = pd.DataFrame(origin_list)
135
- origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
136
- origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
137
- lambda x: any(is_long_domain(domain) for domain in x))
138
- origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
139
- lambda x: any(has_random_characters(domain) for domain in x))
140
- origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
141
- lambda x: any(has_special_characters(domain) for domain in x))
142
- origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
143
- lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
- origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
145
- lambda x: any(is_danger_domain(domain) for domain in x))
146
- origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
147
- lambda x: any(is_danger_subdomain(domain) for domain in x))
148
- origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
149
- lambda x: any(has_uncommon_tld(domain) for domain in x))
134
+ if use_tqdm:
135
+ origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
136
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
137
+ lambda x: any(is_long_domain(domain) for domain in x))
138
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
139
+ lambda x: any(has_random_characters(domain) for domain in x))
140
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
141
+ lambda x: any(has_special_characters(domain) for domain in x))
142
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
143
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
145
+ lambda x: any(is_danger_domain(domain) for domain in x))
146
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
147
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
148
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
149
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
+ else:
151
+ origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
152
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
153
+ lambda x: any(is_long_domain(domain) for domain in x))
154
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
155
+ lambda x: any(has_random_characters(domain) for domain in x))
156
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
157
+ lambda x: any(has_special_characters(domain) for domain in x))
158
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
159
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
160
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
161
+ lambda x: any(is_danger_domain(domain) for domain in x))
162
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
163
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
164
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
165
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
166
  origin_list.drop(columns=['dnslist'], inplace=True)
151
167
  return origin_list
@@ -4,32 +4,11 @@ import traceback
4
4
  from urllib.parse import unquote
5
5
 
6
6
  import pandas as pd
7
- from tqdm import tqdm
8
-
9
-
10
-
11
- def handle_uri(data):
12
- tqdm.pandas()
13
- print(f"处理URI:{len(data)}")
14
- # 定义正则表达式,确保精确匹配各种攻击特征
15
- regex_patterns = {
16
- "sql": re.compile(
17
- r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
18
- re.IGNORECASE),
19
- "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
20
- "cmd": re.compile(
21
- r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
22
- re.IGNORECASE),
23
- "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
24
- "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
25
- "danger": re.compile(
26
- r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
27
- re.IGNORECASE),
28
- "suspicious_ext": re.compile(
29
- r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
30
- re.IGNORECASE)
31
- }
32
7
 
8
+ from xbase_util.xbase_constant import regex_patterns
9
+
10
+
11
+ def handle_uri(data, use_tqdm=True):
33
12
  # 定义多层解码函数,确保完全解码 URI
34
13
  def fully_decode_uri(uri):
35
14
  try:
@@ -55,7 +34,6 @@ def handle_uri(data):
55
34
  traceback.print_exc()
56
35
  exit(0)
57
36
 
58
-
59
37
  # 初始化统计变量
60
38
  param_count = 0
61
39
  path_depth = 0
@@ -97,14 +75,16 @@ def handle_uri(data):
97
75
  result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
98
76
 
99
77
  return result
100
- feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
78
+
79
+ if use_tqdm:
80
+ feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
81
+ else:
82
+ feature_data = data.apply(process_row, axis=1, result_type="expand")
101
83
  data = pd.concat([data, feature_data], axis=1)
102
84
  return data
103
85
 
104
86
 
105
- def handle_ua(data):
106
- tqdm.pandas()
107
- print("处理UA")
87
+ def handle_ua(data, use_tqdm=True):
108
88
  data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
109
89
  # 处理换行符及多余空格
110
90
  data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
@@ -157,8 +137,14 @@ def handle_ua(data):
157
137
  data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
158
138
  flags=re.IGNORECASE).fillna("Unknown")
159
139
  # 统计 User-Agent 中的特殊字符个数
160
- data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
161
- lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
140
+
141
+ if use_tqdm:
142
+ data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
143
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
144
+ else:
145
+ data['UserAgent_special_char_count'] = data['http.useragent'].apply(
146
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
147
+
162
148
  # 更新 UserAgent_is_unknown 的计算逻辑
163
149
  data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
164
150
  axis=1).fillna("Unknown")
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import re
2
3
 
3
4
  current_dir = os.path.dirname(__file__)
4
5
  parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
@@ -203,4 +204,21 @@ features_key = [
203
204
  'URI_FEATURES_EXTRA_param_length_max', 'UserAgent_is_attack', 'UserAgent_is_enterprise', 'UserAgent_browser',
204
205
  'UserAgent_browser_version', 'UserAgent_os', 'UserAgent_os_version', 'UserAgent_device_type',
205
206
  'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
206
- 'UserAgent_is_unknown']
207
+ 'UserAgent_is_unknown']
208
+ regex_patterns = {
209
+ "sql": re.compile(
210
+ r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
211
+ re.IGNORECASE),
212
+ "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
213
+ "cmd": re.compile(
214
+ r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
215
+ re.IGNORECASE),
216
+ "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
217
+ "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
218
+ "danger": re.compile(
219
+ r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
220
+ re.IGNORECASE),
221
+ "suspicious_ext": re.compile(
222
+ r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
223
+ re.IGNORECASE)
224
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.3.9
3
+ Version: 0.4.1
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
File without changes
File without changes