xbase-util 0.3.9__tar.gz → 0.4.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. {xbase_util-0.3.9 → xbase_util-0.4.1}/PKG-INFO +1 -1
  2. {xbase_util-0.3.9 → xbase_util-0.4.1}/setup.py +1 -1
  3. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/add_column_util.py +33 -17
  4. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/handle_features_util.py +18 -32
  5. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/xbase_constant.py +19 -1
  6. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.3.9 → xbase_util-0.4.1}/README.md +0 -0
  8. {xbase_util-0.3.9 → xbase_util-0.4.1}/setup.cfg +0 -0
  9. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/__init__.py +0 -0
  10. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/__init__.py +0 -0
  11. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/ConfigBean.py +0 -0
  12. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  13. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/FlowBean.py +0 -0
  14. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  15. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/bean/__init__.py +0 -0
  16. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/ConfigDao.py +0 -0
  17. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  18. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/FlowDao.py +0 -0
  19. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  20. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/dao/__init__.py +0 -0
  21. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/db/initsqlite3.py +0 -0
  22. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/es_db_util.py +0 -0
  23. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/esreq.py +0 -0
  24. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/geo_util.py +0 -0
  25. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/pcap_util.py +0 -0
  26. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util/xbase_util.py +0 -0
  27. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/SOURCES.txt +0 -0
  28. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/dependency_links.txt +0 -0
  29. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/not-zip-safe +0 -0
  30. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util.egg-info/top_level.txt +0 -0
  31. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  32. {xbase_util-0.3.9 → xbase_util-0.4.1}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.3.9
3
+ Version: 0.4.1
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.3.9",
6
+ version="0.4.1",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -128,24 +128,40 @@ def parse_list(x):
128
128
  return x
129
129
 
130
130
 
131
- def handle_dns(origin_list, isDataFrame=False):
132
- print("handle_dnslist")
131
+ def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
133
132
  if not isDataFrame:
134
133
  origin_list = pd.DataFrame(origin_list)
135
- origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
136
- origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
137
- lambda x: any(is_long_domain(domain) for domain in x))
138
- origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
139
- lambda x: any(has_random_characters(domain) for domain in x))
140
- origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
141
- lambda x: any(has_special_characters(domain) for domain in x))
142
- origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
143
- lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
- origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
145
- lambda x: any(is_danger_domain(domain) for domain in x))
146
- origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
147
- lambda x: any(is_danger_subdomain(domain) for domain in x))
148
- origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
149
- lambda x: any(has_uncommon_tld(domain) for domain in x))
134
+ if use_tqdm:
135
+ origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
136
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
137
+ lambda x: any(is_long_domain(domain) for domain in x))
138
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
139
+ lambda x: any(has_random_characters(domain) for domain in x))
140
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
141
+ lambda x: any(has_special_characters(domain) for domain in x))
142
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
143
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
145
+ lambda x: any(is_danger_domain(domain) for domain in x))
146
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
147
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
148
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
149
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
+ else:
151
+ origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
152
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
153
+ lambda x: any(is_long_domain(domain) for domain in x))
154
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
155
+ lambda x: any(has_random_characters(domain) for domain in x))
156
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
157
+ lambda x: any(has_special_characters(domain) for domain in x))
158
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
159
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
160
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
161
+ lambda x: any(is_danger_domain(domain) for domain in x))
162
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
163
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
164
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
165
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
166
  origin_list.drop(columns=['dnslist'], inplace=True)
151
167
  return origin_list
@@ -4,32 +4,11 @@ import traceback
4
4
  from urllib.parse import unquote
5
5
 
6
6
  import pandas as pd
7
- from tqdm import tqdm
8
-
9
-
10
-
11
- def handle_uri(data):
12
- tqdm.pandas()
13
- print(f"处理URI:{len(data)}")
14
- # 定义正则表达式,确保精确匹配各种攻击特征
15
- regex_patterns = {
16
- "sql": re.compile(
17
- r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
18
- re.IGNORECASE),
19
- "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
20
- "cmd": re.compile(
21
- r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
22
- re.IGNORECASE),
23
- "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
24
- "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
25
- "danger": re.compile(
26
- r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
27
- re.IGNORECASE),
28
- "suspicious_ext": re.compile(
29
- r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
30
- re.IGNORECASE)
31
- }
32
7
 
8
+ from xbase_util.xbase_constant import regex_patterns
9
+
10
+
11
+ def handle_uri(data, use_tqdm=True):
33
12
  # 定义多层解码函数,确保完全解码 URI
34
13
  def fully_decode_uri(uri):
35
14
  try:
@@ -55,7 +34,6 @@ def handle_uri(data):
55
34
  traceback.print_exc()
56
35
  exit(0)
57
36
 
58
-
59
37
  # 初始化统计变量
60
38
  param_count = 0
61
39
  path_depth = 0
@@ -97,14 +75,16 @@ def handle_uri(data):
97
75
  result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
98
76
 
99
77
  return result
100
- feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
78
+
79
+ if use_tqdm:
80
+ feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
81
+ else:
82
+ feature_data = data.apply(process_row, axis=1, result_type="expand")
101
83
  data = pd.concat([data, feature_data], axis=1)
102
84
  return data
103
85
 
104
86
 
105
- def handle_ua(data):
106
- tqdm.pandas()
107
- print("处理UA")
87
+ def handle_ua(data, use_tqdm=True):
108
88
  data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
109
89
  # 处理换行符及多余空格
110
90
  data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
@@ -157,8 +137,14 @@ def handle_ua(data):
157
137
  data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
158
138
  flags=re.IGNORECASE).fillna("Unknown")
159
139
  # 统计 User-Agent 中的特殊字符个数
160
- data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
161
- lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
140
+
141
+ if use_tqdm:
142
+ data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
143
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
144
+ else:
145
+ data['UserAgent_special_char_count'] = data['http.useragent'].apply(
146
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
147
+
162
148
  # 更新 UserAgent_is_unknown 的计算逻辑
163
149
  data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
164
150
  axis=1).fillna("Unknown")
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import re
2
3
 
3
4
  current_dir = os.path.dirname(__file__)
4
5
  parse_path = os.path.join(current_dir, '..', 'xbase_util_assets', 'arkimeparse.js')
@@ -203,4 +204,21 @@ features_key = [
203
204
  'URI_FEATURES_EXTRA_param_length_max', 'UserAgent_is_attack', 'UserAgent_is_enterprise', 'UserAgent_browser',
204
205
  'UserAgent_browser_version', 'UserAgent_os', 'UserAgent_os_version', 'UserAgent_device_type',
205
206
  'UserAgent_platform', 'UserAgent_is_bot', 'UserAgent_language', 'UserAgent_special_char_count',
206
- 'UserAgent_is_unknown']
207
+ 'UserAgent_is_unknown']
208
+ regex_patterns = {
209
+ "sql": re.compile(
210
+ r"\b(select|union|insert|update|delete|drop|--|#| or |' or '|information_schema|database\(\)|version\(\))\b",
211
+ re.IGNORECASE),
212
+ "xss": re.compile(r"(<script\b|javascript:|onload=|onclick=|<iframe\b|src=)", re.IGNORECASE),
213
+ "cmd": re.compile(
214
+ r"(/etc/passwd\b|/etc/shadow\b|;|&&|\||\$\(.+\)|\bcurl\b|\bwget\b|\bexec\b|\bsystem\b|cmd=|proc/self/environ)",
215
+ re.IGNORECASE),
216
+ "path": re.compile(r"(\.\./|\.\.%2f|\.\.%5c|\.\.\\|\.\.;|%2f%2e%2e%2f)", re.IGNORECASE),
217
+ "redirect": re.compile(r"(redirect=|url=|next=|redirect_uri=|redirect:|RedirectTo=)", re.IGNORECASE),
218
+ "danger": re.compile(
219
+ r"(%3C|%3E|%27|%22|%00|%2F|%5C|%3B|%7C|%28|%29|%20|%3D|%3A|%3F|%26|%23|%2B|%25|file://|<foo|xmlns:|/etc/passwd|windows/win\.ini)",
220
+ re.IGNORECASE),
221
+ "suspicious_ext": re.compile(
222
+ r"\.(exe|sh|py|pl|bak|php5|jspx|bat|cmd|pif|js|vbs|vbe|sct|ini|inf|tmp|swp|jar|java|class|ps1)\b",
223
+ re.IGNORECASE)
224
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.3.9
3
+ Version: 0.4.1
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
File without changes
File without changes