xbase-util 0.3.8__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {xbase_util-0.3.8 → xbase_util-0.4.0}/PKG-INFO +1 -1
  2. {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.py +1 -1
  3. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/add_column_util.py +33 -17
  4. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/handle_features_util.py +14 -9
  5. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_util.py +9 -10
  6. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.3.8 → xbase_util-0.4.0}/README.md +0 -0
  8. {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.cfg +0 -0
  9. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/__init__.py +0 -0
  10. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/__init__.py +0 -0
  11. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/ConfigBean.py +0 -0
  12. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  13. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/FlowBean.py +0 -0
  14. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  15. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/__init__.py +0 -0
  16. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/ConfigDao.py +0 -0
  17. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  18. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/FlowDao.py +0 -0
  19. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  20. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/__init__.py +0 -0
  21. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/initsqlite3.py +0 -0
  22. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/es_db_util.py +0 -0
  23. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/esreq.py +0 -0
  24. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/geo_util.py +0 -0
  25. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/pcap_util.py +0 -0
  26. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_constant.py +0 -0
  27. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/SOURCES.txt +0 -0
  28. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/dependency_links.txt +0 -0
  29. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/not-zip-safe +0 -0
  30. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/top_level.txt +0 -0
  31. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  32. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.3.8
3
+ Version: 0.4.0
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.3.8",
6
+ version="0.4.0",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -128,24 +128,40 @@ def parse_list(x):
128
128
  return x
129
129
 
130
130
 
131
- def handle_dns(origin_list, isDataFrame=False):
132
- print("handle_dnslist")
131
+ def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
133
132
  if not isDataFrame:
134
133
  origin_list = pd.DataFrame(origin_list)
135
- origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
136
- origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
137
- lambda x: any(is_long_domain(domain) for domain in x))
138
- origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
139
- lambda x: any(has_random_characters(domain) for domain in x))
140
- origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
141
- lambda x: any(has_special_characters(domain) for domain in x))
142
- origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
143
- lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
- origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
145
- lambda x: any(is_danger_domain(domain) for domain in x))
146
- origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
147
- lambda x: any(is_danger_subdomain(domain) for domain in x))
148
- origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
149
- lambda x: any(has_uncommon_tld(domain) for domain in x))
134
+ if use_tqdm:
135
+ origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
136
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
137
+ lambda x: any(is_long_domain(domain) for domain in x))
138
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
139
+ lambda x: any(has_random_characters(domain) for domain in x))
140
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
141
+ lambda x: any(has_special_characters(domain) for domain in x))
142
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
143
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
145
+ lambda x: any(is_danger_domain(domain) for domain in x))
146
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
147
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
148
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
149
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
+ else:
151
+ origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
152
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
153
+ lambda x: any(is_long_domain(domain) for domain in x))
154
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
155
+ lambda x: any(has_random_characters(domain) for domain in x))
156
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
157
+ lambda x: any(has_special_characters(domain) for domain in x))
158
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
159
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
160
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
161
+ lambda x: any(is_danger_domain(domain) for domain in x))
162
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
163
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
164
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
165
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
166
  origin_list.drop(columns=['dnslist'], inplace=True)
151
167
  return origin_list
@@ -8,9 +8,7 @@ from tqdm import tqdm
8
8
 
9
9
 
10
10
 
11
- def handle_uri(data):
12
- tqdm.pandas()
13
- print(f"处理URI:{len(data)}")
11
+ def handle_uri(data,use_tqdm=True):
14
12
  # 定义正则表达式,确保精确匹配各种攻击特征
15
13
  regex_patterns = {
16
14
  "sql": re.compile(
@@ -97,14 +95,15 @@ def handle_uri(data):
97
95
  result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
98
96
 
99
97
  return result
100
- feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
98
+ if use_tqdm:
99
+ feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
100
+ else:
101
+ feature_data = data.apply(process_row, axis=1, result_type="expand")
101
102
  data = pd.concat([data, feature_data], axis=1)
102
103
  return data
103
104
 
104
105
 
105
- def handle_ua(data):
106
- tqdm.pandas()
107
- print("处理UA")
106
+ def handle_ua(data,use_tqdm=True):
108
107
  data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
109
108
  # 处理换行符及多余空格
110
109
  data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
@@ -157,8 +156,14 @@ def handle_ua(data):
157
156
  data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
158
157
  flags=re.IGNORECASE).fillna("Unknown")
159
158
  # 统计 User-Agent 中的特殊字符个数
160
- data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
161
- lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
159
+
160
+ if use_tqdm:
161
+ data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
162
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
163
+ else:
164
+ data['UserAgent_special_char_count'] = data['http.useragent'].apply(
165
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
166
+
162
167
  # 更新 UserAgent_is_unknown 的计算逻辑
163
168
  data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
164
169
  axis=1).fillna("Unknown")
@@ -402,16 +402,15 @@ def get_uri_filename_length(uri):
402
402
  return 0
403
403
 
404
404
 
405
- def get_dns_domain_suffix(domain, dns_lock):
406
- with dns_lock:
407
- try:
408
- for tmp_suffix in dns_domain_list:
409
- if tmp_suffix in domain:
410
- return tmp_suffix
411
- extracted = tldextract.extract(domain)
412
- return extracted.suffix
413
- except Exception as e:
414
- return ""
405
+ def get_dns_domain_suffix(domain):
406
+ try:
407
+ for tmp_suffix in dns_domain_list:
408
+ if tmp_suffix in domain:
409
+ return tmp_suffix
410
+ extracted = tldextract.extract(domain)
411
+ return extracted.suffix
412
+ except Exception as e:
413
+ return ""
415
414
 
416
415
 
417
416
  def check_path(file_path: str):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.3.8
3
+ Version: 0.4.0
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
File without changes
File without changes