xbase-util 0.3.8__tar.gz → 0.4.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. {xbase_util-0.3.8 → xbase_util-0.4.0}/PKG-INFO +1 -1
  2. {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.py +1 -1
  3. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/add_column_util.py +33 -17
  4. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/handle_features_util.py +14 -9
  5. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_util.py +9 -10
  6. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/PKG-INFO +1 -1
  7. {xbase_util-0.3.8 → xbase_util-0.4.0}/README.md +0 -0
  8. {xbase_util-0.3.8 → xbase_util-0.4.0}/setup.cfg +0 -0
  9. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/__init__.py +0 -0
  10. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/__init__.py +0 -0
  11. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/ConfigBean.py +0 -0
  12. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  13. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/FlowBean.py +0 -0
  14. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  15. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/bean/__init__.py +0 -0
  16. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/ConfigDao.py +0 -0
  17. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  18. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/FlowDao.py +0 -0
  19. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  20. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/dao/__init__.py +0 -0
  21. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/db/initsqlite3.py +0 -0
  22. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/es_db_util.py +0 -0
  23. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/esreq.py +0 -0
  24. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/geo_util.py +0 -0
  25. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/pcap_util.py +0 -0
  26. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util/xbase_constant.py +0 -0
  27. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/SOURCES.txt +0 -0
  28. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/dependency_links.txt +0 -0
  29. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/not-zip-safe +0 -0
  30. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util.egg-info/top_level.txt +0 -0
  31. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  32. {xbase_util-0.3.8 → xbase_util-0.4.0}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.3.8
3
+ Version: 0.4.0
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.3.8",
6
+ version="0.4.0",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -128,24 +128,40 @@ def parse_list(x):
128
128
  return x
129
129
 
130
130
 
131
- def handle_dns(origin_list, isDataFrame=False):
132
- print("handle_dnslist")
131
+ def handle_dns(origin_list, isDataFrame=False,use_tqdm=False):
133
132
  if not isDataFrame:
134
133
  origin_list = pd.DataFrame(origin_list)
135
- origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
136
- origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
137
- lambda x: any(is_long_domain(domain) for domain in x))
138
- origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
139
- lambda x: any(has_random_characters(domain) for domain in x))
140
- origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
141
- lambda x: any(has_special_characters(domain) for domain in x))
142
- origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
143
- lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
- origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
145
- lambda x: any(is_danger_domain(domain) for domain in x))
146
- origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
147
- lambda x: any(is_danger_subdomain(domain) for domain in x))
148
- origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
149
- lambda x: any(has_uncommon_tld(domain) for domain in x))
134
+ if use_tqdm:
135
+ origin_list["dnslist"] = origin_list['dns.host'].progress_apply(parse_list)
136
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].progress_apply(
137
+ lambda x: any(is_long_domain(domain) for domain in x))
138
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].progress_apply(
139
+ lambda x: any(has_random_characters(domain) for domain in x))
140
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].progress_apply(
141
+ lambda x: any(has_special_characters(domain) for domain in x))
142
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].progress_apply(
143
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].progress_apply(
145
+ lambda x: any(is_danger_domain(domain) for domain in x))
146
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].progress_apply(
147
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
148
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].progress_apply(
149
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
+ else:
151
+ origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
152
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
153
+ lambda x: any(is_long_domain(domain) for domain in x))
154
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
155
+ lambda x: any(has_random_characters(domain) for domain in x))
156
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
157
+ lambda x: any(has_special_characters(domain) for domain in x))
158
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
159
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
160
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
161
+ lambda x: any(is_danger_domain(domain) for domain in x))
162
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
163
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
164
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
165
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
166
  origin_list.drop(columns=['dnslist'], inplace=True)
151
167
  return origin_list
@@ -8,9 +8,7 @@ from tqdm import tqdm
8
8
 
9
9
 
10
10
 
11
- def handle_uri(data):
12
- tqdm.pandas()
13
- print(f"处理URI:{len(data)}")
11
+ def handle_uri(data,use_tqdm=True):
14
12
  # 定义正则表达式,确保精确匹配各种攻击特征
15
13
  regex_patterns = {
16
14
  "sql": re.compile(
@@ -97,14 +95,15 @@ def handle_uri(data):
97
95
  result[f"URI_FEATURES_EXTRA_contains_{key}"] = value
98
96
 
99
97
  return result
100
- feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
98
+ if use_tqdm:
99
+ feature_data = data.progress_apply(process_row, axis=1, result_type="expand")
100
+ else:
101
+ feature_data = data.apply(process_row, axis=1, result_type="expand")
101
102
  data = pd.concat([data, feature_data], axis=1)
102
103
  return data
103
104
 
104
105
 
105
- def handle_ua(data):
106
- tqdm.pandas()
107
- print("处理UA")
106
+ def handle_ua(data,use_tqdm=True):
108
107
  data['http.useragent'] = data['http.useragent'].fillna('').astype(str)
109
108
  # 处理换行符及多余空格
110
109
  data['http.useragent'] = data['http.useragent'].str.replace(r'\s+', ' ', regex=True)
@@ -157,8 +156,14 @@ def handle_ua(data):
157
156
  data['UserAgent_language'] = data['http.useragent'].str.extract(r'\b([a-z]{2}-[A-Z]{2})\b', expand=False,
158
157
  flags=re.IGNORECASE).fillna("Unknown")
159
158
  # 统计 User-Agent 中的特殊字符个数
160
- data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
161
- lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
159
+
160
+ if use_tqdm:
161
+ data['UserAgent_special_char_count'] = data['http.useragent'].progress_apply(
162
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
163
+ else:
164
+ data['UserAgent_special_char_count'] = data['http.useragent'].apply(
165
+ lambda x: len(re.findall(r'[!@#$%^&*\'=:|{}]', x, flags=re.IGNORECASE)))
166
+
162
167
  # 更新 UserAgent_is_unknown 的计算逻辑
163
168
  data['UserAgent_is_unknown'] = data[['UserAgent_browser', 'UserAgent_os', 'UserAgent_platform']].isna().any(
164
169
  axis=1).fillna("Unknown")
@@ -402,16 +402,15 @@ def get_uri_filename_length(uri):
402
402
  return 0
403
403
 
404
404
 
405
- def get_dns_domain_suffix(domain, dns_lock):
406
- with dns_lock:
407
- try:
408
- for tmp_suffix in dns_domain_list:
409
- if tmp_suffix in domain:
410
- return tmp_suffix
411
- extracted = tldextract.extract(domain)
412
- return extracted.suffix
413
- except Exception as e:
414
- return ""
405
+ def get_dns_domain_suffix(domain):
406
+ try:
407
+ for tmp_suffix in dns_domain_list:
408
+ if tmp_suffix in domain:
409
+ return tmp_suffix
410
+ extracted = tldextract.extract(domain)
411
+ return extracted.suffix
412
+ except Exception as e:
413
+ return ""
415
414
 
416
415
 
417
416
  def check_path(file_path: str):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.3.8
3
+ Version: 0.4.0
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
File without changes
File without changes