xbase-util 0.3.3__tar.gz → 0.3.5__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. {xbase_util-0.3.3 → xbase_util-0.3.5}/PKG-INFO +1 -1
  2. {xbase_util-0.3.3 → xbase_util-0.3.5}/setup.py +2 -2
  3. xbase_util-0.3.5/xbase_util/add_column_util.py +151 -0
  4. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/xbase_util.py +1 -0
  5. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util.egg-info/PKG-INFO +1 -1
  6. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util.egg-info/SOURCES.txt +1 -0
  7. {xbase_util-0.3.3 → xbase_util-0.3.5}/README.md +0 -0
  8. {xbase_util-0.3.3 → xbase_util-0.3.5}/setup.cfg +0 -0
  9. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/__init__.py +0 -0
  10. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/__init__.py +0 -0
  11. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/bean/ConfigBean.py +0 -0
  12. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/bean/CurrentConfigBean.py +0 -0
  13. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/bean/FlowBean.py +0 -0
  14. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/bean/TaskTemplateBean.py +0 -0
  15. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/bean/__init__.py +0 -0
  16. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/dao/ConfigDao.py +0 -0
  17. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/dao/CurrentConfigDao.py +0 -0
  18. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/dao/FlowDao.py +0 -0
  19. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/dao/TaskTemplateDao.py +0 -0
  20. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/dao/__init__.py +0 -0
  21. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/db/initsqlite3.py +0 -0
  22. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/es_db_util.py +0 -0
  23. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/esreq.py +0 -0
  24. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/geo_util.py +0 -0
  25. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/handle_features_util.py +0 -0
  26. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/pcap_util.py +0 -0
  27. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util/xbase_constant.py +0 -0
  28. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util.egg-info/dependency_links.txt +0 -0
  29. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util.egg-info/not-zip-safe +0 -0
  30. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util.egg-info/top_level.txt +0 -0
  31. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util_assets/GeoLite2-City.mmdb +0 -0
  32. {xbase_util-0.3.3 → xbase_util-0.3.5}/xbase_util_assets/arkimeparse.js +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase_util
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -3,7 +3,7 @@ from distutils.core import setup
3
3
  from setuptools import find_packages
4
4
 
5
5
  setup(name="xbase_util",
6
- version="0.3.3",
6
+ version="0.3.5",
7
7
  description="网络安全基础工具",
8
8
  long_description="包含提取,预测,训练的基础工具",
9
9
  author="xyt",
@@ -15,6 +15,6 @@ setup(name="xbase_util",
15
15
  ],
16
16
  zip_safe=False,
17
17
  package_data={
18
- 'xbase_util': ['../xbase_util_assets/*'],
18
+ 'xbase_util': ['../xbase_util_assets/*']
19
19
  },
20
20
  include_package_data=True)
@@ -0,0 +1,151 @@
1
+ import os
2
+
3
+ import pandas as pd
4
+ import re
5
+
6
+ from nltk.corpus import words
7
+ from nltk.stem import WordNetLemmatizer
8
+ from tldextract import tldextract
9
+
10
+ # 初始化 lemmatizer
11
+ lemmatizer = WordNetLemmatizer()
12
+ os.environ["TLDEXTRACT_DISABLE_UPDATE"] = "1" # 禁用更新
13
+
14
+ # 下载词库(仅需执行一次)
15
+ # nltk.download('wordnet')
16
+ # 构建词库集合
17
+ common_tlds = {
18
+ # 常见的 gTLD
19
+ ".com", ".org", ".net", ".info", ".biz", ".edu", ".gov", ".mil",
20
+ # 新兴的 gTLD
21
+ ".app", ".blog", ".shop", ".tech", ".xyz", ".online", ".me", ".co", ".tv",
22
+ # 其他 gTLD
23
+ ".name", ".pro", ".mobi", ".aero", ".coop", ".museum",
24
+ # 中国的 ccTLD
25
+ ".cn", ".us", ".uk", ".de", ".jp", ".fr", ".ca", ".in", ".br", ".ru", ".au", ".kr", ".it", ".es",
26
+ # 其他常见的 ccTLD
27
+ ".ar", ".mx", ".ch", ".nl", ".se", ".pl", ".no", ".fi", ".be", ".dk", ".at",
28
+ # 国际化域名 (IDN)
29
+ ".中国", ".한국", ".рф", ".印度"
30
+ }
31
+ word_set = set(words.words()) # 将词库转换为集合,提高查找速度
32
+ word_set.update(
33
+ ["baidu", "qq", "ali", "souhu", "douyin", "jd", "tencent", "taobao", "tianmao", "dewu", "sougou", "anmeng", "weibo",
34
+ "douyu", "huya", "bilibili", "csnd", "zhihu", "huawei", "xiaomi", "vivo", "oppo", "qihu", "yahu", "fanke",
35
+ "xunfei"])
36
+
37
+
38
+ def is_meaningful_word(word):
39
+ """判断单词是否在词库中"""
40
+ return int(lemmatizer.lemmatize(word.lower(), pos='n') in word_set)
41
+
42
+
43
+ def is_meaningful_phrase(phrase):
44
+ """判断是否是有意义的短语(分词后每个词都必须有意义)"""
45
+ words_in_phrase = phrase.split('.')
46
+ return all(is_meaningful_word(word) for word in words_in_phrase)
47
+
48
+
49
+ def is_danger_subdomain(uri):
50
+ """提取并处理子域名"""
51
+ ext = tldextract.extract(uri)
52
+
53
+ subdomain = ext.subdomain.replace("www.", "")
54
+ if subdomain:
55
+ subdomain_parts = subdomain.split('.')
56
+ # filtered_parts = [part for part in subdomain_parts if part not in common_prefixes]
57
+ # print(filtered_parts)
58
+ meaningful_parts = [part for part in subdomain_parts if is_meaningful_word(part)]
59
+ # print(meaningful_parts)
60
+ if meaningful_parts:
61
+ return 0
62
+ else:
63
+ return 1
64
+ return 0
65
+
66
+
67
+ def is_danger_domain(uri):
68
+ """提取主域名并判断是否有意义"""
69
+ ext = tldextract.extract(uri)
70
+ domain = ext.domain
71
+ if is_meaningful_word(domain):
72
+ return 0
73
+ return 1
74
+
75
+
76
+ # 判断域名是否过长
77
+ def is_long_domain(uri):
78
+ ext = tldextract.extract(uri)
79
+ domain = ext.domain
80
+ subdomain = ext.subdomain
81
+ if subdomain:
82
+ subdomain_parts = subdomain.split(".")
83
+ target = 1 if any(len(part) > 10 for part in subdomain_parts) else 0
84
+ else:
85
+ target = 0
86
+ return int(len(domain) > 10 or target)
87
+
88
+
89
+ def has_uncommon_tld(domain):
90
+ """判断域名是否使用了非常规TLD"""
91
+ ext = tldextract.extract(domain)
92
+ return int(ext.suffix not in common_tlds)
93
+
94
+
95
+ # 判断域名是否包含随机字符(简单示例:检查是否包含非字母数字字符)
96
+ def has_random_characters(domain):
97
+ # 正常域名通常只包含字母、数字、和连字符
98
+ return int(bool(re.search(r'[^a-zA-Z0-9-_.]', domain)))
99
+
100
+
101
+ # 判断域名是否包含特殊字符(例如汉字或表情符号)
102
+ def has_special_characters(domain):
103
+ # 汉字或特殊字符的 Unicode 范围
104
+ return int(bool(re.search(r'[\u4e00-\u9fff\U0001F600-\U0001F64F]', domain)))
105
+
106
+
107
+ # 判断域名是否包含大量子域名(假设 10 个以上子域名为异常)
108
+ def has_large_number_of_subdomains(uri):
109
+ if tldextract.extract(uri).subdomain:
110
+ subdomains_list = uri.split('.')
111
+ # 如果子域名的数量超过 10,则认为它可能是异常的
112
+ return int(len(subdomains_list) > 3)
113
+ else:
114
+ return 0
115
+
116
+
117
+ def parse_list(x):
118
+ if isinstance(x, str):
119
+ if x == "[]":
120
+ x = []
121
+ else:
122
+ x = f"{x}".replace("\"", "").replace("[", "").replace("]", "").split(",")
123
+ elif isinstance(x, list):
124
+ x = [f"{item}" for item in x]
125
+ else:
126
+ print(f"unknown:{x} {type(x)}")
127
+ x = []
128
+ return x
129
+
130
+
131
+ def handle_dns(origin_list, isDataFrame=False):
132
+ print("handle_dnslist")
133
+ if not isDataFrame:
134
+ origin_list = pd.DataFrame(origin_list)
135
+ origin_list["dnslist"] = origin_list['dns.host'].apply(parse_list)
136
+ origin_list['dns_host_is_long_domain'] = origin_list['dnslist'].apply(
137
+ lambda x: any(is_long_domain(domain) for domain in x))
138
+ origin_list['dns_host_is_random_characters'] = origin_list['dnslist'].apply(
139
+ lambda x: any(has_random_characters(domain) for domain in x))
140
+ origin_list['dns_host_is_special_characters'] = origin_list['dnslist'].apply(
141
+ lambda x: any(has_special_characters(domain) for domain in x))
142
+ origin_list['dns_host_is_large_subdomains'] = origin_list['dnslist'].apply(
143
+ lambda x: any(has_large_number_of_subdomains(domain) for domain in x))
144
+ origin_list['dns_host_is_danger_domain'] = origin_list['dnslist'].apply(
145
+ lambda x: any(is_danger_domain(domain) for domain in x))
146
+ origin_list['dns_host_is_danger_subdomain'] = origin_list['dnslist'].apply(
147
+ lambda x: any(is_danger_subdomain(domain) for domain in x))
148
+ origin_list['dns_host_is_uncommon_tld'] = origin_list['dnslist'].apply(
149
+ lambda x: any(has_uncommon_tld(domain) for domain in x))
150
+ origin_list.drop(columns=['dnslist'], inplace=True)
151
+ return origin_list
@@ -324,6 +324,7 @@ def extract_session_fields(origin_list, geoUtil):
324
324
  "http.request-refererCnt": http.get("requestRefererCnt", 0),
325
325
  "http.path": http.get("path", []),
326
326
  "http.hostCnt": http.get("hostCnt", 0),
327
+ "http.host": http.get("host", []),
327
328
  "http.response-server": http.get("response-server", []),
328
329
  "http.pathCnt": http.get("pathCnt", 0),
329
330
  "http.useragentTokens": http.get("useragentTokens", ""),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xbase-util
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: 网络安全基础工具
5
5
  Home-page: https://gitee.com/jimonik/xbase_util.git
6
6
  Author: xyt
@@ -1,6 +1,7 @@
1
1
  README.md
2
2
  setup.py
3
3
  xbase_util/__init__.py
4
+ xbase_util/add_column_util.py
4
5
  xbase_util/es_db_util.py
5
6
  xbase_util/esreq.py
6
7
  xbase_util/geo_util.py
File without changes
File without changes