xtn-tools-pro 1.0.0.0.5__py3-none-any.whl → 1.0.0.0.7__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- xtn_tools_pro/db/MongoDB.py +2 -2
- xtn_tools_pro/proxy/XiaoXiangProxy.py +1 -1
- xtn_tools_pro/proxy/proxy.py +189 -0
- xtn_tools_pro/tools.py +4 -157
- xtn_tools_pro/utils/crypto.py +104 -0
- xtn_tools_pro/utils/file_utils.py +42 -0
- xtn_tools_pro/utils/helpers.py +126 -0
- xtn_tools_pro/utils/log.py +1 -2
- xtn_tools_pro/utils/sql.py +1 -1
- xtn_tools_pro/utils/time_utils.py +143 -0
- {xtn_tools_pro-1.0.0.0.5.dist-info → xtn_tools_pro-1.0.0.0.7.dist-info}/METADATA +1 -1
- xtn_tools_pro-1.0.0.0.7.dist-info/RECORD +24 -0
- xtn_tools_pro-1.0.0.0.5.dist-info/RECORD +0 -19
- {xtn_tools_pro-1.0.0.0.5.dist-info → xtn_tools_pro-1.0.0.0.7.dist-info}/LICENSE +0 -0
- {xtn_tools_pro-1.0.0.0.5.dist-info → xtn_tools_pro-1.0.0.0.7.dist-info}/WHEEL +0 -0
- {xtn_tools_pro-1.0.0.0.5.dist-info → xtn_tools_pro-1.0.0.0.7.dist-info}/top_level.txt +0 -0
xtn_tools_pro/db/MongoDB.py
CHANGED
@@ -8,11 +8,11 @@
|
|
8
8
|
# --------------------------------------------------------------------------------------------------
|
9
9
|
# 2024/4/17 xiatn V00.01.000 新建
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
11
|
-
from xtn_tools_pro.tools_time import *
|
12
11
|
from urllib import parse
|
12
|
+
from typing import List, Dict, Optional
|
13
|
+
from xtn_tools_pro.utils.time_utils import *
|
13
14
|
from pymongo import MongoClient as _MongoClient
|
14
15
|
from pymongo.database import Database as _Database
|
15
|
-
from typing import List, Dict, Optional
|
16
16
|
from pymongo.collection import Collection as _Collection
|
17
17
|
from pymongo.errors import DuplicateKeyError, BulkWriteError
|
18
18
|
|
@@ -10,7 +10,7 @@
|
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
11
11
|
import requests, time, random
|
12
12
|
from xtn_tools_pro.db.RedisDB import RedisDBPro
|
13
|
-
from xtn_tools_pro.
|
13
|
+
from xtn_tools_pro.utils.time_utils import get_time_now_timestamp, get_time_now_day59_timestamp
|
14
14
|
|
15
15
|
import warnings
|
16
16
|
from urllib3.exceptions import InsecureRequestWarning
|
@@ -0,0 +1,189 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 小象代理专用
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import requests, time, random
|
12
|
+
from xtn_tools_pro.db.RedisDB import RedisDBPro
|
13
|
+
from xtn_tools_pro.utils.time_utils import get_time_now_timestamp, get_time_now_day59_timestamp
|
14
|
+
|
15
|
+
import warnings
|
16
|
+
from urllib3.exceptions import InsecureRequestWarning
|
17
|
+
|
18
|
+
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
19
|
+
|
20
|
+
|
21
|
+
class ProxyPool:
|
22
|
+
def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="", usage_cnt=100, usage_time=100,*args,**kwargs):
|
23
|
+
"""
|
24
|
+
:param ip: redis 数据库 ip
|
25
|
+
:param port: redis 数据库 端口
|
26
|
+
:param db: redis 数据库 db
|
27
|
+
:param user_pass: redis 数据库 密码
|
28
|
+
:param redis_proxy_name: redis 数据库 用于存储代理的key
|
29
|
+
:param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
|
30
|
+
:param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
|
31
|
+
:param is_log: 是否记录日志
|
32
|
+
"""
|
33
|
+
r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
|
34
|
+
self.__redis_pool = r
|
35
|
+
self.__redisProxyName = redis_proxy_name
|
36
|
+
|
37
|
+
# 获取当天0点时间戳
|
38
|
+
self.__now_day59_timestamp = get_time_now_day59_timestamp()
|
39
|
+
self.__usage_cnt = usage_cnt
|
40
|
+
self.__usage_time = usage_time
|
41
|
+
|
42
|
+
def __log(self, text):
|
43
|
+
"""
|
44
|
+
记录日志
|
45
|
+
:param text:
|
46
|
+
:return:
|
47
|
+
"""
|
48
|
+
print(text)
|
49
|
+
|
50
|
+
def __check_proxy(self):
|
51
|
+
"""
|
52
|
+
维护检查代理,删除无用代理
|
53
|
+
删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
|
54
|
+
:return:
|
55
|
+
"""
|
56
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
57
|
+
for proxy_val in proxy_val_list:
|
58
|
+
# 获取时间
|
59
|
+
time_out = proxy_val.split(":")[-1]
|
60
|
+
# 获取使用次数
|
61
|
+
proxy_val_count = self.__redis_pool.get(proxy_val)
|
62
|
+
if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
|
63
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
64
|
+
self.__log(
|
65
|
+
"当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
|
66
|
+
format(proxy_val=proxy_val, del_state=del_state,
|
67
|
+
time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
|
68
|
+
elif int(proxy_val_count) >= self.__usage_cnt:
|
69
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
70
|
+
self.__log(
|
71
|
+
"当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
|
72
|
+
text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
|
73
|
+
del_state=del_state))
|
74
|
+
|
75
|
+
def __get_proxy_length(self):
|
76
|
+
"""
|
77
|
+
获取代理数
|
78
|
+
:return:
|
79
|
+
"""
|
80
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
81
|
+
return len(proxy_val_list)
|
82
|
+
|
83
|
+
def __incr_proxy(self, proxy_val):
|
84
|
+
"""
|
85
|
+
自增代理使用次数
|
86
|
+
:param proxy_val: 代理
|
87
|
+
:return:
|
88
|
+
"""
|
89
|
+
proxy_val_con = self.__redis_pool.incr(proxy_val)
|
90
|
+
return proxy_val_con
|
91
|
+
|
92
|
+
def __get_api_proxy(self):
|
93
|
+
"""
|
94
|
+
通过接口获取小象代理,并存储至数据库
|
95
|
+
响应:
|
96
|
+
{"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
|
97
|
+
{"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
|
98
|
+
:return:
|
99
|
+
"""
|
100
|
+
while True:
|
101
|
+
self.__check_proxy()
|
102
|
+
try:
|
103
|
+
response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
|
104
|
+
if response.status_code == 200:
|
105
|
+
if response.json().get("msg") == "请求过于频繁":
|
106
|
+
self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
|
107
|
+
time.sleep(2)
|
108
|
+
continue
|
109
|
+
# 获取data
|
110
|
+
proxy_data_list = response.json().get("data", [])
|
111
|
+
if not proxy_data_list:
|
112
|
+
self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
|
113
|
+
time.sleep(2)
|
114
|
+
continue
|
115
|
+
else:
|
116
|
+
for data in proxy_data_list:
|
117
|
+
ip = "http://{ip}".format(ip=data.get("ip"))
|
118
|
+
port = data.get("port")
|
119
|
+
time_out = get_time_now_timestamp(is_time_10=True)
|
120
|
+
proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
|
121
|
+
redis_proxy_name=self.__redisProxyName,
|
122
|
+
ip=ip,
|
123
|
+
port=port,
|
124
|
+
timeOut=time_out,
|
125
|
+
)
|
126
|
+
proxy_key_con = self.__incr_proxy(proxy_key)
|
127
|
+
self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
|
128
|
+
proxy_key_con=proxy_key_con))
|
129
|
+
return True # 获取成功
|
130
|
+
else:
|
131
|
+
self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
|
132
|
+
time.sleep(2)
|
133
|
+
continue
|
134
|
+
except Exception as e:
|
135
|
+
self.__log("获取小象代理报错:{e}".format(e=e))
|
136
|
+
|
137
|
+
def run(self):
|
138
|
+
try:
|
139
|
+
while True:
|
140
|
+
# 检查代理
|
141
|
+
self.__check_proxy()
|
142
|
+
# 获取小象代理
|
143
|
+
self.__get_api_proxy()
|
144
|
+
time.sleep(1)
|
145
|
+
# 判断时间是否超过当前23:59分时间戳
|
146
|
+
if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
|
147
|
+
self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
|
148
|
+
break
|
149
|
+
except Exception as eee:
|
150
|
+
self.__log("程序异常报错:{eee}".format(eee=eee))
|
151
|
+
# self.__del__()
|
152
|
+
|
153
|
+
def get_proxy(self):
|
154
|
+
"""
|
155
|
+
从代理池中获取代理
|
156
|
+
:return:
|
157
|
+
"""
|
158
|
+
try:
|
159
|
+
while True:
|
160
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
161
|
+
if proxy_val_list:
|
162
|
+
proxy_val = random.choice(proxy_val_list)
|
163
|
+
proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
|
164
|
+
self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
|
165
|
+
return proxy_v
|
166
|
+
else:
|
167
|
+
self.__log("暂无代理,等待中")
|
168
|
+
time.sleep(2)
|
169
|
+
except Exception as e:
|
170
|
+
self.__log("从代理池中获取代理:{e}".format(e=e))
|
171
|
+
|
172
|
+
def set_proxy_error(self, proxy_v):
|
173
|
+
"""
|
174
|
+
爬虫手动传入代理,设置为 999999 不可用
|
175
|
+
:param proxyV:
|
176
|
+
:return:
|
177
|
+
"""
|
178
|
+
try:
|
179
|
+
self.__redis_pool.set(proxy_v, "999999")
|
180
|
+
self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
|
181
|
+
except Exception as e:
|
182
|
+
self.__log("爬虫手动传入代理:{e}".format(e=e))
|
183
|
+
|
184
|
+
|
185
|
+
if __name__ == '__main__':
|
186
|
+
# p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
|
187
|
+
# XiaoXiangProxyAppSecret="PJ0QBWML")
|
188
|
+
# print(p.get_proxy())
|
189
|
+
pass
|
xtn_tools_pro/tools.py
CHANGED
@@ -8,169 +8,16 @@
|
|
8
8
|
# --------------------------------------------------------------------------------------------------
|
9
9
|
# 2024/4/17 xiatn V00.01.000 新建
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
11
|
-
import hashlib, json, math,re
|
12
|
-
from pprint import pformat
|
13
|
-
from urllib.parse import urlencode
|
14
11
|
|
15
12
|
|
16
|
-
def
|
17
|
-
"""
|
18
|
-
获取文本的md5值 32位
|
19
|
-
:param s: 文本
|
20
|
-
:param is_upper: 是否转大写 默认False
|
21
|
-
:return:
|
22
|
-
"""
|
23
|
-
# s.encode()#变成bytes类型才能加密
|
24
|
-
m = hashlib.md5(s.encode()) # 长度是32
|
25
|
-
if is_upper:
|
26
|
-
return m.hexdigest().upper()
|
27
|
-
return m.hexdigest()
|
28
|
-
|
29
|
-
|
30
|
-
def get_md5_16(s, is_upper=False):
|
31
|
-
"""
|
32
|
-
获取文本的md5值 16位
|
33
|
-
:param s: 文本
|
34
|
-
:param is_upper: 是否转大写 默认False
|
35
|
-
:return:
|
36
|
-
"""
|
37
|
-
result = get_md5_32(s, is_upper)
|
38
|
-
return result[8:24]
|
39
|
-
|
40
|
-
|
41
|
-
def get_binary_content_md5_32(content, is_upper=False):
|
42
|
-
"""
|
43
|
-
二进制内容md5 例如图片
|
44
|
-
:param content: 二进制内容
|
45
|
-
:param is_upper: 是否转大写 默认False
|
46
|
-
:return:
|
47
|
-
"""
|
48
|
-
md5_hash = hashlib.md5(content)
|
49
|
-
md5_hexdigest = md5_hash.hexdigest()
|
50
|
-
if is_upper:
|
51
|
-
return md5_hexdigest.upper()
|
52
|
-
return md5_hexdigest
|
53
|
-
|
54
|
-
|
55
|
-
def get_binary_content_md5_16(content, is_upper=False):
|
56
|
-
"""
|
57
|
-
二进制内容md5 例如图片
|
58
|
-
:param content: 二进制内容
|
59
|
-
:param is_upper: 是否转大写 默认False
|
60
|
-
:return:
|
61
|
-
"""
|
62
|
-
result = get_binary_content_md5_32(content, is_upper)
|
63
|
-
return result[8:24]
|
64
|
-
|
65
|
-
|
66
|
-
def get_file_md5_32(file_path, is_upper=False):
|
67
|
-
"""
|
68
|
-
获取文件md5值
|
69
|
-
:param file_path: 文件路径
|
70
|
-
:param is_upper: 是否转大写 默认False
|
71
|
-
:return:
|
72
|
-
"""
|
73
|
-
with open(file_path, 'rb') as file:
|
74
|
-
data = file.read()
|
75
|
-
md5_hash = hashlib.md5(data).hexdigest()
|
76
|
-
if is_upper:
|
77
|
-
return md5_hash.upper()
|
78
|
-
return md5_hash
|
79
|
-
|
80
|
-
|
81
|
-
def get_file_md5_16(file_path, is_upper=False):
|
82
|
-
"""
|
83
|
-
获取文件md5值
|
84
|
-
:param file_path: 文件路径
|
85
|
-
:param is_upper: 是否转大写 默认False
|
86
|
-
:return:
|
87
|
-
"""
|
88
|
-
result = get_file_md5_32(file_path, is_upper)
|
89
|
-
return result[8:24]
|
90
|
-
|
91
|
-
|
92
|
-
def get_str_to_json(str_json):
|
93
|
-
"""
|
94
|
-
字符串类型的json格式 转 json
|
95
|
-
:param str_json: 字符串json
|
96
|
-
:return:
|
97
|
-
"""
|
98
|
-
try:
|
99
|
-
new_str_json = str_json.replace("'", '"'). \
|
100
|
-
replace("None", "null").replace("True", "true"). \
|
101
|
-
replace("False", "false")
|
102
|
-
return json.loads(new_str_json)
|
103
|
-
except Exception as e:
|
104
|
-
return {}
|
105
|
-
|
106
|
-
|
107
|
-
def get_build_url_with_params(url, params):
|
108
|
-
"""
|
109
|
-
传入url和params拼接完整的url ->效果 https://wwww.xxxx.com/?xxx1=1&xxx2=2
|
110
|
-
:param url:
|
111
|
-
:param params:
|
112
|
-
:return:
|
113
|
-
"""
|
114
|
-
encoded_params = urlencode(params)
|
115
|
-
full_url = url + "?" + encoded_params
|
116
|
-
return full_url
|
117
|
-
|
118
|
-
|
119
|
-
def get_calculate_total_page(total, limit):
|
120
|
-
"""
|
121
|
-
根据total和limit计算出一共有多少页
|
122
|
-
:param total:
|
123
|
-
:param limit:
|
124
|
-
:return:
|
125
|
-
"""
|
126
|
-
if limit <= 0:
|
127
|
-
return 0
|
128
|
-
# 根据总条数和limit计算总页数
|
129
|
-
total_pages = math.ceil(total / limit)
|
130
|
-
return total_pages
|
131
|
-
|
132
|
-
def list_to_strtuple(datas):
|
133
|
-
"""
|
134
|
-
列表转字符串
|
135
|
-
:param datas: datas: [1, 2]
|
136
|
-
:return: (1, 2) 字符串类型
|
137
|
-
"""
|
138
|
-
data_str = str(tuple(datas))
|
139
|
-
data_str = re.sub(",\)$", ")", data_str)
|
140
|
-
return data_str
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
def dumps_json(data,indent=4,sort_keys=False):
|
13
|
+
def split_image(img):
|
146
14
|
"""
|
147
|
-
|
148
|
-
:param
|
149
|
-
:param indent: 每一级嵌套都使用4个空格进行缩进
|
150
|
-
:param sort_keys: 是否排序
|
15
|
+
切割图片
|
16
|
+
:param img:
|
151
17
|
:return:
|
152
18
|
"""
|
153
|
-
try:
|
154
|
-
if isinstance(data, str):
|
155
|
-
data = get_str_to_json(data)
|
156
|
-
|
157
|
-
data = json.dumps(
|
158
|
-
data,
|
159
|
-
ensure_ascii=False,
|
160
|
-
indent=indent,
|
161
|
-
skipkeys=True,
|
162
|
-
sort_keys=sort_keys,
|
163
|
-
default=str,
|
164
|
-
)
|
165
|
-
|
166
|
-
except Exception as e:
|
167
|
-
data = pformat(data)
|
168
|
-
|
169
|
-
return data
|
170
|
-
|
171
|
-
|
172
|
-
def split_image(img):
|
173
19
|
pass
|
174
20
|
|
21
|
+
|
175
22
|
if __name__ == '__main__':
|
176
23
|
pass
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 加解密、编解码
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/5/13 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import hashlib
|
12
|
+
|
13
|
+
|
14
|
+
def get_md5_32(s: str, is_upper=False):
|
15
|
+
"""
|
16
|
+
获取文本的md5值 32位
|
17
|
+
:param s: 文本
|
18
|
+
:param is_upper: 是否转大写 默认False
|
19
|
+
:return:
|
20
|
+
"""
|
21
|
+
# s.encode()#变成bytes类型才能加密
|
22
|
+
m = hashlib.md5(s.encode()) # 长度是32
|
23
|
+
if is_upper:
|
24
|
+
return m.hexdigest().upper()
|
25
|
+
return m.hexdigest()
|
26
|
+
|
27
|
+
|
28
|
+
def get_md5_16(s: str, is_upper=False):
|
29
|
+
"""
|
30
|
+
获取文本的md5值 16位
|
31
|
+
:param s: 文本
|
32
|
+
:param is_upper: 是否转大写 默认False
|
33
|
+
:return:
|
34
|
+
"""
|
35
|
+
result = get_md5_32(s, is_upper)
|
36
|
+
return result[8:24]
|
37
|
+
|
38
|
+
|
39
|
+
def get_binary_content_md5_32(content, is_upper=False):
|
40
|
+
"""
|
41
|
+
二进制内容md5 例如图片
|
42
|
+
:param content: 二进制内容
|
43
|
+
:param is_upper: 是否转大写 默认False
|
44
|
+
:return:
|
45
|
+
"""
|
46
|
+
md5_hash = hashlib.md5(content)
|
47
|
+
md5_hexdigest = md5_hash.hexdigest()
|
48
|
+
if is_upper:
|
49
|
+
return md5_hexdigest.upper()
|
50
|
+
return md5_hexdigest
|
51
|
+
|
52
|
+
|
53
|
+
def get_binary_content_md5_16(content, is_upper=False):
|
54
|
+
"""
|
55
|
+
二进制内容md5 例如图片
|
56
|
+
:param content: 二进制内容
|
57
|
+
:param is_upper: 是否转大写 默认False
|
58
|
+
:return:
|
59
|
+
"""
|
60
|
+
result = get_binary_content_md5_32(content, is_upper)
|
61
|
+
return result[8:24]
|
62
|
+
|
63
|
+
|
64
|
+
def get_file_md5_32(file_path, is_upper=False):
|
65
|
+
"""
|
66
|
+
获取文件md5值
|
67
|
+
:param file_path: 文件路径
|
68
|
+
:param is_upper: 是否转大写 默认False
|
69
|
+
:return:
|
70
|
+
"""
|
71
|
+
with open(file_path, 'rb') as file:
|
72
|
+
data = file.read()
|
73
|
+
md5_hash = hashlib.md5(data).hexdigest()
|
74
|
+
if is_upper:
|
75
|
+
return md5_hash.upper()
|
76
|
+
return md5_hash
|
77
|
+
|
78
|
+
|
79
|
+
def get_file_md5_16(file_path, is_upper=False):
|
80
|
+
"""
|
81
|
+
获取文件md5值
|
82
|
+
:param file_path: 文件路径
|
83
|
+
:param is_upper: 是否转大写 默认False
|
84
|
+
:return:
|
85
|
+
"""
|
86
|
+
result = get_file_md5_32(file_path, is_upper)
|
87
|
+
return result[8:24]
|
88
|
+
|
89
|
+
|
90
|
+
def get_sha1(s: str, is_upper=False):
|
91
|
+
"""
|
92
|
+
sha1
|
93
|
+
:param s: 文本
|
94
|
+
:param is_upper: 是否转大写 默认False
|
95
|
+
:return:
|
96
|
+
"""
|
97
|
+
# 使用sha1算法进行哈希
|
98
|
+
sha1_hash = hashlib.sha1(s.encode()).hexdigest()
|
99
|
+
if is_upper:
|
100
|
+
return sha1_hash.upper()
|
101
|
+
return sha1_hash
|
102
|
+
|
103
|
+
if __name__ == '__main__':
|
104
|
+
print(get_sha1("111"))
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 文件
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/5/13 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import os
|
12
|
+
import re
|
13
|
+
|
14
|
+
|
15
|
+
def get_file_extension(file_name):
|
16
|
+
"""
|
17
|
+
根据文件名获取文件扩展名/后缀名
|
18
|
+
:param file_name: 文件名称
|
19
|
+
:return:
|
20
|
+
"""
|
21
|
+
_, file_extension = os.path.splitext(file_name)
|
22
|
+
return file_extension
|
23
|
+
|
24
|
+
|
25
|
+
def get_file_check_filename(file_name):
|
26
|
+
"""
|
27
|
+
传入文件名返回一个合法的文件名 会替换掉一些特殊符号 常用于爬虫写文件时文件名中带有特殊符号的情况...
|
28
|
+
:param filename: 文件名
|
29
|
+
:return:
|
30
|
+
"""
|
31
|
+
file_extension = get_file_extension(file_name)
|
32
|
+
# 删除非法字符
|
33
|
+
sanitized_filename = re.sub(r'[\/:*?"<>|]', '', file_name)
|
34
|
+
max_length = 255 # 操作系统限制文件名的最大长度为255个
|
35
|
+
sanitized_filename = sanitized_filename[:max_length]
|
36
|
+
return sanitized_filename
|
37
|
+
|
38
|
+
|
39
|
+
if __name__ == '__main__':
|
40
|
+
pass
|
41
|
+
print(get_file_extension('file/2024-04-19/BOSCH GEX 125-1A/125-1AE砂磨机操作说明书:[1]_jingyan.txt'))
|
42
|
+
print(get_file_check_filename('file/2024-04-19/BOSCH GEX 125-1A/125-1AE砂磨机操作说明书:[1]_jingyan.txt'))
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 杂七杂八
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/5/13 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import re
|
12
|
+
import uuid
|
13
|
+
import math
|
14
|
+
import json
|
15
|
+
from uuid import UUID
|
16
|
+
from pprint import pformat
|
17
|
+
from urllib.parse import urlencode
|
18
|
+
|
19
|
+
|
20
|
+
def get_uuid(version=4, namespace: UUID = uuid.NAMESPACE_DNS, name=""):
|
21
|
+
"""
|
22
|
+
生成uuid
|
23
|
+
:param version:版本号
|
24
|
+
1:基于当前时间和 MAC 地址生成版本 1 的 UUID,具有唯一性,但可能存在一定的安全风险。
|
25
|
+
3:基于名称和命名空间的方式生成。它通过将名称和命名空间的标识符组合起来进行哈希计算,生成一个唯一的标识符。UUID 版本 3 使用的哈希算法是 MD5。
|
26
|
+
4:使用随机数生成版本 4 的 UUID,具有足够的随机性和唯一性。
|
27
|
+
5:使用基于命名空间和名称生成版本 5 的 UUID,可以使用自定义的命名空间和名称。
|
28
|
+
:param namespace:命名空间 uuid.NAMESPACE_DNS、uuid.NAMESPACE_URL、uuid.NAMESPACE_OID、uuid.NAMESPACE_X500
|
29
|
+
:param name:名称 自定义
|
30
|
+
:return:
|
31
|
+
"""
|
32
|
+
if version == 1:
|
33
|
+
result = uuid.uuid1()
|
34
|
+
elif version == 3:
|
35
|
+
result = uuid.uuid3(namespace, name)
|
36
|
+
elif version == 5:
|
37
|
+
result = uuid.uuid5(namespace, name)
|
38
|
+
else:
|
39
|
+
result = uuid.uuid4()
|
40
|
+
|
41
|
+
# uuid_str = str(result)
|
42
|
+
# uuid_hex = uuid_obj.hex
|
43
|
+
# uuid_int = uuid_obj.int
|
44
|
+
# uuid_bytes = uuid_obj.bytes
|
45
|
+
return result
|
46
|
+
|
47
|
+
|
48
|
+
def get_str_to_json(str_json):
|
49
|
+
"""
|
50
|
+
字符串类型的json格式 转 json
|
51
|
+
:param str_json: 字符串json
|
52
|
+
:return:
|
53
|
+
"""
|
54
|
+
try:
|
55
|
+
new_str_json = str_json.replace("'", '"'). \
|
56
|
+
replace("None", "null").replace("True", "true"). \
|
57
|
+
replace("False", "false")
|
58
|
+
return json.loads(new_str_json)
|
59
|
+
except Exception as e:
|
60
|
+
return {}
|
61
|
+
|
62
|
+
|
63
|
+
def list_to_strtuple(datas):
|
64
|
+
"""
|
65
|
+
列表转字符串元组
|
66
|
+
:param datas: datas: [1, 2]
|
67
|
+
:return: (1, 2) 字符串类型
|
68
|
+
"""
|
69
|
+
data_str = str(tuple(datas))
|
70
|
+
data_str = re.sub(",\)$", ")", data_str)
|
71
|
+
return data_str
|
72
|
+
|
73
|
+
|
74
|
+
def dumps_json(data, indent=4, sort_keys=False):
|
75
|
+
"""
|
76
|
+
将JSON数据格式化为可打印的字符串
|
77
|
+
:param data:
|
78
|
+
:param indent: 每一级嵌套都使用4个空格进行缩进
|
79
|
+
:param sort_keys: 是否排序
|
80
|
+
:return:
|
81
|
+
"""
|
82
|
+
try:
|
83
|
+
if isinstance(data, str):
|
84
|
+
data = get_str_to_json(data)
|
85
|
+
|
86
|
+
data = json.dumps(
|
87
|
+
data,
|
88
|
+
ensure_ascii=False,
|
89
|
+
indent=indent,
|
90
|
+
skipkeys=True,
|
91
|
+
sort_keys=sort_keys,
|
92
|
+
default=str,
|
93
|
+
)
|
94
|
+
|
95
|
+
except Exception as e:
|
96
|
+
data = pformat(data)
|
97
|
+
|
98
|
+
return data
|
99
|
+
|
100
|
+
|
101
|
+
def get_calculate_total_page(total, limit):
|
102
|
+
"""
|
103
|
+
根据total和limit计算出一共有多少页
|
104
|
+
:param total:
|
105
|
+
:param limit:
|
106
|
+
:return:
|
107
|
+
"""
|
108
|
+
if limit <= 0:
|
109
|
+
return 0
|
110
|
+
# 根据总条数和limit计算总页数
|
111
|
+
total_pages = math.ceil(total / limit)
|
112
|
+
return total_pages
|
113
|
+
|
114
|
+
def get_build_url_with_params(url, params):
|
115
|
+
"""
|
116
|
+
传入url和params拼接完整的url ->效果 https://wwww.xxxx.com/?xxx1=1&xxx2=2
|
117
|
+
:param url:
|
118
|
+
:param params:
|
119
|
+
:return:
|
120
|
+
"""
|
121
|
+
encoded_params = urlencode(params)
|
122
|
+
full_url = url + "?" + encoded_params
|
123
|
+
return full_url
|
124
|
+
|
125
|
+
if __name__ == '__main__':
|
126
|
+
print(get_uuid(4))
|
xtn_tools_pro/utils/log.py
CHANGED
@@ -10,11 +10,10 @@
|
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
11
11
|
import os
|
12
12
|
import sys
|
13
|
-
import time
|
14
13
|
import inspect
|
15
14
|
import logging
|
16
|
-
from xtn_tools_pro.tools_time import get_time_timestamp_to_datestr
|
17
15
|
from logging.handlers import BaseRotatingHandler
|
16
|
+
from xtn_tools_pro.utils.time_utils import get_time_timestamp_to_datestr
|
18
17
|
|
19
18
|
|
20
19
|
class RotatingFileHandler(BaseRotatingHandler):
|
xtn_tools_pro/utils/sql.py
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
# 2024/5/12 xiatn V00.01.000 新建
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
11
11
|
import datetime
|
12
|
-
from xtn_tools_pro.
|
12
|
+
from xtn_tools_pro.utils.helpers import list_to_strtuple, dumps_json
|
13
13
|
|
14
14
|
|
15
15
|
def format_sql_value(value):
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 时间
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/5/13 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import time
|
12
|
+
import datetime
|
13
|
+
|
14
|
+
|
15
|
+
def get_time_now_timestamp(is_time_10=False, is_time_13=False):
|
16
|
+
"""
|
17
|
+
获取当前时间戳
|
18
|
+
:param is_time_10: 是否需要处理为10位的时间戳,默认不处理
|
19
|
+
:param is_time_13: 是否需要处理为13位的时间戳,默认不处理
|
20
|
+
:return:
|
21
|
+
"""
|
22
|
+
|
23
|
+
if is_time_10:
|
24
|
+
val = int(time.time())
|
25
|
+
elif is_time_13:
|
26
|
+
val = int(time.time() * 1000)
|
27
|
+
else:
|
28
|
+
val = time.time()
|
29
|
+
return val
|
30
|
+
|
31
|
+
|
32
|
+
def get_time_now_day0_timestamp(is_time_13=False):
|
33
|
+
"""
|
34
|
+
获取当天0点时间戳
|
35
|
+
:param is_time_13: 是否需要处理为13位的时间戳,默认不处理并且返回10位时间戳
|
36
|
+
:return:
|
37
|
+
"""
|
38
|
+
val = time.mktime(datetime.date.today().timetuple())
|
39
|
+
if is_time_13:
|
40
|
+
return int(val * 1000)
|
41
|
+
else:
|
42
|
+
return int(val)
|
43
|
+
|
44
|
+
|
45
|
+
def get_time_now_day59_timestamp(is_time_13=False):
|
46
|
+
"""
|
47
|
+
获取当天23:59:59点时间戳
|
48
|
+
:param is_time_13: 是否需要处理为13位的时间戳,默认不处理并且返回10位时间戳
|
49
|
+
:return:
|
50
|
+
"""
|
51
|
+
# 获取当前日期时间
|
52
|
+
now = datetime.datetime.now()
|
53
|
+
# 设置小时、分钟、秒为 23:59:59
|
54
|
+
last_second = now.replace(hour=23, minute=59, second=59)
|
55
|
+
# 转换为时间戳
|
56
|
+
timestamp = time.mktime(last_second.timetuple())
|
57
|
+
# 转换为整数类型
|
58
|
+
if is_time_13:
|
59
|
+
return get_time_10_to_13_timestamp(timestamp)
|
60
|
+
else:
|
61
|
+
return int(timestamp)
|
62
|
+
|
63
|
+
|
64
|
+
def get_time_x_day_timestamp(x, is_time_13=False):
|
65
|
+
"""
|
66
|
+
获取x天的0点的时间戳
|
67
|
+
:param x: 0:当天; 1:1天后; -1:一天前
|
68
|
+
:param is_time_13: 是否需要处理为13位的时间戳,默认不处理并且返回10位时间戳
|
69
|
+
:return:
|
70
|
+
"""
|
71
|
+
if x == 0:
|
72
|
+
date_string = datetime.datetime.now().strftime("%Y-%m-%d") # 当天日期
|
73
|
+
elif x > 0:
|
74
|
+
future_date = datetime.datetime.now() + datetime.timedelta(days=x)
|
75
|
+
date_string = future_date.strftime("%Y-%m-%d") # x天后的日期
|
76
|
+
else:
|
77
|
+
past_date = datetime.datetime.now() - datetime.timedelta(days=abs(x))
|
78
|
+
date_string = past_date.strftime("%Y-%m-%d") # x天前的日期
|
79
|
+
|
80
|
+
timestamp = get_time_datestr_to_timestamp(date_string=date_string, is_time_13=is_time_13)
|
81
|
+
return timestamp
|
82
|
+
|
83
|
+
|
84
|
+
def get_time_datestr_to_timestamp(date_string, date_format="%Y-%m-%d", is_time_13=False):
|
85
|
+
"""
|
86
|
+
根据日期格式转换为时间戳,date_string和date_format需要配合,自行传参修改,这里是以%Y-%m-%d为格式也就是2024-04-18
|
87
|
+
:param date_string: 字符串类型的日期格式 例如:2024-04-18
|
88
|
+
:param date_format: 时间格式
|
89
|
+
:param is_time_13: 是否需要处理为13位的时间戳,默认不处理并且返回10位时间戳
|
90
|
+
:return:
|
91
|
+
"""
|
92
|
+
date_obj = datetime.datetime.strptime(date_string, date_format)
|
93
|
+
timestamp = date_obj.timestamp()
|
94
|
+
if is_time_13:
|
95
|
+
return get_time_10_to_13_timestamp(timestamp)
|
96
|
+
else:
|
97
|
+
return int(timestamp)
|
98
|
+
|
99
|
+
|
100
|
+
def get_time_10_to_13_timestamp(timestamp):
|
101
|
+
"""
|
102
|
+
10位时间戳转13位时间戳
|
103
|
+
:param timestamp:
|
104
|
+
:return:
|
105
|
+
"""
|
106
|
+
val = int(timestamp)
|
107
|
+
if len(str(val)) == 10:
|
108
|
+
return int(val * 1000)
|
109
|
+
return val
|
110
|
+
|
111
|
+
|
112
|
+
def get_time_13_to_10_timestamp(timestamp):
|
113
|
+
"""
|
114
|
+
13位时间戳转10位时间戳
|
115
|
+
:param timestamp:
|
116
|
+
:return:
|
117
|
+
"""
|
118
|
+
val = int(timestamp)
|
119
|
+
if len(str(val)) == 13:
|
120
|
+
return int(val // 1000)
|
121
|
+
return val
|
122
|
+
|
123
|
+
|
124
|
+
def get_time_timestamp_to_datestr(format='%Y-%m-%d %H:%M:%S', now_time=0):
|
125
|
+
"""
|
126
|
+
根据时间戳转换为日期格式,兼容10位时间戳和13位时间戳
|
127
|
+
:param format: 日期格式,常用:%Y-%m-%d %H:%M:%S、%Y-%m-%d、%Y/%m/%d、%H:%M:%S ...
|
128
|
+
:param now_time: 时间戳,默认0表示当前时间戳
|
129
|
+
:return:
|
130
|
+
"""
|
131
|
+
# 根据格式获取当前转换好的时间
|
132
|
+
if not now_time:
|
133
|
+
now_time = get_time_now_timestamp()
|
134
|
+
now_time = get_time_13_to_10_timestamp(now_time)
|
135
|
+
val = time.strftime(format, time.localtime(now_time))
|
136
|
+
return val
|
137
|
+
|
138
|
+
|
139
|
+
if __name__ == '__main__':
|
140
|
+
pass
|
141
|
+
print(get_time_timestamp_to_datestr())
|
142
|
+
print(get_time_timestamp_to_datestr(format="%H:%M:%S", now_time=get_time_now_timestamp(is_time_10=True)))
|
143
|
+
print(get_time_timestamp_to_datestr(now_time=get_time_now_timestamp(is_time_13=True)))
|
@@ -0,0 +1,24 @@
|
|
1
|
+
xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
2
|
+
xtn_tools_pro/tools.py,sha256=KmOOrSnjZNJ3u9vFkMO3FX0DxtlW3fnI5fL4e_raCRs,542
|
3
|
+
xtn_tools_pro/tools_flie.py,sha256=-FID31G8AnV2u2djRO3Ae3Ei9ld9bSBWgzwqhYzZ5JQ,1517
|
4
|
+
xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
|
5
|
+
xtn_tools_pro/db/MongoDB.py,sha256=zQLfZ-mhZWCFgY3dPF3dRAzAR3C8hrXO_yLZw9cUUUY,5573
|
6
|
+
xtn_tools_pro/db/MysqlDB.py,sha256=SBJrcjbZdxmtTKPGwl57NthPhs4uX8J3P6o_rK01O4k,13373
|
7
|
+
xtn_tools_pro/db/RedisDB.py,sha256=ep32Yj8AAkUHRshSBhKsdl06UwO7Z-gQJLaezspVRKw,6053
|
8
|
+
xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
9
|
+
xtn_tools_pro/proxy/XiaoXiangProxy.py,sha256=6jzGgN2t2zLPIKbSgN8seixwDLY4IjoZvB26f8yTUME,9848
|
10
|
+
xtn_tools_pro/proxy/__init__.py,sha256=WRwh6s2lruMu5buh0ejo9EK54kWT_VQhCsFGNFAmcyo,418
|
11
|
+
xtn_tools_pro/proxy/proxy.py,sha256=No6E1pFY5yx2F4976pXPrLtq-QEVp79KupzcufjSN58,8703
|
12
|
+
xtn_tools_pro/utils/__init__.py,sha256=I1_n_NP23F2lBqlF4EOlnOdLYxM8M4pbn63UhJN1hRE,418
|
13
|
+
xtn_tools_pro/utils/crypto.py,sha256=qOsVpQo_2yoPZ71Vcr-W_YGyDwi6iNJNFxR9MnnhG4s,2770
|
14
|
+
xtn_tools_pro/utils/file_utils.py,sha256=4Bmb1ISxqQC-7doPXDyY2o-H0m4upj_-hlLrO36sKJg,1509
|
15
|
+
xtn_tools_pro/utils/helpers.py,sha256=VzevgDk0tZAxFmuFlMBIaJ3QyiFxp5Qg-mLPgE83jRI,3773
|
16
|
+
xtn_tools_pro/utils/log.py,sha256=8CRMiY7Q9LWGJLhfL0YfpANBwLets9f2qWfMOe5PpNM,8139
|
17
|
+
xtn_tools_pro/utils/retry.py,sha256=0wjHsR5DBBKpv4naMfxiky8kprrZes4WURIfFQ4H708,1657
|
18
|
+
xtn_tools_pro/utils/sql.py,sha256=EAKzbkZP7Q09j15Gm6o0_uq0qgQmcCQT6EAawbpp4v0,6263
|
19
|
+
xtn_tools_pro/utils/time_utils.py,sha256=TUtzG61PeVYXhaQd6pBrXAdlz7tBispNIRQRcGhE2No,4859
|
20
|
+
xtn_tools_pro-1.0.0.0.7.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
xtn_tools_pro-1.0.0.0.7.dist-info/METADATA,sha256=epiyUW1nMeu4dZ000DVFujSlMFf1uIv8wO-LnRAAXMQ,431
|
22
|
+
xtn_tools_pro-1.0.0.0.7.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
23
|
+
xtn_tools_pro-1.0.0.0.7.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
24
|
+
xtn_tools_pro-1.0.0.0.7.dist-info/RECORD,,
|
@@ -1,19 +0,0 @@
|
|
1
|
-
xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
2
|
-
xtn_tools_pro/tools.py,sha256=xOqTL7jrJvmsPZBiUizKe7yWkmqvEbt-WGylGswNcQ8,4457
|
3
|
-
xtn_tools_pro/tools_flie.py,sha256=-FID31G8AnV2u2djRO3Ae3Ei9ld9bSBWgzwqhYzZ5JQ,1517
|
4
|
-
xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
|
5
|
-
xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
6
|
-
xtn_tools_pro/db/MysqlDB.py,sha256=SBJrcjbZdxmtTKPGwl57NthPhs4uX8J3P6o_rK01O4k,13373
|
7
|
-
xtn_tools_pro/db/RedisDB.py,sha256=ep32Yj8AAkUHRshSBhKsdl06UwO7Z-gQJLaezspVRKw,6053
|
8
|
-
xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
9
|
-
xtn_tools_pro/proxy/XiaoXiangProxy.py,sha256=xrEBJZ6Cjuh6IBZVB17oXHwByaeKcWVUSSWSg17tujE,9842
|
10
|
-
xtn_tools_pro/proxy/__init__.py,sha256=WRwh6s2lruMu5buh0ejo9EK54kWT_VQhCsFGNFAmcyo,418
|
11
|
-
xtn_tools_pro/utils/__init__.py,sha256=I1_n_NP23F2lBqlF4EOlnOdLYxM8M4pbn63UhJN1hRE,418
|
12
|
-
xtn_tools_pro/utils/log.py,sha256=bSuPsGKVBbGLT5hmIcfRi9TeuJApALcPa1qGMrCHk24,8146
|
13
|
-
xtn_tools_pro/utils/retry.py,sha256=0wjHsR5DBBKpv4naMfxiky8kprrZes4WURIfFQ4H708,1657
|
14
|
-
xtn_tools_pro/utils/sql.py,sha256=YSDx2BxkQk-QA6RGBiImR9bbT4WyKDiCYU8Q4ZdaMG0,6255
|
15
|
-
xtn_tools_pro-1.0.0.0.5.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
xtn_tools_pro-1.0.0.0.5.dist-info/METADATA,sha256=JAHmDIpLXqWb8Y5SoEso1ss7gh09BfnMEhmUIBc_D6c,431
|
17
|
-
xtn_tools_pro-1.0.0.0.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
18
|
-
xtn_tools_pro-1.0.0.0.5.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
19
|
-
xtn_tools_pro-1.0.0.0.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|