xtn-tools-pro 1.0.0.0.2__tar.gz → 1.0.0.0.4__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {xtn-tools-pro-1.0.0.0.2/xtn_tools_pro.egg-info → xtn-tools-pro-1.0.0.0.4}/PKG-INFO +1 -1
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/setup.py +3 -2
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/db/RedisDB.py +47 -0
- xtn-tools-pro-1.0.0.0.4/xtn_tools_pro/proxy/XiaoXiangProxy.py +207 -0
- xtn-tools-pro-1.0.0.0.4/xtn_tools_pro/proxy/__init__.py +10 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/tools.py +32 -7
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4/xtn_tools_pro.egg-info}/PKG-INFO +1 -1
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro.egg-info/SOURCES.txt +3 -1
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro.egg-info/requires.txt +1 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/LICENSE +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/README.md +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/setup.cfg +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/__init__.py +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/db/MongoDB.py +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/db/__init__.py +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/tools_flie.py +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro/tools_time.py +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro.egg-info/dependency_links.txt +0 -0
- {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro.egg-info/top_level.txt +0 -0
@@ -15,7 +15,7 @@ with open("README.md", "r") as f:
|
|
15
15
|
|
16
16
|
setuptools.setup(
|
17
17
|
name="xtn-tools-pro", # 模块名称
|
18
|
-
version="1.0.0.0.
|
18
|
+
version="1.0.0.0.4", # 版本
|
19
19
|
author="xtn", # 作者
|
20
20
|
author_email="czw011122@163.com", # 作者邮箱
|
21
21
|
description="xtn 开发工具", # 模块简介
|
@@ -29,7 +29,8 @@ setuptools.setup(
|
|
29
29
|
# 依赖模块
|
30
30
|
install_requires=[
|
31
31
|
"pymongo",
|
32
|
-
"redis"
|
32
|
+
"redis",
|
33
|
+
"requests"
|
33
34
|
],
|
34
35
|
python_requires='>=3',
|
35
36
|
)
|
@@ -27,6 +27,9 @@ class RedisDBPro:
|
|
27
27
|
self.__redis = None
|
28
28
|
self.get_connect()
|
29
29
|
|
30
|
+
def __del__(self):
|
31
|
+
self.__redis.close()
|
32
|
+
|
30
33
|
@classmethod
|
31
34
|
def from_url(cls, url):
|
32
35
|
"""
|
@@ -110,9 +113,53 @@ class RedisDBPro:
|
|
110
113
|
else:
|
111
114
|
return self._redis.sadd(table, values)
|
112
115
|
|
116
|
+
def incr(self, key):
|
117
|
+
"""
|
118
|
+
对一个键的值进行自增操作
|
119
|
+
:param key: 需要自增的key
|
120
|
+
:return:
|
121
|
+
"""
|
122
|
+
return self._redis.incr(key)
|
123
|
+
|
124
|
+
def get_all_key(self, path):
|
125
|
+
"""
|
126
|
+
获取所有的key
|
127
|
+
常用的path:前缀为test的key test*,中间为test的key *test*
|
128
|
+
:param path:
|
129
|
+
:return:
|
130
|
+
"""
|
131
|
+
return list(self._redis.scan_iter(path))
|
132
|
+
|
133
|
+
def get(self, table):
|
134
|
+
return self._redis.get(table)
|
135
|
+
|
136
|
+
def set(self, table, value, **kwargs):
|
137
|
+
"""
|
138
|
+
字符串 set
|
139
|
+
:param table: 表
|
140
|
+
:param value: 值
|
141
|
+
:param kwargs: 参数解释为chatgpt提供
|
142
|
+
:param kwargs: ex(可选):设置键的过期时间,以秒为单位。例如,ex=10表示键将在10秒后过期
|
143
|
+
:param kwargs: px(可选):设置键的过期时间,以毫秒为单位。例如,px=10000表示键将在10秒后过期
|
144
|
+
:param kwargs: nx(可选):如果设置为True,则只有在键不存在时才设置键的值
|
145
|
+
:param kwargs: xx(可选):如果设置为True,则只有在键已存在时才设置键的值
|
146
|
+
:param kwargs: kepp_ttl(可选):如果设置为True,则保留键的过期时间。仅当键已存在且设置了过期时间时才有效
|
147
|
+
:param kwargs: exat(可选):设置键的过期时间,以UNIX时间戳表示。
|
148
|
+
:param kwargs: pxat(可选):设置键的过期时间,以毫秒级的UNIX时间戳表示。
|
149
|
+
:param kwargs: replace(可选):如果设置为True,则无论键是否存在,都会设置键的值。
|
150
|
+
:return:
|
151
|
+
"""
|
152
|
+
return self._redis.set(table, value, **kwargs)
|
153
|
+
|
154
|
+
def delete(self, table):
|
155
|
+
return self._redis.delete(table)
|
156
|
+
|
113
157
|
|
114
158
|
if __name__ == '__main__':
|
115
159
|
pass
|
116
160
|
# r = RedisDBPro(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk")
|
161
|
+
r = RedisDBPro.from_url('redis://:xtn-kk@127.0.0.1:6379/0')
|
117
162
|
# status = r.sadd("test_redis_pro", [1, 2, 3, 4, 5, "6", "7"])
|
118
163
|
# print(status)
|
164
|
+
# print(r.get_all_key("*http*"))
|
165
|
+
print(r.delete("test_redis_pro"))
|
@@ -0,0 +1,207 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 小象代理专用
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import requests, time, random
|
12
|
+
from xtn_tools_pro.db.RedisDB import RedisDBPro
|
13
|
+
from xtn_tools_pro.tools_time import get_time_now_timestamp, get_time_now_day59_timestamp
|
14
|
+
|
15
|
+
import warnings
|
16
|
+
from urllib3.exceptions import InsecureRequestWarning
|
17
|
+
|
18
|
+
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
19
|
+
|
20
|
+
|
21
|
+
class ProxyPool:
|
22
|
+
def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="XiaoXiangProxy",
|
23
|
+
XiaoXiangProxyAppKey=None, XiaoXiangProxyAppSecret=None, usage_cnt=100, usage_time=100):
|
24
|
+
"""
|
25
|
+
小象代理专用
|
26
|
+
:param ip: redis 数据库 ip
|
27
|
+
:param port: redis 数据库 端口
|
28
|
+
:param db: redis 数据库 db
|
29
|
+
:param user_pass: redis 数据库 密码
|
30
|
+
:param redis_proxy_name: redis 数据库 用于存储代理的key
|
31
|
+
:param XiaoXiangProxyAppKey: 小象代理 应用id appKey
|
32
|
+
:param XiaoXiangProxyAppSecret: 小象代理 应用密码 appSecret
|
33
|
+
:param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
|
34
|
+
:param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
|
35
|
+
:param is_log: 是否记录日志
|
36
|
+
"""
|
37
|
+
if not XiaoXiangProxyAppSecret or not XiaoXiangProxyAppKey:
|
38
|
+
raise Exception("应用密码或应用id 不能为空")
|
39
|
+
|
40
|
+
r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
|
41
|
+
self.__redis_pool = r
|
42
|
+
self.__redisProxyName = redis_proxy_name
|
43
|
+
self.__XiaoXiangProxyAPI = "https://api.xiaoxiangdaili.com/ip/get?appKey={appKey}&appSecret={appSecret}&cnt=&wt=json".format(
|
44
|
+
appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
|
45
|
+
self.__XiaoXiangAutoBinding = "https://api.xiaoxiangdaili.com/app/bindIp?appKey={appKey}&appSecret={appSecret}&i=1".format(
|
46
|
+
appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
|
47
|
+
|
48
|
+
# 获取当天0点时间戳
|
49
|
+
self.__now_day59_timestamp = get_time_now_day59_timestamp()
|
50
|
+
self.__usage_cnt = usage_cnt
|
51
|
+
self.__usage_time = usage_time
|
52
|
+
|
53
|
+
# if is_log:
|
54
|
+
# # 日志
|
55
|
+
# nowDate = str(datetime.datetime.now().strftime('%Y_%m_%d'))
|
56
|
+
# logger.add(loggerPath.format(t=nowDate))
|
57
|
+
|
58
|
+
def __log(self, text):
|
59
|
+
"""
|
60
|
+
记录日志
|
61
|
+
:param text:
|
62
|
+
:return:
|
63
|
+
"""
|
64
|
+
print(text)
|
65
|
+
|
66
|
+
def __check_proxy(self):
|
67
|
+
"""
|
68
|
+
维护检查代理,删除无用代理
|
69
|
+
删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
|
70
|
+
:return:
|
71
|
+
"""
|
72
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
73
|
+
for proxy_val in proxy_val_list:
|
74
|
+
# 获取时间
|
75
|
+
time_out = proxy_val.split(":")[-1]
|
76
|
+
# 获取使用次数
|
77
|
+
proxy_val_count = self.__redis_pool.get(proxy_val)
|
78
|
+
if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
|
79
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
80
|
+
self.__log(
|
81
|
+
"当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
|
82
|
+
format(proxy_val=proxy_val, del_state=del_state,
|
83
|
+
time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
|
84
|
+
elif int(proxy_val_count) >= self.__usage_cnt:
|
85
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
86
|
+
self.__log(
|
87
|
+
"当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
|
88
|
+
text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
|
89
|
+
del_state=del_state))
|
90
|
+
|
91
|
+
def __get_proxy_length(self):
|
92
|
+
"""
|
93
|
+
获取代理数
|
94
|
+
:return:
|
95
|
+
"""
|
96
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
97
|
+
return len(proxy_val_list)
|
98
|
+
|
99
|
+
def __incr_proxy(self, proxy_val):
|
100
|
+
"""
|
101
|
+
自增代理使用次数
|
102
|
+
:param proxy_val: 代理
|
103
|
+
:return:
|
104
|
+
"""
|
105
|
+
proxy_val_con = self.__redis_pool.incr(proxy_val)
|
106
|
+
return proxy_val_con
|
107
|
+
|
108
|
+
def __get_api_proxy(self):
|
109
|
+
"""
|
110
|
+
通过接口获取小象代理,并存储至数据库
|
111
|
+
响应:
|
112
|
+
{"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
|
113
|
+
{"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
|
114
|
+
:return:
|
115
|
+
"""
|
116
|
+
while True:
|
117
|
+
self.__check_proxy()
|
118
|
+
try:
|
119
|
+
response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
|
120
|
+
if response.status_code == 200:
|
121
|
+
if response.json().get("msg") == "请求过于频繁":
|
122
|
+
self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
|
123
|
+
time.sleep(2)
|
124
|
+
continue
|
125
|
+
# 获取data
|
126
|
+
proxy_data_list = response.json().get("data", [])
|
127
|
+
if not proxy_data_list:
|
128
|
+
self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
|
129
|
+
time.sleep(2)
|
130
|
+
continue
|
131
|
+
else:
|
132
|
+
for data in proxy_data_list:
|
133
|
+
ip = "http://{ip}".format(ip=data.get("ip"))
|
134
|
+
port = data.get("port")
|
135
|
+
time_out = get_time_now_timestamp(is_time_10=True)
|
136
|
+
proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
|
137
|
+
redis_proxy_name=self.__redisProxyName,
|
138
|
+
ip=ip,
|
139
|
+
port=port,
|
140
|
+
timeOut=time_out,
|
141
|
+
)
|
142
|
+
proxy_key_con = self.__incr_proxy(proxy_key)
|
143
|
+
self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
|
144
|
+
proxy_key_con=proxy_key_con))
|
145
|
+
return True # 获取成功
|
146
|
+
else:
|
147
|
+
self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
|
148
|
+
time.sleep(2)
|
149
|
+
continue
|
150
|
+
except Exception as e:
|
151
|
+
self.__log("获取小象代理报错:{e}".format(e=e))
|
152
|
+
|
153
|
+
def run(self):
|
154
|
+
try:
|
155
|
+
# 手动绑定终端IP
|
156
|
+
response = requests.get(url=self.__XiaoXiangAutoBinding, verify=False, timeout=3)
|
157
|
+
self.__log(response.text)
|
158
|
+
while True:
|
159
|
+
# 检查代理
|
160
|
+
self.__check_proxy()
|
161
|
+
# 获取小象代理
|
162
|
+
self.__get_api_proxy()
|
163
|
+
time.sleep(1)
|
164
|
+
# 判断时间是否超过当前23:59分时间戳
|
165
|
+
if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
|
166
|
+
self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
|
167
|
+
break
|
168
|
+
except Exception as eee:
|
169
|
+
self.__log("程序异常报错:{eee}".format(eee=eee))
|
170
|
+
# self.__del__()
|
171
|
+
|
172
|
+
def get_proxy(self):
|
173
|
+
"""
|
174
|
+
从代理池中获取代理
|
175
|
+
:return:
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
while True:
|
179
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
180
|
+
if proxy_val_list:
|
181
|
+
proxy_val = random.choice(proxy_val_list)
|
182
|
+
proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
|
183
|
+
self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
|
184
|
+
return proxy_v
|
185
|
+
else:
|
186
|
+
self.__log("暂无代理,等待中")
|
187
|
+
time.sleep(2)
|
188
|
+
except Exception as e:
|
189
|
+
self.__log("从代理池中获取代理:{e}".format(e=e))
|
190
|
+
|
191
|
+
def set_proxy_error(self, proxy_v):
|
192
|
+
"""
|
193
|
+
爬虫手动传入代理,设置为 999999 不可用
|
194
|
+
:param proxyV:
|
195
|
+
:return:
|
196
|
+
"""
|
197
|
+
try:
|
198
|
+
self.__redis_pool.set(proxy_v, "999999")
|
199
|
+
self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
|
200
|
+
except Exception as e:
|
201
|
+
self.__log("爬虫手动传入代理:{e}".format(e=e))
|
202
|
+
|
203
|
+
|
204
|
+
if __name__ == '__main__':
|
205
|
+
p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
|
206
|
+
XiaoXiangProxyAppSecret="PJ0QBWML")
|
207
|
+
print(p.get_proxy())
|
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 程序说明xxxxxxxxxxxxxxxxxxx
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
@@ -37,25 +37,54 @@ def get_md5_16(s, is_upper=False):
|
|
37
37
|
return result[8:24]
|
38
38
|
|
39
39
|
|
40
|
-
def
|
40
|
+
def get_binary_content_md5_32(content, is_upper=False):
|
41
|
+
"""
|
42
|
+
二进制内容md5 例如图片
|
43
|
+
:param content: 二进制内容
|
44
|
+
:param is_upper: 是否转大写 默认False
|
45
|
+
:return:
|
46
|
+
"""
|
47
|
+
md5_hash = hashlib.md5(content)
|
48
|
+
md5_hexdigest = md5_hash.hexdigest()
|
49
|
+
if is_upper:
|
50
|
+
return md5_hexdigest.upper()
|
51
|
+
return md5_hexdigest
|
52
|
+
|
53
|
+
|
54
|
+
def get_binary_content_md5_16(content, is_upper=False):
|
55
|
+
"""
|
56
|
+
二进制内容md5 例如图片
|
57
|
+
:param content: 二进制内容
|
58
|
+
:param is_upper: 是否转大写 默认False
|
59
|
+
:return:
|
60
|
+
"""
|
61
|
+
result = get_binary_content_md5_32(content, is_upper)
|
62
|
+
return result[8:24]
|
63
|
+
|
64
|
+
|
65
|
+
def get_file_md5_32(file_path, is_upper=False):
|
41
66
|
"""
|
42
67
|
获取文件md5值
|
43
68
|
:param file_path: 文件路径
|
69
|
+
:param is_upper: 是否转大写 默认False
|
44
70
|
:return:
|
45
71
|
"""
|
46
72
|
with open(file_path, 'rb') as file:
|
47
73
|
data = file.read()
|
48
74
|
md5_hash = hashlib.md5(data).hexdigest()
|
75
|
+
if is_upper:
|
76
|
+
return md5_hash.upper()
|
49
77
|
return md5_hash
|
50
78
|
|
51
79
|
|
52
|
-
def get_file_md5_16(file_path):
|
80
|
+
def get_file_md5_16(file_path, is_upper=False):
|
53
81
|
"""
|
54
82
|
获取文件md5值
|
55
83
|
:param file_path: 文件路径
|
84
|
+
:param is_upper: 是否转大写 默认False
|
56
85
|
:return:
|
57
86
|
"""
|
58
|
-
result = get_file_md5_32(file_path)
|
87
|
+
result = get_file_md5_32(file_path, is_upper)
|
59
88
|
return result[8:24]
|
60
89
|
|
61
90
|
|
@@ -100,9 +129,5 @@ def get_calculate_total_page(total, limit):
|
|
100
129
|
return total_pages
|
101
130
|
|
102
131
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
132
|
if __name__ == '__main__':
|
108
133
|
pass
|
@@ -12,4 +12,6 @@ xtn_tools_pro.egg-info/requires.txt
|
|
12
12
|
xtn_tools_pro.egg-info/top_level.txt
|
13
13
|
xtn_tools_pro/db/MongoDB.py
|
14
14
|
xtn_tools_pro/db/RedisDB.py
|
15
|
-
xtn_tools_pro/db/__init__.py
|
15
|
+
xtn_tools_pro/db/__init__.py
|
16
|
+
xtn_tools_pro/proxy/XiaoXiangProxy.py
|
17
|
+
xtn_tools_pro/proxy/__init__.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.4}/xtn_tools_pro.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|