xtn-tools-pro 1.0.0.0.1__py3-none-any.whl → 1.0.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xtn_tools_pro/db/RedisDB.py +47 -0
- xtn_tools_pro/proxy/XiaoXiangProxy.py +207 -0
- {xtnkk_tools → xtn_tools_pro/proxy}/__init__.py +2 -2
- xtn_tools_pro/tools_flie.py +1 -1
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/METADATA +2 -1
- xtn_tools_pro-1.0.0.0.3.dist-info/RECORD +14 -0
- xtn_tools_pro-1.0.0.0.1.dist-info/RECORD +0 -25
- xtnkk_tools/MongoDB.py +0 -137
- xtnkk_tools/db/MongoDB.py +0 -138
- xtnkk_tools/db/__init__.py +0 -10
- xtnkk_tools/tools.py +0 -106
- xtnkk_tools/tools_time.py +0 -142
- xtnkk_tools/update.py +0 -23
- xtnkk_tools_pro/__init__.py +0 -10
- xtnkk_tools_pro/db/MongoDB.py +0 -138
- xtnkk_tools_pro/db/RedisDB.py +0 -118
- xtnkk_tools_pro/db/__init__.py +0 -10
- xtnkk_tools_pro/tools.py +0 -106
- xtnkk_tools_pro/tools_time.py +0 -142
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/LICENSE +0 -0
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/WHEEL +0 -0
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/top_level.txt +0 -0
xtn_tools_pro/db/RedisDB.py
CHANGED
@@ -27,6 +27,9 @@ class RedisDBPro:
|
|
27
27
|
self.__redis = None
|
28
28
|
self.get_connect()
|
29
29
|
|
30
|
+
def __del__(self):
|
31
|
+
self.__redis.close()
|
32
|
+
|
30
33
|
@classmethod
|
31
34
|
def from_url(cls, url):
|
32
35
|
"""
|
@@ -110,9 +113,53 @@ class RedisDBPro:
|
|
110
113
|
else:
|
111
114
|
return self._redis.sadd(table, values)
|
112
115
|
|
116
|
+
def incr(self, key):
|
117
|
+
"""
|
118
|
+
对一个键的值进行自增操作
|
119
|
+
:param key: 需要自增的key
|
120
|
+
:return:
|
121
|
+
"""
|
122
|
+
return self._redis.incr(key)
|
123
|
+
|
124
|
+
def get_all_key(self, path):
|
125
|
+
"""
|
126
|
+
获取所有的key
|
127
|
+
常用的path:前缀为test的key test*,中间为test的key *test*
|
128
|
+
:param path:
|
129
|
+
:return:
|
130
|
+
"""
|
131
|
+
return list(self._redis.scan_iter(path))
|
132
|
+
|
133
|
+
def get(self, table):
|
134
|
+
return self._redis.get(table)
|
135
|
+
|
136
|
+
def set(self, table, value, **kwargs):
|
137
|
+
"""
|
138
|
+
字符串 set
|
139
|
+
:param table: 表
|
140
|
+
:param value: 值
|
141
|
+
:param kwargs: 参数解释为chatgpt提供
|
142
|
+
:param kwargs: ex(可选):设置键的过期时间,以秒为单位。例如,ex=10表示键将在10秒后过期
|
143
|
+
:param kwargs: px(可选):设置键的过期时间,以毫秒为单位。例如,px=10000表示键将在10秒后过期
|
144
|
+
:param kwargs: nx(可选):如果设置为True,则只有在键不存在时才设置键的值
|
145
|
+
:param kwargs: xx(可选):如果设置为True,则只有在键已存在时才设置键的值
|
146
|
+
:param kwargs: kepp_ttl(可选):如果设置为True,则保留键的过期时间。仅当键已存在且设置了过期时间时才有效
|
147
|
+
:param kwargs: exat(可选):设置键的过期时间,以UNIX时间戳表示。
|
148
|
+
:param kwargs: pxat(可选):设置键的过期时间,以毫秒级的UNIX时间戳表示。
|
149
|
+
:param kwargs: replace(可选):如果设置为True,则无论键是否存在,都会设置键的值。
|
150
|
+
:return:
|
151
|
+
"""
|
152
|
+
return self._redis.set(table, value, **kwargs)
|
153
|
+
|
154
|
+
def delete(self, table):
|
155
|
+
return self._redis.delete(table)
|
156
|
+
|
113
157
|
|
114
158
|
if __name__ == '__main__':
|
115
159
|
pass
|
116
160
|
# r = RedisDBPro(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk")
|
161
|
+
r = RedisDBPro.from_url('redis://:xtn-kk@127.0.0.1:6379/0')
|
117
162
|
# status = r.sadd("test_redis_pro", [1, 2, 3, 4, 5, "6", "7"])
|
118
163
|
# print(status)
|
164
|
+
# print(r.get_all_key("*http*"))
|
165
|
+
print(r.delete("test_redis_pro"))
|
@@ -0,0 +1,207 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 小象代理专用
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import requests, time, random
|
12
|
+
from xtn_tools_pro.db.RedisDB import RedisDBPro
|
13
|
+
from xtn_tools_pro.tools_time import get_time_now_timestamp, get_time_now_day59_timestamp
|
14
|
+
|
15
|
+
import warnings
|
16
|
+
from urllib3.exceptions import InsecureRequestWarning
|
17
|
+
|
18
|
+
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
19
|
+
|
20
|
+
|
21
|
+
class ProxyPool:
|
22
|
+
def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="XiaoXiangProxy",
|
23
|
+
XiaoXiangProxyAppKey=None, XiaoXiangProxyAppSecret=None, usage_cnt=100, usage_time=100):
|
24
|
+
"""
|
25
|
+
小象代理专用
|
26
|
+
:param ip: redis 数据库 ip
|
27
|
+
:param port: redis 数据库 端口
|
28
|
+
:param db: redis 数据库 db
|
29
|
+
:param user_pass: redis 数据库 密码
|
30
|
+
:param redis_proxy_name: redis 数据库 用于存储代理的key
|
31
|
+
:param XiaoXiangProxyAppKey: 小象代理 应用id appKey
|
32
|
+
:param XiaoXiangProxyAppSecret: 小象代理 应用密码 appSecret
|
33
|
+
:param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
|
34
|
+
:param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
|
35
|
+
:param is_log: 是否记录日志
|
36
|
+
"""
|
37
|
+
if not XiaoXiangProxyAppSecret or not XiaoXiangProxyAppKey:
|
38
|
+
raise Exception("应用密码或应用id 不能为空")
|
39
|
+
|
40
|
+
r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
|
41
|
+
self.__redis_pool = r
|
42
|
+
self.__redisProxyName = redis_proxy_name
|
43
|
+
self.__XiaoXiangProxyAPI = "https://api.xiaoxiangdaili.com/ip/get?appKey={appKey}&appSecret={appSecret}&cnt=&wt=json".format(
|
44
|
+
appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
|
45
|
+
self.__XiaoXiangAutoBinding = "https://api.xiaoxiangdaili.com/app/bindIp?appKey={appKey}&appSecret={appSecret}&i=1".format(
|
46
|
+
appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
|
47
|
+
|
48
|
+
# 获取当天0点时间戳
|
49
|
+
self.__now_day59_timestamp = get_time_now_day59_timestamp()
|
50
|
+
self.__usage_cnt = usage_cnt
|
51
|
+
self.__usage_time = usage_time
|
52
|
+
|
53
|
+
# if is_log:
|
54
|
+
# # 日志
|
55
|
+
# nowDate = str(datetime.datetime.now().strftime('%Y_%m_%d'))
|
56
|
+
# logger.add(loggerPath.format(t=nowDate))
|
57
|
+
|
58
|
+
def __log(self, text):
|
59
|
+
"""
|
60
|
+
记录日志
|
61
|
+
:param text:
|
62
|
+
:return:
|
63
|
+
"""
|
64
|
+
print(text)
|
65
|
+
|
66
|
+
def __check_proxy(self):
|
67
|
+
"""
|
68
|
+
维护检查代理,删除无用代理
|
69
|
+
删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
|
70
|
+
:return:
|
71
|
+
"""
|
72
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
73
|
+
for proxy_val in proxy_val_list:
|
74
|
+
# 获取时间
|
75
|
+
time_out = proxy_val.split(":")[-1]
|
76
|
+
# 获取使用次数
|
77
|
+
proxy_val_count = self.__redis_pool.get(proxy_val)
|
78
|
+
if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
|
79
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
80
|
+
self.__log(
|
81
|
+
"当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
|
82
|
+
format(proxy_val=proxy_val, del_state=del_state,
|
83
|
+
time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
|
84
|
+
elif int(proxy_val_count) >= self.__usage_cnt:
|
85
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
86
|
+
self.__log(
|
87
|
+
"当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
|
88
|
+
text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
|
89
|
+
del_state=del_state))
|
90
|
+
|
91
|
+
def __get_proxy_length(self):
|
92
|
+
"""
|
93
|
+
获取代理数
|
94
|
+
:return:
|
95
|
+
"""
|
96
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
97
|
+
return len(proxy_val_list)
|
98
|
+
|
99
|
+
def __incr_proxy(self, proxy_val):
|
100
|
+
"""
|
101
|
+
自增代理使用次数
|
102
|
+
:param proxy_val: 代理
|
103
|
+
:return:
|
104
|
+
"""
|
105
|
+
proxy_val_con = self.__redis_pool.incr(proxy_val)
|
106
|
+
return proxy_val_con
|
107
|
+
|
108
|
+
def __get_api_proxy(self):
|
109
|
+
"""
|
110
|
+
通过接口获取小象代理,并存储至数据库
|
111
|
+
响应:
|
112
|
+
{"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
|
113
|
+
{"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
|
114
|
+
:return:
|
115
|
+
"""
|
116
|
+
while True:
|
117
|
+
self.__check_proxy()
|
118
|
+
try:
|
119
|
+
response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
|
120
|
+
if response.status_code == 200:
|
121
|
+
if response.json().get("msg") == "请求过于频繁":
|
122
|
+
self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
|
123
|
+
time.sleep(2)
|
124
|
+
continue
|
125
|
+
# 获取data
|
126
|
+
proxy_data_list = response.json().get("data", [])
|
127
|
+
if not proxy_data_list:
|
128
|
+
self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
|
129
|
+
time.sleep(2)
|
130
|
+
continue
|
131
|
+
else:
|
132
|
+
for data in proxy_data_list:
|
133
|
+
ip = "http://{ip}".format(ip=data.get("ip"))
|
134
|
+
port = data.get("port")
|
135
|
+
time_out = get_time_now_timestamp(is_time_10=True)
|
136
|
+
proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
|
137
|
+
redis_proxy_name=self.__redisProxyName,
|
138
|
+
ip=ip,
|
139
|
+
port=port,
|
140
|
+
timeOut=time_out,
|
141
|
+
)
|
142
|
+
proxy_key_con = self.__incr_proxy(proxy_key)
|
143
|
+
self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
|
144
|
+
proxy_key_con=proxy_key_con))
|
145
|
+
return True # 获取成功
|
146
|
+
else:
|
147
|
+
self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
|
148
|
+
time.sleep(2)
|
149
|
+
continue
|
150
|
+
except Exception as e:
|
151
|
+
self.__log("获取小象代理报错:{e}".format(e=e))
|
152
|
+
|
153
|
+
def run(self):
|
154
|
+
try:
|
155
|
+
# 手动绑定终端IP
|
156
|
+
response = requests.get(url=self.__XiaoXiangAutoBinding, verify=False, timeout=3)
|
157
|
+
self.__log(response.text)
|
158
|
+
while True:
|
159
|
+
# 检查代理
|
160
|
+
self.__check_proxy()
|
161
|
+
# 获取小象代理
|
162
|
+
self.__get_api_proxy()
|
163
|
+
time.sleep(1)
|
164
|
+
# 判断时间是否超过当前23:59分时间戳
|
165
|
+
if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
|
166
|
+
self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
|
167
|
+
break
|
168
|
+
except Exception as eee:
|
169
|
+
self.__log("程序异常报错:{eee}".format(eee=eee))
|
170
|
+
# self.__del__()
|
171
|
+
|
172
|
+
def get_proxy(self):
|
173
|
+
"""
|
174
|
+
从代理池中获取代理
|
175
|
+
:return:
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
while True:
|
179
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
180
|
+
if proxy_val_list:
|
181
|
+
proxy_val = random.choice(proxy_val_list)
|
182
|
+
proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
|
183
|
+
self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
|
184
|
+
return proxy_v
|
185
|
+
else:
|
186
|
+
self.__log("暂无代理,等待中")
|
187
|
+
time.sleep(2)
|
188
|
+
except Exception as e:
|
189
|
+
self.__log("从代理池中获取代理:{e}".format(e=e))
|
190
|
+
|
191
|
+
def set_proxy_error(self, proxy_v):
|
192
|
+
"""
|
193
|
+
爬虫手动传入代理,设置为 999999 不可用
|
194
|
+
:param proxyV:
|
195
|
+
:return:
|
196
|
+
"""
|
197
|
+
try:
|
198
|
+
self.__redis_pool.set(proxy_v, "999999")
|
199
|
+
self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
|
200
|
+
except Exception as e:
|
201
|
+
self.__log("爬虫手动传入代理:{e}".format(e=e))
|
202
|
+
|
203
|
+
|
204
|
+
if __name__ == '__main__':
|
205
|
+
p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
|
206
|
+
XiaoXiangProxyAppSecret="PJ0QBWML")
|
207
|
+
print(p.get_proxy())
|
@@ -2,9 +2,9 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
4
|
# 说明:
|
5
|
-
#
|
5
|
+
# 程序说明xxxxxxxxxxxxxxxxxxx
|
6
6
|
# History:
|
7
7
|
# Date Author Version Modification
|
8
8
|
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
xtn_tools_pro/tools_flie.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: xtn-tools-pro
|
3
|
-
Version: 1.0.0.0.
|
3
|
+
Version: 1.0.0.0.3
|
4
4
|
Summary: xtn 开发工具
|
5
5
|
Author: xtn
|
6
6
|
Author-email: czw011122@163.com
|
@@ -10,5 +10,6 @@ Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: pymongo
|
12
12
|
Requires-Dist: redis
|
13
|
+
Requires-Dist: requests
|
13
14
|
|
14
15
|
xtnkk-tools
|
@@ -0,0 +1,14 @@
|
|
1
|
+
xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
2
|
+
xtn_tools_pro/tools.py,sha256=e9KSPqaFBIptBGvexShCcn0nZmUQ5omlVwXgEfWZf5Y,2630
|
3
|
+
xtn_tools_pro/tools_flie.py,sha256=B_P3J_R-nRLt_IFutnOVrBRGf6_SZ_cXoIoeaT9B7tk,1512
|
4
|
+
xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
|
5
|
+
xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
6
|
+
xtn_tools_pro/db/RedisDB.py,sha256=ep32Yj8AAkUHRshSBhKsdl06UwO7Z-gQJLaezspVRKw,6053
|
7
|
+
xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
8
|
+
xtn_tools_pro/proxy/XiaoXiangProxy.py,sha256=xrEBJZ6Cjuh6IBZVB17oXHwByaeKcWVUSSWSg17tujE,9842
|
9
|
+
xtn_tools_pro/proxy/__init__.py,sha256=WRwh6s2lruMu5buh0ejo9EK54kWT_VQhCsFGNFAmcyo,418
|
10
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/METADATA,sha256=VG4Bs3muLvi85SaWcP3Tw3q4TFfafKDtZhbQ-x9CpRw,358
|
12
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
13
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
14
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
2
|
-
xtn_tools_pro/tools.py,sha256=e9KSPqaFBIptBGvexShCcn0nZmUQ5omlVwXgEfWZf5Y,2630
|
3
|
-
xtn_tools_pro/tools_flie.py,sha256=KuA1sowK31LuOfQMPRsQYPgxr8y7491caepRFYOwqG0,1507
|
4
|
-
xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
|
5
|
-
xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
6
|
-
xtn_tools_pro/db/RedisDB.py,sha256=qMffCNIHa3o7KD_yVQlsj3OupsNXMsDPRi03migwSu0,4003
|
7
|
-
xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
8
|
-
xtnkk_tools/MongoDB.py,sha256=2mwln6JPfu5N1N8Hbh6KvN6sED-KPTrOteCBHVFjvwM,5497
|
9
|
-
xtnkk_tools/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
10
|
-
xtnkk_tools/tools.py,sha256=KYoTds_c7XZBL9yLeoKksHz39QPh02DNQupRKJWx_II,2626
|
11
|
-
xtnkk_tools/tools_time.py,sha256=n4-T2tNSHnsh-X89IbjahCmoiDcmjZTKJlWyqGOmJQY,4877
|
12
|
-
xtnkk_tools/update.py,sha256=VygnKO9dXo02JyUEkpbJoBE6BceYARZEn-O1i6AO6E0,911
|
13
|
-
xtnkk_tools/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
14
|
-
xtnkk_tools/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
15
|
-
xtnkk_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
16
|
-
xtnkk_tools_pro/tools.py,sha256=KYoTds_c7XZBL9yLeoKksHz39QPh02DNQupRKJWx_II,2626
|
17
|
-
xtnkk_tools_pro/tools_time.py,sha256=n4-T2tNSHnsh-X89IbjahCmoiDcmjZTKJlWyqGOmJQY,4877
|
18
|
-
xtnkk_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
19
|
-
xtnkk_tools_pro/db/RedisDB.py,sha256=qMffCNIHa3o7KD_yVQlsj3OupsNXMsDPRi03migwSu0,4003
|
20
|
-
xtnkk_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
21
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/METADATA,sha256=cGqGoLCT0OxwP7KI82OKR1JUF9d9XnNk_Xg4DCgOSvU,333
|
23
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
24
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
25
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/RECORD,,
|
xtnkk_tools/MongoDB.py
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# 说明:
|
5
|
-
# MongoDBPro
|
6
|
-
# History:
|
7
|
-
# Date Author Version Modification
|
8
|
-
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/17 xiatn V00.01.000 新建
|
10
|
-
# --------------------------------------------------------------------------------------------------
|
11
|
-
from xtn_tools_pro.tools_time import *
|
12
|
-
from urllib import parse
|
13
|
-
from typing import List, Dict, Optional
|
14
|
-
from pymongo import MongoClient
|
15
|
-
from pymongo.database import Database
|
16
|
-
from pymongo.collection import Collection
|
17
|
-
from pymongo.errors import DuplicateKeyError, BulkWriteError
|
18
|
-
|
19
|
-
|
20
|
-
class MongoDBPro:
|
21
|
-
def __init__(self, ip=None, port=None, db=None, user_name=None, user_pass=None, url=None, **kwargs):
|
22
|
-
if url:
|
23
|
-
self.client = MongoClient(url, **kwargs)
|
24
|
-
else:
|
25
|
-
self.client = MongoClient(host=ip,
|
26
|
-
port=port,
|
27
|
-
username=user_name,
|
28
|
-
password=user_pass,
|
29
|
-
authSource=db)
|
30
|
-
|
31
|
-
self.db = self.get_database(db)
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def from_url(cls, url, **kwargs):
|
35
|
-
url_parsed = parse.urlparse(url)
|
36
|
-
# 获取 URL的协议
|
37
|
-
db_type = url_parsed.scheme.strip()
|
38
|
-
if db_type != "mongodb":
|
39
|
-
raise Exception(
|
40
|
-
"url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
|
41
|
-
url
|
42
|
-
))
|
43
|
-
return cls(url=url, **kwargs)
|
44
|
-
|
45
|
-
def get_database(self, database, **kwargs) -> Database:
|
46
|
-
"""
|
47
|
-
根据db名获取数据库对象
|
48
|
-
"""
|
49
|
-
return self.client.get_database(database, **kwargs)
|
50
|
-
|
51
|
-
def get_collection(self, coll_name, **kwargs) -> Collection:
|
52
|
-
"""
|
53
|
-
根据集合名获取集合对象
|
54
|
-
"""
|
55
|
-
return self.db.get_collection(coll_name, **kwargs)
|
56
|
-
|
57
|
-
def run_command(self, command: Dict):
|
58
|
-
"""
|
59
|
-
参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
|
60
|
-
"""
|
61
|
-
return self.db.command(command)
|
62
|
-
|
63
|
-
def find(self, coll_name: str, condition: Optional[Dict] = None,
|
64
|
-
limit: int = 0, **kwargs) -> List[Dict]:
|
65
|
-
"""
|
66
|
-
find
|
67
|
-
coll_name:集合名称
|
68
|
-
condition:查询条件 例如:{"name": "John"}、{"_id": "xxxxx"}
|
69
|
-
"""
|
70
|
-
condition = {} if condition is None else condition
|
71
|
-
command = {"find": coll_name, "filter": condition, "limit": limit}
|
72
|
-
command.update(kwargs)
|
73
|
-
result = self.run_command(command)
|
74
|
-
cursor = result["cursor"]
|
75
|
-
cursor_id = cursor["id"]
|
76
|
-
while True:
|
77
|
-
for document in cursor.get("nextBatch", cursor.get("firstBatch", [])):
|
78
|
-
# 处理数据
|
79
|
-
yield document
|
80
|
-
if cursor_id == 0:
|
81
|
-
# 游标已经完全遍历,没有剩余的结果可供获取
|
82
|
-
# 游标的生命周期已经结束,例如在查询会话结束后。
|
83
|
-
# 游标被显式地关闭,例如使用 db.killCursor() 命令关闭游标。
|
84
|
-
break
|
85
|
-
result = self.run_command(
|
86
|
-
{
|
87
|
-
"getMore": cursor_id, # 类似于mongo命令行中的it命令,通过索引id用于获取下一批结果
|
88
|
-
"collection": coll_name,
|
89
|
-
"batchSize": kwargs.get("batchSize", 100),
|
90
|
-
}
|
91
|
-
)
|
92
|
-
# 覆盖原来的参数
|
93
|
-
cursor = result["cursor"]
|
94
|
-
cursor_id = cursor["id"]
|
95
|
-
# print("下一批获取")
|
96
|
-
|
97
|
-
def add_data_one(self, coll_name: str, data: Dict, insert_ignore=False,
|
98
|
-
is_add_create_time=False,
|
99
|
-
is_add_create_time_field_name="create_dt"):
|
100
|
-
"""
|
101
|
-
添加单条数据
|
102
|
-
coll_name: 集合名
|
103
|
-
data: 单条数据
|
104
|
-
insert_ignore: 索引冲突是否忽略 默认False
|
105
|
-
is_add_create_time: 是否在数据中添加一个创建数据10时间戳字段 默认False不创建
|
106
|
-
is_add_create_time_field_name: 自定义创建数据时间戳字段名:默认:create_dt
|
107
|
-
Returns: 插入成功的行数
|
108
|
-
"""
|
109
|
-
if is_add_create_time:
|
110
|
-
data[is_add_create_time_field_name] = get_time_now_timestamp(is_time_10=True)
|
111
|
-
collection = self.get_collection(coll_name)
|
112
|
-
try:
|
113
|
-
collection.insert_one(data)
|
114
|
-
except DuplicateKeyError as e:
|
115
|
-
if not insert_ignore:
|
116
|
-
raise e
|
117
|
-
return 0
|
118
|
-
return 1
|
119
|
-
|
120
|
-
def find_id_is_exist(self, coll_name, _id):
|
121
|
-
"""
|
122
|
-
根据id查询id是否存在
|
123
|
-
:param _id:id
|
124
|
-
:return: 存在返回True 否则False
|
125
|
-
"""
|
126
|
-
condition = {"_id": _id}
|
127
|
-
status = list(self.find(coll_name, condition))
|
128
|
-
if status:
|
129
|
-
return True
|
130
|
-
return False
|
131
|
-
|
132
|
-
|
133
|
-
if __name__ == '__main__':
|
134
|
-
mongo_db = MongoDBPro("127.0.0.1", 27017, "spider_pro")
|
135
|
-
# mongo_db.add_data_one("test", {"_id": "1", "data": "aaa"})
|
136
|
-
print(mongo_db.find_id_is_exist("test", "1"))
|
137
|
-
print(mongo_db.find_id_is_exist("test", "11"))
|
xtnkk_tools/db/MongoDB.py
DELETED
@@ -1,138 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# 说明:
|
5
|
-
# MongoDBPro
|
6
|
-
# History:
|
7
|
-
# Date Author Version Modification
|
8
|
-
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/17 xiatn V00.01.000 新建
|
10
|
-
# --------------------------------------------------------------------------------------------------
|
11
|
-
from xtn_tools_pro.tools_time import *
|
12
|
-
from urllib import parse
|
13
|
-
from pymongo import MongoClient as _MongoClient
|
14
|
-
from pymongo.database import Database as _Database
|
15
|
-
from typing import List, Dict, Optional
|
16
|
-
from pymongo.collection import Collection as _Collection
|
17
|
-
from pymongo.errors import DuplicateKeyError, BulkWriteError
|
18
|
-
|
19
|
-
|
20
|
-
class MongoDBPro:
|
21
|
-
def __init__(self, ip=None, port=None, db=None, user_name=None, user_pass=None, url=None, **kwargs):
|
22
|
-
if url:
|
23
|
-
self.client = _MongoClient(url, **kwargs)
|
24
|
-
else:
|
25
|
-
self.client = _MongoClient(host=ip,
|
26
|
-
port=port,
|
27
|
-
username=user_name,
|
28
|
-
password=user_pass,
|
29
|
-
authSource=db)
|
30
|
-
|
31
|
-
self.db = self.get_database(db)
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def from_url(cls, url, **kwargs):
|
35
|
-
url_parsed = parse.urlparse(url)
|
36
|
-
# 获取 URL的协议
|
37
|
-
db_type = url_parsed.scheme.strip()
|
38
|
-
if db_type != "mongodb":
|
39
|
-
raise Exception(
|
40
|
-
"url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
|
41
|
-
url
|
42
|
-
))
|
43
|
-
return cls(url=url, **kwargs)
|
44
|
-
|
45
|
-
def get_database(self, database, **kwargs) -> _Database:
|
46
|
-
"""
|
47
|
-
根据db名获取数据库对象
|
48
|
-
"""
|
49
|
-
return self.client.get_database(database, **kwargs)
|
50
|
-
|
51
|
-
def get_collection(self, coll_name, **kwargs) -> _Collection:
|
52
|
-
"""
|
53
|
-
根据集合名获取集合对象
|
54
|
-
"""
|
55
|
-
return self.db.get_collection(coll_name, **kwargs)
|
56
|
-
|
57
|
-
def run_command(self, command: Dict):
|
58
|
-
"""
|
59
|
-
参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
|
60
|
-
"""
|
61
|
-
return self.db.command(command)
|
62
|
-
|
63
|
-
def find(self, coll_name: str, condition: Optional[Dict] = None,
|
64
|
-
limit: int = 0, **kwargs) -> List[Dict]:
|
65
|
-
"""
|
66
|
-
find
|
67
|
-
coll_name:集合名称
|
68
|
-
condition:查询条件 例如:{"name": "John"}、{"_id": "xxxxx"}
|
69
|
-
"""
|
70
|
-
condition = {} if condition is None else condition
|
71
|
-
command = {"find": coll_name, "filter": condition, "limit": limit}
|
72
|
-
command.update(kwargs)
|
73
|
-
result = self.run_command(command)
|
74
|
-
cursor = result["cursor"]
|
75
|
-
cursor_id = cursor["id"]
|
76
|
-
while True:
|
77
|
-
for document in cursor.get("nextBatch", cursor.get("firstBatch", [])):
|
78
|
-
# 处理数据
|
79
|
-
yield document
|
80
|
-
if cursor_id == 0:
|
81
|
-
# 游标已经完全遍历,没有剩余的结果可供获取
|
82
|
-
# 游标的生命周期已经结束,例如在查询会话结束后。
|
83
|
-
# 游标被显式地关闭,例如使用 db.killCursor() 命令关闭游标。
|
84
|
-
break
|
85
|
-
result = self.run_command(
|
86
|
-
{
|
87
|
-
"getMore": cursor_id, # 类似于mongo命令行中的it命令,通过索引id用于获取下一批结果
|
88
|
-
"collection": coll_name,
|
89
|
-
"batchSize": kwargs.get("batchSize", 100),
|
90
|
-
}
|
91
|
-
)
|
92
|
-
# 覆盖原来的参数
|
93
|
-
cursor = result["cursor"]
|
94
|
-
cursor_id = cursor["id"]
|
95
|
-
# print("下一批获取")
|
96
|
-
|
97
|
-
def add_data_one(self, coll_name: str, data: Dict, insert_ignore=False,
|
98
|
-
is_add_create_time=False,
|
99
|
-
is_add_create_time_field_name="create_dt"):
|
100
|
-
"""
|
101
|
-
添加单条数据
|
102
|
-
coll_name: 集合名
|
103
|
-
data: 单条数据
|
104
|
-
insert_ignore: 索引冲突是否忽略 默认False
|
105
|
-
is_add_create_time: 是否在数据中添加一个创建数据10时间戳字段 默认False不创建
|
106
|
-
is_add_create_time_field_name: 自定义创建数据时间戳字段名:默认:create_dt
|
107
|
-
Returns: 插入成功的行数
|
108
|
-
"""
|
109
|
-
if is_add_create_time:
|
110
|
-
data[is_add_create_time_field_name] = get_time_now_timestamp(is_time_10=True)
|
111
|
-
collection = self.get_collection(coll_name)
|
112
|
-
try:
|
113
|
-
collection.insert_one(data)
|
114
|
-
except DuplicateKeyError as e:
|
115
|
-
if not insert_ignore:
|
116
|
-
raise e
|
117
|
-
return 0
|
118
|
-
return 1
|
119
|
-
|
120
|
-
def find_id_is_exist(self, coll_name, _id):
|
121
|
-
"""
|
122
|
-
根据id查询id是否存在
|
123
|
-
:param _id:id
|
124
|
-
:return: 存在返回True 否则False
|
125
|
-
"""
|
126
|
-
condition = {"_id": _id}
|
127
|
-
status = list(self.find(coll_name, condition))
|
128
|
-
if status:
|
129
|
-
return True
|
130
|
-
return False
|
131
|
-
|
132
|
-
|
133
|
-
if __name__ == '__main__':
|
134
|
-
pass
|
135
|
-
# mongo_db = MongoDBPro("127.0.0.1", 27017, "spider_pro")
|
136
|
-
# # mongo_db.add_data_one("test", {"_id": "1", "data": "aaa"})
|
137
|
-
# print(mongo_db.find_id_is_exist("test", "1"))
|
138
|
-
# print(mongo_db.find_id_is_exist("test", "11"))
|
xtnkk_tools/db/__init__.py
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# 说明:
|
5
|
-
# 程序说明xxxxxxxxxxxxxxxxxxx
|
6
|
-
# History:
|
7
|
-
# Date Author Version Modification
|
8
|
-
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/18 xiatn V00.01.000 新建
|
10
|
-
# --------------------------------------------------------------------------------------------------
|