xtn-tools-pro 1.0.0.0.1__py3-none-any.whl → 1.0.0.0.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- xtn_tools_pro/db/RedisDB.py +47 -0
- xtn_tools_pro/proxy/XiaoXiangProxy.py +207 -0
- {xtnkk_tools → xtn_tools_pro/proxy}/__init__.py +2 -2
- xtn_tools_pro/tools_flie.py +1 -1
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/METADATA +2 -1
- xtn_tools_pro-1.0.0.0.3.dist-info/RECORD +14 -0
- xtn_tools_pro-1.0.0.0.1.dist-info/RECORD +0 -25
- xtnkk_tools/MongoDB.py +0 -137
- xtnkk_tools/db/MongoDB.py +0 -138
- xtnkk_tools/db/__init__.py +0 -10
- xtnkk_tools/tools.py +0 -106
- xtnkk_tools/tools_time.py +0 -142
- xtnkk_tools/update.py +0 -23
- xtnkk_tools_pro/__init__.py +0 -10
- xtnkk_tools_pro/db/MongoDB.py +0 -138
- xtnkk_tools_pro/db/RedisDB.py +0 -118
- xtnkk_tools_pro/db/__init__.py +0 -10
- xtnkk_tools_pro/tools.py +0 -106
- xtnkk_tools_pro/tools_time.py +0 -142
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/LICENSE +0 -0
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/WHEEL +0 -0
- {xtn_tools_pro-1.0.0.0.1.dist-info → xtn_tools_pro-1.0.0.0.3.dist-info}/top_level.txt +0 -0
xtn_tools_pro/db/RedisDB.py
CHANGED
@@ -27,6 +27,9 @@ class RedisDBPro:
|
|
27
27
|
self.__redis = None
|
28
28
|
self.get_connect()
|
29
29
|
|
30
|
+
def __del__(self):
|
31
|
+
self.__redis.close()
|
32
|
+
|
30
33
|
@classmethod
|
31
34
|
def from_url(cls, url):
|
32
35
|
"""
|
@@ -110,9 +113,53 @@ class RedisDBPro:
|
|
110
113
|
else:
|
111
114
|
return self._redis.sadd(table, values)
|
112
115
|
|
116
|
+
def incr(self, key):
|
117
|
+
"""
|
118
|
+
对一个键的值进行自增操作
|
119
|
+
:param key: 需要自增的key
|
120
|
+
:return:
|
121
|
+
"""
|
122
|
+
return self._redis.incr(key)
|
123
|
+
|
124
|
+
def get_all_key(self, path):
|
125
|
+
"""
|
126
|
+
获取所有的key
|
127
|
+
常用的path:前缀为test的key test*,中间为test的key *test*
|
128
|
+
:param path:
|
129
|
+
:return:
|
130
|
+
"""
|
131
|
+
return list(self._redis.scan_iter(path))
|
132
|
+
|
133
|
+
def get(self, table):
|
134
|
+
return self._redis.get(table)
|
135
|
+
|
136
|
+
def set(self, table, value, **kwargs):
|
137
|
+
"""
|
138
|
+
字符串 set
|
139
|
+
:param table: 表
|
140
|
+
:param value: 值
|
141
|
+
:param kwargs: 参数解释为chatgpt提供
|
142
|
+
:param kwargs: ex(可选):设置键的过期时间,以秒为单位。例如,ex=10表示键将在10秒后过期
|
143
|
+
:param kwargs: px(可选):设置键的过期时间,以毫秒为单位。例如,px=10000表示键将在10秒后过期
|
144
|
+
:param kwargs: nx(可选):如果设置为True,则只有在键不存在时才设置键的值
|
145
|
+
:param kwargs: xx(可选):如果设置为True,则只有在键已存在时才设置键的值
|
146
|
+
:param kwargs: kepp_ttl(可选):如果设置为True,则保留键的过期时间。仅当键已存在且设置了过期时间时才有效
|
147
|
+
:param kwargs: exat(可选):设置键的过期时间,以UNIX时间戳表示。
|
148
|
+
:param kwargs: pxat(可选):设置键的过期时间,以毫秒级的UNIX时间戳表示。
|
149
|
+
:param kwargs: replace(可选):如果设置为True,则无论键是否存在,都会设置键的值。
|
150
|
+
:return:
|
151
|
+
"""
|
152
|
+
return self._redis.set(table, value, **kwargs)
|
153
|
+
|
154
|
+
def delete(self, table):
|
155
|
+
return self._redis.delete(table)
|
156
|
+
|
113
157
|
|
114
158
|
if __name__ == '__main__':
|
115
159
|
pass
|
116
160
|
# r = RedisDBPro(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk")
|
161
|
+
r = RedisDBPro.from_url('redis://:xtn-kk@127.0.0.1:6379/0')
|
117
162
|
# status = r.sadd("test_redis_pro", [1, 2, 3, 4, 5, "6", "7"])
|
118
163
|
# print(status)
|
164
|
+
# print(r.get_all_key("*http*"))
|
165
|
+
print(r.delete("test_redis_pro"))
|
@@ -0,0 +1,207 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 小象代理专用
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
import requests, time, random
|
12
|
+
from xtn_tools_pro.db.RedisDB import RedisDBPro
|
13
|
+
from xtn_tools_pro.tools_time import get_time_now_timestamp, get_time_now_day59_timestamp
|
14
|
+
|
15
|
+
import warnings
|
16
|
+
from urllib3.exceptions import InsecureRequestWarning
|
17
|
+
|
18
|
+
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
19
|
+
|
20
|
+
|
21
|
+
class ProxyPool:
|
22
|
+
def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="XiaoXiangProxy",
|
23
|
+
XiaoXiangProxyAppKey=None, XiaoXiangProxyAppSecret=None, usage_cnt=100, usage_time=100):
|
24
|
+
"""
|
25
|
+
小象代理专用
|
26
|
+
:param ip: redis 数据库 ip
|
27
|
+
:param port: redis 数据库 端口
|
28
|
+
:param db: redis 数据库 db
|
29
|
+
:param user_pass: redis 数据库 密码
|
30
|
+
:param redis_proxy_name: redis 数据库 用于存储代理的key
|
31
|
+
:param XiaoXiangProxyAppKey: 小象代理 应用id appKey
|
32
|
+
:param XiaoXiangProxyAppSecret: 小象代理 应用密码 appSecret
|
33
|
+
:param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
|
34
|
+
:param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
|
35
|
+
:param is_log: 是否记录日志
|
36
|
+
"""
|
37
|
+
if not XiaoXiangProxyAppSecret or not XiaoXiangProxyAppKey:
|
38
|
+
raise Exception("应用密码或应用id 不能为空")
|
39
|
+
|
40
|
+
r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
|
41
|
+
self.__redis_pool = r
|
42
|
+
self.__redisProxyName = redis_proxy_name
|
43
|
+
self.__XiaoXiangProxyAPI = "https://api.xiaoxiangdaili.com/ip/get?appKey={appKey}&appSecret={appSecret}&cnt=&wt=json".format(
|
44
|
+
appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
|
45
|
+
self.__XiaoXiangAutoBinding = "https://api.xiaoxiangdaili.com/app/bindIp?appKey={appKey}&appSecret={appSecret}&i=1".format(
|
46
|
+
appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
|
47
|
+
|
48
|
+
# 获取当天0点时间戳
|
49
|
+
self.__now_day59_timestamp = get_time_now_day59_timestamp()
|
50
|
+
self.__usage_cnt = usage_cnt
|
51
|
+
self.__usage_time = usage_time
|
52
|
+
|
53
|
+
# if is_log:
|
54
|
+
# # 日志
|
55
|
+
# nowDate = str(datetime.datetime.now().strftime('%Y_%m_%d'))
|
56
|
+
# logger.add(loggerPath.format(t=nowDate))
|
57
|
+
|
58
|
+
def __log(self, text):
|
59
|
+
"""
|
60
|
+
记录日志
|
61
|
+
:param text:
|
62
|
+
:return:
|
63
|
+
"""
|
64
|
+
print(text)
|
65
|
+
|
66
|
+
def __check_proxy(self):
|
67
|
+
"""
|
68
|
+
维护检查代理,删除无用代理
|
69
|
+
删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
|
70
|
+
:return:
|
71
|
+
"""
|
72
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
73
|
+
for proxy_val in proxy_val_list:
|
74
|
+
# 获取时间
|
75
|
+
time_out = proxy_val.split(":")[-1]
|
76
|
+
# 获取使用次数
|
77
|
+
proxy_val_count = self.__redis_pool.get(proxy_val)
|
78
|
+
if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
|
79
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
80
|
+
self.__log(
|
81
|
+
"当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
|
82
|
+
format(proxy_val=proxy_val, del_state=del_state,
|
83
|
+
time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
|
84
|
+
elif int(proxy_val_count) >= self.__usage_cnt:
|
85
|
+
del_state = self.__redis_pool.delete(proxy_val)
|
86
|
+
self.__log(
|
87
|
+
"当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
|
88
|
+
text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
|
89
|
+
del_state=del_state))
|
90
|
+
|
91
|
+
def __get_proxy_length(self):
|
92
|
+
"""
|
93
|
+
获取代理数
|
94
|
+
:return:
|
95
|
+
"""
|
96
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
97
|
+
return len(proxy_val_list)
|
98
|
+
|
99
|
+
def __incr_proxy(self, proxy_val):
|
100
|
+
"""
|
101
|
+
自增代理使用次数
|
102
|
+
:param proxy_val: 代理
|
103
|
+
:return:
|
104
|
+
"""
|
105
|
+
proxy_val_con = self.__redis_pool.incr(proxy_val)
|
106
|
+
return proxy_val_con
|
107
|
+
|
108
|
+
def __get_api_proxy(self):
|
109
|
+
"""
|
110
|
+
通过接口获取小象代理,并存储至数据库
|
111
|
+
响应:
|
112
|
+
{"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
|
113
|
+
{"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
|
114
|
+
:return:
|
115
|
+
"""
|
116
|
+
while True:
|
117
|
+
self.__check_proxy()
|
118
|
+
try:
|
119
|
+
response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
|
120
|
+
if response.status_code == 200:
|
121
|
+
if response.json().get("msg") == "请求过于频繁":
|
122
|
+
self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
|
123
|
+
time.sleep(2)
|
124
|
+
continue
|
125
|
+
# 获取data
|
126
|
+
proxy_data_list = response.json().get("data", [])
|
127
|
+
if not proxy_data_list:
|
128
|
+
self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
|
129
|
+
time.sleep(2)
|
130
|
+
continue
|
131
|
+
else:
|
132
|
+
for data in proxy_data_list:
|
133
|
+
ip = "http://{ip}".format(ip=data.get("ip"))
|
134
|
+
port = data.get("port")
|
135
|
+
time_out = get_time_now_timestamp(is_time_10=True)
|
136
|
+
proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
|
137
|
+
redis_proxy_name=self.__redisProxyName,
|
138
|
+
ip=ip,
|
139
|
+
port=port,
|
140
|
+
timeOut=time_out,
|
141
|
+
)
|
142
|
+
proxy_key_con = self.__incr_proxy(proxy_key)
|
143
|
+
self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
|
144
|
+
proxy_key_con=proxy_key_con))
|
145
|
+
return True # 获取成功
|
146
|
+
else:
|
147
|
+
self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
|
148
|
+
time.sleep(2)
|
149
|
+
continue
|
150
|
+
except Exception as e:
|
151
|
+
self.__log("获取小象代理报错:{e}".format(e=e))
|
152
|
+
|
153
|
+
def run(self):
|
154
|
+
try:
|
155
|
+
# 手动绑定终端IP
|
156
|
+
response = requests.get(url=self.__XiaoXiangAutoBinding, verify=False, timeout=3)
|
157
|
+
self.__log(response.text)
|
158
|
+
while True:
|
159
|
+
# 检查代理
|
160
|
+
self.__check_proxy()
|
161
|
+
# 获取小象代理
|
162
|
+
self.__get_api_proxy()
|
163
|
+
time.sleep(1)
|
164
|
+
# 判断时间是否超过当前23:59分时间戳
|
165
|
+
if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
|
166
|
+
self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
|
167
|
+
break
|
168
|
+
except Exception as eee:
|
169
|
+
self.__log("程序异常报错:{eee}".format(eee=eee))
|
170
|
+
# self.__del__()
|
171
|
+
|
172
|
+
def get_proxy(self):
|
173
|
+
"""
|
174
|
+
从代理池中获取代理
|
175
|
+
:return:
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
while True:
|
179
|
+
proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
|
180
|
+
if proxy_val_list:
|
181
|
+
proxy_val = random.choice(proxy_val_list)
|
182
|
+
proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
|
183
|
+
self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
|
184
|
+
return proxy_v
|
185
|
+
else:
|
186
|
+
self.__log("暂无代理,等待中")
|
187
|
+
time.sleep(2)
|
188
|
+
except Exception as e:
|
189
|
+
self.__log("从代理池中获取代理:{e}".format(e=e))
|
190
|
+
|
191
|
+
def set_proxy_error(self, proxy_v):
|
192
|
+
"""
|
193
|
+
爬虫手动传入代理,设置为 999999 不可用
|
194
|
+
:param proxyV:
|
195
|
+
:return:
|
196
|
+
"""
|
197
|
+
try:
|
198
|
+
self.__redis_pool.set(proxy_v, "999999")
|
199
|
+
self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
|
200
|
+
except Exception as e:
|
201
|
+
self.__log("爬虫手动传入代理:{e}".format(e=e))
|
202
|
+
|
203
|
+
|
204
|
+
if __name__ == '__main__':
|
205
|
+
p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
|
206
|
+
XiaoXiangProxyAppSecret="PJ0QBWML")
|
207
|
+
print(p.get_proxy())
|
@@ -2,9 +2,9 @@
|
|
2
2
|
# -*- coding: utf-8 -*-
|
3
3
|
|
4
4
|
# 说明:
|
5
|
-
#
|
5
|
+
# 程序说明xxxxxxxxxxxxxxxxxxx
|
6
6
|
# History:
|
7
7
|
# Date Author Version Modification
|
8
8
|
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/
|
9
|
+
# 2024/4/27 xiatn V00.01.000 新建
|
10
10
|
# --------------------------------------------------------------------------------------------------
|
xtn_tools_pro/tools_flie.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: xtn-tools-pro
|
3
|
-
Version: 1.0.0.0.
|
3
|
+
Version: 1.0.0.0.3
|
4
4
|
Summary: xtn 开发工具
|
5
5
|
Author: xtn
|
6
6
|
Author-email: czw011122@163.com
|
@@ -10,5 +10,6 @@ Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: pymongo
|
12
12
|
Requires-Dist: redis
|
13
|
+
Requires-Dist: requests
|
13
14
|
|
14
15
|
xtnkk-tools
|
@@ -0,0 +1,14 @@
|
|
1
|
+
xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
2
|
+
xtn_tools_pro/tools.py,sha256=e9KSPqaFBIptBGvexShCcn0nZmUQ5omlVwXgEfWZf5Y,2630
|
3
|
+
xtn_tools_pro/tools_flie.py,sha256=B_P3J_R-nRLt_IFutnOVrBRGf6_SZ_cXoIoeaT9B7tk,1512
|
4
|
+
xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
|
5
|
+
xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
6
|
+
xtn_tools_pro/db/RedisDB.py,sha256=ep32Yj8AAkUHRshSBhKsdl06UwO7Z-gQJLaezspVRKw,6053
|
7
|
+
xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
8
|
+
xtn_tools_pro/proxy/XiaoXiangProxy.py,sha256=xrEBJZ6Cjuh6IBZVB17oXHwByaeKcWVUSSWSg17tujE,9842
|
9
|
+
xtn_tools_pro/proxy/__init__.py,sha256=WRwh6s2lruMu5buh0ejo9EK54kWT_VQhCsFGNFAmcyo,418
|
10
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/METADATA,sha256=VG4Bs3muLvi85SaWcP3Tw3q4TFfafKDtZhbQ-x9CpRw,358
|
12
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
13
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
14
|
+
xtn_tools_pro-1.0.0.0.3.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
2
|
-
xtn_tools_pro/tools.py,sha256=e9KSPqaFBIptBGvexShCcn0nZmUQ5omlVwXgEfWZf5Y,2630
|
3
|
-
xtn_tools_pro/tools_flie.py,sha256=KuA1sowK31LuOfQMPRsQYPgxr8y7491caepRFYOwqG0,1507
|
4
|
-
xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
|
5
|
-
xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
6
|
-
xtn_tools_pro/db/RedisDB.py,sha256=qMffCNIHa3o7KD_yVQlsj3OupsNXMsDPRi03migwSu0,4003
|
7
|
-
xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
8
|
-
xtnkk_tools/MongoDB.py,sha256=2mwln6JPfu5N1N8Hbh6KvN6sED-KPTrOteCBHVFjvwM,5497
|
9
|
-
xtnkk_tools/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
10
|
-
xtnkk_tools/tools.py,sha256=KYoTds_c7XZBL9yLeoKksHz39QPh02DNQupRKJWx_II,2626
|
11
|
-
xtnkk_tools/tools_time.py,sha256=n4-T2tNSHnsh-X89IbjahCmoiDcmjZTKJlWyqGOmJQY,4877
|
12
|
-
xtnkk_tools/update.py,sha256=VygnKO9dXo02JyUEkpbJoBE6BceYARZEn-O1i6AO6E0,911
|
13
|
-
xtnkk_tools/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
14
|
-
xtnkk_tools/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
15
|
-
xtnkk_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
|
16
|
-
xtnkk_tools_pro/tools.py,sha256=KYoTds_c7XZBL9yLeoKksHz39QPh02DNQupRKJWx_II,2626
|
17
|
-
xtnkk_tools_pro/tools_time.py,sha256=n4-T2tNSHnsh-X89IbjahCmoiDcmjZTKJlWyqGOmJQY,4877
|
18
|
-
xtnkk_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
|
19
|
-
xtnkk_tools_pro/db/RedisDB.py,sha256=qMffCNIHa3o7KD_yVQlsj3OupsNXMsDPRi03migwSu0,4003
|
20
|
-
xtnkk_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
|
21
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/METADATA,sha256=cGqGoLCT0OxwP7KI82OKR1JUF9d9XnNk_Xg4DCgOSvU,333
|
23
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
24
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
25
|
-
xtn_tools_pro-1.0.0.0.1.dist-info/RECORD,,
|
xtnkk_tools/MongoDB.py
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# 说明:
|
5
|
-
# MongoDBPro
|
6
|
-
# History:
|
7
|
-
# Date Author Version Modification
|
8
|
-
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/17 xiatn V00.01.000 新建
|
10
|
-
# --------------------------------------------------------------------------------------------------
|
11
|
-
from xtn_tools_pro.tools_time import *
|
12
|
-
from urllib import parse
|
13
|
-
from typing import List, Dict, Optional
|
14
|
-
from pymongo import MongoClient
|
15
|
-
from pymongo.database import Database
|
16
|
-
from pymongo.collection import Collection
|
17
|
-
from pymongo.errors import DuplicateKeyError, BulkWriteError
|
18
|
-
|
19
|
-
|
20
|
-
class MongoDBPro:
|
21
|
-
def __init__(self, ip=None, port=None, db=None, user_name=None, user_pass=None, url=None, **kwargs):
|
22
|
-
if url:
|
23
|
-
self.client = MongoClient(url, **kwargs)
|
24
|
-
else:
|
25
|
-
self.client = MongoClient(host=ip,
|
26
|
-
port=port,
|
27
|
-
username=user_name,
|
28
|
-
password=user_pass,
|
29
|
-
authSource=db)
|
30
|
-
|
31
|
-
self.db = self.get_database(db)
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def from_url(cls, url, **kwargs):
|
35
|
-
url_parsed = parse.urlparse(url)
|
36
|
-
# 获取 URL的协议
|
37
|
-
db_type = url_parsed.scheme.strip()
|
38
|
-
if db_type != "mongodb":
|
39
|
-
raise Exception(
|
40
|
-
"url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
|
41
|
-
url
|
42
|
-
))
|
43
|
-
return cls(url=url, **kwargs)
|
44
|
-
|
45
|
-
def get_database(self, database, **kwargs) -> Database:
|
46
|
-
"""
|
47
|
-
根据db名获取数据库对象
|
48
|
-
"""
|
49
|
-
return self.client.get_database(database, **kwargs)
|
50
|
-
|
51
|
-
def get_collection(self, coll_name, **kwargs) -> Collection:
|
52
|
-
"""
|
53
|
-
根据集合名获取集合对象
|
54
|
-
"""
|
55
|
-
return self.db.get_collection(coll_name, **kwargs)
|
56
|
-
|
57
|
-
def run_command(self, command: Dict):
|
58
|
-
"""
|
59
|
-
参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
|
60
|
-
"""
|
61
|
-
return self.db.command(command)
|
62
|
-
|
63
|
-
def find(self, coll_name: str, condition: Optional[Dict] = None,
|
64
|
-
limit: int = 0, **kwargs) -> List[Dict]:
|
65
|
-
"""
|
66
|
-
find
|
67
|
-
coll_name:集合名称
|
68
|
-
condition:查询条件 例如:{"name": "John"}、{"_id": "xxxxx"}
|
69
|
-
"""
|
70
|
-
condition = {} if condition is None else condition
|
71
|
-
command = {"find": coll_name, "filter": condition, "limit": limit}
|
72
|
-
command.update(kwargs)
|
73
|
-
result = self.run_command(command)
|
74
|
-
cursor = result["cursor"]
|
75
|
-
cursor_id = cursor["id"]
|
76
|
-
while True:
|
77
|
-
for document in cursor.get("nextBatch", cursor.get("firstBatch", [])):
|
78
|
-
# 处理数据
|
79
|
-
yield document
|
80
|
-
if cursor_id == 0:
|
81
|
-
# 游标已经完全遍历,没有剩余的结果可供获取
|
82
|
-
# 游标的生命周期已经结束,例如在查询会话结束后。
|
83
|
-
# 游标被显式地关闭,例如使用 db.killCursor() 命令关闭游标。
|
84
|
-
break
|
85
|
-
result = self.run_command(
|
86
|
-
{
|
87
|
-
"getMore": cursor_id, # 类似于mongo命令行中的it命令,通过索引id用于获取下一批结果
|
88
|
-
"collection": coll_name,
|
89
|
-
"batchSize": kwargs.get("batchSize", 100),
|
90
|
-
}
|
91
|
-
)
|
92
|
-
# 覆盖原来的参数
|
93
|
-
cursor = result["cursor"]
|
94
|
-
cursor_id = cursor["id"]
|
95
|
-
# print("下一批获取")
|
96
|
-
|
97
|
-
def add_data_one(self, coll_name: str, data: Dict, insert_ignore=False,
|
98
|
-
is_add_create_time=False,
|
99
|
-
is_add_create_time_field_name="create_dt"):
|
100
|
-
"""
|
101
|
-
添加单条数据
|
102
|
-
coll_name: 集合名
|
103
|
-
data: 单条数据
|
104
|
-
insert_ignore: 索引冲突是否忽略 默认False
|
105
|
-
is_add_create_time: 是否在数据中添加一个创建数据10时间戳字段 默认False不创建
|
106
|
-
is_add_create_time_field_name: 自定义创建数据时间戳字段名:默认:create_dt
|
107
|
-
Returns: 插入成功的行数
|
108
|
-
"""
|
109
|
-
if is_add_create_time:
|
110
|
-
data[is_add_create_time_field_name] = get_time_now_timestamp(is_time_10=True)
|
111
|
-
collection = self.get_collection(coll_name)
|
112
|
-
try:
|
113
|
-
collection.insert_one(data)
|
114
|
-
except DuplicateKeyError as e:
|
115
|
-
if not insert_ignore:
|
116
|
-
raise e
|
117
|
-
return 0
|
118
|
-
return 1
|
119
|
-
|
120
|
-
def find_id_is_exist(self, coll_name, _id):
|
121
|
-
"""
|
122
|
-
根据id查询id是否存在
|
123
|
-
:param _id:id
|
124
|
-
:return: 存在返回True 否则False
|
125
|
-
"""
|
126
|
-
condition = {"_id": _id}
|
127
|
-
status = list(self.find(coll_name, condition))
|
128
|
-
if status:
|
129
|
-
return True
|
130
|
-
return False
|
131
|
-
|
132
|
-
|
133
|
-
if __name__ == '__main__':
|
134
|
-
mongo_db = MongoDBPro("127.0.0.1", 27017, "spider_pro")
|
135
|
-
# mongo_db.add_data_one("test", {"_id": "1", "data": "aaa"})
|
136
|
-
print(mongo_db.find_id_is_exist("test", "1"))
|
137
|
-
print(mongo_db.find_id_is_exist("test", "11"))
|
xtnkk_tools/db/MongoDB.py
DELETED
@@ -1,138 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# 说明:
|
5
|
-
# MongoDBPro
|
6
|
-
# History:
|
7
|
-
# Date Author Version Modification
|
8
|
-
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/17 xiatn V00.01.000 新建
|
10
|
-
# --------------------------------------------------------------------------------------------------
|
11
|
-
from xtn_tools_pro.tools_time import *
|
12
|
-
from urllib import parse
|
13
|
-
from pymongo import MongoClient as _MongoClient
|
14
|
-
from pymongo.database import Database as _Database
|
15
|
-
from typing import List, Dict, Optional
|
16
|
-
from pymongo.collection import Collection as _Collection
|
17
|
-
from pymongo.errors import DuplicateKeyError, BulkWriteError
|
18
|
-
|
19
|
-
|
20
|
-
class MongoDBPro:
|
21
|
-
def __init__(self, ip=None, port=None, db=None, user_name=None, user_pass=None, url=None, **kwargs):
|
22
|
-
if url:
|
23
|
-
self.client = _MongoClient(url, **kwargs)
|
24
|
-
else:
|
25
|
-
self.client = _MongoClient(host=ip,
|
26
|
-
port=port,
|
27
|
-
username=user_name,
|
28
|
-
password=user_pass,
|
29
|
-
authSource=db)
|
30
|
-
|
31
|
-
self.db = self.get_database(db)
|
32
|
-
|
33
|
-
@classmethod
|
34
|
-
def from_url(cls, url, **kwargs):
|
35
|
-
url_parsed = parse.urlparse(url)
|
36
|
-
# 获取 URL的协议
|
37
|
-
db_type = url_parsed.scheme.strip()
|
38
|
-
if db_type != "mongodb":
|
39
|
-
raise Exception(
|
40
|
-
"url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
|
41
|
-
url
|
42
|
-
))
|
43
|
-
return cls(url=url, **kwargs)
|
44
|
-
|
45
|
-
def get_database(self, database, **kwargs) -> _Database:
|
46
|
-
"""
|
47
|
-
根据db名获取数据库对象
|
48
|
-
"""
|
49
|
-
return self.client.get_database(database, **kwargs)
|
50
|
-
|
51
|
-
def get_collection(self, coll_name, **kwargs) -> _Collection:
|
52
|
-
"""
|
53
|
-
根据集合名获取集合对象
|
54
|
-
"""
|
55
|
-
return self.db.get_collection(coll_name, **kwargs)
|
56
|
-
|
57
|
-
def run_command(self, command: Dict):
|
58
|
-
"""
|
59
|
-
参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
|
60
|
-
"""
|
61
|
-
return self.db.command(command)
|
62
|
-
|
63
|
-
def find(self, coll_name: str, condition: Optional[Dict] = None,
|
64
|
-
limit: int = 0, **kwargs) -> List[Dict]:
|
65
|
-
"""
|
66
|
-
find
|
67
|
-
coll_name:集合名称
|
68
|
-
condition:查询条件 例如:{"name": "John"}、{"_id": "xxxxx"}
|
69
|
-
"""
|
70
|
-
condition = {} if condition is None else condition
|
71
|
-
command = {"find": coll_name, "filter": condition, "limit": limit}
|
72
|
-
command.update(kwargs)
|
73
|
-
result = self.run_command(command)
|
74
|
-
cursor = result["cursor"]
|
75
|
-
cursor_id = cursor["id"]
|
76
|
-
while True:
|
77
|
-
for document in cursor.get("nextBatch", cursor.get("firstBatch", [])):
|
78
|
-
# 处理数据
|
79
|
-
yield document
|
80
|
-
if cursor_id == 0:
|
81
|
-
# 游标已经完全遍历,没有剩余的结果可供获取
|
82
|
-
# 游标的生命周期已经结束,例如在查询会话结束后。
|
83
|
-
# 游标被显式地关闭,例如使用 db.killCursor() 命令关闭游标。
|
84
|
-
break
|
85
|
-
result = self.run_command(
|
86
|
-
{
|
87
|
-
"getMore": cursor_id, # 类似于mongo命令行中的it命令,通过索引id用于获取下一批结果
|
88
|
-
"collection": coll_name,
|
89
|
-
"batchSize": kwargs.get("batchSize", 100),
|
90
|
-
}
|
91
|
-
)
|
92
|
-
# 覆盖原来的参数
|
93
|
-
cursor = result["cursor"]
|
94
|
-
cursor_id = cursor["id"]
|
95
|
-
# print("下一批获取")
|
96
|
-
|
97
|
-
def add_data_one(self, coll_name: str, data: Dict, insert_ignore=False,
|
98
|
-
is_add_create_time=False,
|
99
|
-
is_add_create_time_field_name="create_dt"):
|
100
|
-
"""
|
101
|
-
添加单条数据
|
102
|
-
coll_name: 集合名
|
103
|
-
data: 单条数据
|
104
|
-
insert_ignore: 索引冲突是否忽略 默认False
|
105
|
-
is_add_create_time: 是否在数据中添加一个创建数据10时间戳字段 默认False不创建
|
106
|
-
is_add_create_time_field_name: 自定义创建数据时间戳字段名:默认:create_dt
|
107
|
-
Returns: 插入成功的行数
|
108
|
-
"""
|
109
|
-
if is_add_create_time:
|
110
|
-
data[is_add_create_time_field_name] = get_time_now_timestamp(is_time_10=True)
|
111
|
-
collection = self.get_collection(coll_name)
|
112
|
-
try:
|
113
|
-
collection.insert_one(data)
|
114
|
-
except DuplicateKeyError as e:
|
115
|
-
if not insert_ignore:
|
116
|
-
raise e
|
117
|
-
return 0
|
118
|
-
return 1
|
119
|
-
|
120
|
-
def find_id_is_exist(self, coll_name, _id):
|
121
|
-
"""
|
122
|
-
根据id查询id是否存在
|
123
|
-
:param _id:id
|
124
|
-
:return: 存在返回True 否则False
|
125
|
-
"""
|
126
|
-
condition = {"_id": _id}
|
127
|
-
status = list(self.find(coll_name, condition))
|
128
|
-
if status:
|
129
|
-
return True
|
130
|
-
return False
|
131
|
-
|
132
|
-
|
133
|
-
if __name__ == '__main__':
|
134
|
-
pass
|
135
|
-
# mongo_db = MongoDBPro("127.0.0.1", 27017, "spider_pro")
|
136
|
-
# # mongo_db.add_data_one("test", {"_id": "1", "data": "aaa"})
|
137
|
-
# print(mongo_db.find_id_is_exist("test", "1"))
|
138
|
-
# print(mongo_db.find_id_is_exist("test", "11"))
|
xtnkk_tools/db/__init__.py
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
# 说明:
|
5
|
-
# 程序说明xxxxxxxxxxxxxxxxxxx
|
6
|
-
# History:
|
7
|
-
# Date Author Version Modification
|
8
|
-
# --------------------------------------------------------------------------------------------------
|
9
|
-
# 2024/4/18 xiatn V00.01.000 新建
|
10
|
-
# --------------------------------------------------------------------------------------------------
|