xtn-tools-pro 1.0.0.0.1__py3-none-any.whl → 1.0.0.0.3__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -27,6 +27,9 @@ class RedisDBPro:
27
27
  self.__redis = None
28
28
  self.get_connect()
29
29
 
30
+ def __del__(self):
31
+ self.__redis.close()
32
+
30
33
  @classmethod
31
34
  def from_url(cls, url):
32
35
  """
@@ -110,9 +113,53 @@ class RedisDBPro:
110
113
  else:
111
114
  return self._redis.sadd(table, values)
112
115
 
116
+ def incr(self, key):
117
+ """
118
+ 对一个键的值进行自增操作
119
+ :param key: 需要自增的key
120
+ :return:
121
+ """
122
+ return self._redis.incr(key)
123
+
124
+ def get_all_key(self, path):
125
+ """
126
+ 获取所有的key
127
+ 常用的path:前缀为test的key test*,中间为test的key *test*
128
+ :param path:
129
+ :return:
130
+ """
131
+ return list(self._redis.scan_iter(path))
132
+
133
+ def get(self, table):
134
+ return self._redis.get(table)
135
+
136
+ def set(self, table, value, **kwargs):
137
+ """
138
+ 字符串 set
139
+ :param table: 表
140
+ :param value: 值
141
+ :param kwargs: 参数解释为chatgpt提供
142
+ :param kwargs: ex(可选):设置键的过期时间,以秒为单位。例如,ex=10表示键将在10秒后过期
143
+ :param kwargs: px(可选):设置键的过期时间,以毫秒为单位。例如,px=10000表示键将在10秒后过期
144
+ :param kwargs: nx(可选):如果设置为True,则只有在键不存在时才设置键的值
145
+ :param kwargs: xx(可选):如果设置为True,则只有在键已存在时才设置键的值
146
+ :param kwargs: kepp_ttl(可选):如果设置为True,则保留键的过期时间。仅当键已存在且设置了过期时间时才有效
147
+ :param kwargs: exat(可选):设置键的过期时间,以UNIX时间戳表示。
148
+ :param kwargs: pxat(可选):设置键的过期时间,以毫秒级的UNIX时间戳表示。
149
+ :param kwargs: replace(可选):如果设置为True,则无论键是否存在,都会设置键的值。
150
+ :return:
151
+ """
152
+ return self._redis.set(table, value, **kwargs)
153
+
154
+ def delete(self, table):
155
+ return self._redis.delete(table)
156
+
113
157
 
114
158
  if __name__ == '__main__':
115
159
  pass
116
160
  # r = RedisDBPro(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk")
161
+ r = RedisDBPro.from_url('redis://:xtn-kk@127.0.0.1:6379/0')
117
162
  # status = r.sadd("test_redis_pro", [1, 2, 3, 4, 5, "6", "7"])
118
163
  # print(status)
164
+ # print(r.get_all_key("*http*"))
165
+ print(r.delete("test_redis_pro"))
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 说明:
5
+ # 小象代理专用
6
+ # History:
7
+ # Date Author Version Modification
8
+ # --------------------------------------------------------------------------------------------------
9
+ # 2024/4/27 xiatn V00.01.000 新建
10
+ # --------------------------------------------------------------------------------------------------
11
+ import requests, time, random
12
+ from xtn_tools_pro.db.RedisDB import RedisDBPro
13
+ from xtn_tools_pro.tools_time import get_time_now_timestamp, get_time_now_day59_timestamp
14
+
15
+ import warnings
16
+ from urllib3.exceptions import InsecureRequestWarning
17
+
18
+ warnings.filterwarnings("ignore", category=InsecureRequestWarning)
19
+
20
+
21
+ class ProxyPool:
22
+ def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="XiaoXiangProxy",
23
+ XiaoXiangProxyAppKey=None, XiaoXiangProxyAppSecret=None, usage_cnt=100, usage_time=100):
24
+ """
25
+ 小象代理专用
26
+ :param ip: redis 数据库 ip
27
+ :param port: redis 数据库 端口
28
+ :param db: redis 数据库 db
29
+ :param user_pass: redis 数据库 密码
30
+ :param redis_proxy_name: redis 数据库 用于存储代理的key
31
+ :param XiaoXiangProxyAppKey: 小象代理 应用id appKey
32
+ :param XiaoXiangProxyAppSecret: 小象代理 应用密码 appSecret
33
+ :param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
34
+ :param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
35
+ :param is_log: 是否记录日志
36
+ """
37
+ if not XiaoXiangProxyAppSecret or not XiaoXiangProxyAppKey:
38
+ raise Exception("应用密码或应用id 不能为空")
39
+
40
+ r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
41
+ self.__redis_pool = r
42
+ self.__redisProxyName = redis_proxy_name
43
+ self.__XiaoXiangProxyAPI = "https://api.xiaoxiangdaili.com/ip/get?appKey={appKey}&appSecret={appSecret}&cnt=&wt=json".format(
44
+ appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
45
+ self.__XiaoXiangAutoBinding = "https://api.xiaoxiangdaili.com/app/bindIp?appKey={appKey}&appSecret={appSecret}&i=1".format(
46
+ appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
47
+
48
+ # 获取当天0点时间戳
49
+ self.__now_day59_timestamp = get_time_now_day59_timestamp()
50
+ self.__usage_cnt = usage_cnt
51
+ self.__usage_time = usage_time
52
+
53
+ # if is_log:
54
+ # # 日志
55
+ # nowDate = str(datetime.datetime.now().strftime('%Y_%m_%d'))
56
+ # logger.add(loggerPath.format(t=nowDate))
57
+
58
+ def __log(self, text):
59
+ """
60
+ 记录日志
61
+ :param text:
62
+ :return:
63
+ """
64
+ print(text)
65
+
66
+ def __check_proxy(self):
67
+ """
68
+ 维护检查代理,删除无用代理
69
+ 删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
70
+ :return:
71
+ """
72
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
73
+ for proxy_val in proxy_val_list:
74
+ # 获取时间
75
+ time_out = proxy_val.split(":")[-1]
76
+ # 获取使用次数
77
+ proxy_val_count = self.__redis_pool.get(proxy_val)
78
+ if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
79
+ del_state = self.__redis_pool.delete(proxy_val)
80
+ self.__log(
81
+ "当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
82
+ format(proxy_val=proxy_val, del_state=del_state,
83
+ time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
84
+ elif int(proxy_val_count) >= self.__usage_cnt:
85
+ del_state = self.__redis_pool.delete(proxy_val)
86
+ self.__log(
87
+ "当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
88
+ text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
89
+ del_state=del_state))
90
+
91
+ def __get_proxy_length(self):
92
+ """
93
+ 获取代理数
94
+ :return:
95
+ """
96
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
97
+ return len(proxy_val_list)
98
+
99
+ def __incr_proxy(self, proxy_val):
100
+ """
101
+ 自增代理使用次数
102
+ :param proxy_val: 代理
103
+ :return:
104
+ """
105
+ proxy_val_con = self.__redis_pool.incr(proxy_val)
106
+ return proxy_val_con
107
+
108
+ def __get_api_proxy(self):
109
+ """
110
+ 通过接口获取小象代理,并存储至数据库
111
+ 响应:
112
+ {"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
113
+ {"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
114
+ :return:
115
+ """
116
+ while True:
117
+ self.__check_proxy()
118
+ try:
119
+ response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
120
+ if response.status_code == 200:
121
+ if response.json().get("msg") == "请求过于频繁":
122
+ self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
123
+ time.sleep(2)
124
+ continue
125
+ # 获取data
126
+ proxy_data_list = response.json().get("data", [])
127
+ if not proxy_data_list:
128
+ self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
129
+ time.sleep(2)
130
+ continue
131
+ else:
132
+ for data in proxy_data_list:
133
+ ip = "http://{ip}".format(ip=data.get("ip"))
134
+ port = data.get("port")
135
+ time_out = get_time_now_timestamp(is_time_10=True)
136
+ proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
137
+ redis_proxy_name=self.__redisProxyName,
138
+ ip=ip,
139
+ port=port,
140
+ timeOut=time_out,
141
+ )
142
+ proxy_key_con = self.__incr_proxy(proxy_key)
143
+ self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
144
+ proxy_key_con=proxy_key_con))
145
+ return True # 获取成功
146
+ else:
147
+ self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
148
+ time.sleep(2)
149
+ continue
150
+ except Exception as e:
151
+ self.__log("获取小象代理报错:{e}".format(e=e))
152
+
153
+ def run(self):
154
+ try:
155
+ # 手动绑定终端IP
156
+ response = requests.get(url=self.__XiaoXiangAutoBinding, verify=False, timeout=3)
157
+ self.__log(response.text)
158
+ while True:
159
+ # 检查代理
160
+ self.__check_proxy()
161
+ # 获取小象代理
162
+ self.__get_api_proxy()
163
+ time.sleep(1)
164
+ # 判断时间是否超过当前23:59分时间戳
165
+ if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
166
+ self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
167
+ break
168
+ except Exception as eee:
169
+ self.__log("程序异常报错:{eee}".format(eee=eee))
170
+ # self.__del__()
171
+
172
+ def get_proxy(self):
173
+ """
174
+ 从代理池中获取代理
175
+ :return:
176
+ """
177
+ try:
178
+ while True:
179
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
180
+ if proxy_val_list:
181
+ proxy_val = random.choice(proxy_val_list)
182
+ proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
183
+ self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
184
+ return proxy_v
185
+ else:
186
+ self.__log("暂无代理,等待中")
187
+ time.sleep(2)
188
+ except Exception as e:
189
+ self.__log("从代理池中获取代理:{e}".format(e=e))
190
+
191
+ def set_proxy_error(self, proxy_v):
192
+ """
193
+ 爬虫手动传入代理,设置为 999999 不可用
194
+ :param proxyV:
195
+ :return:
196
+ """
197
+ try:
198
+ self.__redis_pool.set(proxy_v, "999999")
199
+ self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
200
+ except Exception as e:
201
+ self.__log("爬虫手动传入代理:{e}".format(e=e))
202
+
203
+
204
+ if __name__ == '__main__':
205
+ p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
206
+ XiaoXiangProxyAppSecret="PJ0QBWML")
207
+ print(p.get_proxy())
@@ -2,9 +2,9 @@
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
4
  # 说明:
5
- # __init__
5
+ # 程序说明xxxxxxxxxxxxxxxxxxx
6
6
  # History:
7
7
  # Date Author Version Modification
8
8
  # --------------------------------------------------------------------------------------------------
9
- # 2024/4/17 xiatn V00.01.000 新建
9
+ # 2024/4/27 xiatn V00.01.000 新建
10
10
  # --------------------------------------------------------------------------------------------------
@@ -21,7 +21,7 @@ def get_file_extension(file_name):
21
21
  return file_extension
22
22
 
23
23
 
24
- def get_check_filename(file_name):
24
+ def get_file_check_filename(file_name):
25
25
  """
26
26
  传入文件名返回一个合法的文件名 会替换掉一些特殊符号 常用于爬虫写文件时文件名中带有特殊符号的情况...
27
27
  :param filename: 文件名
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.0.1
3
+ Version: 1.0.0.0.3
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@163.com
@@ -10,5 +10,6 @@ Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
11
  Requires-Dist: pymongo
12
12
  Requires-Dist: redis
13
+ Requires-Dist: requests
13
14
 
14
15
  xtnkk-tools
@@ -0,0 +1,14 @@
1
+ xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
2
+ xtn_tools_pro/tools.py,sha256=e9KSPqaFBIptBGvexShCcn0nZmUQ5omlVwXgEfWZf5Y,2630
3
+ xtn_tools_pro/tools_flie.py,sha256=B_P3J_R-nRLt_IFutnOVrBRGf6_SZ_cXoIoeaT9B7tk,1512
4
+ xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
5
+ xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
6
+ xtn_tools_pro/db/RedisDB.py,sha256=ep32Yj8AAkUHRshSBhKsdl06UwO7Z-gQJLaezspVRKw,6053
7
+ xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
8
+ xtn_tools_pro/proxy/XiaoXiangProxy.py,sha256=xrEBJZ6Cjuh6IBZVB17oXHwByaeKcWVUSSWSg17tujE,9842
9
+ xtn_tools_pro/proxy/__init__.py,sha256=WRwh6s2lruMu5buh0ejo9EK54kWT_VQhCsFGNFAmcyo,418
10
+ xtn_tools_pro-1.0.0.0.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ xtn_tools_pro-1.0.0.0.3.dist-info/METADATA,sha256=VG4Bs3muLvi85SaWcP3Tw3q4TFfafKDtZhbQ-x9CpRw,358
12
+ xtn_tools_pro-1.0.0.0.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
13
+ xtn_tools_pro-1.0.0.0.3.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
14
+ xtn_tools_pro-1.0.0.0.3.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- xtn_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
2
- xtn_tools_pro/tools.py,sha256=e9KSPqaFBIptBGvexShCcn0nZmUQ5omlVwXgEfWZf5Y,2630
3
- xtn_tools_pro/tools_flie.py,sha256=KuA1sowK31LuOfQMPRsQYPgxr8y7491caepRFYOwqG0,1507
4
- xtn_tools_pro/tools_time.py,sha256=DMjsw9h4E_mrPsanPA8CEhpUE1AA6Z2FU4OJqJKZc1k,4867
5
- xtn_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
6
- xtn_tools_pro/db/RedisDB.py,sha256=qMffCNIHa3o7KD_yVQlsj3OupsNXMsDPRi03migwSu0,4003
7
- xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
8
- xtnkk_tools/MongoDB.py,sha256=2mwln6JPfu5N1N8Hbh6KvN6sED-KPTrOteCBHVFjvwM,5497
9
- xtnkk_tools/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
10
- xtnkk_tools/tools.py,sha256=KYoTds_c7XZBL9yLeoKksHz39QPh02DNQupRKJWx_II,2626
11
- xtnkk_tools/tools_time.py,sha256=n4-T2tNSHnsh-X89IbjahCmoiDcmjZTKJlWyqGOmJQY,4877
12
- xtnkk_tools/update.py,sha256=VygnKO9dXo02JyUEkpbJoBE6BceYARZEn-O1i6AO6E0,911
13
- xtnkk_tools/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
14
- xtnkk_tools/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
15
- xtnkk_tools_pro/__init__.py,sha256=26Tf9j2wj88M1Ldg3b1DJ40KyGgN9ZmQdBLuV453388,395
16
- xtnkk_tools_pro/tools.py,sha256=KYoTds_c7XZBL9yLeoKksHz39QPh02DNQupRKJWx_II,2626
17
- xtnkk_tools_pro/tools_time.py,sha256=n4-T2tNSHnsh-X89IbjahCmoiDcmjZTKJlWyqGOmJQY,4877
18
- xtnkk_tools_pro/db/MongoDB.py,sha256=_GiX1MHNl9CtI-uLDgY_NmMSvRJei-mtKq3Hhe6ly1E,5567
19
- xtnkk_tools_pro/db/RedisDB.py,sha256=qMffCNIHa3o7KD_yVQlsj3OupsNXMsDPRi03migwSu0,4003
20
- xtnkk_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
21
- xtn_tools_pro-1.0.0.0.1.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- xtn_tools_pro-1.0.0.0.1.dist-info/METADATA,sha256=cGqGoLCT0OxwP7KI82OKR1JUF9d9XnNk_Xg4DCgOSvU,333
23
- xtn_tools_pro-1.0.0.0.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
24
- xtn_tools_pro-1.0.0.0.1.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
25
- xtn_tools_pro-1.0.0.0.1.dist-info/RECORD,,
xtnkk_tools/MongoDB.py DELETED
@@ -1,137 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- # 说明:
5
- # MongoDBPro
6
- # History:
7
- # Date Author Version Modification
8
- # --------------------------------------------------------------------------------------------------
9
- # 2024/4/17 xiatn V00.01.000 新建
10
- # --------------------------------------------------------------------------------------------------
11
- from xtn_tools_pro.tools_time import *
12
- from urllib import parse
13
- from typing import List, Dict, Optional
14
- from pymongo import MongoClient
15
- from pymongo.database import Database
16
- from pymongo.collection import Collection
17
- from pymongo.errors import DuplicateKeyError, BulkWriteError
18
-
19
-
20
- class MongoDBPro:
21
- def __init__(self, ip=None, port=None, db=None, user_name=None, user_pass=None, url=None, **kwargs):
22
- if url:
23
- self.client = MongoClient(url, **kwargs)
24
- else:
25
- self.client = MongoClient(host=ip,
26
- port=port,
27
- username=user_name,
28
- password=user_pass,
29
- authSource=db)
30
-
31
- self.db = self.get_database(db)
32
-
33
- @classmethod
34
- def from_url(cls, url, **kwargs):
35
- url_parsed = parse.urlparse(url)
36
- # 获取 URL的协议
37
- db_type = url_parsed.scheme.strip()
38
- if db_type != "mongodb":
39
- raise Exception(
40
- "url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
41
- url
42
- ))
43
- return cls(url=url, **kwargs)
44
-
45
- def get_database(self, database, **kwargs) -> Database:
46
- """
47
- 根据db名获取数据库对象
48
- """
49
- return self.client.get_database(database, **kwargs)
50
-
51
- def get_collection(self, coll_name, **kwargs) -> Collection:
52
- """
53
- 根据集合名获取集合对象
54
- """
55
- return self.db.get_collection(coll_name, **kwargs)
56
-
57
- def run_command(self, command: Dict):
58
- """
59
- 参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
60
- """
61
- return self.db.command(command)
62
-
63
- def find(self, coll_name: str, condition: Optional[Dict] = None,
64
- limit: int = 0, **kwargs) -> List[Dict]:
65
- """
66
- find
67
- coll_name:集合名称
68
- condition:查询条件 例如:{"name": "John"}、{"_id": "xxxxx"}
69
- """
70
- condition = {} if condition is None else condition
71
- command = {"find": coll_name, "filter": condition, "limit": limit}
72
- command.update(kwargs)
73
- result = self.run_command(command)
74
- cursor = result["cursor"]
75
- cursor_id = cursor["id"]
76
- while True:
77
- for document in cursor.get("nextBatch", cursor.get("firstBatch", [])):
78
- # 处理数据
79
- yield document
80
- if cursor_id == 0:
81
- # 游标已经完全遍历,没有剩余的结果可供获取
82
- # 游标的生命周期已经结束,例如在查询会话结束后。
83
- # 游标被显式地关闭,例如使用 db.killCursor() 命令关闭游标。
84
- break
85
- result = self.run_command(
86
- {
87
- "getMore": cursor_id, # 类似于mongo命令行中的it命令,通过索引id用于获取下一批结果
88
- "collection": coll_name,
89
- "batchSize": kwargs.get("batchSize", 100),
90
- }
91
- )
92
- # 覆盖原来的参数
93
- cursor = result["cursor"]
94
- cursor_id = cursor["id"]
95
- # print("下一批获取")
96
-
97
- def add_data_one(self, coll_name: str, data: Dict, insert_ignore=False,
98
- is_add_create_time=False,
99
- is_add_create_time_field_name="create_dt"):
100
- """
101
- 添加单条数据
102
- coll_name: 集合名
103
- data: 单条数据
104
- insert_ignore: 索引冲突是否忽略 默认False
105
- is_add_create_time: 是否在数据中添加一个创建数据10时间戳字段 默认False不创建
106
- is_add_create_time_field_name: 自定义创建数据时间戳字段名:默认:create_dt
107
- Returns: 插入成功的行数
108
- """
109
- if is_add_create_time:
110
- data[is_add_create_time_field_name] = get_time_now_timestamp(is_time_10=True)
111
- collection = self.get_collection(coll_name)
112
- try:
113
- collection.insert_one(data)
114
- except DuplicateKeyError as e:
115
- if not insert_ignore:
116
- raise e
117
- return 0
118
- return 1
119
-
120
- def find_id_is_exist(self, coll_name, _id):
121
- """
122
- 根据id查询id是否存在
123
- :param _id:id
124
- :return: 存在返回True 否则False
125
- """
126
- condition = {"_id": _id}
127
- status = list(self.find(coll_name, condition))
128
- if status:
129
- return True
130
- return False
131
-
132
-
133
- if __name__ == '__main__':
134
- mongo_db = MongoDBPro("127.0.0.1", 27017, "spider_pro")
135
- # mongo_db.add_data_one("test", {"_id": "1", "data": "aaa"})
136
- print(mongo_db.find_id_is_exist("test", "1"))
137
- print(mongo_db.find_id_is_exist("test", "11"))
xtnkk_tools/db/MongoDB.py DELETED
@@ -1,138 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- # 说明:
5
- # MongoDBPro
6
- # History:
7
- # Date Author Version Modification
8
- # --------------------------------------------------------------------------------------------------
9
- # 2024/4/17 xiatn V00.01.000 新建
10
- # --------------------------------------------------------------------------------------------------
11
- from xtn_tools_pro.tools_time import *
12
- from urllib import parse
13
- from pymongo import MongoClient as _MongoClient
14
- from pymongo.database import Database as _Database
15
- from typing import List, Dict, Optional
16
- from pymongo.collection import Collection as _Collection
17
- from pymongo.errors import DuplicateKeyError, BulkWriteError
18
-
19
-
20
- class MongoDBPro:
21
- def __init__(self, ip=None, port=None, db=None, user_name=None, user_pass=None, url=None, **kwargs):
22
- if url:
23
- self.client = _MongoClient(url, **kwargs)
24
- else:
25
- self.client = _MongoClient(host=ip,
26
- port=port,
27
- username=user_name,
28
- password=user_pass,
29
- authSource=db)
30
-
31
- self.db = self.get_database(db)
32
-
33
- @classmethod
34
- def from_url(cls, url, **kwargs):
35
- url_parsed = parse.urlparse(url)
36
- # 获取 URL的协议
37
- db_type = url_parsed.scheme.strip()
38
- if db_type != "mongodb":
39
- raise Exception(
40
- "url error, expect mongodb://[username:password@]host1[:port1][,host2[:port2],...[,hostN[:portN]]][/[database][?options]], but get {}".format(
41
- url
42
- ))
43
- return cls(url=url, **kwargs)
44
-
45
- def get_database(self, database, **kwargs) -> _Database:
46
- """
47
- 根据db名获取数据库对象
48
- """
49
- return self.client.get_database(database, **kwargs)
50
-
51
- def get_collection(self, coll_name, **kwargs) -> _Collection:
52
- """
53
- 根据集合名获取集合对象
54
- """
55
- return self.db.get_collection(coll_name, **kwargs)
56
-
57
- def run_command(self, command: Dict):
58
- """
59
- 参考文档 https://www.geek-book.com/src/docs/mongodb/mongodb/docs.mongodb.com/manual/reference/command/index.html
60
- """
61
- return self.db.command(command)
62
-
63
- def find(self, coll_name: str, condition: Optional[Dict] = None,
64
- limit: int = 0, **kwargs) -> List[Dict]:
65
- """
66
- find
67
- coll_name:集合名称
68
- condition:查询条件 例如:{"name": "John"}、{"_id": "xxxxx"}
69
- """
70
- condition = {} if condition is None else condition
71
- command = {"find": coll_name, "filter": condition, "limit": limit}
72
- command.update(kwargs)
73
- result = self.run_command(command)
74
- cursor = result["cursor"]
75
- cursor_id = cursor["id"]
76
- while True:
77
- for document in cursor.get("nextBatch", cursor.get("firstBatch", [])):
78
- # 处理数据
79
- yield document
80
- if cursor_id == 0:
81
- # 游标已经完全遍历,没有剩余的结果可供获取
82
- # 游标的生命周期已经结束,例如在查询会话结束后。
83
- # 游标被显式地关闭,例如使用 db.killCursor() 命令关闭游标。
84
- break
85
- result = self.run_command(
86
- {
87
- "getMore": cursor_id, # 类似于mongo命令行中的it命令,通过索引id用于获取下一批结果
88
- "collection": coll_name,
89
- "batchSize": kwargs.get("batchSize", 100),
90
- }
91
- )
92
- # 覆盖原来的参数
93
- cursor = result["cursor"]
94
- cursor_id = cursor["id"]
95
- # print("下一批获取")
96
-
97
- def add_data_one(self, coll_name: str, data: Dict, insert_ignore=False,
98
- is_add_create_time=False,
99
- is_add_create_time_field_name="create_dt"):
100
- """
101
- 添加单条数据
102
- coll_name: 集合名
103
- data: 单条数据
104
- insert_ignore: 索引冲突是否忽略 默认False
105
- is_add_create_time: 是否在数据中添加一个创建数据10时间戳字段 默认False不创建
106
- is_add_create_time_field_name: 自定义创建数据时间戳字段名:默认:create_dt
107
- Returns: 插入成功的行数
108
- """
109
- if is_add_create_time:
110
- data[is_add_create_time_field_name] = get_time_now_timestamp(is_time_10=True)
111
- collection = self.get_collection(coll_name)
112
- try:
113
- collection.insert_one(data)
114
- except DuplicateKeyError as e:
115
- if not insert_ignore:
116
- raise e
117
- return 0
118
- return 1
119
-
120
- def find_id_is_exist(self, coll_name, _id):
121
- """
122
- 根据id查询id是否存在
123
- :param _id:id
124
- :return: 存在返回True 否则False
125
- """
126
- condition = {"_id": _id}
127
- status = list(self.find(coll_name, condition))
128
- if status:
129
- return True
130
- return False
131
-
132
-
133
- if __name__ == '__main__':
134
- pass
135
- # mongo_db = MongoDBPro("127.0.0.1", 27017, "spider_pro")
136
- # # mongo_db.add_data_one("test", {"_id": "1", "data": "aaa"})
137
- # print(mongo_db.find_id_is_exist("test", "1"))
138
- # print(mongo_db.find_id_is_exist("test", "11"))
@@ -1,10 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
-
4
- # 说明:
5
- # 程序说明xxxxxxxxxxxxxxxxxxx
6
- # History:
7
- # Date Author Version Modification
8
- # --------------------------------------------------------------------------------------------------
9
- # 2024/4/18 xiatn V00.01.000 新建
10
- # --------------------------------------------------------------------------------------------------