xtn-tools-pro 1.0.0.0.2__tar.gz → 1.0.0.0.3__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (19) hide show
  1. {xtn-tools-pro-1.0.0.0.2/xtn_tools_pro.egg-info → xtn-tools-pro-1.0.0.0.3}/PKG-INFO +1 -1
  2. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/setup.py +3 -2
  3. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/db/RedisDB.py +47 -0
  4. xtn-tools-pro-1.0.0.0.3/xtn_tools_pro/proxy/XiaoXiangProxy.py +207 -0
  5. xtn-tools-pro-1.0.0.0.3/xtn_tools_pro/proxy/__init__.py +10 -0
  6. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3/xtn_tools_pro.egg-info}/PKG-INFO +1 -1
  7. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro.egg-info/SOURCES.txt +3 -1
  8. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro.egg-info/requires.txt +1 -0
  9. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/LICENSE +0 -0
  10. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/README.md +0 -0
  11. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/setup.cfg +0 -0
  12. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/__init__.py +0 -0
  13. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/db/MongoDB.py +0 -0
  14. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/db/__init__.py +0 -0
  15. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/tools.py +0 -0
  16. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/tools_flie.py +0 -0
  17. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro/tools_time.py +0 -0
  18. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro.egg-info/dependency_links.txt +0 -0
  19. {xtn-tools-pro-1.0.0.0.2 → xtn-tools-pro-1.0.0.0.3}/xtn_tools_pro.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.0.2
3
+ Version: 1.0.0.0.3
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@163.com
@@ -15,7 +15,7 @@ with open("README.md", "r") as f:
15
15
 
16
16
  setuptools.setup(
17
17
  name="xtn-tools-pro", # 模块名称
18
- version="1.0.0.0.2", # 版本
18
+ version="1.0.0.0.3", # 版本
19
19
  author="xtn", # 作者
20
20
  author_email="czw011122@163.com", # 作者邮箱
21
21
  description="xtn 开发工具", # 模块简介
@@ -29,7 +29,8 @@ setuptools.setup(
29
29
  # 依赖模块
30
30
  install_requires=[
31
31
  "pymongo",
32
- "redis"
32
+ "redis",
33
+ "requests"
33
34
  ],
34
35
  python_requires='>=3',
35
36
  )
@@ -27,6 +27,9 @@ class RedisDBPro:
27
27
  self.__redis = None
28
28
  self.get_connect()
29
29
 
30
+ def __del__(self):
31
+ self.__redis.close()
32
+
30
33
  @classmethod
31
34
  def from_url(cls, url):
32
35
  """
@@ -110,9 +113,53 @@ class RedisDBPro:
110
113
  else:
111
114
  return self._redis.sadd(table, values)
112
115
 
116
+ def incr(self, key):
117
+ """
118
+ 对一个键的值进行自增操作
119
+ :param key: 需要自增的key
120
+ :return:
121
+ """
122
+ return self._redis.incr(key)
123
+
124
+ def get_all_key(self, path):
125
+ """
126
+ 获取所有的key
127
+ 常用的path:前缀为test的key test*,中间为test的key *test*
128
+ :param path:
129
+ :return:
130
+ """
131
+ return list(self._redis.scan_iter(path))
132
+
133
+ def get(self, table):
134
+ return self._redis.get(table)
135
+
136
+ def set(self, table, value, **kwargs):
137
+ """
138
+ 字符串 set
139
+ :param table: 表
140
+ :param value: 值
141
+ :param kwargs: 参数解释为chatgpt提供
142
+ :param kwargs: ex(可选):设置键的过期时间,以秒为单位。例如,ex=10表示键将在10秒后过期
143
+ :param kwargs: px(可选):设置键的过期时间,以毫秒为单位。例如,px=10000表示键将在10秒后过期
144
+ :param kwargs: nx(可选):如果设置为True,则只有在键不存在时才设置键的值
145
+ :param kwargs: xx(可选):如果设置为True,则只有在键已存在时才设置键的值
146
+ :param kwargs: kepp_ttl(可选):如果设置为True,则保留键的过期时间。仅当键已存在且设置了过期时间时才有效
147
+ :param kwargs: exat(可选):设置键的过期时间,以UNIX时间戳表示。
148
+ :param kwargs: pxat(可选):设置键的过期时间,以毫秒级的UNIX时间戳表示。
149
+ :param kwargs: replace(可选):如果设置为True,则无论键是否存在,都会设置键的值。
150
+ :return:
151
+ """
152
+ return self._redis.set(table, value, **kwargs)
153
+
154
+ def delete(self, table):
155
+ return self._redis.delete(table)
156
+
113
157
 
114
158
  if __name__ == '__main__':
115
159
  pass
116
160
  # r = RedisDBPro(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk")
161
+ r = RedisDBPro.from_url('redis://:xtn-kk@127.0.0.1:6379/0')
117
162
  # status = r.sadd("test_redis_pro", [1, 2, 3, 4, 5, "6", "7"])
118
163
  # print(status)
164
+ # print(r.get_all_key("*http*"))
165
+ print(r.delete("test_redis_pro"))
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 说明:
5
+ # 小象代理专用
6
+ # History:
7
+ # Date Author Version Modification
8
+ # --------------------------------------------------------------------------------------------------
9
+ # 2024/4/27 xiatn V00.01.000 新建
10
+ # --------------------------------------------------------------------------------------------------
11
+ import requests, time, random
12
+ from xtn_tools_pro.db.RedisDB import RedisDBPro
13
+ from xtn_tools_pro.tools_time import get_time_now_timestamp, get_time_now_day59_timestamp
14
+
15
+ import warnings
16
+ from urllib3.exceptions import InsecureRequestWarning
17
+
18
+ warnings.filterwarnings("ignore", category=InsecureRequestWarning)
19
+
20
+
21
+ class ProxyPool:
22
+ def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="XiaoXiangProxy",
23
+ XiaoXiangProxyAppKey=None, XiaoXiangProxyAppSecret=None, usage_cnt=100, usage_time=100):
24
+ """
25
+ 小象代理专用
26
+ :param ip: redis 数据库 ip
27
+ :param port: redis 数据库 端口
28
+ :param db: redis 数据库 db
29
+ :param user_pass: redis 数据库 密码
30
+ :param redis_proxy_name: redis 数据库 用于存储代理的key
31
+ :param XiaoXiangProxyAppKey: 小象代理 应用id appKey
32
+ :param XiaoXiangProxyAppSecret: 小象代理 应用密码 appSecret
33
+ :param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
34
+ :param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
35
+ :param is_log: 是否记录日志
36
+ """
37
+ if not XiaoXiangProxyAppSecret or not XiaoXiangProxyAppKey:
38
+ raise Exception("应用密码或应用id 不能为空")
39
+
40
+ r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
41
+ self.__redis_pool = r
42
+ self.__redisProxyName = redis_proxy_name
43
+ self.__XiaoXiangProxyAPI = "https://api.xiaoxiangdaili.com/ip/get?appKey={appKey}&appSecret={appSecret}&cnt=&wt=json".format(
44
+ appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
45
+ self.__XiaoXiangAutoBinding = "https://api.xiaoxiangdaili.com/app/bindIp?appKey={appKey}&appSecret={appSecret}&i=1".format(
46
+ appKey=XiaoXiangProxyAppKey, appSecret=XiaoXiangProxyAppSecret)
47
+
48
+ # 获取当天0点时间戳
49
+ self.__now_day59_timestamp = get_time_now_day59_timestamp()
50
+ self.__usage_cnt = usage_cnt
51
+ self.__usage_time = usage_time
52
+
53
+ # if is_log:
54
+ # # 日志
55
+ # nowDate = str(datetime.datetime.now().strftime('%Y_%m_%d'))
56
+ # logger.add(loggerPath.format(t=nowDate))
57
+
58
+ def __log(self, text):
59
+ """
60
+ 记录日志
61
+ :param text:
62
+ :return:
63
+ """
64
+ print(text)
65
+
66
+ def __check_proxy(self):
67
+ """
68
+ 维护检查代理,删除无用代理
69
+ 删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
70
+ :return:
71
+ """
72
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
73
+ for proxy_val in proxy_val_list:
74
+ # 获取时间
75
+ time_out = proxy_val.split(":")[-1]
76
+ # 获取使用次数
77
+ proxy_val_count = self.__redis_pool.get(proxy_val)
78
+ if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
79
+ del_state = self.__redis_pool.delete(proxy_val)
80
+ self.__log(
81
+ "当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
82
+ format(proxy_val=proxy_val, del_state=del_state,
83
+ time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
84
+ elif int(proxy_val_count) >= self.__usage_cnt:
85
+ del_state = self.__redis_pool.delete(proxy_val)
86
+ self.__log(
87
+ "当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
88
+ text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
89
+ del_state=del_state))
90
+
91
+ def __get_proxy_length(self):
92
+ """
93
+ 获取代理数
94
+ :return:
95
+ """
96
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
97
+ return len(proxy_val_list)
98
+
99
+ def __incr_proxy(self, proxy_val):
100
+ """
101
+ 自增代理使用次数
102
+ :param proxy_val: 代理
103
+ :return:
104
+ """
105
+ proxy_val_con = self.__redis_pool.incr(proxy_val)
106
+ return proxy_val_con
107
+
108
+ def __get_api_proxy(self):
109
+ """
110
+ 通过接口获取小象代理,并存储至数据库
111
+ 响应:
112
+ {"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
113
+ {"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
114
+ :return:
115
+ """
116
+ while True:
117
+ self.__check_proxy()
118
+ try:
119
+ response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
120
+ if response.status_code == 200:
121
+ if response.json().get("msg") == "请求过于频繁":
122
+ self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
123
+ time.sleep(2)
124
+ continue
125
+ # 获取data
126
+ proxy_data_list = response.json().get("data", [])
127
+ if not proxy_data_list:
128
+ self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
129
+ time.sleep(2)
130
+ continue
131
+ else:
132
+ for data in proxy_data_list:
133
+ ip = "http://{ip}".format(ip=data.get("ip"))
134
+ port = data.get("port")
135
+ time_out = get_time_now_timestamp(is_time_10=True)
136
+ proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
137
+ redis_proxy_name=self.__redisProxyName,
138
+ ip=ip,
139
+ port=port,
140
+ timeOut=time_out,
141
+ )
142
+ proxy_key_con = self.__incr_proxy(proxy_key)
143
+ self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
144
+ proxy_key_con=proxy_key_con))
145
+ return True # 获取成功
146
+ else:
147
+ self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
148
+ time.sleep(2)
149
+ continue
150
+ except Exception as e:
151
+ self.__log("获取小象代理报错:{e}".format(e=e))
152
+
153
+ def run(self):
154
+ try:
155
+ # 手动绑定终端IP
156
+ response = requests.get(url=self.__XiaoXiangAutoBinding, verify=False, timeout=3)
157
+ self.__log(response.text)
158
+ while True:
159
+ # 检查代理
160
+ self.__check_proxy()
161
+ # 获取小象代理
162
+ self.__get_api_proxy()
163
+ time.sleep(1)
164
+ # 判断时间是否超过当前23:59分时间戳
165
+ if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
166
+ self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
167
+ break
168
+ except Exception as eee:
169
+ self.__log("程序异常报错:{eee}".format(eee=eee))
170
+ # self.__del__()
171
+
172
+ def get_proxy(self):
173
+ """
174
+ 从代理池中获取代理
175
+ :return:
176
+ """
177
+ try:
178
+ while True:
179
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
180
+ if proxy_val_list:
181
+ proxy_val = random.choice(proxy_val_list)
182
+ proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
183
+ self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
184
+ return proxy_v
185
+ else:
186
+ self.__log("暂无代理,等待中")
187
+ time.sleep(2)
188
+ except Exception as e:
189
+ self.__log("从代理池中获取代理:{e}".format(e=e))
190
+
191
+ def set_proxy_error(self, proxy_v):
192
+ """
193
+ 爬虫手动传入代理,设置为 999999 不可用
194
+ :param proxyV:
195
+ :return:
196
+ """
197
+ try:
198
+ self.__redis_pool.set(proxy_v, "999999")
199
+ self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
200
+ except Exception as e:
201
+ self.__log("爬虫手动传入代理:{e}".format(e=e))
202
+
203
+
204
+ if __name__ == '__main__':
205
+ p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
206
+ XiaoXiangProxyAppSecret="PJ0QBWML")
207
+ print(p.get_proxy())
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 说明:
5
+ # 程序说明xxxxxxxxxxxxxxxxxxx
6
+ # History:
7
+ # Date Author Version Modification
8
+ # --------------------------------------------------------------------------------------------------
9
+ # 2024/4/27 xiatn V00.01.000 新建
10
+ # --------------------------------------------------------------------------------------------------
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.0.2
3
+ Version: 1.0.0.0.3
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@163.com
@@ -12,4 +12,6 @@ xtn_tools_pro.egg-info/requires.txt
12
12
  xtn_tools_pro.egg-info/top_level.txt
13
13
  xtn_tools_pro/db/MongoDB.py
14
14
  xtn_tools_pro/db/RedisDB.py
15
- xtn_tools_pro/db/__init__.py
15
+ xtn_tools_pro/db/__init__.py
16
+ xtn_tools_pro/proxy/XiaoXiangProxy.py
17
+ xtn_tools_pro/proxy/__init__.py