xtn-tools-pro 1.0.0.0.6__tar.gz → 1.0.0.0.7__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. {xtn-tools-pro-1.0.0.0.6/xtn_tools_pro.egg-info → xtn-tools-pro-1.0.0.0.7}/PKG-INFO +1 -1
  2. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/setup.py +1 -1
  3. xtn-tools-pro-1.0.0.0.7/xtn_tools_pro/proxy/proxy.py +189 -0
  4. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/crypto.py +19 -2
  5. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/helpers.py +6 -3
  6. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7/xtn_tools_pro.egg-info}/PKG-INFO +1 -1
  7. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro.egg-info/SOURCES.txt +1 -0
  8. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/LICENSE +0 -0
  9. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/README.md +0 -0
  10. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/setup.cfg +0 -0
  11. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/__init__.py +0 -0
  12. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/db/MongoDB.py +0 -0
  13. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/db/MysqlDB.py +0 -0
  14. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/db/RedisDB.py +0 -0
  15. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/db/__init__.py +0 -0
  16. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/proxy/XiaoXiangProxy.py +0 -0
  17. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/proxy/__init__.py +0 -0
  18. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/tools.py +0 -0
  19. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/__init__.py +0 -0
  20. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/file_utils.py +0 -0
  21. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/log.py +0 -0
  22. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/retry.py +0 -0
  23. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/sql.py +0 -0
  24. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro/utils/time_utils.py +0 -0
  25. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro.egg-info/dependency_links.txt +0 -0
  26. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro.egg-info/requires.txt +0 -0
  27. {xtn-tools-pro-1.0.0.0.6 → xtn-tools-pro-1.0.0.0.7}/xtn_tools_pro.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.0.6
3
+ Version: 1.0.0.0.7
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@163.com
@@ -15,7 +15,7 @@ with open("README.md", "r") as f:
15
15
 
16
16
  setuptools.setup(
17
17
  name="xtn-tools-pro", # 模块名称
18
- version="1.0.0.0.6", # 版本
18
+ version="1.0.0.0.7", # 版本
19
19
  author="xtn", # 作者
20
20
  author_email="czw011122@163.com", # 作者邮箱
21
21
  description="xtn 开发工具", # 模块简介
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 说明:
5
+ # 小象代理专用
6
+ # History:
7
+ # Date Author Version Modification
8
+ # --------------------------------------------------------------------------------------------------
9
+ # 2024/4/27 xiatn V00.01.000 新建
10
+ # --------------------------------------------------------------------------------------------------
11
+ import requests, time, random
12
+ from xtn_tools_pro.db.RedisDB import RedisDBPro
13
+ from xtn_tools_pro.utils.time_utils import get_time_now_timestamp, get_time_now_day59_timestamp
14
+
15
+ import warnings
16
+ from urllib3.exceptions import InsecureRequestWarning
17
+
18
+ warnings.filterwarnings("ignore", category=InsecureRequestWarning)
19
+
20
+
21
+ class ProxyPool:
22
+ def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="", usage_cnt=100, usage_time=100,*args,**kwargs):
23
+ """
24
+ :param ip: redis 数据库 ip
25
+ :param port: redis 数据库 端口
26
+ :param db: redis 数据库 db
27
+ :param user_pass: redis 数据库 密码
28
+ :param redis_proxy_name: redis 数据库 用于存储代理的key
29
+ :param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
30
+ :param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
31
+ :param is_log: 是否记录日志
32
+ """
33
+ r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
34
+ self.__redis_pool = r
35
+ self.__redisProxyName = redis_proxy_name
36
+
37
+ # 获取当天0点时间戳
38
+ self.__now_day59_timestamp = get_time_now_day59_timestamp()
39
+ self.__usage_cnt = usage_cnt
40
+ self.__usage_time = usage_time
41
+
42
+ def __log(self, text):
43
+ """
44
+ 记录日志
45
+ :param text:
46
+ :return:
47
+ """
48
+ print(text)
49
+
50
+ def __check_proxy(self):
51
+ """
52
+ 维护检查代理,删除无用代理
53
+ 删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
54
+ :return:
55
+ """
56
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
57
+ for proxy_val in proxy_val_list:
58
+ # 获取时间
59
+ time_out = proxy_val.split(":")[-1]
60
+ # 获取使用次数
61
+ proxy_val_count = self.__redis_pool.get(proxy_val)
62
+ if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
63
+ del_state = self.__redis_pool.delete(proxy_val)
64
+ self.__log(
65
+ "当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
66
+ format(proxy_val=proxy_val, del_state=del_state,
67
+ time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
68
+ elif int(proxy_val_count) >= self.__usage_cnt:
69
+ del_state = self.__redis_pool.delete(proxy_val)
70
+ self.__log(
71
+ "当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
72
+ text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
73
+ del_state=del_state))
74
+
75
+ def __get_proxy_length(self):
76
+ """
77
+ 获取代理数
78
+ :return:
79
+ """
80
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
81
+ return len(proxy_val_list)
82
+
83
+ def __incr_proxy(self, proxy_val):
84
+ """
85
+ 自增代理使用次数
86
+ :param proxy_val: 代理
87
+ :return:
88
+ """
89
+ proxy_val_con = self.__redis_pool.incr(proxy_val)
90
+ return proxy_val_con
91
+
92
+ def __get_api_proxy(self):
93
+ """
94
+ 通过接口获取小象代理,并存储至数据库
95
+ 响应:
96
+ {"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
97
+ {"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
98
+ :return:
99
+ """
100
+ while True:
101
+ self.__check_proxy()
102
+ try:
103
+ response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
104
+ if response.status_code == 200:
105
+ if response.json().get("msg") == "请求过于频繁":
106
+ self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
107
+ time.sleep(2)
108
+ continue
109
+ # 获取data
110
+ proxy_data_list = response.json().get("data", [])
111
+ if not proxy_data_list:
112
+ self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
113
+ time.sleep(2)
114
+ continue
115
+ else:
116
+ for data in proxy_data_list:
117
+ ip = "http://{ip}".format(ip=data.get("ip"))
118
+ port = data.get("port")
119
+ time_out = get_time_now_timestamp(is_time_10=True)
120
+ proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
121
+ redis_proxy_name=self.__redisProxyName,
122
+ ip=ip,
123
+ port=port,
124
+ timeOut=time_out,
125
+ )
126
+ proxy_key_con = self.__incr_proxy(proxy_key)
127
+ self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
128
+ proxy_key_con=proxy_key_con))
129
+ return True # 获取成功
130
+ else:
131
+ self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
132
+ time.sleep(2)
133
+ continue
134
+ except Exception as e:
135
+ self.__log("获取小象代理报错:{e}".format(e=e))
136
+
137
+ def run(self):
138
+ try:
139
+ while True:
140
+ # 检查代理
141
+ self.__check_proxy()
142
+ # 获取小象代理
143
+ self.__get_api_proxy()
144
+ time.sleep(1)
145
+ # 判断时间是否超过当前23:59分时间戳
146
+ if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
147
+ self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
148
+ break
149
+ except Exception as eee:
150
+ self.__log("程序异常报错:{eee}".format(eee=eee))
151
+ # self.__del__()
152
+
153
+ def get_proxy(self):
154
+ """
155
+ 从代理池中获取代理
156
+ :return:
157
+ """
158
+ try:
159
+ while True:
160
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
161
+ if proxy_val_list:
162
+ proxy_val = random.choice(proxy_val_list)
163
+ proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
164
+ self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
165
+ return proxy_v
166
+ else:
167
+ self.__log("暂无代理,等待中")
168
+ time.sleep(2)
169
+ except Exception as e:
170
+ self.__log("从代理池中获取代理:{e}".format(e=e))
171
+
172
+ def set_proxy_error(self, proxy_v):
173
+ """
174
+ 爬虫手动传入代理,设置为 999999 不可用
175
+ :param proxyV:
176
+ :return:
177
+ """
178
+ try:
179
+ self.__redis_pool.set(proxy_v, "999999")
180
+ self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
181
+ except Exception as e:
182
+ self.__log("爬虫手动传入代理:{e}".format(e=e))
183
+
184
+
185
+ if __name__ == '__main__':
186
+ # p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
187
+ # XiaoXiangProxyAppSecret="PJ0QBWML")
188
+ # print(p.get_proxy())
189
+ pass
@@ -11,7 +11,7 @@
11
11
  import hashlib
12
12
 
13
13
 
14
- def get_md5_32(s, is_upper=False):
14
+ def get_md5_32(s: str, is_upper=False):
15
15
  """
16
16
  获取文本的md5值 32位
17
17
  :param s: 文本
@@ -25,7 +25,7 @@ def get_md5_32(s, is_upper=False):
25
25
  return m.hexdigest()
26
26
 
27
27
 
28
- def get_md5_16(s, is_upper=False):
28
+ def get_md5_16(s: str, is_upper=False):
29
29
  """
30
30
  获取文本的md5值 16位
31
31
  :param s: 文本
@@ -85,3 +85,20 @@ def get_file_md5_16(file_path, is_upper=False):
85
85
  """
86
86
  result = get_file_md5_32(file_path, is_upper)
87
87
  return result[8:24]
88
+
89
+
90
+ def get_sha1(s: str, is_upper=False):
91
+ """
92
+ sha1
93
+ :param s: 文本
94
+ :param is_upper: 是否转大写 默认False
95
+ :return:
96
+ """
97
+ # 使用sha1算法进行哈希
98
+ sha1_hash = hashlib.sha1(s.encode()).hexdigest()
99
+ if is_upper:
100
+ return sha1_hash.upper()
101
+ return sha1_hash
102
+
103
+ if __name__ == '__main__':
104
+ print(get_sha1("111"))
@@ -38,11 +38,11 @@ def get_uuid(version=4, namespace: UUID = uuid.NAMESPACE_DNS, name=""):
38
38
  else:
39
39
  result = uuid.uuid4()
40
40
 
41
- uuid_str = str(result)
41
+ # uuid_str = str(result)
42
42
  # uuid_hex = uuid_obj.hex
43
43
  # uuid_int = uuid_obj.int
44
44
  # uuid_bytes = uuid_obj.bytes
45
- return uuid_str
45
+ return result
46
46
 
47
47
 
48
48
  def get_str_to_json(str_json):
@@ -120,4 +120,7 @@ def get_build_url_with_params(url, params):
120
120
  """
121
121
  encoded_params = urlencode(params)
122
122
  full_url = url + "?" + encoded_params
123
- return full_url
123
+ return full_url
124
+
125
+ if __name__ == '__main__':
126
+ print(get_uuid(4))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.0.6
3
+ Version: 1.0.0.0.7
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@163.com
@@ -14,6 +14,7 @@ xtn_tools_pro/db/RedisDB.py
14
14
  xtn_tools_pro/db/__init__.py
15
15
  xtn_tools_pro/proxy/XiaoXiangProxy.py
16
16
  xtn_tools_pro/proxy/__init__.py
17
+ xtn_tools_pro/proxy/proxy.py
17
18
  xtn_tools_pro/utils/__init__.py
18
19
  xtn_tools_pro/utils/crypto.py
19
20
  xtn_tools_pro/utils/file_utils.py