xtn-tools-pro 1.0.0.0.6__py3-none-any.whl → 1.0.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 说明:
5
+ # 小象代理专用
6
+ # History:
7
+ # Date Author Version Modification
8
+ # --------------------------------------------------------------------------------------------------
9
+ # 2024/4/27 xiatn V00.01.000 新建
10
+ # --------------------------------------------------------------------------------------------------
11
+ import requests, time, random
12
+ from xtn_tools_pro.db.RedisDB import RedisDBPro
13
+ from xtn_tools_pro.utils.time_utils import get_time_now_timestamp, get_time_now_day59_timestamp
14
+
15
+ import warnings
16
+ from urllib3.exceptions import InsecureRequestWarning
17
+
18
+ warnings.filterwarnings("ignore", category=InsecureRequestWarning)
19
+
20
+
21
+ class ProxyPool:
22
+ def __init__(self, ip, port, db=0, user_pass="", redis_proxy_name="", usage_cnt=100, usage_time=100,*args,**kwargs):
23
+ """
24
+ :param ip: redis 数据库 ip
25
+ :param port: redis 数据库 端口
26
+ :param db: redis 数据库 db
27
+ :param user_pass: redis 数据库 密码
28
+ :param redis_proxy_name: redis 数据库 用于存储代理的key
29
+ :param usage_cnt: 每个代理最长使用次数 单位秒 维护代理时用
30
+ :param usage_time: 每个代理最长使用时间 单位秒 维护代理时用
31
+ :param is_log: 是否记录日志
32
+ """
33
+ r = RedisDBPro(ip=ip, port=port, db=db, user_pass=user_pass)
34
+ self.__redis_pool = r
35
+ self.__redisProxyName = redis_proxy_name
36
+
37
+ # 获取当天0点时间戳
38
+ self.__now_day59_timestamp = get_time_now_day59_timestamp()
39
+ self.__usage_cnt = usage_cnt
40
+ self.__usage_time = usage_time
41
+
42
+ def __log(self, text):
43
+ """
44
+ 记录日志
45
+ :param text:
46
+ :return:
47
+ """
48
+ print(text)
49
+
50
+ def __check_proxy(self):
51
+ """
52
+ 维护检查代理,删除无用代理
53
+ 删除标准:1.代理使用超过xx次;2.使用时间超过xx秒;3.被爬虫标记使用次数为 999999 会被删除
54
+ :return:
55
+ """
56
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
57
+ for proxy_val in proxy_val_list:
58
+ # 获取时间
59
+ time_out = proxy_val.split(":")[-1]
60
+ # 获取使用次数
61
+ proxy_val_count = self.__redis_pool.get(proxy_val)
62
+ if int(time_out) + self.__usage_time < get_time_now_timestamp(is_time_10=True):
63
+ del_state = self.__redis_pool.delete(proxy_val)
64
+ self.__log(
65
+ "当前代理状态:{proxy_val},{time_out}_{py_time}当前代理已超过使用时间,删除状态为:{del_state}".
66
+ format(proxy_val=proxy_val, del_state=del_state,
67
+ time_out=time_out, py_time=get_time_now_timestamp(is_time_10=True)))
68
+ elif int(proxy_val_count) >= self.__usage_cnt:
69
+ del_state = self.__redis_pool.delete(proxy_val)
70
+ self.__log(
71
+ "当前代理状态:{proxy_val},{text},删除状态为:{del_state}".format(proxy_val=proxy_val,
72
+ text="当前代理被爬虫标记为不可用" if proxy_val_count >= 999999 else "当前代理已超过使用时间",
73
+ del_state=del_state))
74
+
75
+ def __get_proxy_length(self):
76
+ """
77
+ 获取代理数
78
+ :return:
79
+ """
80
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
81
+ return len(proxy_val_list)
82
+
83
+ def __incr_proxy(self, proxy_val):
84
+ """
85
+ 自增代理使用次数
86
+ :param proxy_val: 代理
87
+ :return:
88
+ """
89
+ proxy_val_con = self.__redis_pool.incr(proxy_val)
90
+ return proxy_val_con
91
+
92
+ def __get_api_proxy(self):
93
+ """
94
+ 通过接口获取小象代理,并存储至数据库
95
+ 响应:
96
+ {"code":1010,"success":false,"data":null,"msg":"请求过于频繁"}
97
+ {"code":200,"success":true,"data":[{"ip":"125.123.244.60","port":37635,"realIp":null,"startTime":"2024-04-27 14:09:42","during":2}],"msg":"操作成功"}
98
+ :return:
99
+ """
100
+ while True:
101
+ self.__check_proxy()
102
+ try:
103
+ response = requests.get(url=self.__XiaoXiangProxyAPI, verify=False, timeout=3)
104
+ if response.status_code == 200:
105
+ if response.json().get("msg") == "请求过于频繁":
106
+ self.__log("获取小象代理过于频繁,等待2s,{content}".format(content=response.text))
107
+ time.sleep(2)
108
+ continue
109
+ # 获取data
110
+ proxy_data_list = response.json().get("data", [])
111
+ if not proxy_data_list:
112
+ self.__log("获取小象代理失败 data 为空,等待2s,{content}".format(content=response.text))
113
+ time.sleep(2)
114
+ continue
115
+ else:
116
+ for data in proxy_data_list:
117
+ ip = "http://{ip}".format(ip=data.get("ip"))
118
+ port = data.get("port")
119
+ time_out = get_time_now_timestamp(is_time_10=True)
120
+ proxy_key = "{redis_proxy_name}:{ip}:{port}:{timeOut}".format(
121
+ redis_proxy_name=self.__redisProxyName,
122
+ ip=ip,
123
+ port=port,
124
+ timeOut=time_out,
125
+ )
126
+ proxy_key_con = self.__incr_proxy(proxy_key)
127
+ self.__log("获取代理:{proxy_key},插入数据库状态为{proxy_key_con}".format(proxy_key=proxy_key,
128
+ proxy_key_con=proxy_key_con))
129
+ return True # 获取成功
130
+ else:
131
+ self.__log("获取小象代理返回响应码不为200,等待2s,{content}".format(content=response.text))
132
+ time.sleep(2)
133
+ continue
134
+ except Exception as e:
135
+ self.__log("获取小象代理报错:{e}".format(e=e))
136
+
137
+ def run(self):
138
+ try:
139
+ while True:
140
+ # 检查代理
141
+ self.__check_proxy()
142
+ # 获取小象代理
143
+ self.__get_api_proxy()
144
+ time.sleep(1)
145
+ # 判断时间是否超过当前23:59分时间戳
146
+ if get_time_now_timestamp(is_time_10=True) >= self.__now_day59_timestamp:
147
+ self.__log("时间23:59,结束循环,{t}".format(t=int(time.time())))
148
+ break
149
+ except Exception as eee:
150
+ self.__log("程序异常报错:{eee}".format(eee=eee))
151
+ # self.__del__()
152
+
153
+ def get_proxy(self):
154
+ """
155
+ 从代理池中获取代理
156
+ :return:
157
+ """
158
+ try:
159
+ while True:
160
+ proxy_val_list = list(self.__redis_pool.get_all_key("{}*".format(self.__redisProxyName)))
161
+ if proxy_val_list:
162
+ proxy_val = random.choice(proxy_val_list)
163
+ proxy_v = ":".join(str(proxy_val).split(":")[1:-1])
164
+ self.__log("获取到的代理为:{proxy_v}".format(proxy_v=proxy_v))
165
+ return proxy_v
166
+ else:
167
+ self.__log("暂无代理,等待中")
168
+ time.sleep(2)
169
+ except Exception as e:
170
+ self.__log("从代理池中获取代理:{e}".format(e=e))
171
+
172
+ def set_proxy_error(self, proxy_v):
173
+ """
174
+ 爬虫手动传入代理,设置为 999999 不可用
175
+ :param proxyV:
176
+ :return:
177
+ """
178
+ try:
179
+ self.__redis_pool.set(proxy_v, "999999")
180
+ self.__log("设置不可用的代理 {proxy_v} 为 999999".format(proxy_v=proxy_v))
181
+ except Exception as e:
182
+ self.__log("爬虫手动传入代理:{e}".format(e=e))
183
+
184
+
185
+ if __name__ == '__main__':
186
+ # p = ProxyPool(ip="127.0.0.1", port=6379, db=0, user_pass="xtn-kk", XiaoXiangProxyAppKey="1101384562594172928",
187
+ # XiaoXiangProxyAppSecret="PJ0QBWML")
188
+ # print(p.get_proxy())
189
+ pass
@@ -8,10 +8,11 @@
8
8
  # --------------------------------------------------------------------------------------------------
9
9
  # 2024/5/13 xiatn V00.01.000 新建
10
10
  # --------------------------------------------------------------------------------------------------
11
+ import base64
11
12
  import hashlib
12
13
 
13
14
 
14
- def get_md5_32(s, is_upper=False):
15
+ def get_md5_32(s: str, is_upper=False):
15
16
  """
16
17
  获取文本的md5值 32位
17
18
  :param s: 文本
@@ -25,7 +26,7 @@ def get_md5_32(s, is_upper=False):
25
26
  return m.hexdigest()
26
27
 
27
28
 
28
- def get_md5_16(s, is_upper=False):
29
+ def get_md5_16(s: str, is_upper=False):
29
30
  """
30
31
  获取文本的md5值 16位
31
32
  :param s: 文本
@@ -85,3 +86,36 @@ def get_file_md5_16(file_path, is_upper=False):
85
86
  """
86
87
  result = get_file_md5_32(file_path, is_upper)
87
88
  return result[8:24]
89
+
90
+
91
+ def get_sha1(s: str, is_upper=False):
92
+ """
93
+ sha1
94
+ :param s: 文本
95
+ :param is_upper: 是否转大写 默认False
96
+ :return:
97
+ """
98
+ # 使用sha1算法进行哈希
99
+ sha1_hash = hashlib.sha1(s.encode()).hexdigest()
100
+ if is_upper:
101
+ return sha1_hash.upper()
102
+ return sha1_hash
103
+
104
+
105
+ def get_base64_encode(s: str):
106
+ """
107
+ base64 编码
108
+ :param s: 文本
109
+ :return:
110
+ """
111
+ # 将字符串编码为 bytes
112
+ data_bytes = s.encode('utf-8')
113
+ # 使用 base64 进行编码
114
+ encoded_bytes = base64.b64encode(data_bytes)
115
+ # 将编码后的 bytes 转换为字符串
116
+ encoded_string = encoded_bytes.decode('utf-8')
117
+ return encoded_string
118
+
119
+
120
+ if __name__ == '__main__':
121
+ print(get_base64_encode(''))
@@ -38,11 +38,11 @@ def get_uuid(version=4, namespace: UUID = uuid.NAMESPACE_DNS, name=""):
38
38
  else:
39
39
  result = uuid.uuid4()
40
40
 
41
- uuid_str = str(result)
41
+ # uuid_str = str(result)
42
42
  # uuid_hex = uuid_obj.hex
43
43
  # uuid_int = uuid_obj.int
44
44
  # uuid_bytes = uuid_obj.bytes
45
- return uuid_str
45
+ return result
46
46
 
47
47
 
48
48
  def get_str_to_json(str_json):
@@ -120,4 +120,7 @@ def get_build_url_with_params(url, params):
120
120
  """
121
121
  encoded_params = urlencode(params)
122
122
  full_url = url + "?" + encoded_params
123
- return full_url
123
+ return full_url
124
+
125
+ if __name__ == '__main__':
126
+ print(get_uuid(4))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.0.6
3
+ Version: 1.0.0.0.8
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@163.com
@@ -8,16 +8,17 @@ xtn_tools_pro/db/RedisDB.py,sha256=ep32Yj8AAkUHRshSBhKsdl06UwO7Z-gQJLaezspVRKw,6
8
8
  xtn_tools_pro/db/__init__.py,sha256=Zg91UWS02TO0Ba_0AY56s0oabRy93xLNFkpIIL_6mMM,416
9
9
  xtn_tools_pro/proxy/XiaoXiangProxy.py,sha256=6jzGgN2t2zLPIKbSgN8seixwDLY4IjoZvB26f8yTUME,9848
10
10
  xtn_tools_pro/proxy/__init__.py,sha256=WRwh6s2lruMu5buh0ejo9EK54kWT_VQhCsFGNFAmcyo,418
11
+ xtn_tools_pro/proxy/proxy.py,sha256=No6E1pFY5yx2F4976pXPrLtq-QEVp79KupzcufjSN58,8703
11
12
  xtn_tools_pro/utils/__init__.py,sha256=I1_n_NP23F2lBqlF4EOlnOdLYxM8M4pbn63UhJN1hRE,418
12
- xtn_tools_pro/utils/crypto.py,sha256=1hwpD3SW0PI2fa79-_Xh37ACzeeKFH775GiVTlp5xjA,2379
13
+ xtn_tools_pro/utils/crypto.py,sha256=RZ5AET4udlraACWMeNF-17JiZ2R6Ahb47_j4tjkV7LE,3190
13
14
  xtn_tools_pro/utils/file_utils.py,sha256=4Bmb1ISxqQC-7doPXDyY2o-H0m4upj_-hlLrO36sKJg,1509
14
- xtn_tools_pro/utils/helpers.py,sha256=C6w0Nc8ngpWtdwyKDAACtobnAe-qQNVCRgk5n9iM0o8,3719
15
+ xtn_tools_pro/utils/helpers.py,sha256=VzevgDk0tZAxFmuFlMBIaJ3QyiFxp5Qg-mLPgE83jRI,3773
15
16
  xtn_tools_pro/utils/log.py,sha256=8CRMiY7Q9LWGJLhfL0YfpANBwLets9f2qWfMOe5PpNM,8139
16
17
  xtn_tools_pro/utils/retry.py,sha256=0wjHsR5DBBKpv4naMfxiky8kprrZes4WURIfFQ4H708,1657
17
18
  xtn_tools_pro/utils/sql.py,sha256=EAKzbkZP7Q09j15Gm6o0_uq0qgQmcCQT6EAawbpp4v0,6263
18
19
  xtn_tools_pro/utils/time_utils.py,sha256=TUtzG61PeVYXhaQd6pBrXAdlz7tBispNIRQRcGhE2No,4859
19
- xtn_tools_pro-1.0.0.0.6.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- xtn_tools_pro-1.0.0.0.6.dist-info/METADATA,sha256=1vth1IfbsqTpVsmz24eaHlsUlxX2apH9eUz2gMdRjKs,431
21
- xtn_tools_pro-1.0.0.0.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
22
- xtn_tools_pro-1.0.0.0.6.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
23
- xtn_tools_pro-1.0.0.0.6.dist-info/RECORD,,
20
+ xtn_tools_pro-1.0.0.0.8.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ xtn_tools_pro-1.0.0.0.8.dist-info/METADATA,sha256=CN33o7GbV3bBP0HYYrlBem6vBcoHD6u64GRnlEBENKQ,431
22
+ xtn_tools_pro-1.0.0.0.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
23
+ xtn_tools_pro-1.0.0.0.8.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
24
+ xtn_tools_pro-1.0.0.0.8.dist-info/RECORD,,