xtn-tools-pro 1.0.0.7.1__py3-none-any.whl → 1.0.0.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,9 +34,11 @@ class GoFunTaskV3:
34
34
  restart_time = ini_dict.get('restart_time', 0) # 间隔x秒强制重启
35
35
  restart_time = 30 * 60 if restart_time <= 0 else restart_time # 间隔x秒强制重启时间不传默认60分钟
36
36
  update_proxies_time = ini_dict.get('update_proxies_time', 0) # 间隔x秒更新代理
37
- upload_task_tine = ini_dict.get('upload_task_tine', 0) # 回写间隔
38
- download_not_task_tine = ini_dict.get('download_not_task_tine', 0) # 当遇到下载任务接口返回空任务时,间隔x秒再继续请求,默认2秒
39
- download_not_task_tine = 2 if download_not_task_tine <= 0 else download_not_task_tine # 当遇到下载任务接口返回空任务时,间隔x秒再继续请求,默认2秒
37
+ upload_task_time = ini_dict.get('upload_task_time', 0) # 回写间隔
38
+ download_not_task_time = ini_dict.get('download_not_task_time', 0) # 当遇到下载任务接口返回空任务时,间隔x秒再继续请求,默认2秒
39
+ download_not_task_time = 2 if download_not_task_time <= 0 else download_not_task_time # 当遇到下载任务接口返回空任务时,间隔x秒再继续请求,默认2秒
40
+
41
+ # 默认进程数和线程数
40
42
  processes_num = 1 if processes_num <= 0 else processes_num
41
43
  thread_num = 1 if thread_num <= 0 else thread_num
42
44
 
@@ -62,18 +64,18 @@ class GoFunTaskV3:
62
64
  "thread_num": int(thread_num),
63
65
  "restart_time": int(restart_time),
64
66
  "update_proxies_time": int(update_proxies_time),
65
- "upload_task_tine": int(upload_task_tine),
67
+ "upload_task_time": int(upload_task_time),
66
68
  "download_url": download_url, # 获取任务地址
67
69
  "upload_url": upload_url, # 回写任务地址
68
70
  "update_proxy_url": update_proxy_url, # 更新代理地址
69
71
  "external_ip": external_ip,
70
- "download_not_task_tine": download_not_task_tine,
72
+ "download_not_task_time": download_not_task_time,
71
73
  }
72
74
 
73
75
  logger.debug(
74
76
  f"\n无敌框架来咯~~~当前设置配置如下:"
75
77
  f"\n\t功能函数重启间隔:{restart_time};进程数:{processes_num};线程数:{thread_num}"
76
- f"\n\t代理更新间隔:{update_proxies_time};回写间隔{upload_task_tine};\n"
78
+ f"\n\t代理更新间隔:{update_proxies_time};回写间隔{upload_task_time};\n"
77
79
  )
78
80
 
79
81
  # 共享任务队列
@@ -173,6 +175,7 @@ class GoFunTaskV3:
173
175
  download_url = ini_info["download_url"]
174
176
  auto = ini_info["auto"]
175
177
  task = ini_info["task"]
178
+ download_not_task_time = ini_info["download_not_task_time"]
176
179
  headers = {"Authorization": auto}
177
180
  params = {"taskType": task}
178
181
 
@@ -182,7 +185,7 @@ class GoFunTaskV3:
182
185
  qsize = download_queue.qsize()
183
186
  logger.info(f"当前队列剩余任务数:{qsize}")
184
187
  if qsize >= 10:
185
- time.sleep(2)
188
+ time.sleep(download_not_task_time)
186
189
  continue
187
190
  resp = requests.get(download_url, headers=headers, params=params, timeout=5)
188
191
  json_data = resp.json()
@@ -218,7 +221,7 @@ class GoFunTaskV3:
218
221
  external_ip = ini_info["external_ip"]
219
222
  auto = ini_info["auto"]
220
223
  task = ini_info["task"]
221
- upload_task_tine = ini_info["upload_task_tine"]
224
+ upload_task_time = ini_info["upload_task_time"]
222
225
  headers = {"Authorization": auto}
223
226
  params = {"taskType": task}
224
227
  while True:
@@ -245,7 +248,8 @@ class GoFunTaskV3:
245
248
  }
246
249
  result_list.append(task_item)
247
250
  except Exception as e:
248
- logger.critical(f"循环全部获取队列的任务{e}")
251
+ pass
252
+ # logger.critical(f"循环全部获取队列的任务{e}")
249
253
 
250
254
  # 回写任务
251
255
  data = {"result": result_list, "remoteAddr": external_ip}
@@ -260,10 +264,10 @@ class GoFunTaskV3:
260
264
  logger.critical(f"回写异常,{len(result_list)},{e}")
261
265
  time.sleep(2)
262
266
 
263
- if not upload_task_tine:
267
+ if not upload_task_time:
264
268
  # 一直执行 不退出
265
269
  continue
266
- time.sleep(upload_task_tine)
270
+ time.sleep(upload_task_time)
267
271
 
268
272
  def __update_proxy(self, proxies_dict, manager_info, ini_info, logger):
269
273
  """
@@ -8,8 +8,11 @@
8
8
  # --------------------------------------------------------------------------------------------------
9
9
  # 2024/5/13 xiatn V00.01.000 新建
10
10
  # --------------------------------------------------------------------------------------------------
11
+ import jwt
11
12
  import base64
12
13
  import hashlib
14
+ import secrets
15
+ import datetime
13
16
 
14
17
 
15
18
  def get_md5_32(s: str, is_upper=False):
@@ -117,5 +120,33 @@ def get_base64_encode(s: str):
117
120
  return encoded_string
118
121
 
119
122
 
123
+ def generate_bearer_token(secret_key, expiration=0, algorithm='HS256', **kwargs):
124
+ """
125
+ 生成 Bearer Token
126
+ :param secret_key: 用于加密的密钥
127
+ :param expiration: 过期时间戳,0则表示不过期
128
+ :param algorithm: 使用的加密算法(默认是 HS256)HS256、HS384、HS512
129
+ :param kwargs: 其他自定义的 Token 数据
130
+ :return: 生成的 Bearer Token
131
+ """
132
+ payload = {
133
+ **kwargs # 将其他参数添加到 payload
134
+ }
135
+ if expiration:
136
+ payload['exp'] = expiration
137
+
138
+ token = jwt.encode(payload, secret_key, algorithm=algorithm)
139
+ return token
140
+
141
+
142
+ def generate_secret_key(length=32):
143
+ """
144
+ 生成一个随机的 secret key
145
+ :param length: key 的长度,默认为 32
146
+ :return: 生成的 secret key
147
+ """
148
+ return secrets.token_hex(length)
149
+
150
+
120
151
  if __name__ == '__main__':
121
152
  print(get_base64_encode(''))
@@ -58,6 +58,28 @@ def check_file_exists(file_path):
58
58
  return False
59
59
 
60
60
 
61
+ def is_dir(dir_path):
62
+ """
63
+ 传入文件夹判断是否为文件夹或文件夹是否存在
64
+ 传入的如果是文件路径则返回False
65
+ 传入的如果是一个不存在的文件夹则返回False
66
+ :param dir_path:
67
+ :return:
68
+ """
69
+ if not os.path.isdir(dir_path):
70
+ return False
71
+ return True
72
+
73
+
74
+ def get_listdir(dir_path):
75
+ """
76
+ 获取指定文件夹下的所有文件
77
+ :param dir_path:
78
+ :return:
79
+ """
80
+ return os.listdir(dir_path)
81
+
82
+
61
83
  if __name__ == '__main__':
62
84
  pass
63
85
  print(get_file_extension('file/2024-04-19/BOSCH GEX 125-1A/125-1AE砂磨机操作说明书:[1]_jingyan.txt'))
@@ -107,7 +107,8 @@ class Log:
107
107
  # 记录日志文件的路径模板,用于之后创建新的日志文件
108
108
  self.path_template = self.path_template = f"{save_time_log_path}/{{date}}/{name}.log"
109
109
  path = self.path_template.format(date=self.current_date)
110
- mkdirs_dir(self.path_template.format(date=self.current_date))
110
+ if is_write_to_file:
111
+ mkdirs_dir(self.path_template.format(date=self.current_date))
111
112
 
112
113
  # 创建日志格式化器
113
114
  # formatter = logging.Formatter('[%(now_datestr)s] [%(levelname)s] [%(func_name)s] - %(message)s') # 原
@@ -0,0 +1,155 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # 说明:
5
+ # 大文件去重
6
+ # History:
7
+ # Date Author Version Modification
8
+ # --------------------------------------------------------------------------------------------------
9
+ # 2025/3/11 xiatn V00.01.000 新建
10
+ # --------------------------------------------------------------------------------------------------
11
+ import os
12
+ import fnmatch
13
+ import hashlib
14
+ from tqdm import tqdm
15
+ from xtn_tools_pro.utils.log import Log
16
+ from xtn_tools_pro.utils.helpers import get_orderId_random
17
+ from xtn_tools_pro.utils.file_utils import mkdirs_dir, get_file_extension,is_dir,get_listdir
18
+
19
+
20
+ class PppSetDataObj:
21
+ def __init__(self):
22
+ # 随机生成一个临时文件夹
23
+ self.__order_id = get_orderId_random()
24
+ temp_dir_name = f"temp_{self.__order_id}\\"
25
+ now_current_working_dir = os.getcwd()
26
+ self.__now_current_working_dir = os.path.join(now_current_working_dir, temp_dir_name)
27
+ mkdirs_dir(self.__now_current_working_dir)
28
+
29
+ self.__logger = Log('set_data', './xxx.log', log_level='DEBUG', is_write_to_console=True,
30
+ is_write_to_file=False,
31
+ color=True, mode='a', save_time_log_path='./logs')
32
+
33
+ def set_file_data_air(self, set_file_path, num_shards=1000):
34
+ """
35
+ 对单个文件去重,air版本,不对文件做任何修改,去重任何数据
36
+ :param set_file_path:单文件路径
37
+ :param num_shards:临时文件切片,推荐:数据越大值越大 10、100、1000、10000
38
+ :return:
39
+ """
40
+ if get_file_extension(set_file_path) != ".txt":
41
+ self.__logger.critical("文件不合法,只接受.txt文件")
42
+ return
43
+ self.__logger.info("正在读取文件总行数...")
44
+
45
+ with open(set_file_path, "r", encoding="utf-8") as fp_r:
46
+ line_count = sum(1 for _ in fp_r)
47
+ self.__logger.info(f"读取文件完成,总行数为:{line_count}")
48
+
49
+ num_shards = 3000 if num_shards >= 3000 else num_shards
50
+ num_shards = 3000 if line_count >= 30000000 else num_shards
51
+ num_shards = 1000 if num_shards <= 0 else num_shards
52
+
53
+ shard_file_obj_list = []
54
+ shard_path_list = []
55
+ for _ in range(num_shards):
56
+ shard_path = f"{os.path.join(self.__now_current_working_dir, f'{self.__order_id}_shard_{_}.tmp')}"
57
+ shard_path_list.append(shard_path)
58
+ shard_file_obj_list.append(open(shard_path, "w", encoding="utf-8"))
59
+
60
+ with open(set_file_path, "r", encoding="utf-8") as f_r:
61
+ tqdm_f = tqdm(f_r, total=line_count, desc="正在去重(1/2)", unit="lines")
62
+ for idx, line_i in enumerate(tqdm_f):
63
+ line = line_i.strip().encode()
64
+ line_hash = hashlib.md5(line).hexdigest()
65
+ shard_id = int(line_hash, 16) % num_shards
66
+ shard_file_obj_list[shard_id].write(line_i)
67
+
68
+ for shard_file_obj in shard_file_obj_list:
69
+ shard_file_obj.close()
70
+
71
+ result_w_path = os.path.join(self.__now_current_working_dir, "000_去重结果.txt")
72
+ tqdm_f = tqdm(shard_path_list, total=len(shard_path_list), desc="正在去重(2/2)", unit="lines")
73
+ with open(result_w_path, "w", encoding="utf-8") as f_w:
74
+ for shard_path in tqdm_f:
75
+ with open(shard_path, "r", encoding="utf-8") as f_r:
76
+ seen_list = []
77
+ for line_i in f_r.readlines():
78
+ line = line_i.strip()
79
+ seen_list.append(line)
80
+ seen_list = list(set(seen_list))
81
+ w_txt = "\n".join(seen_list)
82
+ f_w.write(w_txt + "\n")
83
+ os.remove(shard_path) # 删除临时文件
84
+
85
+ with open(result_w_path, "r", encoding="utf-8") as fp_r:
86
+ line_count = sum(1 for _ in fp_r)
87
+ self.__logger.info(f"文件处理完毕,去重后总行数为:{line_count},结果路径:{result_w_path}")
88
+
89
+ def set_file_data_pro(self, set_file_dir_path, num_shards=1000):
90
+ """
91
+ 对文件夹下的所有txt文件去重,pro版本,不对文件做任何修改,去重任何数据
92
+ :param set_file_dir_path:文件夹路径
93
+ :param num_shards:临时文件切片,推荐:数据越大值越大 10、100、1000、10000
94
+ :return:
95
+ """
96
+ if not is_dir(set_file_dir_path):
97
+ self.__logger.critical("文件夹不存在或不合法")
98
+ return
99
+
100
+ self.__logger.info("正在统计文件可去重数量...")
101
+ set_file_path_list = []
102
+ for set_file_name in get_listdir(set_file_dir_path):
103
+ if fnmatch.fnmatch(set_file_name, '*.txt'):
104
+ set_file_path_list.append(os.path.join(set_file_dir_path,set_file_name))
105
+ self.__logger.info(f"当前文件夹下可去重文件数量为:{len(set_file_path_list)}")
106
+
107
+ for set_file_path in set_file_path_list:
108
+ pass
109
+ # with open(set_file_path, "r", encoding="utf-8") as fp_r:
110
+ # line_count = sum(1 for _ in fp_r)
111
+ # self.__logger.info(f"读取文件完成,总行数为:{line_count}")
112
+
113
+
114
+
115
+
116
+
117
+ # num_shards = 3000 if num_shards >= 3000 else num_shards
118
+ # num_shards = 3000 if line_count >= 30000000 else num_shards
119
+ # num_shards = 1000 if num_shards <= 0 else num_shards
120
+ #
121
+ # shard_file_obj_list = []
122
+ # shard_path_list = []
123
+ # for _ in range(num_shards):
124
+ # shard_path = f"{os.path.join(self.__now_current_working_dir, f'{self.__order_id}_shard_{_}.tmp')}"
125
+ # shard_path_list.append(shard_path)
126
+ # shard_file_obj_list.append(open(shard_path, "w", encoding="utf-8"))
127
+ #
128
+ # with open(set_file_path, "r", encoding="utf-8") as f_r:
129
+ # tqdm_f = tqdm(f_r, total=line_count, desc="正在去重(1/2)", unit="lines")
130
+ # for idx, line_i in enumerate(tqdm_f):
131
+ # line = line_i.strip().encode()
132
+ # line_hash = hashlib.md5(line).hexdigest()
133
+ # shard_id = int(line_hash, 16) % num_shards
134
+ # shard_file_obj_list[shard_id].write(line_i)
135
+ #
136
+ # for shard_file_obj in shard_file_obj_list:
137
+ # shard_file_obj.close()
138
+ #
139
+ # result_w_path = os.path.join(self.__now_current_working_dir, "000_去重结果.txt")
140
+ # tqdm_f = tqdm(shard_path_list, total=len(shard_path_list), desc="正在去重(2/2)", unit="lines")
141
+ # with open(result_w_path, "w", encoding="utf-8") as f_w:
142
+ # for shard_path in tqdm_f:
143
+ # with open(shard_path, "r", encoding="utf-8") as f_r:
144
+ # seen_list = []
145
+ # for line_i in f_r.readlines():
146
+ # line = line_i.strip()
147
+ # seen_list.append(line)
148
+ # seen_list = list(set(seen_list))
149
+ # w_txt = "\n".join(seen_list)
150
+ # f_w.write(w_txt + "\n")
151
+ # os.remove(shard_path) # 删除临时文件
152
+ #
153
+ # with open(result_w_path, "r", encoding="utf-8") as fp_r:
154
+ # line_count = sum(1 for _ in fp_r)
155
+ # self.__logger.info(f"文件处理完毕,去重后总行数为:{line_count},结果路径:{result_w_path}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xtn-tools-pro
3
- Version: 1.0.0.7.1
3
+ Version: 1.0.0.7.3
4
4
  Summary: xtn 开发工具
5
5
  Author: xtn
6
6
  Author-email: czw011122@gmail.com
@@ -15,5 +15,7 @@ Requires-Dist: dbutils
15
15
  Requires-Dist: colorlog
16
16
  Requires-Dist: requests
17
17
  Requires-Dist: Faker
18
+ Requires-Dist: PyJWT
19
+ Requires-Dist: tqdm
18
20
 
19
21
  xtnkk-tools
@@ -12,17 +12,18 @@ xtn_tools_pro/proxy/proxy.py,sha256=No6E1pFY5yx2F4976pXPrLtq-QEVp79KupzcufjSN58,
12
12
  xtn_tools_pro/task_pro/__init__.py,sha256=nK3U47hWwE1H875ieEToH9r-jzXHS-PXk8cDstOvRE8,418
13
13
  xtn_tools_pro/task_pro/go_fun.py,sha256=hWEt2uJ9FCvJH7PhVZttS-11A7J6zbRKwX7c5YLYQag,19144
14
14
  xtn_tools_pro/task_pro/go_fun_v2.py,sha256=SgcXgtEBGSVL1V2LyqO0z8Md2H8JZxucYrLLIwqtiLM,18489
15
- xtn_tools_pro/task_pro/go_fun_v3.py,sha256=wPd7LFQlDhLH-5SIlq_1k8og5oW6KWeY6g3QvmZSFSU,16013
15
+ xtn_tools_pro/task_pro/go_fun_v3.py,sha256=J8jJLRckbbk4AaHsxUSQ_NKTCIcf71hcPFTDKtJzYD4,16168
16
16
  xtn_tools_pro/utils/__init__.py,sha256=I1_n_NP23F2lBqlF4EOlnOdLYxM8M4pbn63UhJN1hRE,418
17
- xtn_tools_pro/utils/crypto.py,sha256=RZ5AET4udlraACWMeNF-17JiZ2R6Ahb47_j4tjkV7LE,3190
18
- xtn_tools_pro/utils/file_utils.py,sha256=VfdIxog4s1UW5NpKkCvQsUs9qHjLoNCnstZbnftkT4w,2046
17
+ xtn_tools_pro/utils/crypto.py,sha256=oyzFqWum_oimUtzhfVCELQhdMjxDbLu-nOWfcNmazcc,4087
18
+ xtn_tools_pro/utils/file_utils.py,sha256=obaBP7CaBCsXxzqGeWzV2l0yw7vicgKOaXzmpMV8ips,2567
19
19
  xtn_tools_pro/utils/helpers.py,sha256=H-a3gnahIah3kJqyKzzKlPWtVQYcFlJncz2rAfBqIiw,4444
20
- xtn_tools_pro/utils/log.py,sha256=m0WtTWkkwtrki1ftP8vCDR8bMfK2gcfUGx5J2x2IlLQ,10138
20
+ xtn_tools_pro/utils/log.py,sha256=pAye_sXH-y-8v2vNf-OwOTk2Exkjl6y7V_y_Hpk_d0s,10176
21
21
  xtn_tools_pro/utils/retry.py,sha256=0wjHsR5DBBKpv4naMfxiky8kprrZes4WURIfFQ4H708,1657
22
+ xtn_tools_pro/utils/set_data.py,sha256=vNhE_jCG-3p6KFnY_jbQ0vQ7EV1gB9D4Jb0S5ZoD4IM,7529
22
23
  xtn_tools_pro/utils/sql.py,sha256=EAKzbkZP7Q09j15Gm6o0_uq0qgQmcCQT6EAawbpp4v0,6263
23
24
  xtn_tools_pro/utils/time_utils.py,sha256=TUtzG61PeVYXhaQd6pBrXAdlz7tBispNIRQRcGhE2No,4859
24
- xtn_tools_pro-1.0.0.7.1.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- xtn_tools_pro-1.0.0.7.1.dist-info/METADATA,sha256=Yl6AU8kAvrr4nKEBgCHpLCJbOXtZ7UksCEF7XU-Gtuo,455
26
- xtn_tools_pro-1.0.0.7.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
27
- xtn_tools_pro-1.0.0.7.1.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
28
- xtn_tools_pro-1.0.0.7.1.dist-info/RECORD,,
25
+ xtn_tools_pro-1.0.0.7.3.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ xtn_tools_pro-1.0.0.7.3.dist-info/METADATA,sha256=e7BnO3AKRCicejlwWdIVafht9964e0EJmA5ORQeRPa0,498
27
+ xtn_tools_pro-1.0.0.7.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
28
+ xtn_tools_pro-1.0.0.7.3.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
29
+ xtn_tools_pro-1.0.0.7.3.dist-info/RECORD,,