xtn-tools-pro 1.0.1.0.4__py3-none-any.whl → 1.0.1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xtn_tools_pro/utils/set_data.py +138 -6
- xtn_tools_pro/utils/shell.py +62 -0
- {xtn_tools_pro-1.0.1.0.4.dist-info → xtn_tools_pro-1.0.1.0.6.dist-info}/METADATA +3 -2
- {xtn_tools_pro-1.0.1.0.4.dist-info → xtn_tools_pro-1.0.1.0.6.dist-info}/RECORD +8 -7
- {xtn_tools_pro-1.0.1.0.4.dist-info → xtn_tools_pro-1.0.1.0.6.dist-info}/LICENSE +0 -0
- {xtn_tools_pro-1.0.1.0.4.dist-info → xtn_tools_pro-1.0.1.0.6.dist-info}/WHEEL +0 -0
- {xtn_tools_pro-1.0.1.0.4.dist-info → xtn_tools_pro-1.0.1.0.6.dist-info}/entry_points.txt +0 -0
- {xtn_tools_pro-1.0.1.0.4.dist-info → xtn_tools_pro-1.0.1.0.6.dist-info}/top_level.txt +0 -0
xtn_tools_pro/utils/set_data.py
CHANGED
@@ -30,15 +30,28 @@ class PSetDataObj:
|
|
30
30
|
is_write_to_file=False,
|
31
31
|
color=True, mode='a', save_time_log_path='./logs')
|
32
32
|
|
33
|
+
def get_file_line_cnt(self, txt_file_path):
|
34
|
+
"""
|
35
|
+
传入一个 txt 文件 读取总行数
|
36
|
+
:param txt_file_path:
|
37
|
+
:return:
|
38
|
+
"""
|
39
|
+
if get_file_extension(txt_file_path) != ".txt":
|
40
|
+
return 0
|
41
|
+
|
42
|
+
with open(txt_file_path, "r", encoding="utf-8") as fp_r:
|
43
|
+
a_line_count = sum(1 for _ in fp_r)
|
44
|
+
return a_line_count
|
45
|
+
|
33
46
|
def set_file_data_air(self, set_file_path, num_shards=1000):
|
34
47
|
"""
|
35
|
-
|
48
|
+
对单个文件去重,air版本,不对文件做任何修改,去重任何数据
|
36
49
|
:param set_file_path:单文件路径
|
37
|
-
:param num_shards
|
50
|
+
:param num_shards:临时文件切片,推荐:数据越大值越大 1000
|
38
51
|
:return:
|
39
52
|
"""
|
40
53
|
if get_file_extension(set_file_path) != ".txt":
|
41
|
-
self.__logger.critical("
|
54
|
+
self.__logger.critical("文件不合法,只接受.txt文件")
|
42
55
|
return
|
43
56
|
self.__logger.info("正在读取文件总行数...")
|
44
57
|
|
@@ -90,9 +103,9 @@ class PSetDataObj:
|
|
90
103
|
|
91
104
|
def set_file_data_pro(self, set_file_dir_path, num_shards=3000):
|
92
105
|
"""
|
93
|
-
对文件夹下的所有txt
|
106
|
+
对文件夹下的所有txt文件去重,pro版本,不对文件做任何修改,去重任何数据
|
94
107
|
:param set_file_dir_path:文件夹路径
|
95
|
-
:param num_shards
|
108
|
+
:param num_shards:临时文件切片,推荐:数据越大值越大 1000
|
96
109
|
:return:
|
97
110
|
"""
|
98
111
|
if not is_dir(set_file_dir_path):
|
@@ -154,6 +167,111 @@ class PSetDataObj:
|
|
154
167
|
line_count = sum(1 for _ in fp_r)
|
155
168
|
self.__logger.info(f"文件处理完毕,去重后总行数为:{line_count},结果路径:{result_w_path}")
|
156
169
|
|
170
|
+
def set_file_data_max(self, a_set_file_path, b_set_file_path):
|
171
|
+
"""
|
172
|
+
对两个a、b文件去重,从a文件的元素中剔除掉b文件里所有元素
|
173
|
+
:param a_set_file_path: a文件路径
|
174
|
+
:param b_set_file_path: b文件路径
|
175
|
+
:return:
|
176
|
+
"""
|
177
|
+
if get_file_extension(a_set_file_path) != ".txt" or get_file_extension(b_set_file_path) != ".txt":
|
178
|
+
self.__logger.critical("文件不合法,只接受.txt文件")
|
179
|
+
return
|
180
|
+
self.__logger.info("正在读取a、b文件总行数...")
|
181
|
+
a_line_count = self.get_file_line_cnt(a_set_file_path)
|
182
|
+
self.__logger.info(f"读取a文件完成,总行数为:{a_line_count},路径:{a_set_file_path}")
|
183
|
+
b_line_count = self.get_file_line_cnt(b_set_file_path)
|
184
|
+
self.__logger.info(f"读取b文件完成,总行数为:{b_line_count},路径:{b_set_file_path}")
|
185
|
+
|
186
|
+
num_shards = 50000
|
187
|
+
buffer_size = 2500 # 缓冲区行数,越大I/O越少但内存占用越高
|
188
|
+
# self.__now_current_working_dir = r"D:\000\xtnkk-tools\demos\temp_25032714561875181921"
|
189
|
+
# self.__order_id = "25032714561875181921"
|
190
|
+
|
191
|
+
# 初始化文件
|
192
|
+
ab_shard_path_dict = {}
|
193
|
+
for _ in range(num_shards):
|
194
|
+
a_shard_path = f"{os.path.join(self.__now_current_working_dir, f'{self.__order_id}_shard_{_}_a.tmp')}"
|
195
|
+
b_shard_path = f"{os.path.join(self.__now_current_working_dir, f'{self.__order_id}_shard_{_}_b.tmp')}"
|
196
|
+
ab_shard_path_dict[_] = {
|
197
|
+
"a": a_shard_path,
|
198
|
+
"b": b_shard_path,
|
199
|
+
}
|
200
|
+
with open(a_shard_path, "w", encoding="utf-8"):
|
201
|
+
pass
|
202
|
+
with open(b_shard_path, "w", encoding="utf-8"):
|
203
|
+
pass
|
204
|
+
|
205
|
+
self.__set_file_data_max_writelinesBuffers(a_set_file_path, a_line_count, num_shards, buffer_size, "a")
|
206
|
+
self.__set_file_data_max_writelinesBuffers(b_set_file_path, b_line_count, num_shards, buffer_size, "b")
|
207
|
+
|
208
|
+
result_w_path = os.path.join(self.__now_current_working_dir, "000_去重结果.txt")
|
209
|
+
result_tqdm_f = tqdm(ab_shard_path_dict, total=len(ab_shard_path_dict),
|
210
|
+
desc=f"正在合并文件", bar_format="{l_bar}{bar}|{n}/{total} [预计完成时间:{remaining}]")
|
211
|
+
|
212
|
+
with open(result_w_path, "w", encoding="utf-8") as result_f_w:
|
213
|
+
for _ in result_tqdm_f:
|
214
|
+
a_shard_path = ab_shard_path_dict[_]["a"]
|
215
|
+
b_shard_path = ab_shard_path_dict[_]["b"]
|
216
|
+
a_seen_list = []
|
217
|
+
b_seen_list = []
|
218
|
+
with open(a_shard_path, "r", encoding="utf-8") as a_f_r:
|
219
|
+
for line_i in a_f_r.readlines():
|
220
|
+
line = line_i.strip()
|
221
|
+
a_seen_list.append(line)
|
222
|
+
with open(b_shard_path, "r", encoding="utf-8") as b_f_r:
|
223
|
+
for line_i in b_f_r.readlines():
|
224
|
+
line = line_i.strip()
|
225
|
+
b_seen_list.append(line)
|
226
|
+
|
227
|
+
seen_list = list(set(a_seen_list) - set(b_seen_list))
|
228
|
+
w_txt = "\n".join(seen_list)
|
229
|
+
result_f_w.write(w_txt + "\n")
|
230
|
+
os.remove(a_shard_path) # 删除临时文件
|
231
|
+
os.remove(b_shard_path) # 删除临时文件
|
232
|
+
|
233
|
+
with open(result_w_path, "r", encoding="utf-8") as fp_r:
|
234
|
+
line_count = sum(1 for _ in fp_r)
|
235
|
+
self.__logger.info(f"文件处理完毕,去重后总行数为:{line_count},结果路径:{result_w_path}")
|
236
|
+
|
237
|
+
def __set_file_data_max_writelinesBuffers(self, set_file_path, file_line_count, num_shards, buffer_size,
|
238
|
+
tmp_file_suffix):
|
239
|
+
"""
|
240
|
+
set_file_data_max 专用,用于写临时文件
|
241
|
+
:param set_file_path:
|
242
|
+
:param file_line_count:
|
243
|
+
:param num_shards:
|
244
|
+
:param buffer_size:
|
245
|
+
:param tmp_file_suffix:
|
246
|
+
:return:
|
247
|
+
"""
|
248
|
+
# 初始化分片缓冲区
|
249
|
+
buffers_dict = {i: [] for i in range(num_shards)}
|
250
|
+
with open(set_file_path, "r", encoding="utf-8") as a_f_r:
|
251
|
+
for line_i in tqdm(a_f_r,
|
252
|
+
total=file_line_count,
|
253
|
+
desc=f"正在去重{tmp_file_suffix}文件",
|
254
|
+
bar_format="{l_bar}{bar}|{n}/{total} [预计完成时间:{remaining}]"):
|
255
|
+
line = line_i.strip().encode()
|
256
|
+
line_hash = hashlib.md5(line).hexdigest()
|
257
|
+
shard_id = int(line_hash, 16) % num_shards
|
258
|
+
buffers_dict[shard_id].append(line_i) # 写入缓冲区
|
259
|
+
|
260
|
+
# 缓冲满时写入
|
261
|
+
if len(buffers_dict[shard_id]) >= buffer_size:
|
262
|
+
shard_path = f"{os.path.join(self.__now_current_working_dir, f'{self.__order_id}_shard_{shard_id}_{tmp_file_suffix}.tmp')}"
|
263
|
+
with open(shard_path, "a", encoding="utf-8") as a_shard_fw:
|
264
|
+
a_shard_fw.writelines(buffers_dict[shard_id])
|
265
|
+
buffers_dict[shard_id].clear()
|
266
|
+
|
267
|
+
# 最终刷新所有缓冲区
|
268
|
+
for shard_id in buffers_dict:
|
269
|
+
if buffers_dict[shard_id]:
|
270
|
+
a_shard_path = f"{os.path.join(self.__now_current_working_dir, f'{self.__order_id}_shard_{shard_id}_{tmp_file_suffix}.tmp')}"
|
271
|
+
with open(a_shard_path, "a", encoding="utf-8") as a_shard_fw:
|
272
|
+
a_shard_fw.writelines(buffers_dict[shard_id])
|
273
|
+
buffers_dict[shard_id].clear()
|
274
|
+
|
157
275
|
def merging_data(self, file_dir_path, merging_new_file_name="合并"):
|
158
276
|
"""
|
159
277
|
传入一个文件夹,合并这个文件夹下所有.txt的数据
|
@@ -190,8 +308,16 @@ class PSetDataObj:
|
|
190
308
|
f_w.write(line + "\n")
|
191
309
|
|
192
310
|
def split_data(self, file_path, split_new_file_name="分割", file_index=1, file_max_line=1000000):
|
311
|
+
"""
|
312
|
+
传入一个txt文件,按 file_max_line 分割
|
313
|
+
:param file_path:
|
314
|
+
:param split_new_file_name:
|
315
|
+
:param file_index:
|
316
|
+
:param file_max_line:
|
317
|
+
:return:
|
318
|
+
"""
|
193
319
|
if get_file_extension(file_path) != ".txt":
|
194
|
-
self.__logger.critical("
|
320
|
+
self.__logger.critical("文件不合法,只接受.txt文件")
|
195
321
|
return
|
196
322
|
self.__logger.info("正在读取文件总行数...")
|
197
323
|
|
@@ -217,5 +343,11 @@ class PSetDataObj:
|
|
217
343
|
self.__list_to_write_file(result_w_path, temp_line_list)
|
218
344
|
|
219
345
|
def __list_to_write_file(self, file_w_path, data_list):
|
346
|
+
"""
|
347
|
+
列表数据 批量 覆盖写入文件
|
348
|
+
:param file_w_path:
|
349
|
+
:param data_list:
|
350
|
+
:return:
|
351
|
+
"""
|
220
352
|
with open(file_w_path, "w", encoding="utf-8") as result_w_f:
|
221
353
|
result_w_f.write("\n".join(data_list))
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
# 说明:
|
5
|
+
# 服务器批量操作
|
6
|
+
# History:
|
7
|
+
# Date Author Version Modification
|
8
|
+
# --------------------------------------------------------------------------------------------------
|
9
|
+
# 2025/4/2 xiatn V00.01.000 新建
|
10
|
+
# --------------------------------------------------------------------------------------------------
|
11
|
+
from fabric import Connection, Config
|
12
|
+
from xtn_tools_pro.utils.log import Log
|
13
|
+
|
14
|
+
|
15
|
+
class ShellPro:
|
16
|
+
def __init__(self, server_info_list):
|
17
|
+
self.server_info_list = server_info_list
|
18
|
+
self.__logger = Log('shell', './xxx.log', log_level='DEBUG', is_write_to_console=True,
|
19
|
+
is_write_to_file=False,
|
20
|
+
color=True, mode='a', save_time_log_path='./logs')
|
21
|
+
|
22
|
+
for _ in range(len(self.server_info_list)):
|
23
|
+
ip, pwd, tips = self.server_info_list[_]["ip"], \
|
24
|
+
self.server_info_list[_]["pwd"], \
|
25
|
+
self.server_info_list[_]["tips"]
|
26
|
+
self.__logger.info(f"{tips} 正在连接...")
|
27
|
+
config = Config(overrides={'sudo': {'password': pwd}})
|
28
|
+
conn = Connection(
|
29
|
+
host=ip,
|
30
|
+
user="root", # 根据实际情况修改用户名
|
31
|
+
connect_kwargs={"password": pwd},
|
32
|
+
config=config
|
33
|
+
)
|
34
|
+
self.server_info_list[_]["conn"] = conn
|
35
|
+
self.__logger.info(f"{tips} 连接成功!!!")
|
36
|
+
|
37
|
+
def run_shell(self, conn, cmd, warn=False):
|
38
|
+
"""
|
39
|
+
传入conn和命令执行
|
40
|
+
:param conn:
|
41
|
+
:param cmd:
|
42
|
+
:return:
|
43
|
+
"""
|
44
|
+
conn.run(cmd, warn=warn)
|
45
|
+
|
46
|
+
def update_file(self, LOCAL_FILE, REMOTE_FILE):
|
47
|
+
"""
|
48
|
+
覆盖远程文件
|
49
|
+
:param LOCAL_FILE: 本地文件
|
50
|
+
:param REMOTE_FILE: 远程文件
|
51
|
+
:return:
|
52
|
+
"""
|
53
|
+
for server_item in self.server_info_list:
|
54
|
+
conn = server_item["server_item"]
|
55
|
+
conn.put(LOCAL_FILE, REMOTE_FILE)
|
56
|
+
|
57
|
+
|
58
|
+
if __name__ == '__main__':
|
59
|
+
server_info_list = [
|
60
|
+
{"ip": "xxx.xxx.xx.xxx", "pwd": "123456", "tips": "服务器_01"},
|
61
|
+
]
|
62
|
+
sh = ShellPro(server_info_list=server_info_list)
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: xtn-tools-pro
|
3
|
-
Version: 1.0.1.0.
|
3
|
+
Version: 1.0.1.0.6
|
4
4
|
Summary: xtn 开发工具
|
5
5
|
Author: xtn
|
6
6
|
Author-email: czw011122@gmail.com
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
8
|
-
Requires-Python: >=3
|
8
|
+
Requires-Python: >=3.7
|
9
9
|
Description-Content-Type: text/markdown
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: pymongo
|
@@ -17,5 +17,6 @@ Requires-Dist: requests
|
|
17
17
|
Requires-Dist: Faker
|
18
18
|
Requires-Dist: PyJWT
|
19
19
|
Requires-Dist: tqdm
|
20
|
+
Requires-Dist: fabric
|
20
21
|
|
21
22
|
xtnkk-tools
|
@@ -58,12 +58,13 @@ xtn_tools_pro/utils/file_utils.py,sha256=obaBP7CaBCsXxzqGeWzV2l0yw7vicgKOaXzmpMV
|
|
58
58
|
xtn_tools_pro/utils/helpers.py,sha256=OmPRxB6vDN1S7OhxQWgoOXnxs2YPorwSQKBVzYOiKoA,4928
|
59
59
|
xtn_tools_pro/utils/log.py,sha256=mf5huJDA8xVxxFWPG_tl_vOsAA2_ywGDFycYSGHIDCo,10202
|
60
60
|
xtn_tools_pro/utils/retry.py,sha256=0wjHsR5DBBKpv4naMfxiky8kprrZes4WURIfFQ4H708,1657
|
61
|
-
xtn_tools_pro/utils/set_data.py,sha256=
|
61
|
+
xtn_tools_pro/utils/set_data.py,sha256=BzCLbEDO83csDHBey8kP7SoE6kx6EjqE7OqRBY7k82s,17608
|
62
|
+
xtn_tools_pro/utils/shell.py,sha256=CD0eXlCH_fZ9ylGo_H948aVe04Hd9oAHLYIGo3I71Bg,2300
|
62
63
|
xtn_tools_pro/utils/sql.py,sha256=EAKzbkZP7Q09j15Gm6o0_uq0qgQmcCQT6EAawbpp4v0,6263
|
63
64
|
xtn_tools_pro/utils/time_utils.py,sha256=TUtzG61PeVYXhaQd6pBrXAdlz7tBispNIRQRcGhE2No,4859
|
64
|
-
xtn_tools_pro-1.0.1.0.
|
65
|
-
xtn_tools_pro-1.0.1.0.
|
66
|
-
xtn_tools_pro-1.0.1.0.
|
67
|
-
xtn_tools_pro-1.0.1.0.
|
68
|
-
xtn_tools_pro-1.0.1.0.
|
69
|
-
xtn_tools_pro-1.0.1.0.
|
65
|
+
xtn_tools_pro-1.0.1.0.6.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
66
|
+
xtn_tools_pro-1.0.1.0.6.dist-info/METADATA,sha256=5p96ZExOVfN5YFQh9p8zY4PQc-f5Fr_ImJWKC5BkN9Q,523
|
67
|
+
xtn_tools_pro-1.0.1.0.6.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
68
|
+
xtn_tools_pro-1.0.1.0.6.dist-info/entry_points.txt,sha256=t8CtXWOgw7nRDW3XNlZh8MT_P4l8EzsFnyWi5b3ovrc,68
|
69
|
+
xtn_tools_pro-1.0.1.0.6.dist-info/top_level.txt,sha256=jyB3FLDEr8zE1U7wHczTgIbvUpALhR-ULF7RVEO7O2U,14
|
70
|
+
xtn_tools_pro-1.0.1.0.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|