tfduck-bsd 0.19.0__tar.gz → 0.19.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tfduck-bsd might be problematic. Click here for more details.

Files changed (50) hide show
  1. {tfduck-bsd-0.19.0/tfduck_bsd.egg-info → tfduck-bsd-0.19.1}/PKG-INFO +1 -1
  2. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/setup.py +1 -1
  3. tfduck-bsd-0.19.1/tfduck/__init__.py +1 -0
  4. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/s3/s3oper.py +172 -71
  5. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1/tfduck_bsd.egg-info}/PKG-INFO +1 -1
  6. tfduck-bsd-0.19.0/tfduck/__init__.py +0 -1
  7. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/LICENSE +0 -0
  8. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/README.md +0 -0
  9. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/bin/tfduck +0 -0
  10. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/setup.cfg +0 -0
  11. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/__init__.py +0 -0
  12. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/config/__init__.py +0 -0
  13. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/config/bdpmanager.py +0 -0
  14. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/config/table_config.py +0 -0
  15. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/example.py +0 -0
  16. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/opends/__init__.py +0 -0
  17. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/opends/opends.py +0 -0
  18. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/bdp_sdk_py/opends/sdk.py +0 -0
  19. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/common/__init__.py +0 -0
  20. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/common/defines.py +0 -0
  21. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/common/defines_clean.py +0 -0
  22. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/common/extendEncoder.py +0 -0
  23. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/main.py +0 -0
  24. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/oss/__init__.py +0 -0
  25. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/oss/oss.py +0 -0
  26. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/pyspark_k8s/__init__.py +0 -0
  27. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/pyspark_k8s/k8s_manage.py +0 -0
  28. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/pyspark_k8s/spark_manage.py +0 -0
  29. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/s3/__init__.py +0 -0
  30. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/sagemaker/__init__.py +0 -0
  31. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/sagemaker/saoper.py +0 -0
  32. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/serverless_k8s/__init__.py +0 -0
  33. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/serverless_k8s/k8s_manage.py +0 -0
  34. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/serverless_k8s/k8s_task.py +0 -0
  35. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/__init__.py +0 -0
  36. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/base_tga.py +0 -0
  37. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/predict_sql_ltv.py +0 -0
  38. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/predict_sql_retain.py +0 -0
  39. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/predict_sql_yh.py +0 -0
  40. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/tga.py +0 -0
  41. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/tga_test.py +0 -0
  42. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/train_sql_ltv.py +0 -0
  43. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/train_sql_retain.py +0 -0
  44. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/tga/train_sql_yh.py +0 -0
  45. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/thinkdata/__init__.py +0 -0
  46. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck/thinkdata/query.py +0 -0
  47. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck_bsd.egg-info/SOURCES.txt +0 -0
  48. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck_bsd.egg-info/dependency_links.txt +0 -0
  49. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck_bsd.egg-info/requires.txt +0 -0
  50. {tfduck-bsd-0.19.0 → tfduck-bsd-0.19.1}/tfduck_bsd.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.19.0
3
+ Version: 0.19.1
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -8,7 +8,7 @@ with open("README.md", "r") as fh:
8
8
 
9
9
  setuptools.setup(
10
10
  name="tfduck-bsd",
11
- version="0.19.0",
11
+ version="0.19.1",
12
12
  author="yuanxiao",
13
13
  author_email="yuan6785@163.com",
14
14
  description="A small example package",
@@ -0,0 +1 @@
1
+ __version__="0.19.1"
@@ -2,6 +2,7 @@
2
2
  s3的公共操作
3
3
  版本号见setup.py
4
4
  """
5
+
5
6
  # coding=utf-8
6
7
  import boto3
7
8
  import time
@@ -14,7 +15,13 @@ import gzip
14
15
  import pprint
15
16
  from tfduck.common.defines import BMOBJ, Et
16
17
  from botocore.exceptions import ClientError
17
- from concurrent.futures import ThreadPoolExecutor, as_completed, wait, ALL_COMPLETED, FIRST_COMPLETED
18
+ from concurrent.futures import (
19
+ ThreadPoolExecutor,
20
+ as_completed,
21
+ wait,
22
+ ALL_COMPLETED,
23
+ FIRST_COMPLETED,
24
+ )
18
25
  from io import StringIO, BytesIO
19
26
  from botocore.client import Config
20
27
 
@@ -24,7 +31,9 @@ class S3Oper(object):
24
31
  @des: S3的公共操作, 上传文件
25
32
  """
26
33
 
27
- def __init__(self, ctx={}, ak="", sk="", bucket="", region_name="", print_files_info=False):
34
+ def __init__(
35
+ self, ctx={}, ak="", sk="", bucket="", region_name="", print_files_info=False
36
+ ):
28
37
  """
29
38
  @des: 初始化
30
39
  """
@@ -44,7 +53,6 @@ class S3Oper(object):
44
53
  初始化client
45
54
  """
46
55
  self.set_s3_client()
47
-
48
56
 
49
57
  def get_s3_config(self):
50
58
  """
@@ -52,11 +60,14 @@ class S3Oper(object):
52
60
  """
53
61
  # connect_timeout 和 read_timeout默认值都是60秒
54
62
  # max_pool_connections 最大的连接池,默认10
55
- config = Config(connect_timeout=60,
56
- read_timeout=600,
57
- retries={'max_attempts': 0}, # 将默认重试次数设为0,不重试
58
- # 如果是多线程共享client, upload_foler的max_workers必须比这个值小;  目前是每个线程一个client,所以这里不受影响
59
- max_pool_connections=10)
63
+ config = Config(
64
+ connect_timeout=60,
65
+ read_timeout=600,
66
+ retries={"max_attempts": 0}, # 将默认重试次数设为0,不重试
67
+ # retries={"max_attempts": 5, "mode": "standard"}, # 将默认重试次数设为5
68
+ # 如果是多线程共享client, upload_foler的max_workers必须比这个值小;  但是目前_upload_i是每个线程一个client,所以这里就算超过也不受影响
69
+ max_pool_connections=10,
70
+ )
60
71
  # s3 = boto3.client('s3', config=config)
61
72
  return config
62
73
 
@@ -67,7 +78,7 @@ class S3Oper(object):
67
78
  s3_session = boto3.Session(
68
79
  aws_access_key_id=self.access_key,
69
80
  aws_secret_access_key=self.secret_key,
70
- region_name=self.region_name
81
+ region_name=self.region_name,
71
82
  )
72
83
  return s3_session
73
84
 
@@ -77,7 +88,7 @@ class S3Oper(object):
77
88
  """
78
89
  s3_session = self.get_s3_session()
79
90
  config = self.get_s3_config()
80
- s3_resource = s3_session.resource('s3')
91
+ s3_resource = s3_session.resource("s3")
81
92
  # s3_resource = s3_session.resource('s3', config=config)
82
93
  return s3_resource
83
94
 
@@ -143,16 +154,16 @@ class S3Oper(object):
143
154
  可以超过1000
144
155
  """
145
156
  paginator = client.get_paginator(
146
- 'list_objects_v2') # MaxKeys的最大值为1000, 不能超过1000,
157
+ "list_objects_v2"
158
+ ) # MaxKeys的最大值为1000, 不能超过1000,
147
159
  # 所以用分页的方式,这样就可以通过分很多页超过1000
148
- pages = paginator.paginate(
149
- Bucket=self.bucket, Prefix=remote_path, MaxKeys=1000)
160
+ pages = paginator.paginate(Bucket=self.bucket, Prefix=remote_path, MaxKeys=1000)
150
161
  remote_path_parents_count = len(pathlib.Path(remote_path).parents)
151
162
  for page in pages:
152
- for obj in page['Contents']:
153
- key = obj['Key']
163
+ for obj in page["Contents"]:
164
+ key = obj["Key"]
154
165
  if not isrm:
155
- if len(pathlib.Path(key).parents)-1 == remote_path_parents_count:
166
+ if len(pathlib.Path(key).parents) - 1 == remote_path_parents_count:
156
167
  file_list.append(key)
157
168
  else:
158
169
  file_list.append(key)
@@ -166,23 +177,96 @@ class S3Oper(object):
166
177
  s3_client = self.get_s3_client()
167
178
  try:
168
179
  resp = s3_client.list_objects(
169
- Bucket=self.bucket, Delimiter='/', Prefix=remote_path+'/')
180
+ Bucket=self.bucket, Delimiter="/", Prefix=remote_path + "/"
181
+ )
170
182
  return [d.get("Prefix") for d in resp.get("CommonPrefixes")]
171
183
  except ClientError as e:
172
184
  BMOBJ.clog(self.ctx, "get s3 folders error:", e)
173
185
  return []
174
186
 
175
- def upload_file(self, local_path, remote_path):
187
+ # def upload_file(self, local_path, remote_path):
188
+ # """
189
+ # @des: 将本地文件上传到s3
190
+ # """
191
+ # s3_client = self.get_s3_client()
192
+ # try:
193
+ # _ = s3_client.upload_file(local_path, self.bucket, remote_path)
194
+ # except ClientError as e:
195
+ # BMOBJ.clog(self.ctx, "upload s3 file error:", e)
196
+ # return False
197
+ # return True
198
+
199
+ def upload_file(self, local_path, remote_path, max_attempts=5, initial_delay=1):
176
200
  """
177
- @des: 将本地文件上传到s3
201
+ @des: 将本地文件上传到s3,增加了健壮的重试逻辑。
202
+ @param max_attempts: 最大尝试次数(包括第一次)。
203
+ @param initial_delay: 初始重试等待时间(秒)。
178
204
  """
179
205
  s3_client = self.get_s3_client()
180
- try:
181
- _ = s3_client.upload_file(local_path, self.bucket, remote_path)
182
- except ClientError as e:
183
- BMOBJ.clog(self.ctx, "upload s3 file error:", e)
184
- return False
185
- return True
206
+ last_exception = None
207
+
208
+ # 定义哪些S3错误代码是可重试的
209
+ # 'InternalError' 'SlowDown' 是最常见的。'ThrottlingException' 也可能出现。
210
+ retryable_error_codes = {"InternalError", "SlowDown", "ThrottlingException"}
211
+
212
+ for attempt in range(max_attempts):
213
+ try:
214
+ # 尝试上传文件
215
+ _ = s3_client.upload_file(local_path, self.bucket, remote_path)
216
+ # 如果上传成功,记录日志并直接返回True
217
+ if attempt > 0: # 如果不是第一次尝试就成功了,可以加个日志
218
+ BMOBJ.clog(
219
+ self.ctx,
220
+ f"Successfully uploaded {local_path} on attempt {attempt + 1}",
221
+ )
222
+ return True
223
+ except ClientError as e:
224
+ last_exception = e
225
+ error_code = e.response.get("Error", {}).get("Code")
226
+
227
+ # 检查错误代码是否在我们的可重试列表中
228
+ if error_code in retryable_error_codes:
229
+ # 如果是最后一次尝试,则不再等待,直接跳出循环去处理失败
230
+ if attempt == max_attempts - 1:
231
+ BMOBJ.clog(
232
+ self.ctx,
233
+ f"Upload of {local_path} failed on the final attempt ({max_attempts}). Error: {e}",
234
+ )
235
+ break
236
+
237
+ # 计算下一次重试的等待时间(指数退避 + 随机抖动)
238
+ # 等待时间 = initial_delay * 2^attempt + 随机数(0~1秒)
239
+ # 例如:1s, 2s, 4s, 8s, 16s
240
+ sleep_time = (initial_delay * (2**attempt)) + random.randint(0, 10)
241
+
242
+ BMOBJ.clog(
243
+ self.ctx,
244
+ f"Attempt {attempt + 1}/{max_attempts} to upload {local_path} failed with a retryable error: {error_code}. "
245
+ f"Retrying in {sleep_time:.2f} seconds...",
246
+ )
247
+ time.sleep(sleep_time)
248
+ else:
249
+ # 如果是不可重试的错误(如权限问题),立即记录错误并返回False
250
+ BMOBJ.clog(
251
+ self.ctx,
252
+ f"Upload of {local_path} failed with a non-retryable error: {e}",
253
+ )
254
+ return False
255
+ except Exception as e:
256
+ # 捕获其他可能的异常 (如网络问题),并将其视为失败
257
+ BMOBJ.clog(
258
+ self.ctx,
259
+ f"An unexpected error occurred during upload of {local_path}: {e}",
260
+ )
261
+ last_exception = e
262
+ break # 出现未知异常,终止重试
263
+
264
+ # 如果循环结束仍未成功,说明所有重试都失败了
265
+ BMOBJ.clog(
266
+ self.ctx,
267
+ f"Failed to upload {local_path} to {remote_path} after {max_attempts} attempts. Last error: {last_exception}",
268
+ )
269
+ return False
186
270
 
187
271
  def upload_fileobj(self, io_obj, remote_path):
188
272
  """
@@ -251,7 +335,7 @@ class S3Oper(object):
251
335
  try:
252
336
  s3_client.head_object(Bucket=self.bucket, Key=remote_path)
253
337
  except ClientError as e:
254
- return int(e.response['Error']['Code']) != 404
338
+ return int(e.response["Error"]["Code"]) != 404
255
339
  return True
256
340
 
257
341
  def delete_file(self, remote_path):
@@ -274,7 +358,7 @@ class S3Oper(object):
274
358
  bucket.objects.filter(Prefix=remote_path).delete()
275
359
  return True
276
360
 
277
- def _upload_i(self, remote_file, local_file, retry_count):
361
+ def _upload_i(self, remote_file, local_file, retry_count):
278
362
  """
279
363
  @des: 多线程批量上传
280
364
  """
@@ -286,33 +370,39 @@ class S3Oper(object):
286
370
  """
287
371
  for i in range(retry_count): # 最多重试三次,由于网络不稳定等问题
288
372
  try:
289
-
290
373
  _s = time.time()
291
374
  _ = s3_client.upload_file( # 这个方法本来就是分块多线程上传,所以开一个和多个在大文件来说上传速度区别不大
292
- local_file, self.bucket, remote_file) # 返回值为None
375
+ local_file, self.bucket, remote_file
376
+ ) # 返回值为None
293
377
  _e = time.time()
294
378
  if self.print_files_info:
295
379
  BMOBJ.clog(
296
- ctx, f"{local_file} upload success, sub time {_e-_s}", )
380
+ ctx,
381
+ f"{local_file} upload success, sub time {_e - _s}",
382
+ )
297
383
  break
298
384
  except Exception as e:
299
- BMOBJ.clog(
300
- ctx, f"{local_file} upload fail, repeat {i}, error: {e}")
385
+ BMOBJ.clog(ctx, f"{local_file} upload fail, repeat {i}, error: {e}")
301
386
  #
302
- if i < retry_count-1:
387
+ if i < retry_count - 1:
303
388
  sleep_time = random.randint(60, 120)
304
389
  time.sleep(sleep_time)
305
390
  continue
306
391
  else:
307
- BMOBJ.clog(
308
- ctx, f"{local_file} upload finally fail: {e}")
392
+ BMOBJ.clog(ctx, f"{local_file} upload finally fail: {e}")
309
393
  raise Et(2, f"upload fail {remote_file} {local_file}")
310
394
 
311
- def upload_folder(self, local_path, remote_path,
312
- add_success=False, add_empty=False,
313
- max_workers=50, isrm=True,
314
- isdel=True, retry_count=5
315
- ):
395
+ def upload_folder(
396
+ self,
397
+ local_path,
398
+ remote_path,
399
+ add_success=False,
400
+ add_empty=False,
401
+ max_workers=50,
402
+ isrm=True,
403
+ isdel=True,
404
+ retry_count=5,
405
+ ):
316
406
  """
317
407
  @des: 上传到s3---多线程上传---上传文件夹
318
408
  """
@@ -330,17 +420,22 @@ class S3Oper(object):
330
420
  total_files = []
331
421
  total_size = 0
332
422
  for subfile in subfiles:
333
- size = round(subfile.stat().st_size/1024/1024, 4)
423
+ size = round(subfile.stat().st_size / 1024 / 1024, 4)
334
424
  total_size += size
335
425
  name = subfile.name
336
426
  total_files.append(f"{size}M {name}")
337
- _infos = '\n'.join(total_files)
427
+ _infos = "\n".join(total_files)
338
428
  if self.print_files_info:
339
429
  BMOBJ.clog(
340
- ctx, f"""upload file info * file total count {len(subfiles)} file total size {total_size}M""", _infos)
430
+ ctx,
431
+ f"""upload file info * file total count {len(subfiles)} file total size {total_size}M""",
432
+ _infos,
433
+ )
341
434
  else:
342
435
  BMOBJ.clog(
343
- ctx, f"""upload file info * file total count {len(subfiles)} file total size {total_size}M""")
436
+ ctx,
437
+ f"""upload file info * file total count {len(subfiles)} file total size {total_size}M""",
438
+ )
344
439
 
345
440
  # 参考 https://www.jianshu.com/p/b9b3d66aa0be
346
441
  # 控制最大队列数200,记得修改settings.py的redis队列数必须大于这个
@@ -357,7 +452,8 @@ class S3Oper(object):
357
452
  local_file = str(subfile)
358
453
  # 通过submit函数提交执行的函数到线程池中,submit函数立即返回,不阻塞
359
454
  task_i = executor.submit(
360
- self._upload_i, *(remote_file, local_file, retry_count))
455
+ self._upload_i, *(remote_file, local_file, retry_count)
456
+ )
361
457
  all_tasks.append(task_i)
362
458
  # 等待所有任务完成后
363
459
  # wait(all_tasks, timeout=timeout, return_when=ALL_COMPLETED)
@@ -367,17 +463,16 @@ class S3Oper(object):
367
463
  if add_success:
368
464
  # 上传成功后,上传一个空文件代表成功
369
465
  with BytesIO() as f:
370
- with gzip.open(f, 'wb') as r:
371
- r.write(b'')
466
+ with gzip.open(f, "wb") as r:
467
+ r.write(b"")
372
468
  f.seek(0)
373
- self.upload_fileobj(
374
- f, os.path.join(remote_path, "_SUCCESS"))
469
+ self.upload_fileobj(f, os.path.join(remote_path, "_SUCCESS"))
375
470
  else:
376
471
  if add_empty:
377
472
  # 上传一个empty文件,代表没有数据
378
473
  with BytesIO() as f:
379
- with gzip.open(f, 'wb') as r:
380
- r.write(b'')
474
+ with gzip.open(f, "wb") as r:
475
+ r.write(b"")
381
476
  f.seek(0)
382
477
  self.upload_fileobj(f, os.path.join(remote_path, "_EMPTY"))
383
478
  e = time.time()
@@ -387,8 +482,7 @@ class S3Oper(object):
387
482
  raise Et(2, "cannt del root folder")
388
483
  BMOBJ.remove_folder(local_path)
389
484
  #
390
- BMOBJ.clog(
391
- ctx, f"{remote_path} upload all time", e-s)
485
+ BMOBJ.clog(ctx, f"{remote_path} upload all time", e - s)
392
486
 
393
487
  def _download_i(self, remote_file, local_file, retry_count):
394
488
  """
@@ -402,26 +496,33 @@ class S3Oper(object):
402
496
  try:
403
497
  _s = time.time()
404
498
  _ = s3_client.download_file(
405
- self.bucket, remote_file, local_file) # 返回值为None
499
+ self.bucket, remote_file, local_file
500
+ ) # 返回值为None
406
501
  _e = time.time()
407
502
  BMOBJ.clog(
408
- ctx, f"{local_file} download success, sub time {_e-_s}", )
503
+ ctx,
504
+ f"{local_file} download success, sub time {_e - _s}",
505
+ )
409
506
  break
410
507
  except Exception as e:
411
- BMOBJ.clog(
412
- ctx, f"{local_file} download fail, repeat {i}, error: {e}")
413
- if i < retry_count-1:
508
+ BMOBJ.clog(ctx, f"{local_file} download fail, repeat {i}, error: {e}")
509
+ if i < retry_count - 1:
414
510
  sleep_time = random.randint(60, 120)
415
511
  time.sleep(sleep_time)
416
512
  continue
417
513
  else:
418
- BMOBJ.clog(
419
- ctx, f"{local_file} download finally fail: {e}")
514
+ BMOBJ.clog(ctx, f"{local_file} download finally fail: {e}")
420
515
  raise Et(2, f"download fail {remote_file} {local_file}")
421
516
 
422
- def download_folder(self, local_path, remote_path,
423
- max_workers=50, isrm=True,
424
- isdel=True, retry_count=5):
517
+ def download_folder(
518
+ self,
519
+ local_path,
520
+ remote_path,
521
+ max_workers=50,
522
+ isrm=True,
523
+ isdel=True,
524
+ retry_count=5,
525
+ ):
425
526
  """
426
527
  @des: 下载到本地---多线程下载---下载文件夹--下载后删除s3的文件
427
528
  """
@@ -442,15 +543,16 @@ class S3Oper(object):
442
543
  if not isrm:
443
544
  local_file = os.path.join(local_path, subfile_name)
444
545
  else:
445
- l_name = str(pathlib.PurePath(remote_file)
446
- ).replace(remote_path, "")
546
+ l_name = str(pathlib.PurePath(remote_file)).replace(remote_path, "")
447
547
  l_name = l_name.lstrip("/")
448
548
  local_file = os.path.join(local_path, l_name)
449
- os.makedirs(os.path.dirname(local_file),
450
- exist_ok=True) # 创建不存在的子文件夹
549
+ os.makedirs(
550
+ os.path.dirname(local_file), exist_ok=True
551
+ ) # 创建不存在的子文件夹
451
552
  # 通过submit函数提交执行的函数到线程池中,submit函数立即返回,不阻塞
452
553
  task_i = executor.submit(
453
- self._download_i, *(remote_file, local_file, retry_count))
554
+ self._download_i, *(remote_file, local_file, retry_count)
555
+ )
454
556
  all_tasks.append(task_i)
455
557
  # 等待所有任务完成后
456
558
  # wait(all_tasks, timeout=timeout, return_when=ALL_COMPLETED)
@@ -464,11 +566,10 @@ class S3Oper(object):
464
566
  raise Et(2, "cannt del root folder")
465
567
  self.delete_folder(remote_path)
466
568
  #
467
- BMOBJ.clog(
468
- ctx, f"{remote_path} download all time", e-s)
569
+ BMOBJ.clog(ctx, f"{remote_path} download all time", e - s)
469
570
 
470
571
 
471
- if __name__ == '__main__': # 打版本的时候一定记得记得脱敏
572
+ if __name__ == "__main__": # 打版本的时候一定记得记得脱敏
472
573
  pass
473
574
  # s3 = S3Oper(ctx = {}, ak="xx", sk="yy", bucket="xx", region_name="us-east-2")
474
575
  # s3.upload_folder(local_path="/Users/yuanxiao/Downloads/train/samples",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tfduck-bsd
3
- Version: 0.19.0
3
+ Version: 0.19.1
4
4
  Summary: A small example package
5
5
  Home-page: UNKNOWN
6
6
  Author: yuanxiao
@@ -1 +0,0 @@
1
- __version__="0.19.0"
File without changes
File without changes
File without changes
File without changes
File without changes