tamar-file-hub-client 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. file_hub_client/__init__.py +88 -0
  2. file_hub_client/client.py +414 -0
  3. file_hub_client/enums/__init__.py +12 -0
  4. file_hub_client/enums/export_format.py +16 -0
  5. file_hub_client/enums/role.py +7 -0
  6. file_hub_client/enums/upload_mode.py +11 -0
  7. file_hub_client/errors/__init__.py +30 -0
  8. file_hub_client/errors/exceptions.py +93 -0
  9. file_hub_client/py.typed +1 -0
  10. file_hub_client/rpc/__init__.py +10 -0
  11. file_hub_client/rpc/async_client.py +312 -0
  12. file_hub_client/rpc/gen/__init__.py +1 -0
  13. file_hub_client/rpc/gen/file_service_pb2.py +74 -0
  14. file_hub_client/rpc/gen/file_service_pb2_grpc.py +533 -0
  15. file_hub_client/rpc/gen/folder_service_pb2.py +53 -0
  16. file_hub_client/rpc/gen/folder_service_pb2_grpc.py +269 -0
  17. file_hub_client/rpc/generate_grpc.py +76 -0
  18. file_hub_client/rpc/protos/file_service.proto +147 -0
  19. file_hub_client/rpc/protos/folder_service.proto +65 -0
  20. file_hub_client/rpc/sync_client.py +313 -0
  21. file_hub_client/schemas/__init__.py +43 -0
  22. file_hub_client/schemas/context.py +160 -0
  23. file_hub_client/schemas/file.py +89 -0
  24. file_hub_client/schemas/folder.py +29 -0
  25. file_hub_client/services/__init__.py +17 -0
  26. file_hub_client/services/file/__init__.py +14 -0
  27. file_hub_client/services/file/async_blob_service.py +482 -0
  28. file_hub_client/services/file/async_file_service.py +257 -0
  29. file_hub_client/services/file/base_file_service.py +103 -0
  30. file_hub_client/services/file/sync_blob_service.py +478 -0
  31. file_hub_client/services/file/sync_file_service.py +255 -0
  32. file_hub_client/services/folder/__init__.py +10 -0
  33. file_hub_client/services/folder/async_folder_service.py +206 -0
  34. file_hub_client/services/folder/sync_folder_service.py +205 -0
  35. file_hub_client/utils/__init__.py +48 -0
  36. file_hub_client/utils/converter.py +108 -0
  37. file_hub_client/utils/download_helper.py +355 -0
  38. file_hub_client/utils/file_utils.py +105 -0
  39. file_hub_client/utils/retry.py +69 -0
  40. file_hub_client/utils/upload_helper.py +527 -0
  41. tamar_file_hub_client-0.0.1.dist-info/METADATA +874 -0
  42. tamar_file_hub_client-0.0.1.dist-info/RECORD +44 -0
  43. tamar_file_hub_client-0.0.1.dist-info/WHEEL +5 -0
  44. tamar_file_hub_client-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,48 @@
1
+ """
2
+ 工具函数模块
3
+ """
4
+ from .converter import (
5
+ timestamp_to_datetime
6
+ )
7
+ from .file_utils import (
8
+ get_file_mime_type,
9
+ get_file_extension,
10
+ humanize_file_size,
11
+ calculate_file_hash,
12
+ split_file_chunks,
13
+ )
14
+ from .retry import retry_with_backoff
15
+ from .upload_helper import (
16
+ HttpUploader,
17
+ AsyncHttpUploader,
18
+ UploadProgress,
19
+ calculate_file_md5,
20
+ )
21
+ from .download_helper import (
22
+ HttpDownloader,
23
+ AsyncHttpDownloader,
24
+ DownloadProgress,
25
+ )
26
+
27
+ __all__ = [
28
+ # 文件工具
29
+ "get_file_mime_type",
30
+ "get_file_extension",
31
+ "humanize_file_size",
32
+ "calculate_file_hash",
33
+ "split_file_chunks",
34
+
35
+ # 重试工具
36
+ "retry_with_backoff",
37
+
38
+ # 上传助手
39
+ "HttpUploader",
40
+ "AsyncHttpUploader",
41
+ "UploadProgress",
42
+ "calculate_file_md5",
43
+
44
+ # 下载助手
45
+ "HttpDownloader",
46
+ "AsyncHttpDownloader",
47
+ "DownloadProgress",
48
+ ]
@@ -0,0 +1,108 @@
1
+ """
2
+ Proto和Model之间的转换工具
3
+ """
4
+ from datetime import datetime
5
+ from typing import Any
6
+ from google.protobuf.timestamp_pb2 import Timestamp
7
+
8
+ from ..schemas import File, FolderInfo
9
+
10
+
11
+ def timestamp_to_datetime(timestamp: Timestamp) -> datetime:
12
+ """将protobuf timestamp转换为datetime"""
13
+ return datetime.fromtimestamp(timestamp.seconds + timestamp.nanos / 1e9)
14
+ #
15
+ #
16
+ # def datetime_to_timestamp(dt: datetime) -> Timestamp:
17
+ # """将datetime转换为protobuf timestamp"""
18
+ # timestamp = Timestamp()
19
+ # timestamp.FromDatetime(dt)
20
+ # return timestamp
21
+ #
22
+ #
23
+ # def convert_proto_to_model(proto_obj: Any) -> Any:
24
+ # """
25
+ # 将Proto对象转换为Model对象
26
+ #
27
+ # Args:
28
+ # proto_obj: Proto对象
29
+ #
30
+ # Returns:
31
+ # Model对象
32
+ # """
33
+ # # 动态导入,避免循环导入
34
+ # from ..rpc.gen import file_service_pb2 as file_hub_pb2
35
+ #
36
+ # if isinstance(proto_obj, file_hub_pb2.FileInfo):
37
+ # return File(
38
+ # id=proto_obj.id,
39
+ # #name=proto_obj.name,
40
+ # folder_id=proto_obj.folder_id or None,
41
+ # #type=ModelFileType.TRADITIONAL if proto_obj.type == file_hub_pb2.FILE_TYPE_TRADITIONAL else ModelFileType.CUSTOM,
42
+ # #size=proto_obj.size,
43
+ # mime_type=proto_obj.mime_type or None,
44
+ # created_at=timestamp_to_datetime(proto_obj.created_at),
45
+ # updated_at=timestamp_to_datetime(proto_obj.updated_at),
46
+ # #metadata=dict(proto_obj.metadata),
47
+ # #storage_path=proto_obj.storage_path or None,
48
+ # )
49
+ #
50
+ # elif isinstance(proto_obj, file_hub_pb2.FolderInfo):
51
+ # return FolderInfo(
52
+ # id=proto_obj.id,
53
+ # folder_name=proto_obj.name,
54
+ # parent_id=proto_obj.parent_id or None,
55
+ # created_at=timestamp_to_datetime(proto_obj.created_at),
56
+ # updated_at=timestamp_to_datetime(proto_obj.updated_at),
57
+ # #metadata=dict(proto_obj.metadata),
58
+ # )
59
+ #
60
+ # else:
61
+ # raise ValueError(f"不支持的Proto类型: {type(proto_obj)}")
62
+ #
63
+ #
64
+ # def convert_model_to_proto(model_obj: Any) -> Any:
65
+ # """
66
+ # 将Model对象转换为Proto对象
67
+ #
68
+ # Args:
69
+ # model_obj: Model对象
70
+ #
71
+ # Returns:
72
+ # Proto对象
73
+ # """
74
+ # # 动态导入,避免循环导入
75
+ # from ..rpc.gen import file_service_pb2 as file_hub_pb2
76
+ #
77
+ # if isinstance(model_obj, File):
78
+ # proto_type = (
79
+ # file_hub_pb2.FILE_TYPE_TRADITIONAL
80
+ # if model_obj.type == ModelFileType.TRADITIONAL
81
+ # else file_hub_pb2.FILE_TYPE_CUSTOM
82
+ # )
83
+ #
84
+ # return file_hub_pb2.FileInfo(
85
+ # id=model_obj.id,
86
+ # name=model_obj.name,
87
+ # folder_id=model_obj.folder_id or "",
88
+ # type=proto_type,
89
+ # size=model_obj.size,
90
+ # mime_type=model_obj.mime_type or "",
91
+ # created_at=datetime_to_timestamp(model_obj.created_at),
92
+ # updated_at=datetime_to_timestamp(model_obj.updated_at),
93
+ # metadata=model_obj.metadata,
94
+ # storage_path=model_obj.storage_path or "",
95
+ # )
96
+ #
97
+ # elif isinstance(model_obj, FolderInfo):
98
+ # return file_hub_pb2.FolderInfo(
99
+ # id=model_obj.id,
100
+ # name=model_obj.name,
101
+ # parent_id=model_obj.parent_id or "",
102
+ # created_at=datetime_to_timestamp(model_obj.created_at),
103
+ # updated_at=datetime_to_timestamp(model_obj.updated_at),
104
+ # metadata=model_obj.metadata,
105
+ # )
106
+ #
107
+ # else:
108
+ # raise ValueError(f"不支持的Model类型: {type(model_obj)}")
@@ -0,0 +1,355 @@
1
+ """
2
+ 下载助手模块
3
+
4
+ 提供HTTP下载、流式下载、进度监控等功能
5
+ """
6
+ import time
7
+ import asyncio
8
+ from urllib.parse import urlparse
9
+
10
+ import aiohttp
11
+ import requests
12
+ from pathlib import Path
13
+ from typing import Union, Optional, Callable
14
+ from dataclasses import dataclass
15
+
16
+
17
+ @dataclass
18
+ class DownloadProgress:
19
+ """下载进度信息"""
20
+ total_size: int
21
+ downloaded_size: int
22
+ percentage: float
23
+ speed: float # bytes per second
24
+ remaining_time: float # seconds
25
+
26
+ @property
27
+ def is_completed(self) -> bool:
28
+ return self.downloaded_size >= self.total_size if self.total_size > 0 else False
29
+
30
+
31
+ class HttpDownloader:
32
+ """HTTP下载器,支持同步下载"""
33
+
34
+ def __init__(self, chunk_size: int = 1024 * 1024, total_retries: int = 3, retry_delay_seconds: int = 5): # 默认1MB分片
35
+ self.chunk_size = chunk_size
36
+ self.total_retries = total_retries
37
+ self.retry_delay_seconds = retry_delay_seconds
38
+
39
+ def download(
40
+ self,
41
+ url: str,
42
+ save_path: Optional[Union[str, Path]] = None,
43
+ chunk_size: Optional[int] = None,
44
+ headers: Optional[dict] = None,
45
+ progress_callback: Optional[Callable[[DownloadProgress], None]] = None,
46
+ timeout: Optional[int] = None
47
+ ) -> Union[bytes, Path]:
48
+ """
49
+ 从URL下载文件
50
+
51
+ Args:
52
+ url: 下载URL
53
+ save_path: 保存路径(如果为None,返回字节数据)
54
+ chunk_size: 分块大小
55
+ headers: 请求头
56
+ progress_callback: 进度回调函数
57
+ timeout: 超时时间(秒)
58
+
59
+ Returns:
60
+ 下载的内容(字节)或保存的文件路径
61
+ """
62
+ headers = headers or {}
63
+ chunk_size = chunk_size or self.chunk_size
64
+ save_path = Path(save_path) if save_path else None
65
+
66
+ # 校验扩展名一致性(若提供)
67
+ parsed_url = urlparse(url)
68
+ url_suffix = Path(parsed_url.path).suffix.lower()
69
+ if save_path and save_path.suffix and save_path.suffix.lower() != url_suffix:
70
+ raise ValueError(
71
+ f"File extension mismatch: download_url ends with '{url_suffix}', but save_path ends with '{save_path.suffix.lower()}'")
72
+
73
+ for attempt in range(self.total_retries):
74
+ try:
75
+ resume_from = 0
76
+ if save_path:
77
+ temp_path = save_path.with_suffix(save_path.suffix + ".part")
78
+ if temp_path.exists():
79
+ resume_from = temp_path.stat().st_size
80
+ headers["Range"] = f"bytes={resume_from}-"
81
+
82
+ # 发送请求
83
+ response = requests.get(url, headers=headers, stream=True, timeout=timeout)
84
+ response.raise_for_status()
85
+
86
+ content_length = int(response.headers.get('content-length', 0))
87
+ total_size = content_length + resume_from if save_path else content_length
88
+
89
+ if save_path is None:
90
+ return self._download_to_memory(response, total_size, chunk_size, progress_callback, resume_from)
91
+ else:
92
+ return self._download_to_file(response, save_path, total_size, chunk_size, progress_callback,
93
+ resume_from)
94
+ except Exception as e:
95
+ if attempt == self.total_retries - 1:
96
+ raise
97
+ time.sleep(self.retry_delay_seconds)
98
+
99
+ def _download_to_memory(
100
+ self,
101
+ response: requests.Response,
102
+ total_size: int,
103
+ chunk_size: int,
104
+ progress_callback: Optional[Callable[[DownloadProgress], None]],
105
+ resume_from: int = 0
106
+ ) -> bytes:
107
+ """下载到内存"""
108
+ chunks = []
109
+ downloaded_size = resume_from
110
+ start_time = time.time()
111
+
112
+ for chunk in response.iter_content(chunk_size=chunk_size):
113
+ if chunk:
114
+ chunks.append(chunk)
115
+ downloaded_size += len(chunk)
116
+
117
+ if progress_callback:
118
+ self._report_progress(
119
+ downloaded_size, total_size, start_time, progress_callback
120
+ )
121
+
122
+ return b''.join(chunks)
123
+
124
+ def _download_to_file(
125
+ self,
126
+ response: requests.Response,
127
+ save_path: Path,
128
+ total_size: int,
129
+ chunk_size: int,
130
+ progress_callback: Optional[Callable[[DownloadProgress], None]],
131
+ resume_from: int = 0
132
+ ) -> Path:
133
+ """下载到文件"""
134
+ # 创建目录
135
+ save_path.parent.mkdir(parents=True, exist_ok=True)
136
+
137
+ # 临时保存为 .part 文件
138
+ temp_path = save_path.with_suffix(save_path.suffix + ".part")
139
+
140
+ downloaded_size = resume_from
141
+ start_time = time.time()
142
+
143
+ mode = 'ab' if resume_from > 0 else 'wb'
144
+
145
+ with open(temp_path, mode) as f:
146
+ for chunk in response.iter_content(chunk_size=chunk_size):
147
+ if chunk:
148
+ f.write(chunk)
149
+ downloaded_size += len(chunk)
150
+ if progress_callback:
151
+ self._report_progress(downloaded_size, total_size, start_time, progress_callback)
152
+
153
+ if save_path.exists():
154
+ save_path.unlink()
155
+ temp_path.rename(save_path)
156
+ return save_path
157
+
158
+ def _report_progress(
159
+ self,
160
+ downloaded_size: int,
161
+ total_size: int,
162
+ start_time: float,
163
+ callback: Callable[[DownloadProgress], None]
164
+ ):
165
+ """报告下载进度"""
166
+ elapsed = time.time() - start_time
167
+ speed = downloaded_size / elapsed if elapsed > 0 else 0
168
+ percentage = (downloaded_size / total_size * 100) if total_size > 0 else 0
169
+ remaining = (total_size - downloaded_size) / speed if speed > 0 else 0
170
+
171
+ progress = DownloadProgress(
172
+ total_size=total_size,
173
+ downloaded_size=downloaded_size,
174
+ percentage=percentage,
175
+ speed=speed,
176
+ remaining_time=remaining
177
+ )
178
+ callback(progress)
179
+
180
+
181
+ class AsyncHttpDownloader:
182
+ """异步HTTP下载器"""
183
+
184
+ def __init__(self, chunk_size: int = 1024 * 1024, total_retries: int = 3, retry_delay_seconds: int = 5): # 默认1MB分片
185
+ self.chunk_size = chunk_size
186
+ self.total_retries = total_retries
187
+ self.retry_delay_seconds = retry_delay_seconds
188
+
189
+ async def download(
190
+ self,
191
+ url: str,
192
+ save_path: Optional[Union[str, Path]] = None,
193
+ chunk_size: Optional[int] = None,
194
+ headers: Optional[dict] = None,
195
+ progress_callback: Optional[Callable[[DownloadProgress], None]] = None,
196
+ timeout: Optional[int] = None
197
+ ) -> Union[bytes, Path]:
198
+ """
199
+ 异步从URL下载文件
200
+
201
+ Args:
202
+ url: 下载URL
203
+ save_path: 保存路径(如果为None,返回字节数据)
204
+ chunk_size: 分块大小
205
+ headers: 请求头
206
+ progress_callback: 进度回调函数
207
+ timeout: 超时时间(秒)
208
+
209
+ Returns:
210
+ 下载的内容(字节)或保存的文件路径
211
+ """
212
+ headers = headers or {}
213
+ save_path = Path(save_path) if save_path else None
214
+ chunk_size = chunk_size or self.chunk_size
215
+
216
+ # 校验扩展名一致性(若提供)
217
+ parsed_url = urlparse(url)
218
+ url_suffix = Path(parsed_url.path).suffix.lower()
219
+ if save_path and save_path.suffix and save_path.suffix.lower() != url_suffix:
220
+ raise ValueError(
221
+ f"File extension mismatch: download_url ends with '{url_suffix}', but save_path ends with '{save_path.suffix.lower()}'")
222
+
223
+ for attempt in range(self.total_retries):
224
+ try:
225
+ resume_from = 0
226
+ if save_path:
227
+ temp_path = save_path.with_suffix(save_path.suffix + ".part")
228
+ if temp_path.exists():
229
+ resume_from = temp_path.stat().st_size
230
+ headers["Range"] = f"bytes={resume_from}-"
231
+
232
+ timeout_config = aiohttp.ClientTimeout(total=timeout) if timeout else None
233
+
234
+ async with aiohttp.ClientSession() as session:
235
+ async with session.get(url, headers=headers, timeout=timeout_config) as response:
236
+ if response.status not in (200, 206):
237
+ raise Exception(f"Unexpected status: {response.status}")
238
+
239
+ total_size = int(response.headers.get("content-length", 0)) + resume_from
240
+
241
+ if save_path is None:
242
+ return await self._download_to_memory(response, total_size, chunk_size, progress_callback,
243
+ resume_from)
244
+ else:
245
+ return await self._download_to_file(response, save_path, total_size, chunk_size,
246
+ progress_callback, resume_from)
247
+ except Exception as e:
248
+ if attempt == self.total_retries - 1:
249
+ raise
250
+ await asyncio.sleep(self.retry_delay_seconds)
251
+
252
+ async def _download_to_memory(
253
+ self,
254
+ response: aiohttp.ClientResponse,
255
+ total_size: int,
256
+ chunk_size: Optional[int],
257
+ progress_callback: Optional[Callable[[DownloadProgress], None]],
258
+ resume_from: int = 0
259
+ ) -> bytes:
260
+ """异步下载到内存"""
261
+ chunks = []
262
+ downloaded_size = resume_from
263
+ start_time = asyncio.get_event_loop().time()
264
+
265
+ async for chunk in response.content.iter_chunked(chunk_size):
266
+ chunks.append(chunk)
267
+ downloaded_size += len(chunk)
268
+
269
+ if progress_callback:
270
+ await self._report_progress(
271
+ downloaded_size, total_size, start_time, progress_callback
272
+ )
273
+
274
+ return b''.join(chunks)
275
+
276
+ async def _download_to_file(
277
+ self,
278
+ response: aiohttp.ClientResponse,
279
+ save_path: Path,
280
+ total_size: int,
281
+ chunk_size: Optional[int],
282
+ progress_callback: Optional[Callable[[DownloadProgress], None]],
283
+ resume_from: int = 0
284
+ ) -> Path:
285
+ """异步下载到文件"""
286
+ # 创建目录
287
+ save_path.parent.mkdir(parents=True, exist_ok=True)
288
+
289
+ # 临时保存为 .part 文件
290
+ temp_path = save_path.with_suffix(save_path.suffix + ".part")
291
+
292
+ downloaded_size = 0
293
+ start_time = time.time()
294
+
295
+ mode = 'ab' if resume_from > 0 else 'wb'
296
+
297
+ if aiofiles:
298
+ # 使用异步文件IO
299
+ async with aiofiles.open(temp_path, mode) as f:
300
+ async for chunk in response.content.iter_chunked(chunk_size):
301
+ await f.write(chunk)
302
+ downloaded_size += len(chunk)
303
+
304
+ if progress_callback:
305
+ await self._report_progress(
306
+ downloaded_size, total_size, start_time, progress_callback
307
+ )
308
+ else:
309
+ # 回退到同步文件IO
310
+ with open(temp_path, mode) as f:
311
+ async for chunk in response.content.iter_chunked(chunk_size):
312
+ f.write(chunk)
313
+ downloaded_size += len(chunk)
314
+
315
+ if progress_callback:
316
+ await self._report_progress(
317
+ downloaded_size, total_size, start_time, progress_callback
318
+ )
319
+ if save_path.exists():
320
+ save_path.unlink()
321
+ temp_path.rename(save_path)
322
+ return save_path
323
+
324
+ async def _report_progress(
325
+ self,
326
+ downloaded_size: int,
327
+ total_size: int,
328
+ start_time: float,
329
+ callback: Callable[[DownloadProgress], None]
330
+ ):
331
+ """报告下载进度"""
332
+ elapsed = asyncio.get_event_loop().time() - start_time
333
+ speed = downloaded_size / elapsed if elapsed > 0 else 0
334
+ percentage = (downloaded_size / total_size * 100) if total_size > 0 else 0
335
+ remaining = (total_size - downloaded_size) / speed if speed > 0 else 0
336
+
337
+ progress = DownloadProgress(
338
+ total_size=total_size,
339
+ downloaded_size=downloaded_size,
340
+ percentage=percentage,
341
+ speed=speed,
342
+ remaining_time=remaining
343
+ )
344
+
345
+ if asyncio.iscoroutinefunction(callback):
346
+ await callback(progress)
347
+ else:
348
+ callback(progress)
349
+
350
+
351
+ # 尝试导入aiofiles(可选依赖)
352
+ try:
353
+ import aiofiles
354
+ except ImportError:
355
+ aiofiles = None
@@ -0,0 +1,105 @@
1
+ """
2
+ 文件工具函数
3
+ """
4
+ import hashlib
5
+ import mimetypes
6
+ from pathlib import Path
7
+ from typing import Generator, Optional, BinaryIO, Union
8
+
9
+
10
+ def get_file_mime_type(file_path: Union[str, Path]) -> str:
11
+ """
12
+ 获取文件的MIME类型
13
+
14
+ Args:
15
+ file_path: 文件路径
16
+
17
+ Returns:
18
+ MIME类型
19
+ """
20
+ file_path = Path(file_path)
21
+ mime_type, _ = mimetypes.guess_type(str(file_path))
22
+ return mime_type or "application/octet-stream"
23
+
24
+
25
+ def get_file_extension(file_name: str) -> str:
26
+ """
27
+ 获取文件扩展名
28
+
29
+ Args:
30
+ file_name: 文件名
31
+
32
+ Returns:
33
+ 文件扩展名(包含点号)
34
+ """
35
+ return Path(file_name).suffix.lower()
36
+
37
+
38
+ def humanize_file_size(size_bytes: int) -> str:
39
+ """
40
+ 将文件大小转换为人类可读的格式
41
+
42
+ Args:
43
+ size_bytes: 文件大小(字节)
44
+
45
+ Returns:
46
+ 人类可读的文件大小
47
+ """
48
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
49
+ if size_bytes < 1024.0:
50
+ return f"{size_bytes:.2f} {unit}"
51
+ size_bytes /= 1024.0
52
+ return f"{size_bytes:.2f} PB"
53
+
54
+
55
+ def calculate_file_hash(file_path: Union[str, Path], algorithm: str = "sha256") -> str:
56
+ """
57
+ 计算文件哈希值
58
+
59
+ Args:
60
+ file_path: 文件路径
61
+ algorithm: 哈希算法(md5, sha1, sha256等)
62
+
63
+ Returns:
64
+ 文件哈希值(十六进制)
65
+ """
66
+ file_path = Path(file_path)
67
+ hash_obj = hashlib.new(algorithm)
68
+
69
+ with open(file_path, "rb") as f:
70
+ while chunk := f.read(8192):
71
+ hash_obj.update(chunk)
72
+
73
+ return hash_obj.hexdigest()
74
+
75
+
76
+ def split_file_chunks(
77
+ file_obj: BinaryIO,
78
+ chunk_size: int = 1024 * 1024, # 默认1MB
79
+ start_offset: int = 0
80
+ ) -> Generator[tuple[bytes, int, bool], None, None]:
81
+ """
82
+ 将文件分割成块
83
+
84
+ Args:
85
+ file_obj: 文件对象
86
+ chunk_size: 块大小(字节)
87
+ start_offset: 起始偏移量
88
+
89
+ Yields:
90
+ (块数据, 偏移量, 是否最后一块)
91
+ """
92
+ file_obj.seek(start_offset)
93
+ offset = start_offset
94
+
95
+ while True:
96
+ chunk = file_obj.read(chunk_size)
97
+ if not chunk:
98
+ break
99
+
100
+ is_last = len(chunk) < chunk_size
101
+ yield chunk, offset, is_last
102
+
103
+ offset += len(chunk)
104
+ if is_last:
105
+ break
@@ -0,0 +1,69 @@
1
+ """
2
+ 重试工具
3
+ """
4
+ import asyncio
5
+ import functools
6
+ import time
7
+ from typing import TypeVar, Callable, Type, Tuple
8
+
9
+ T = TypeVar("T")
10
+
11
+
12
+ def retry_with_backoff(
13
+ max_retries: int = 3,
14
+ initial_delay: float = 1.0,
15
+ backoff_factor: float = 2.0,
16
+ max_delay: float = 60.0,
17
+ exceptions: Tuple[Type[Exception], ...] = (Exception,)
18
+ ):
19
+ """
20
+ 带退避策略的重试装饰器
21
+
22
+ Args:
23
+ max_retries: 最大重试次数
24
+ initial_delay: 初始延迟(秒)
25
+ backoff_factor: 退避因子
26
+ max_delay: 最大延迟(秒)
27
+ exceptions: 需要重试的异常类型
28
+ """
29
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
30
+ if asyncio.iscoroutinefunction(func):
31
+ @functools.wraps(func)
32
+ async def async_wrapper(*args, **kwargs) -> T:
33
+ delay = initial_delay
34
+ last_exception = None
35
+
36
+ for attempt in range(max_retries + 1):
37
+ try:
38
+ return await func(*args, **kwargs)
39
+ except exceptions as e:
40
+ last_exception = e
41
+ if attempt < max_retries:
42
+ await asyncio.sleep(delay)
43
+ delay = min(delay * backoff_factor, max_delay)
44
+ else:
45
+ raise
46
+
47
+ raise last_exception
48
+ return async_wrapper
49
+ else:
50
+ @functools.wraps(func)
51
+ def sync_wrapper(*args, **kwargs) -> T:
52
+ delay = initial_delay
53
+ last_exception = None
54
+
55
+ for attempt in range(max_retries + 1):
56
+ try:
57
+ return func(*args, **kwargs)
58
+ except exceptions as e:
59
+ last_exception = e
60
+ if attempt < max_retries:
61
+ time.sleep(delay)
62
+ delay = min(delay * backoff_factor, max_delay)
63
+ else:
64
+ raise
65
+
66
+ raise last_exception
67
+ return sync_wrapper
68
+
69
+ return decorator