tamar-file-hub-client 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- file_hub_client/__init__.py +88 -0
- file_hub_client/client.py +414 -0
- file_hub_client/enums/__init__.py +12 -0
- file_hub_client/enums/export_format.py +16 -0
- file_hub_client/enums/role.py +7 -0
- file_hub_client/enums/upload_mode.py +11 -0
- file_hub_client/errors/__init__.py +30 -0
- file_hub_client/errors/exceptions.py +93 -0
- file_hub_client/py.typed +1 -0
- file_hub_client/rpc/__init__.py +10 -0
- file_hub_client/rpc/async_client.py +312 -0
- file_hub_client/rpc/gen/__init__.py +1 -0
- file_hub_client/rpc/gen/file_service_pb2.py +74 -0
- file_hub_client/rpc/gen/file_service_pb2_grpc.py +533 -0
- file_hub_client/rpc/gen/folder_service_pb2.py +53 -0
- file_hub_client/rpc/gen/folder_service_pb2_grpc.py +269 -0
- file_hub_client/rpc/generate_grpc.py +76 -0
- file_hub_client/rpc/protos/file_service.proto +147 -0
- file_hub_client/rpc/protos/folder_service.proto +65 -0
- file_hub_client/rpc/sync_client.py +313 -0
- file_hub_client/schemas/__init__.py +43 -0
- file_hub_client/schemas/context.py +160 -0
- file_hub_client/schemas/file.py +89 -0
- file_hub_client/schemas/folder.py +29 -0
- file_hub_client/services/__init__.py +17 -0
- file_hub_client/services/file/__init__.py +14 -0
- file_hub_client/services/file/async_blob_service.py +482 -0
- file_hub_client/services/file/async_file_service.py +257 -0
- file_hub_client/services/file/base_file_service.py +103 -0
- file_hub_client/services/file/sync_blob_service.py +478 -0
- file_hub_client/services/file/sync_file_service.py +255 -0
- file_hub_client/services/folder/__init__.py +10 -0
- file_hub_client/services/folder/async_folder_service.py +206 -0
- file_hub_client/services/folder/sync_folder_service.py +205 -0
- file_hub_client/utils/__init__.py +48 -0
- file_hub_client/utils/converter.py +108 -0
- file_hub_client/utils/download_helper.py +355 -0
- file_hub_client/utils/file_utils.py +105 -0
- file_hub_client/utils/retry.py +69 -0
- file_hub_client/utils/upload_helper.py +527 -0
- tamar_file_hub_client-0.0.1.dist-info/METADATA +874 -0
- tamar_file_hub_client-0.0.1.dist-info/RECORD +44 -0
- tamar_file_hub_client-0.0.1.dist-info/WHEEL +5 -0
- tamar_file_hub_client-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,48 @@
|
|
1
|
+
"""
|
2
|
+
工具函数模块
|
3
|
+
"""
|
4
|
+
from .converter import (
|
5
|
+
timestamp_to_datetime
|
6
|
+
)
|
7
|
+
from .file_utils import (
|
8
|
+
get_file_mime_type,
|
9
|
+
get_file_extension,
|
10
|
+
humanize_file_size,
|
11
|
+
calculate_file_hash,
|
12
|
+
split_file_chunks,
|
13
|
+
)
|
14
|
+
from .retry import retry_with_backoff
|
15
|
+
from .upload_helper import (
|
16
|
+
HttpUploader,
|
17
|
+
AsyncHttpUploader,
|
18
|
+
UploadProgress,
|
19
|
+
calculate_file_md5,
|
20
|
+
)
|
21
|
+
from .download_helper import (
|
22
|
+
HttpDownloader,
|
23
|
+
AsyncHttpDownloader,
|
24
|
+
DownloadProgress,
|
25
|
+
)
|
26
|
+
|
27
|
+
__all__ = [
|
28
|
+
# 文件工具
|
29
|
+
"get_file_mime_type",
|
30
|
+
"get_file_extension",
|
31
|
+
"humanize_file_size",
|
32
|
+
"calculate_file_hash",
|
33
|
+
"split_file_chunks",
|
34
|
+
|
35
|
+
# 重试工具
|
36
|
+
"retry_with_backoff",
|
37
|
+
|
38
|
+
# 上传助手
|
39
|
+
"HttpUploader",
|
40
|
+
"AsyncHttpUploader",
|
41
|
+
"UploadProgress",
|
42
|
+
"calculate_file_md5",
|
43
|
+
|
44
|
+
# 下载助手
|
45
|
+
"HttpDownloader",
|
46
|
+
"AsyncHttpDownloader",
|
47
|
+
"DownloadProgress",
|
48
|
+
]
|
@@ -0,0 +1,108 @@
|
|
1
|
+
"""
|
2
|
+
Proto和Model之间的转换工具
|
3
|
+
"""
|
4
|
+
from datetime import datetime
|
5
|
+
from typing import Any
|
6
|
+
from google.protobuf.timestamp_pb2 import Timestamp
|
7
|
+
|
8
|
+
from ..schemas import File, FolderInfo
|
9
|
+
|
10
|
+
|
11
|
+
def timestamp_to_datetime(timestamp: Timestamp) -> datetime:
|
12
|
+
"""将protobuf timestamp转换为datetime"""
|
13
|
+
return datetime.fromtimestamp(timestamp.seconds + timestamp.nanos / 1e9)
|
14
|
+
#
|
15
|
+
#
|
16
|
+
# def datetime_to_timestamp(dt: datetime) -> Timestamp:
|
17
|
+
# """将datetime转换为protobuf timestamp"""
|
18
|
+
# timestamp = Timestamp()
|
19
|
+
# timestamp.FromDatetime(dt)
|
20
|
+
# return timestamp
|
21
|
+
#
|
22
|
+
#
|
23
|
+
# def convert_proto_to_model(proto_obj: Any) -> Any:
|
24
|
+
# """
|
25
|
+
# 将Proto对象转换为Model对象
|
26
|
+
#
|
27
|
+
# Args:
|
28
|
+
# proto_obj: Proto对象
|
29
|
+
#
|
30
|
+
# Returns:
|
31
|
+
# Model对象
|
32
|
+
# """
|
33
|
+
# # 动态导入,避免循环导入
|
34
|
+
# from ..rpc.gen import file_service_pb2 as file_hub_pb2
|
35
|
+
#
|
36
|
+
# if isinstance(proto_obj, file_hub_pb2.FileInfo):
|
37
|
+
# return File(
|
38
|
+
# id=proto_obj.id,
|
39
|
+
# #name=proto_obj.name,
|
40
|
+
# folder_id=proto_obj.folder_id or None,
|
41
|
+
# #type=ModelFileType.TRADITIONAL if proto_obj.type == file_hub_pb2.FILE_TYPE_TRADITIONAL else ModelFileType.CUSTOM,
|
42
|
+
# #size=proto_obj.size,
|
43
|
+
# mime_type=proto_obj.mime_type or None,
|
44
|
+
# created_at=timestamp_to_datetime(proto_obj.created_at),
|
45
|
+
# updated_at=timestamp_to_datetime(proto_obj.updated_at),
|
46
|
+
# #metadata=dict(proto_obj.metadata),
|
47
|
+
# #storage_path=proto_obj.storage_path or None,
|
48
|
+
# )
|
49
|
+
#
|
50
|
+
# elif isinstance(proto_obj, file_hub_pb2.FolderInfo):
|
51
|
+
# return FolderInfo(
|
52
|
+
# id=proto_obj.id,
|
53
|
+
# folder_name=proto_obj.name,
|
54
|
+
# parent_id=proto_obj.parent_id or None,
|
55
|
+
# created_at=timestamp_to_datetime(proto_obj.created_at),
|
56
|
+
# updated_at=timestamp_to_datetime(proto_obj.updated_at),
|
57
|
+
# #metadata=dict(proto_obj.metadata),
|
58
|
+
# )
|
59
|
+
#
|
60
|
+
# else:
|
61
|
+
# raise ValueError(f"不支持的Proto类型: {type(proto_obj)}")
|
62
|
+
#
|
63
|
+
#
|
64
|
+
# def convert_model_to_proto(model_obj: Any) -> Any:
|
65
|
+
# """
|
66
|
+
# 将Model对象转换为Proto对象
|
67
|
+
#
|
68
|
+
# Args:
|
69
|
+
# model_obj: Model对象
|
70
|
+
#
|
71
|
+
# Returns:
|
72
|
+
# Proto对象
|
73
|
+
# """
|
74
|
+
# # 动态导入,避免循环导入
|
75
|
+
# from ..rpc.gen import file_service_pb2 as file_hub_pb2
|
76
|
+
#
|
77
|
+
# if isinstance(model_obj, File):
|
78
|
+
# proto_type = (
|
79
|
+
# file_hub_pb2.FILE_TYPE_TRADITIONAL
|
80
|
+
# if model_obj.type == ModelFileType.TRADITIONAL
|
81
|
+
# else file_hub_pb2.FILE_TYPE_CUSTOM
|
82
|
+
# )
|
83
|
+
#
|
84
|
+
# return file_hub_pb2.FileInfo(
|
85
|
+
# id=model_obj.id,
|
86
|
+
# name=model_obj.name,
|
87
|
+
# folder_id=model_obj.folder_id or "",
|
88
|
+
# type=proto_type,
|
89
|
+
# size=model_obj.size,
|
90
|
+
# mime_type=model_obj.mime_type or "",
|
91
|
+
# created_at=datetime_to_timestamp(model_obj.created_at),
|
92
|
+
# updated_at=datetime_to_timestamp(model_obj.updated_at),
|
93
|
+
# metadata=model_obj.metadata,
|
94
|
+
# storage_path=model_obj.storage_path or "",
|
95
|
+
# )
|
96
|
+
#
|
97
|
+
# elif isinstance(model_obj, FolderInfo):
|
98
|
+
# return file_hub_pb2.FolderInfo(
|
99
|
+
# id=model_obj.id,
|
100
|
+
# name=model_obj.name,
|
101
|
+
# parent_id=model_obj.parent_id or "",
|
102
|
+
# created_at=datetime_to_timestamp(model_obj.created_at),
|
103
|
+
# updated_at=datetime_to_timestamp(model_obj.updated_at),
|
104
|
+
# metadata=model_obj.metadata,
|
105
|
+
# )
|
106
|
+
#
|
107
|
+
# else:
|
108
|
+
# raise ValueError(f"不支持的Model类型: {type(model_obj)}")
|
@@ -0,0 +1,355 @@
|
|
1
|
+
"""
|
2
|
+
下载助手模块
|
3
|
+
|
4
|
+
提供HTTP下载、流式下载、进度监控等功能
|
5
|
+
"""
|
6
|
+
import time
|
7
|
+
import asyncio
|
8
|
+
from urllib.parse import urlparse
|
9
|
+
|
10
|
+
import aiohttp
|
11
|
+
import requests
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import Union, Optional, Callable
|
14
|
+
from dataclasses import dataclass
|
15
|
+
|
16
|
+
|
17
|
+
@dataclass
|
18
|
+
class DownloadProgress:
|
19
|
+
"""下载进度信息"""
|
20
|
+
total_size: int
|
21
|
+
downloaded_size: int
|
22
|
+
percentage: float
|
23
|
+
speed: float # bytes per second
|
24
|
+
remaining_time: float # seconds
|
25
|
+
|
26
|
+
@property
|
27
|
+
def is_completed(self) -> bool:
|
28
|
+
return self.downloaded_size >= self.total_size if self.total_size > 0 else False
|
29
|
+
|
30
|
+
|
31
|
+
class HttpDownloader:
|
32
|
+
"""HTTP下载器,支持同步下载"""
|
33
|
+
|
34
|
+
def __init__(self, chunk_size: int = 1024 * 1024, total_retries: int = 3, retry_delay_seconds: int = 5): # 默认1MB分片
|
35
|
+
self.chunk_size = chunk_size
|
36
|
+
self.total_retries = total_retries
|
37
|
+
self.retry_delay_seconds = retry_delay_seconds
|
38
|
+
|
39
|
+
def download(
|
40
|
+
self,
|
41
|
+
url: str,
|
42
|
+
save_path: Optional[Union[str, Path]] = None,
|
43
|
+
chunk_size: Optional[int] = None,
|
44
|
+
headers: Optional[dict] = None,
|
45
|
+
progress_callback: Optional[Callable[[DownloadProgress], None]] = None,
|
46
|
+
timeout: Optional[int] = None
|
47
|
+
) -> Union[bytes, Path]:
|
48
|
+
"""
|
49
|
+
从URL下载文件
|
50
|
+
|
51
|
+
Args:
|
52
|
+
url: 下载URL
|
53
|
+
save_path: 保存路径(如果为None,返回字节数据)
|
54
|
+
chunk_size: 分块大小
|
55
|
+
headers: 请求头
|
56
|
+
progress_callback: 进度回调函数
|
57
|
+
timeout: 超时时间(秒)
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
下载的内容(字节)或保存的文件路径
|
61
|
+
"""
|
62
|
+
headers = headers or {}
|
63
|
+
chunk_size = chunk_size or self.chunk_size
|
64
|
+
save_path = Path(save_path) if save_path else None
|
65
|
+
|
66
|
+
# 校验扩展名一致性(若提供)
|
67
|
+
parsed_url = urlparse(url)
|
68
|
+
url_suffix = Path(parsed_url.path).suffix.lower()
|
69
|
+
if save_path and save_path.suffix and save_path.suffix.lower() != url_suffix:
|
70
|
+
raise ValueError(
|
71
|
+
f"File extension mismatch: download_url ends with '{url_suffix}', but save_path ends with '{save_path.suffix.lower()}'")
|
72
|
+
|
73
|
+
for attempt in range(self.total_retries):
|
74
|
+
try:
|
75
|
+
resume_from = 0
|
76
|
+
if save_path:
|
77
|
+
temp_path = save_path.with_suffix(save_path.suffix + ".part")
|
78
|
+
if temp_path.exists():
|
79
|
+
resume_from = temp_path.stat().st_size
|
80
|
+
headers["Range"] = f"bytes={resume_from}-"
|
81
|
+
|
82
|
+
# 发送请求
|
83
|
+
response = requests.get(url, headers=headers, stream=True, timeout=timeout)
|
84
|
+
response.raise_for_status()
|
85
|
+
|
86
|
+
content_length = int(response.headers.get('content-length', 0))
|
87
|
+
total_size = content_length + resume_from if save_path else content_length
|
88
|
+
|
89
|
+
if save_path is None:
|
90
|
+
return self._download_to_memory(response, total_size, chunk_size, progress_callback, resume_from)
|
91
|
+
else:
|
92
|
+
return self._download_to_file(response, save_path, total_size, chunk_size, progress_callback,
|
93
|
+
resume_from)
|
94
|
+
except Exception as e:
|
95
|
+
if attempt == self.total_retries - 1:
|
96
|
+
raise
|
97
|
+
time.sleep(self.retry_delay_seconds)
|
98
|
+
|
99
|
+
def _download_to_memory(
|
100
|
+
self,
|
101
|
+
response: requests.Response,
|
102
|
+
total_size: int,
|
103
|
+
chunk_size: int,
|
104
|
+
progress_callback: Optional[Callable[[DownloadProgress], None]],
|
105
|
+
resume_from: int = 0
|
106
|
+
) -> bytes:
|
107
|
+
"""下载到内存"""
|
108
|
+
chunks = []
|
109
|
+
downloaded_size = resume_from
|
110
|
+
start_time = time.time()
|
111
|
+
|
112
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
113
|
+
if chunk:
|
114
|
+
chunks.append(chunk)
|
115
|
+
downloaded_size += len(chunk)
|
116
|
+
|
117
|
+
if progress_callback:
|
118
|
+
self._report_progress(
|
119
|
+
downloaded_size, total_size, start_time, progress_callback
|
120
|
+
)
|
121
|
+
|
122
|
+
return b''.join(chunks)
|
123
|
+
|
124
|
+
def _download_to_file(
|
125
|
+
self,
|
126
|
+
response: requests.Response,
|
127
|
+
save_path: Path,
|
128
|
+
total_size: int,
|
129
|
+
chunk_size: int,
|
130
|
+
progress_callback: Optional[Callable[[DownloadProgress], None]],
|
131
|
+
resume_from: int = 0
|
132
|
+
) -> Path:
|
133
|
+
"""下载到文件"""
|
134
|
+
# 创建目录
|
135
|
+
save_path.parent.mkdir(parents=True, exist_ok=True)
|
136
|
+
|
137
|
+
# 临时保存为 .part 文件
|
138
|
+
temp_path = save_path.with_suffix(save_path.suffix + ".part")
|
139
|
+
|
140
|
+
downloaded_size = resume_from
|
141
|
+
start_time = time.time()
|
142
|
+
|
143
|
+
mode = 'ab' if resume_from > 0 else 'wb'
|
144
|
+
|
145
|
+
with open(temp_path, mode) as f:
|
146
|
+
for chunk in response.iter_content(chunk_size=chunk_size):
|
147
|
+
if chunk:
|
148
|
+
f.write(chunk)
|
149
|
+
downloaded_size += len(chunk)
|
150
|
+
if progress_callback:
|
151
|
+
self._report_progress(downloaded_size, total_size, start_time, progress_callback)
|
152
|
+
|
153
|
+
if save_path.exists():
|
154
|
+
save_path.unlink()
|
155
|
+
temp_path.rename(save_path)
|
156
|
+
return save_path
|
157
|
+
|
158
|
+
def _report_progress(
|
159
|
+
self,
|
160
|
+
downloaded_size: int,
|
161
|
+
total_size: int,
|
162
|
+
start_time: float,
|
163
|
+
callback: Callable[[DownloadProgress], None]
|
164
|
+
):
|
165
|
+
"""报告下载进度"""
|
166
|
+
elapsed = time.time() - start_time
|
167
|
+
speed = downloaded_size / elapsed if elapsed > 0 else 0
|
168
|
+
percentage = (downloaded_size / total_size * 100) if total_size > 0 else 0
|
169
|
+
remaining = (total_size - downloaded_size) / speed if speed > 0 else 0
|
170
|
+
|
171
|
+
progress = DownloadProgress(
|
172
|
+
total_size=total_size,
|
173
|
+
downloaded_size=downloaded_size,
|
174
|
+
percentage=percentage,
|
175
|
+
speed=speed,
|
176
|
+
remaining_time=remaining
|
177
|
+
)
|
178
|
+
callback(progress)
|
179
|
+
|
180
|
+
|
181
|
+
class AsyncHttpDownloader:
|
182
|
+
"""异步HTTP下载器"""
|
183
|
+
|
184
|
+
def __init__(self, chunk_size: int = 1024 * 1024, total_retries: int = 3, retry_delay_seconds: int = 5): # 默认1MB分片
|
185
|
+
self.chunk_size = chunk_size
|
186
|
+
self.total_retries = total_retries
|
187
|
+
self.retry_delay_seconds = retry_delay_seconds
|
188
|
+
|
189
|
+
async def download(
|
190
|
+
self,
|
191
|
+
url: str,
|
192
|
+
save_path: Optional[Union[str, Path]] = None,
|
193
|
+
chunk_size: Optional[int] = None,
|
194
|
+
headers: Optional[dict] = None,
|
195
|
+
progress_callback: Optional[Callable[[DownloadProgress], None]] = None,
|
196
|
+
timeout: Optional[int] = None
|
197
|
+
) -> Union[bytes, Path]:
|
198
|
+
"""
|
199
|
+
异步从URL下载文件
|
200
|
+
|
201
|
+
Args:
|
202
|
+
url: 下载URL
|
203
|
+
save_path: 保存路径(如果为None,返回字节数据)
|
204
|
+
chunk_size: 分块大小
|
205
|
+
headers: 请求头
|
206
|
+
progress_callback: 进度回调函数
|
207
|
+
timeout: 超时时间(秒)
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
下载的内容(字节)或保存的文件路径
|
211
|
+
"""
|
212
|
+
headers = headers or {}
|
213
|
+
save_path = Path(save_path) if save_path else None
|
214
|
+
chunk_size = chunk_size or self.chunk_size
|
215
|
+
|
216
|
+
# 校验扩展名一致性(若提供)
|
217
|
+
parsed_url = urlparse(url)
|
218
|
+
url_suffix = Path(parsed_url.path).suffix.lower()
|
219
|
+
if save_path and save_path.suffix and save_path.suffix.lower() != url_suffix:
|
220
|
+
raise ValueError(
|
221
|
+
f"File extension mismatch: download_url ends with '{url_suffix}', but save_path ends with '{save_path.suffix.lower()}'")
|
222
|
+
|
223
|
+
for attempt in range(self.total_retries):
|
224
|
+
try:
|
225
|
+
resume_from = 0
|
226
|
+
if save_path:
|
227
|
+
temp_path = save_path.with_suffix(save_path.suffix + ".part")
|
228
|
+
if temp_path.exists():
|
229
|
+
resume_from = temp_path.stat().st_size
|
230
|
+
headers["Range"] = f"bytes={resume_from}-"
|
231
|
+
|
232
|
+
timeout_config = aiohttp.ClientTimeout(total=timeout) if timeout else None
|
233
|
+
|
234
|
+
async with aiohttp.ClientSession() as session:
|
235
|
+
async with session.get(url, headers=headers, timeout=timeout_config) as response:
|
236
|
+
if response.status not in (200, 206):
|
237
|
+
raise Exception(f"Unexpected status: {response.status}")
|
238
|
+
|
239
|
+
total_size = int(response.headers.get("content-length", 0)) + resume_from
|
240
|
+
|
241
|
+
if save_path is None:
|
242
|
+
return await self._download_to_memory(response, total_size, chunk_size, progress_callback,
|
243
|
+
resume_from)
|
244
|
+
else:
|
245
|
+
return await self._download_to_file(response, save_path, total_size, chunk_size,
|
246
|
+
progress_callback, resume_from)
|
247
|
+
except Exception as e:
|
248
|
+
if attempt == self.total_retries - 1:
|
249
|
+
raise
|
250
|
+
await asyncio.sleep(self.retry_delay_seconds)
|
251
|
+
|
252
|
+
async def _download_to_memory(
|
253
|
+
self,
|
254
|
+
response: aiohttp.ClientResponse,
|
255
|
+
total_size: int,
|
256
|
+
chunk_size: Optional[int],
|
257
|
+
progress_callback: Optional[Callable[[DownloadProgress], None]],
|
258
|
+
resume_from: int = 0
|
259
|
+
) -> bytes:
|
260
|
+
"""异步下载到内存"""
|
261
|
+
chunks = []
|
262
|
+
downloaded_size = resume_from
|
263
|
+
start_time = asyncio.get_event_loop().time()
|
264
|
+
|
265
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
266
|
+
chunks.append(chunk)
|
267
|
+
downloaded_size += len(chunk)
|
268
|
+
|
269
|
+
if progress_callback:
|
270
|
+
await self._report_progress(
|
271
|
+
downloaded_size, total_size, start_time, progress_callback
|
272
|
+
)
|
273
|
+
|
274
|
+
return b''.join(chunks)
|
275
|
+
|
276
|
+
async def _download_to_file(
|
277
|
+
self,
|
278
|
+
response: aiohttp.ClientResponse,
|
279
|
+
save_path: Path,
|
280
|
+
total_size: int,
|
281
|
+
chunk_size: Optional[int],
|
282
|
+
progress_callback: Optional[Callable[[DownloadProgress], None]],
|
283
|
+
resume_from: int = 0
|
284
|
+
) -> Path:
|
285
|
+
"""异步下载到文件"""
|
286
|
+
# 创建目录
|
287
|
+
save_path.parent.mkdir(parents=True, exist_ok=True)
|
288
|
+
|
289
|
+
# 临时保存为 .part 文件
|
290
|
+
temp_path = save_path.with_suffix(save_path.suffix + ".part")
|
291
|
+
|
292
|
+
downloaded_size = 0
|
293
|
+
start_time = time.time()
|
294
|
+
|
295
|
+
mode = 'ab' if resume_from > 0 else 'wb'
|
296
|
+
|
297
|
+
if aiofiles:
|
298
|
+
# 使用异步文件IO
|
299
|
+
async with aiofiles.open(temp_path, mode) as f:
|
300
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
301
|
+
await f.write(chunk)
|
302
|
+
downloaded_size += len(chunk)
|
303
|
+
|
304
|
+
if progress_callback:
|
305
|
+
await self._report_progress(
|
306
|
+
downloaded_size, total_size, start_time, progress_callback
|
307
|
+
)
|
308
|
+
else:
|
309
|
+
# 回退到同步文件IO
|
310
|
+
with open(temp_path, mode) as f:
|
311
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
312
|
+
f.write(chunk)
|
313
|
+
downloaded_size += len(chunk)
|
314
|
+
|
315
|
+
if progress_callback:
|
316
|
+
await self._report_progress(
|
317
|
+
downloaded_size, total_size, start_time, progress_callback
|
318
|
+
)
|
319
|
+
if save_path.exists():
|
320
|
+
save_path.unlink()
|
321
|
+
temp_path.rename(save_path)
|
322
|
+
return save_path
|
323
|
+
|
324
|
+
async def _report_progress(
|
325
|
+
self,
|
326
|
+
downloaded_size: int,
|
327
|
+
total_size: int,
|
328
|
+
start_time: float,
|
329
|
+
callback: Callable[[DownloadProgress], None]
|
330
|
+
):
|
331
|
+
"""报告下载进度"""
|
332
|
+
elapsed = asyncio.get_event_loop().time() - start_time
|
333
|
+
speed = downloaded_size / elapsed if elapsed > 0 else 0
|
334
|
+
percentage = (downloaded_size / total_size * 100) if total_size > 0 else 0
|
335
|
+
remaining = (total_size - downloaded_size) / speed if speed > 0 else 0
|
336
|
+
|
337
|
+
progress = DownloadProgress(
|
338
|
+
total_size=total_size,
|
339
|
+
downloaded_size=downloaded_size,
|
340
|
+
percentage=percentage,
|
341
|
+
speed=speed,
|
342
|
+
remaining_time=remaining
|
343
|
+
)
|
344
|
+
|
345
|
+
if asyncio.iscoroutinefunction(callback):
|
346
|
+
await callback(progress)
|
347
|
+
else:
|
348
|
+
callback(progress)
|
349
|
+
|
350
|
+
|
351
|
+
# 尝试导入aiofiles(可选依赖)
|
352
|
+
try:
|
353
|
+
import aiofiles
|
354
|
+
except ImportError:
|
355
|
+
aiofiles = None
|
@@ -0,0 +1,105 @@
|
|
1
|
+
"""
|
2
|
+
文件工具函数
|
3
|
+
"""
|
4
|
+
import hashlib
|
5
|
+
import mimetypes
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Generator, Optional, BinaryIO, Union
|
8
|
+
|
9
|
+
|
10
|
+
def get_file_mime_type(file_path: Union[str, Path]) -> str:
|
11
|
+
"""
|
12
|
+
获取文件的MIME类型
|
13
|
+
|
14
|
+
Args:
|
15
|
+
file_path: 文件路径
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
MIME类型
|
19
|
+
"""
|
20
|
+
file_path = Path(file_path)
|
21
|
+
mime_type, _ = mimetypes.guess_type(str(file_path))
|
22
|
+
return mime_type or "application/octet-stream"
|
23
|
+
|
24
|
+
|
25
|
+
def get_file_extension(file_name: str) -> str:
|
26
|
+
"""
|
27
|
+
获取文件扩展名
|
28
|
+
|
29
|
+
Args:
|
30
|
+
file_name: 文件名
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
文件扩展名(包含点号)
|
34
|
+
"""
|
35
|
+
return Path(file_name).suffix.lower()
|
36
|
+
|
37
|
+
|
38
|
+
def humanize_file_size(size_bytes: int) -> str:
|
39
|
+
"""
|
40
|
+
将文件大小转换为人类可读的格式
|
41
|
+
|
42
|
+
Args:
|
43
|
+
size_bytes: 文件大小(字节)
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
人类可读的文件大小
|
47
|
+
"""
|
48
|
+
for unit in ["B", "KB", "MB", "GB", "TB"]:
|
49
|
+
if size_bytes < 1024.0:
|
50
|
+
return f"{size_bytes:.2f} {unit}"
|
51
|
+
size_bytes /= 1024.0
|
52
|
+
return f"{size_bytes:.2f} PB"
|
53
|
+
|
54
|
+
|
55
|
+
def calculate_file_hash(file_path: Union[str, Path], algorithm: str = "sha256") -> str:
|
56
|
+
"""
|
57
|
+
计算文件哈希值
|
58
|
+
|
59
|
+
Args:
|
60
|
+
file_path: 文件路径
|
61
|
+
algorithm: 哈希算法(md5, sha1, sha256等)
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
文件哈希值(十六进制)
|
65
|
+
"""
|
66
|
+
file_path = Path(file_path)
|
67
|
+
hash_obj = hashlib.new(algorithm)
|
68
|
+
|
69
|
+
with open(file_path, "rb") as f:
|
70
|
+
while chunk := f.read(8192):
|
71
|
+
hash_obj.update(chunk)
|
72
|
+
|
73
|
+
return hash_obj.hexdigest()
|
74
|
+
|
75
|
+
|
76
|
+
def split_file_chunks(
|
77
|
+
file_obj: BinaryIO,
|
78
|
+
chunk_size: int = 1024 * 1024, # 默认1MB
|
79
|
+
start_offset: int = 0
|
80
|
+
) -> Generator[tuple[bytes, int, bool], None, None]:
|
81
|
+
"""
|
82
|
+
将文件分割成块
|
83
|
+
|
84
|
+
Args:
|
85
|
+
file_obj: 文件对象
|
86
|
+
chunk_size: 块大小(字节)
|
87
|
+
start_offset: 起始偏移量
|
88
|
+
|
89
|
+
Yields:
|
90
|
+
(块数据, 偏移量, 是否最后一块)
|
91
|
+
"""
|
92
|
+
file_obj.seek(start_offset)
|
93
|
+
offset = start_offset
|
94
|
+
|
95
|
+
while True:
|
96
|
+
chunk = file_obj.read(chunk_size)
|
97
|
+
if not chunk:
|
98
|
+
break
|
99
|
+
|
100
|
+
is_last = len(chunk) < chunk_size
|
101
|
+
yield chunk, offset, is_last
|
102
|
+
|
103
|
+
offset += len(chunk)
|
104
|
+
if is_last:
|
105
|
+
break
|
@@ -0,0 +1,69 @@
|
|
1
|
+
"""
|
2
|
+
重试工具
|
3
|
+
"""
|
4
|
+
import asyncio
|
5
|
+
import functools
|
6
|
+
import time
|
7
|
+
from typing import TypeVar, Callable, Type, Tuple
|
8
|
+
|
9
|
+
T = TypeVar("T")
|
10
|
+
|
11
|
+
|
12
|
+
def retry_with_backoff(
|
13
|
+
max_retries: int = 3,
|
14
|
+
initial_delay: float = 1.0,
|
15
|
+
backoff_factor: float = 2.0,
|
16
|
+
max_delay: float = 60.0,
|
17
|
+
exceptions: Tuple[Type[Exception], ...] = (Exception,)
|
18
|
+
):
|
19
|
+
"""
|
20
|
+
带退避策略的重试装饰器
|
21
|
+
|
22
|
+
Args:
|
23
|
+
max_retries: 最大重试次数
|
24
|
+
initial_delay: 初始延迟(秒)
|
25
|
+
backoff_factor: 退避因子
|
26
|
+
max_delay: 最大延迟(秒)
|
27
|
+
exceptions: 需要重试的异常类型
|
28
|
+
"""
|
29
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
30
|
+
if asyncio.iscoroutinefunction(func):
|
31
|
+
@functools.wraps(func)
|
32
|
+
async def async_wrapper(*args, **kwargs) -> T:
|
33
|
+
delay = initial_delay
|
34
|
+
last_exception = None
|
35
|
+
|
36
|
+
for attempt in range(max_retries + 1):
|
37
|
+
try:
|
38
|
+
return await func(*args, **kwargs)
|
39
|
+
except exceptions as e:
|
40
|
+
last_exception = e
|
41
|
+
if attempt < max_retries:
|
42
|
+
await asyncio.sleep(delay)
|
43
|
+
delay = min(delay * backoff_factor, max_delay)
|
44
|
+
else:
|
45
|
+
raise
|
46
|
+
|
47
|
+
raise last_exception
|
48
|
+
return async_wrapper
|
49
|
+
else:
|
50
|
+
@functools.wraps(func)
|
51
|
+
def sync_wrapper(*args, **kwargs) -> T:
|
52
|
+
delay = initial_delay
|
53
|
+
last_exception = None
|
54
|
+
|
55
|
+
for attempt in range(max_retries + 1):
|
56
|
+
try:
|
57
|
+
return func(*args, **kwargs)
|
58
|
+
except exceptions as e:
|
59
|
+
last_exception = e
|
60
|
+
if attempt < max_retries:
|
61
|
+
time.sleep(delay)
|
62
|
+
delay = min(delay * backoff_factor, max_delay)
|
63
|
+
else:
|
64
|
+
raise
|
65
|
+
|
66
|
+
raise last_exception
|
67
|
+
return sync_wrapper
|
68
|
+
|
69
|
+
return decorator
|