tamar-file-hub-client 0.1.7__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ from .base_file_service import BaseFileService
9
9
  from ...enums import UploadMode
10
10
  from ...errors import ValidationError
11
11
  from ...rpc import SyncGrpcClient
12
- from ...schemas import FileUploadResponse, UploadUrlResponse, BatchDownloadUrlResponse, DownloadUrlInfo, GcsUrlInfo, GetGcsUrlResponse, BatchGcsUrlResponse, CompressionStatusResponse, GetVariantsResponse, RecompressionResponse, VariantDownloadUrlResponse, CompressedVariant
12
+ from ...schemas import FileUploadResponse, UploadUrlResponse, BatchDownloadUrlResponse, DownloadUrlInfo, GcsUrlInfo, GetGcsUrlResponse, BatchGcsUrlResponse, CompressionStatusResponse, GetVariantsResponse, RecompressionResponse, VariantDownloadUrlResponse, CompressedVariant, BatchFileStatusResponse, FileStatusInfo, FileStatusDetails, FileUploadStatus, FileCompressionStatus, FileSyncStatus
13
13
  from ...utils import HttpUploader, HttpDownloader, retry_with_backoff, get_file_mime_type
14
14
 
15
15
 
@@ -116,6 +116,7 @@ class SyncBlobService(BaseFileService):
116
116
  is_temporary: Optional[bool] = False,
117
117
  expire_seconds: Optional[int] = None,
118
118
  keep_original_filename: Optional[bool] = False,
119
+ forbid_overwrite: Optional[bool] = True,
119
120
  request_id: Optional[str] = None,
120
121
  **metadata
121
122
  ) -> FileUploadResponse:
@@ -132,6 +133,7 @@ class SyncBlobService(BaseFileService):
132
133
  is_temporary=is_temporary,
133
134
  expire_seconds=expire_seconds if is_temporary and expire_seconds else None,
134
135
  keep_original_filename=keep_original_filename,
136
+ forbid_overwrite=forbid_overwrite,
135
137
  request_id=request_id,
136
138
  **metadata
137
139
  )
@@ -149,12 +151,13 @@ class SyncBlobService(BaseFileService):
149
151
  "Cache-Control": "public, max-age=86400" # 24小时公共缓存
150
152
  }
151
153
 
152
- # 上传文件到对象存储
154
+ # 上传文件到对象存储,传递forbid_overwrite参数
153
155
  self.http_uploader.upload(
154
156
  url=upload_url_resp.upload_url,
155
157
  content=content,
156
158
  headers=headers,
157
159
  total_size=file_size,
160
+ forbid_overwrite=forbid_overwrite,
158
161
  )
159
162
 
160
163
  # 确认上传完成
@@ -182,6 +185,7 @@ class SyncBlobService(BaseFileService):
182
185
  is_temporary: Optional[bool] = False,
183
186
  expire_seconds: Optional[int] = None,
184
187
  keep_original_filename: Optional[bool] = False,
188
+ forbid_overwrite: Optional[bool] = False,
185
189
  request_id: Optional[str] = None,
186
190
  **metadata
187
191
  ) -> FileUploadResponse:
@@ -198,6 +202,7 @@ class SyncBlobService(BaseFileService):
198
202
  is_temporary=is_temporary,
199
203
  expire_seconds=expire_seconds if is_temporary and expire_seconds else None,
200
204
  keep_original_filename=keep_original_filename,
205
+ forbid_overwrite=forbid_overwrite,
201
206
  request_id=request_id,
202
207
  **metadata
203
208
  )
@@ -222,13 +227,14 @@ class SyncBlobService(BaseFileService):
222
227
  mime_type=mime_type,
223
228
  )
224
229
 
225
- # 上传文件到对象存储
230
+ # 上传文件到对象存储,传递forbid_overwrite参数
226
231
  self.http_uploader.upload(
227
232
  url=upload_url,
228
233
  content=content,
229
234
  headers=headers,
230
235
  total_size=file_size,
231
- is_resume=True
236
+ is_resume=True,
237
+ forbid_overwrite=forbid_overwrite,
232
238
  )
233
239
 
234
240
  # 确认上传完成
@@ -276,6 +282,7 @@ class SyncBlobService(BaseFileService):
276
282
  is_temporary: Optional[bool] = False,
277
283
  expire_seconds: Optional[int] = None,
278
284
  keep_original_filename: Optional[bool] = False,
285
+ forbid_overwrite: Optional[bool] = False,
279
286
  request_id: Optional[str] = None,
280
287
  **metadata
281
288
  ) -> UploadUrlResponse:
@@ -292,6 +299,7 @@ class SyncBlobService(BaseFileService):
292
299
  is_temporary: 是否为临时文件
293
300
  expire_seconds: 过期秒数
294
301
  keep_original_filename: 是否保留原始文件名(默认False)
302
+ forbid_overwrite: 防止覆盖同名文件(默认False)
295
303
  request_id: 请求ID(可选,如果不提供则自动生成)
296
304
  **metadata: 额外的元数据(如 x-org-id, x-user-id 等)
297
305
 
@@ -311,6 +319,7 @@ class SyncBlobService(BaseFileService):
311
319
  is_temporary=is_temporary,
312
320
  expire_seconds=expire_seconds if is_temporary and expire_seconds else None,
313
321
  keep_original_filename=keep_original_filename,
322
+ forbid_overwrite=forbid_overwrite,
314
323
  )
315
324
 
316
325
  if folder_id:
@@ -338,6 +347,7 @@ class SyncBlobService(BaseFileService):
338
347
  is_temporary: Optional[bool] = False,
339
348
  expire_seconds: Optional[int] = None,
340
349
  keep_original_filename: Optional[bool] = False,
350
+ forbid_overwrite: Optional[bool] = True,
341
351
  request_id: Optional[str] = None,
342
352
  **metadata
343
353
  ) -> UploadUrlResponse:
@@ -354,6 +364,7 @@ class SyncBlobService(BaseFileService):
354
364
  is_temporary: 是否为临时文件
355
365
  expire_seconds: 过期秒数
356
366
  keep_original_filename: 是否保留原始文件名(默认False)
367
+ forbid_overwrite: 防止覆盖同名文件(默认True)
357
368
  request_id: 请求ID(可选,如果不提供则自动生成)
358
369
  **metadata: 额外的元数据(如 x-org-id, x-user-id 等)
359
370
 
@@ -375,6 +386,7 @@ class SyncBlobService(BaseFileService):
375
386
  is_temporary=is_temporary,
376
387
  expire_seconds=expire_seconds if is_temporary and expire_seconds else None,
377
388
  keep_original_filename=keep_original_filename,
389
+ forbid_overwrite=forbid_overwrite,
378
390
  )
379
391
 
380
392
  if folder_id:
@@ -400,6 +412,7 @@ class SyncBlobService(BaseFileService):
400
412
  is_temporary: Optional[bool] = False,
401
413
  expire_seconds: Optional[int] = None,
402
414
  keep_original_filename: Optional[bool] = False,
415
+ forbid_overwrite: Optional[bool] = True,
403
416
  url: Optional[str] = None,
404
417
  file_name: Optional[str] = None,
405
418
  mime_type: Optional[str] = None,
@@ -416,6 +429,7 @@ class SyncBlobService(BaseFileService):
416
429
  is_temporary: 是否为临时文件
417
430
  expire_seconds: 过期秒数
418
431
  keep_original_filename: 是否保留原始文件名(默认False)
432
+ forbid_overwrite: 防止覆盖同名文件(默认True)
419
433
  url: 要下载并上传的URL(可选)
420
434
  file_name: 当使用url参数时指定的文件名(可选)
421
435
  mime_type: MIME类型(可选,用于推断文件扩展名,特别适用于AI生成的字节数据)
@@ -510,8 +524,16 @@ class SyncBlobService(BaseFileService):
510
524
  hundred_mb = 1024 * 1024 * 100
511
525
  if file_size >= ten_mb and file_size < hundred_mb: # 10MB
512
526
  mode = UploadMode.STREAM # 大文件自动使用流式上传模式
527
+ # 暂时屏蔽 RESUMABLE 模式,因为OSS断点续传尚未完成开发
528
+ # elif file_size > hundred_mb:
529
+ # mode = UploadMode.RESUMABLE # 特大文件自动使用断点续传模式
513
530
  elif file_size > hundred_mb:
514
- mode = UploadMode.RESUMABLE # 特大文件自动使用断点续传模式
531
+ mode = UploadMode.STREAM # 暂时使用流式上传代替断点续传
532
+
533
+ # OSS断点续传尚未完成,将RESUMABLE模式自动转为STREAM模式
534
+ if mode == UploadMode.RESUMABLE:
535
+ mode = UploadMode.STREAM
536
+ # TODO: 当OSS断点续传功能完成后,移除此转换逻辑
515
537
 
516
538
  # 根据上传模式执行不同的上传逻辑
517
539
  if mode == UploadMode.NORMAL:
@@ -542,6 +564,7 @@ class SyncBlobService(BaseFileService):
542
564
  is_temporary=is_temporary,
543
565
  expire_seconds=expire_seconds if is_temporary and expire_seconds else None,
544
566
  keep_original_filename=keep_original_filename,
567
+ forbid_overwrite=forbid_overwrite,
545
568
  request_id=request_id,
546
569
  **metadata
547
570
  )
@@ -559,6 +582,7 @@ class SyncBlobService(BaseFileService):
559
582
  is_temporary=is_temporary,
560
583
  expire_seconds=expire_seconds if is_temporary and expire_seconds else None,
561
584
  keep_original_filename=keep_original_filename,
585
+ forbid_overwrite=forbid_overwrite,
562
586
  request_id=request_id,
563
587
  **metadata
564
588
  )
@@ -993,3 +1017,115 @@ class SyncBlobService(BaseFileService):
993
1017
  error=response.error if response.error else None,
994
1018
  variant_info=variant_info
995
1019
  )
1020
+
1021
+ def batch_get_file_status(
1022
+ self,
1023
+ file_ids: List[str],
1024
+ *,
1025
+ include_details: Optional[bool] = False,
1026
+ request_id: Optional[str] = None,
1027
+ **metadata
1028
+ ) -> BatchFileStatusResponse:
1029
+ """
1030
+ 批量获取文件状态
1031
+
1032
+ Args:
1033
+ file_ids: 文件ID列表(最多100个)
1034
+ include_details: 是否包含详细状态信息(默认False)
1035
+ request_id: 请求ID,用于追踪
1036
+ **metadata: 额外的gRPC元数据
1037
+
1038
+ Returns:
1039
+ BatchFileStatusResponse: 批量文件状态响应
1040
+ """
1041
+ from ...rpc.gen import file_service_pb2, file_service_pb2_grpc
1042
+
1043
+ stub = self.client.get_stub(file_service_pb2_grpc.FileServiceStub)
1044
+
1045
+ request = file_service_pb2.BatchFileStatusRequest(
1046
+ file_ids=file_ids,
1047
+ include_details=include_details if include_details is not None else False
1048
+ )
1049
+
1050
+ # 构建元数据
1051
+ grpc_metadata = self.client.build_metadata(request_id=request_id, **metadata)
1052
+
1053
+ response = stub.BatchGetFileStatus(request, metadata=grpc_metadata)
1054
+
1055
+ # 转换文件状态信息
1056
+ statuses = []
1057
+ for status_info in response.statuses:
1058
+ # 转换状态详情(如果存在)
1059
+ details = None
1060
+ if status_info.HasField('details'):
1061
+ details = FileStatusDetails(
1062
+ file_size=status_info.details.file_size if status_info.details.HasField('file_size') else None,
1063
+ storage_type=status_info.details.storage_type if status_info.details.HasField('storage_type') else None,
1064
+ storage_region=status_info.details.storage_region if status_info.details.HasField('storage_region') else None,
1065
+ compression_task_id=status_info.details.compression_task_id if status_info.details.HasField('compression_task_id') else None,
1066
+ compression_variants_count=status_info.details.compression_variants_count if status_info.details.HasField('compression_variants_count') else None,
1067
+ compression_progress=status_info.details.compression_progress if status_info.details.HasField('compression_progress') else None,
1068
+ sync_regions_total=status_info.details.sync_regions_total if status_info.details.HasField('sync_regions_total') else None,
1069
+ sync_regions_completed=status_info.details.sync_regions_completed if status_info.details.HasField('sync_regions_completed') else None,
1070
+ sync_pending_regions=list(status_info.details.sync_pending_regions)
1071
+ )
1072
+
1073
+ # 转换枚举值
1074
+ upload_status = self._convert_upload_status(status_info.upload_status)
1075
+ compression_status = self._convert_compression_status(status_info.compression_status)
1076
+ sync_status = self._convert_sync_status(status_info.sync_status)
1077
+
1078
+ statuses.append(FileStatusInfo(
1079
+ file_id=status_info.file_id,
1080
+ upload_status=upload_status,
1081
+ compression_status=compression_status,
1082
+ sync_status=sync_status,
1083
+ details=details,
1084
+ error_message=status_info.error_message if status_info.HasField('error_message') else None
1085
+ ))
1086
+
1087
+ return BatchFileStatusResponse(
1088
+ statuses=statuses,
1089
+ timestamp=response.timestamp,
1090
+ cache_hit_count=response.cache_hit_count
1091
+ )
1092
+
1093
+ def _convert_upload_status(self, proto_status: int) -> FileUploadStatus:
1094
+ """转换上传状态枚举"""
1095
+ status_map = {
1096
+ 0: FileUploadStatus.UPLOAD_UNKNOWN,
1097
+ 1: FileUploadStatus.UPLOAD_PENDING,
1098
+ 2: FileUploadStatus.UPLOAD_PROCESSING,
1099
+ 3: FileUploadStatus.UPLOAD_COMPLETED,
1100
+ 4: FileUploadStatus.UPLOAD_FAILED,
1101
+ 5: FileUploadStatus.UPLOAD_FILE_NOT_FOUND,
1102
+ }
1103
+ return status_map.get(proto_status, FileUploadStatus.UPLOAD_UNKNOWN)
1104
+
1105
+ def _convert_compression_status(self, proto_status: int) -> FileCompressionStatus:
1106
+ """转换压缩状态枚举"""
1107
+ status_map = {
1108
+ 0: FileCompressionStatus.COMPRESSION_UNKNOWN,
1109
+ 1: FileCompressionStatus.COMPRESSION_NOT_APPLICABLE,
1110
+ 2: FileCompressionStatus.COMPRESSION_PENDING,
1111
+ 3: FileCompressionStatus.COMPRESSION_PROCESSING,
1112
+ 4: FileCompressionStatus.COMPRESSION_COMPLETED,
1113
+ 5: FileCompressionStatus.COMPRESSION_FAILED,
1114
+ 6: FileCompressionStatus.COMPRESSION_SKIPPED,
1115
+ 7: FileCompressionStatus.COMPRESSION_FILE_NOT_FOUND,
1116
+ }
1117
+ return status_map.get(proto_status, FileCompressionStatus.COMPRESSION_UNKNOWN)
1118
+
1119
+ def _convert_sync_status(self, proto_status: int) -> FileSyncStatus:
1120
+ """转换同步状态枚举"""
1121
+ status_map = {
1122
+ 0: FileSyncStatus.SYNC_UNKNOWN,
1123
+ 1: FileSyncStatus.SYNC_NOT_REQUIRED,
1124
+ 2: FileSyncStatus.SYNC_PENDING,
1125
+ 3: FileSyncStatus.SYNC_PROCESSING,
1126
+ 4: FileSyncStatus.SYNC_PARTIAL,
1127
+ 5: FileSyncStatus.SYNC_COMPLETED,
1128
+ 6: FileSyncStatus.SYNC_FAILED,
1129
+ 7: FileSyncStatus.SYNC_FILE_NOT_FOUND,
1130
+ }
1131
+ return status_map.get(proto_status, FileSyncStatus.SYNC_UNKNOWN)
@@ -490,3 +490,69 @@ class SyncFileService(BaseFileService):
490
490
  error=response.error if response.error else None,
491
491
  variant_info=variant_info
492
492
  )
493
+
494
+ def import_from_gcs(
495
+ self,
496
+ gcs_uri: str,
497
+ operation_type: str,
498
+ *,
499
+ folder_id: Optional[str] = None,
500
+ file_name: Optional[str] = None,
501
+ keep_original_filename: bool = False,
502
+ created_by_role: Optional[str] = None,
503
+ created_by: Optional[str] = None,
504
+ request_id: Optional[str] = None,
505
+ **metadata
506
+ ) -> 'ImportFromGcsResponse':
507
+ """
508
+ 从GCS导入文件
509
+
510
+ Args:
511
+ gcs_uri: GCS URI, 例如 gs://bucket/path/to/file
512
+ operation_type: 操作类型,"copy" 或 "move"
513
+ folder_id: 目标文件夹ID(可选)
514
+ file_name: 自定义文件名(可选)
515
+ keep_original_filename: 保留原始文件名,默认False
516
+ created_by_role: 创建者角色(可选)
517
+ created_by: 创建者ID(可选)
518
+ request_id: 请求ID,用于追踪
519
+ **metadata: 额外的gRPC元数据
520
+
521
+ Returns:
522
+ ImportFromGcsResponse: 导入响应,包含文件信息和上传文件信息
523
+ """
524
+ from ...rpc.gen import file_service_pb2, file_service_pb2_grpc
525
+ from ...schemas import ImportFromGcsResponse
526
+
527
+ stub = self.client.get_stub(file_service_pb2_grpc.FileServiceStub)
528
+
529
+ request = file_service_pb2.ImportFromGcsRequest(
530
+ gcs_uri=gcs_uri,
531
+ operation_type=operation_type,
532
+ keep_original_filename=keep_original_filename
533
+ )
534
+
535
+ if folder_id:
536
+ request.folder_id = folder_id
537
+ if file_name:
538
+ request.file_name = file_name
539
+ if created_by_role:
540
+ request.created_by_role = created_by_role
541
+ if created_by:
542
+ request.created_by = created_by
543
+
544
+ # 构建元数据
545
+ grpc_metadata = self.client.build_metadata(request_id=request_id, **metadata)
546
+
547
+ response = stub.ImportFromGcs(request, metadata=grpc_metadata)
548
+
549
+ # 转换文件信息
550
+ file_info = self._convert_file_info(response.file)
551
+
552
+ # 转换上传文件信息
553
+ upload_file_info = self._convert_upload_file_info(response.upload_file)
554
+
555
+ return ImportFromGcsResponse(
556
+ file=file_info,
557
+ upload_file=upload_file_info
558
+ )
@@ -115,22 +115,18 @@ def setup_logging(
115
115
 
116
116
  # 初始化日志(使用JSON格式)
117
117
  if enable_grpc_logging:
118
- log_record = logging.LogRecord(
119
- name=logger.name,
120
- level=logging.INFO,
121
- pathname="",
122
- lineno=0,
123
- msg="📡 文件中心客户端 gRPC 日志已初始化",
124
- args=(),
125
- exc_info=None
118
+ # 使用 logger.debug() 以遵守日志级别设置
119
+ logger.debug(
120
+ "📡 文件中心客户端 gRPC 日志已初始化",
121
+ extra={
122
+ "log_type": "debug",
123
+ "data": {
124
+ "level": level,
125
+ "grpc_logging": enable_grpc_logging,
126
+ "json_format": use_json_format
127
+ }
128
+ }
126
129
  )
127
- log_record.log_type = "info"
128
- log_record.data = {
129
- "level": level,
130
- "grpc_logging": enable_grpc_logging,
131
- "json_format": use_json_format
132
- }
133
- logger.handle(log_record)
134
130
 
135
131
 
136
132
  def get_logger() -> logging.Logger:
@@ -9,11 +9,61 @@ import asyncio
9
9
  import aiohttp
10
10
  import requests
11
11
  from pathlib import Path
12
- from typing import Union, BinaryIO, Optional, Callable, Dict, Any, AsyncGenerator
12
+ from typing import Union, BinaryIO, Optional, Callable, Dict, Any, AsyncGenerator, Tuple
13
13
  from dataclasses import dataclass
14
14
  import hashlib
15
15
 
16
16
 
17
+ def detect_storage_type(url: str) -> str:
18
+ """
19
+ 根据URL检测存储类型
20
+
21
+ Args:
22
+ url: 上传URL
23
+
24
+ Returns:
25
+ 存储类型: 'gcs'、'oss' 或 'unknown'
26
+ """
27
+ if 'storage.googleapis.com' in url or 'storage.cloud.google.com' in url:
28
+ return 'gcs'
29
+ elif 'aliyuncs.com' in url:
30
+ return 'oss'
31
+ else:
32
+ return 'gcs'
33
+
34
+
35
+ def get_forbid_overwrite_headers(url: str, forbid_overwrite: bool = False) -> Dict[str, str]:
36
+ """
37
+ 获取防止覆盖的headers
38
+
39
+ Args:
40
+ url: 上传URL
41
+ forbid_overwrite: 是否防止覆盖
42
+
43
+ Returns:
44
+ 包含防止覆盖header的字典
45
+
46
+ Note:
47
+ - GCS: 需要在HTTP header中添加 x-goog-if-generation-match: 0
48
+ - OSS: 需要在HTTP header中添加 x-oss-forbid-overwrite: true
49
+ """
50
+ if not forbid_overwrite:
51
+ return {}
52
+
53
+ storage_type = detect_storage_type(url)
54
+
55
+ if storage_type == 'gcs':
56
+ return {
57
+ 'x-goog-if-generation-match': '0'
58
+ }
59
+ elif storage_type == 'oss':
60
+ return {
61
+ 'x-oss-forbid-overwrite': 'true'
62
+ }
63
+ else:
64
+ return {}
65
+
66
+
17
67
  @dataclass
18
68
  class UploadProgress:
19
69
  """上传进度信息"""
@@ -127,10 +177,11 @@ class HttpUploader:
127
177
  progress_callback: Optional[Callable[[UploadProgress], None]] = None,
128
178
  total_size: Optional[int] = None,
129
179
  is_resume: bool = False,
180
+ forbid_overwrite: bool = False,
130
181
  ) -> requests.Response:
131
182
  """
132
183
  上传文件到指定URL
133
-
184
+
134
185
  Args:
135
186
  url: 上传URL
136
187
  content: 文件内容
@@ -138,9 +189,15 @@ class HttpUploader:
138
189
  headers: 请求头
139
190
  progress_callback: 进度回调函数
140
191
  is_resume: 是否断点续传
192
+ forbid_overwrite: 是否防止覆盖(添加相应的header)
141
193
  """
142
194
  headers = headers or {}
143
195
 
196
+ # 添加防止覆盖的headers
197
+ if forbid_overwrite:
198
+ forbid_headers = get_forbid_overwrite_headers(url, forbid_overwrite)
199
+ headers.update(forbid_headers)
200
+
144
201
  # 获取文件大小(不生成 chunk,避免提前读取)
145
202
  final_total_size = self._calculate_total_size(content) if total_size is None else total_size
146
203
 
@@ -360,19 +417,26 @@ class AsyncHttpUploader:
360
417
  progress_callback: Optional[Callable[[UploadProgress], None]] = None,
361
418
  total_size: Optional[int] = None,
362
419
  is_resume: bool = False,
420
+ forbid_overwrite: bool = False,
363
421
  ) -> aiohttp.ClientResponse:
364
422
  """
365
423
  异步上传文件到指定URL
366
-
424
+
367
425
  Args:
368
426
  url: 上传URL
369
427
  content: 文件内容
370
428
  headers: 请求头
371
429
  progress_callback: 进度回调函数
372
430
  is_resume: 是否断点续传
431
+ forbid_overwrite: 是否防止覆盖(添加相应的header)
373
432
  """
374
433
  headers = headers or {}
375
434
 
435
+ # 添加防止覆盖的headers
436
+ if forbid_overwrite:
437
+ forbid_headers = get_forbid_overwrite_headers(url, forbid_overwrite)
438
+ headers.update(forbid_headers)
439
+
376
440
  # 获取文件大小(避免读取内容)
377
441
  final_total_size = total_size or await self._calculate_total_size(content)
378
442