tamar-file-hub-client 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- file_hub_client/__init__.py +39 -0
- file_hub_client/client.py +43 -6
- file_hub_client/rpc/async_client.py +91 -11
- file_hub_client/rpc/gen/taple_service_pb2.py +225 -0
- file_hub_client/rpc/gen/taple_service_pb2_grpc.py +1626 -0
- file_hub_client/rpc/generate_grpc.py +2 -2
- file_hub_client/rpc/interceptors.py +550 -0
- file_hub_client/rpc/protos/taple_service.proto +874 -0
- file_hub_client/rpc/sync_client.py +91 -9
- file_hub_client/schemas/__init__.py +60 -0
- file_hub_client/schemas/taple.py +413 -0
- file_hub_client/services/__init__.py +5 -0
- file_hub_client/services/file/async_blob_service.py +558 -482
- file_hub_client/services/file/async_file_service.py +18 -9
- file_hub_client/services/file/base_file_service.py +19 -6
- file_hub_client/services/file/sync_blob_service.py +554 -478
- file_hub_client/services/file/sync_file_service.py +18 -9
- file_hub_client/services/folder/async_folder_service.py +20 -11
- file_hub_client/services/folder/sync_folder_service.py +20 -11
- file_hub_client/services/taple/__init__.py +10 -0
- file_hub_client/services/taple/async_taple_service.py +2281 -0
- file_hub_client/services/taple/base_taple_service.py +353 -0
- file_hub_client/services/taple/idempotent_taple_mixin.py +142 -0
- file_hub_client/services/taple/sync_taple_service.py +2256 -0
- file_hub_client/utils/__init__.py +43 -1
- file_hub_client/utils/file_utils.py +59 -11
- file_hub_client/utils/idempotency.py +196 -0
- file_hub_client/utils/logging.py +315 -0
- file_hub_client/utils/retry.py +241 -2
- file_hub_client/utils/smart_retry.py +403 -0
- tamar_file_hub_client-0.0.2.dist-info/METADATA +2050 -0
- tamar_file_hub_client-0.0.2.dist-info/RECORD +57 -0
- tamar_file_hub_client-0.0.1.dist-info/METADATA +0 -874
- tamar_file_hub_client-0.0.1.dist-info/RECORD +0 -44
- {tamar_file_hub_client-0.0.1.dist-info → tamar_file_hub_client-0.0.2.dist-info}/WHEEL +0 -0
- {tamar_file_hub_client-0.0.1.dist-info → tamar_file_hub_client-0.0.2.dist-info}/top_level.txt +0 -0
file_hub_client/utils/retry.py
CHANGED
@@ -1,12 +1,26 @@
|
|
1
1
|
"""
|
2
2
|
重试工具
|
3
|
+
|
4
|
+
提供向后兼容的重试机制,同时支持智能重试
|
3
5
|
"""
|
4
6
|
import asyncio
|
5
7
|
import functools
|
6
8
|
import time
|
7
|
-
from typing import TypeVar, Callable, Type, Tuple
|
9
|
+
from typing import TypeVar, Callable, Type, Tuple, Any, Dict
|
10
|
+
import logging
|
11
|
+
|
12
|
+
# 导入智能重试功能
|
13
|
+
from .smart_retry import (
|
14
|
+
smart_retry,
|
15
|
+
retry_on_network_errors,
|
16
|
+
retry_on_conflict,
|
17
|
+
no_retry,
|
18
|
+
ErrorClassifier,
|
19
|
+
RetryStrategy
|
20
|
+
)
|
8
21
|
|
9
22
|
T = TypeVar("T")
|
23
|
+
logger = logging.getLogger(__name__)
|
10
24
|
|
11
25
|
|
12
26
|
def retry_with_backoff(
|
@@ -19,6 +33,8 @@ def retry_with_backoff(
|
|
19
33
|
"""
|
20
34
|
带退避策略的重试装饰器
|
21
35
|
|
36
|
+
现在会智能判断错误是否可重试,避免对明显不可恢复的错误进行重试
|
37
|
+
|
22
38
|
Args:
|
23
39
|
max_retries: 最大重试次数
|
24
40
|
initial_delay: 初始延迟(秒)
|
@@ -38,10 +54,33 @@ def retry_with_backoff(
|
|
38
54
|
return await func(*args, **kwargs)
|
39
55
|
except exceptions as e:
|
40
56
|
last_exception = e
|
57
|
+
|
58
|
+
# 使用 ErrorClassifier 检查错误是否可重试
|
59
|
+
is_retryable, _ = ErrorClassifier.is_retryable(e)
|
60
|
+
|
61
|
+
if not is_retryable:
|
62
|
+
logger.debug(
|
63
|
+
f"🚫 不可重试错误 | 操作: {func.__name__} | "
|
64
|
+
f"错误: {type(e).__name__}: {str(e)} | "
|
65
|
+
f"直接抛出异常"
|
66
|
+
)
|
67
|
+
raise
|
68
|
+
|
41
69
|
if attempt < max_retries:
|
70
|
+
logger.warning(
|
71
|
+
f"🔄 触发重试 | 操作: {func.__name__} | "
|
72
|
+
f"尝试: {attempt + 1}/{max_retries + 1} | "
|
73
|
+
f"错误: {type(e).__name__}: {str(e)} | "
|
74
|
+
f"延迟: {delay:.1f}秒"
|
75
|
+
)
|
42
76
|
await asyncio.sleep(delay)
|
43
77
|
delay = min(delay * backoff_factor, max_delay)
|
44
78
|
else:
|
79
|
+
logger.error(
|
80
|
+
f"❌ 重试失败 | 操作: {func.__name__} | "
|
81
|
+
f"已达最大重试次数: {max_retries} | "
|
82
|
+
f"最终错误: {type(e).__name__}: {str(e)}"
|
83
|
+
)
|
45
84
|
raise
|
46
85
|
|
47
86
|
raise last_exception
|
@@ -57,13 +96,213 @@ def retry_with_backoff(
|
|
57
96
|
return func(*args, **kwargs)
|
58
97
|
except exceptions as e:
|
59
98
|
last_exception = e
|
99
|
+
|
100
|
+
# 使用 ErrorClassifier 检查错误是否可重试
|
101
|
+
is_retryable, _ = ErrorClassifier.is_retryable(e)
|
102
|
+
|
103
|
+
if not is_retryable:
|
104
|
+
logger.debug(
|
105
|
+
f"🚫 不可重试错误 | 操作: {func.__name__} | "
|
106
|
+
f"错误: {type(e).__name__}: {str(e)} | "
|
107
|
+
f"直接抛出异常"
|
108
|
+
)
|
109
|
+
raise
|
110
|
+
|
60
111
|
if attempt < max_retries:
|
112
|
+
logger.warning(
|
113
|
+
f"🔄 触发重试 | 操作: {func.__name__} | "
|
114
|
+
f"尝试: {attempt + 1}/{max_retries + 1} | "
|
115
|
+
f"错误: {type(e).__name__}: {str(e)} | "
|
116
|
+
f"延迟: {delay:.1f}秒"
|
117
|
+
)
|
61
118
|
time.sleep(delay)
|
62
119
|
delay = min(delay * backoff_factor, max_delay)
|
63
120
|
else:
|
121
|
+
logger.error(
|
122
|
+
f"❌ 重试失败 | 操作: {func.__name__} | "
|
123
|
+
f"已达最大重试次数: {max_retries} | "
|
124
|
+
f"最终错误: {type(e).__name__}: {str(e)}"
|
125
|
+
)
|
64
126
|
raise
|
65
127
|
|
66
128
|
raise last_exception
|
67
129
|
return sync_wrapper
|
68
130
|
|
69
|
-
return decorator
|
131
|
+
return decorator
|
132
|
+
|
133
|
+
|
134
|
+
def retry_on_lock_conflict(
|
135
|
+
max_retries: int = 5,
|
136
|
+
initial_delay: float = 0.5,
|
137
|
+
backoff_factor: float = 1.5,
|
138
|
+
max_delay: float = 10.0
|
139
|
+
):
|
140
|
+
"""
|
141
|
+
专门处理锁冲突的重试装饰器
|
142
|
+
|
143
|
+
当响应中包含 conflict_type: "lock_conflict" 时进行重试
|
144
|
+
|
145
|
+
Args:
|
146
|
+
max_retries: 最大重试次数(默认5次)
|
147
|
+
initial_delay: 初始延迟(默认0.5秒)
|
148
|
+
backoff_factor: 退避因子(默认1.5)
|
149
|
+
max_delay: 最大延迟(默认10秒)
|
150
|
+
"""
|
151
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
152
|
+
if asyncio.iscoroutinefunction(func):
|
153
|
+
@functools.wraps(func)
|
154
|
+
async def async_wrapper(*args, **kwargs) -> T:
|
155
|
+
delay = initial_delay
|
156
|
+
last_result = None
|
157
|
+
|
158
|
+
for attempt in range(max_retries + 1):
|
159
|
+
result = await func(*args, **kwargs)
|
160
|
+
|
161
|
+
# 检查是否是锁冲突
|
162
|
+
if _is_lock_conflict(result):
|
163
|
+
last_result = result
|
164
|
+
if attempt < max_retries:
|
165
|
+
logger.warning(
|
166
|
+
f"🔒 锁冲突重试 | 操作: {func.__name__} | "
|
167
|
+
f"尝试: {attempt + 1}/{max_retries + 1} | "
|
168
|
+
f"冲突类型: lock_conflict | "
|
169
|
+
f"延迟: {delay:.1f}秒"
|
170
|
+
)
|
171
|
+
await asyncio.sleep(delay)
|
172
|
+
delay = min(delay * backoff_factor, max_delay)
|
173
|
+
continue
|
174
|
+
else:
|
175
|
+
logger.error(
|
176
|
+
f"❌ 锁冲突重试失败 | 操作: {func.__name__} | "
|
177
|
+
f"已达最大重试次数: {max_retries} | "
|
178
|
+
f"返回最后的冲突结果"
|
179
|
+
)
|
180
|
+
|
181
|
+
return result
|
182
|
+
|
183
|
+
# 如果所有重试都失败了,返回最后的结果
|
184
|
+
return last_result
|
185
|
+
return async_wrapper
|
186
|
+
else:
|
187
|
+
@functools.wraps(func)
|
188
|
+
def sync_wrapper(*args, **kwargs) -> T:
|
189
|
+
delay = initial_delay
|
190
|
+
last_result = None
|
191
|
+
|
192
|
+
for attempt in range(max_retries + 1):
|
193
|
+
result = func(*args, **kwargs)
|
194
|
+
|
195
|
+
# 检查是否是锁冲突
|
196
|
+
if _is_lock_conflict(result):
|
197
|
+
last_result = result
|
198
|
+
if attempt < max_retries:
|
199
|
+
logger.warning(
|
200
|
+
f"🔒 锁冲突重试 | 操作: {func.__name__} | "
|
201
|
+
f"尝试: {attempt + 1}/{max_retries + 1} | "
|
202
|
+
f"冲突类型: lock_conflict | "
|
203
|
+
f"延迟: {delay:.1f}秒"
|
204
|
+
)
|
205
|
+
time.sleep(delay)
|
206
|
+
delay = min(delay * backoff_factor, max_delay)
|
207
|
+
continue
|
208
|
+
else:
|
209
|
+
logger.error(
|
210
|
+
f"❌ 锁冲突重试失败 | 操作: {func.__name__} | "
|
211
|
+
f"已达最大重试次数: {max_retries} | "
|
212
|
+
f"返回最后的冲突结果"
|
213
|
+
)
|
214
|
+
|
215
|
+
return result
|
216
|
+
|
217
|
+
# 如果所有重试都失败了,返回最后的结果
|
218
|
+
return last_result
|
219
|
+
return sync_wrapper
|
220
|
+
|
221
|
+
return decorator
|
222
|
+
|
223
|
+
|
224
|
+
def _is_lock_conflict(result: Any) -> bool:
|
225
|
+
"""
|
226
|
+
检查结果是否包含锁冲突
|
227
|
+
|
228
|
+
Args:
|
229
|
+
result: API调用的结果
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
是否是锁冲突
|
233
|
+
"""
|
234
|
+
if result is None:
|
235
|
+
return False
|
236
|
+
|
237
|
+
# 检查错误信息中是否包含锁冲突的关键字(统一转换为小写比对)
|
238
|
+
lock_conflict_messages = [
|
239
|
+
'failed to acquire necessary locks',
|
240
|
+
'lock_conflict',
|
241
|
+
'lock conflict'
|
242
|
+
]
|
243
|
+
|
244
|
+
# 添加调试信息
|
245
|
+
import logging
|
246
|
+
logger = logging.getLogger(__name__)
|
247
|
+
logger.debug(f"Checking for lock conflict in result: {type(result)}")
|
248
|
+
|
249
|
+
# 处理字典格式的响应
|
250
|
+
if isinstance(result, dict):
|
251
|
+
# 检查 conflict_info
|
252
|
+
conflict_info = result.get('conflict_info', {})
|
253
|
+
if isinstance(conflict_info, dict):
|
254
|
+
# 检查 conflict_type
|
255
|
+
if conflict_info.get('conflict_type') == 'lock_conflict':
|
256
|
+
return True
|
257
|
+
# 检查 resolution_suggestion 中的信息
|
258
|
+
suggestion = conflict_info.get('resolution_suggestion', '').lower()
|
259
|
+
if any(msg in suggestion for msg in lock_conflict_messages):
|
260
|
+
return True
|
261
|
+
|
262
|
+
# 检查顶层的 conflict_type
|
263
|
+
if result.get('conflict_type') == 'lock_conflict':
|
264
|
+
return True
|
265
|
+
|
266
|
+
# 检查 error_message
|
267
|
+
error_msg = result.get('error_message', '').lower()
|
268
|
+
logger.debug(f"Checking error_message: '{error_msg}'")
|
269
|
+
if any(msg in error_msg for msg in lock_conflict_messages):
|
270
|
+
logger.debug(f"Found lock conflict in error_message: '{error_msg}'")
|
271
|
+
return True
|
272
|
+
|
273
|
+
# 处理对象格式的响应
|
274
|
+
elif hasattr(result, 'conflict_info'):
|
275
|
+
conflict_info = getattr(result, 'conflict_info', None)
|
276
|
+
if conflict_info:
|
277
|
+
# 检查 conflict_type
|
278
|
+
if hasattr(conflict_info, 'conflict_type'):
|
279
|
+
if getattr(conflict_info, 'conflict_type') == 'lock_conflict':
|
280
|
+
return True
|
281
|
+
# 检查 resolution_suggestion
|
282
|
+
if hasattr(conflict_info, 'resolution_suggestion'):
|
283
|
+
suggestion = getattr(conflict_info, 'resolution_suggestion', '').lower()
|
284
|
+
if any(msg in suggestion for msg in lock_conflict_messages):
|
285
|
+
return True
|
286
|
+
|
287
|
+
# 检查 success 字段和 error_message
|
288
|
+
if hasattr(result, 'success') and not getattr(result, 'success', True):
|
289
|
+
# 检查 conflict_info
|
290
|
+
if hasattr(result, 'conflict_info'):
|
291
|
+
conflict_info = getattr(result, 'conflict_info', None)
|
292
|
+
if conflict_info and hasattr(conflict_info, 'conflict_type'):
|
293
|
+
if getattr(conflict_info, 'conflict_type') == 'lock_conflict':
|
294
|
+
return True
|
295
|
+
|
296
|
+
# 检查 error_message
|
297
|
+
if hasattr(result, 'error_message'):
|
298
|
+
error_msg = getattr(result, 'error_message', '').lower()
|
299
|
+
if any(msg in error_msg for msg in lock_conflict_messages):
|
300
|
+
return True
|
301
|
+
|
302
|
+
# 对于纯对象响应,检查 error_message 字段
|
303
|
+
if hasattr(result, 'error_message'):
|
304
|
+
error_msg = getattr(result, 'error_message', '').lower()
|
305
|
+
if any(msg in error_msg for msg in lock_conflict_messages):
|
306
|
+
return True
|
307
|
+
|
308
|
+
return False
|
@@ -0,0 +1,403 @@
|
|
1
|
+
"""
|
2
|
+
智能重试机制
|
3
|
+
|
4
|
+
提供基于错误类型的智能重试策略
|
5
|
+
"""
|
6
|
+
import asyncio
|
7
|
+
import functools
|
8
|
+
import time
|
9
|
+
import random
|
10
|
+
import grpc
|
11
|
+
from typing import TypeVar, Callable, Type, Tuple, Any, Optional, Dict, Set
|
12
|
+
from enum import Enum
|
13
|
+
import logging
|
14
|
+
|
15
|
+
T = TypeVar("T")
|
16
|
+
logger = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class RetryStrategy(Enum):
|
20
|
+
"""重试策略枚举"""
|
21
|
+
NO_RETRY = "no_retry" # 不重试
|
22
|
+
IMMEDIATE = "immediate" # 立即重试
|
23
|
+
LINEAR = "linear" # 线性退避
|
24
|
+
EXPONENTIAL = "exponential" # 指数退避
|
25
|
+
EXPONENTIAL_JITTER = "exp_jitter" # 指数退避+抖动
|
26
|
+
|
27
|
+
|
28
|
+
class ErrorClassifier:
|
29
|
+
"""错误分类器,判断错误是否可重试"""
|
30
|
+
|
31
|
+
# 可重试的 gRPC 状态码
|
32
|
+
RETRYABLE_GRPC_CODES = {
|
33
|
+
grpc.StatusCode.UNAVAILABLE, # 服务不可用
|
34
|
+
grpc.StatusCode.DEADLINE_EXCEEDED, # 超时
|
35
|
+
grpc.StatusCode.RESOURCE_EXHAUSTED,# 资源耗尽(可能需要退避)
|
36
|
+
grpc.StatusCode.ABORTED, # 操作被中止(如锁冲突)
|
37
|
+
grpc.StatusCode.INTERNAL, # 内部错误(部分情况)
|
38
|
+
}
|
39
|
+
|
40
|
+
# 不可重试的 gRPC 状态码
|
41
|
+
NON_RETRYABLE_GRPC_CODES = {
|
42
|
+
grpc.StatusCode.INVALID_ARGUMENT, # 参数无效
|
43
|
+
grpc.StatusCode.NOT_FOUND, # 资源不存在
|
44
|
+
grpc.StatusCode.ALREADY_EXISTS, # 资源已存在
|
45
|
+
grpc.StatusCode.PERMISSION_DENIED, # 权限拒绝
|
46
|
+
grpc.StatusCode.UNAUTHENTICATED, # 未认证
|
47
|
+
grpc.StatusCode.FAILED_PRECONDITION,# 前置条件失败
|
48
|
+
grpc.StatusCode.OUT_OF_RANGE, # 超出范围
|
49
|
+
grpc.StatusCode.UNIMPLEMENTED, # 未实现
|
50
|
+
grpc.StatusCode.DATA_LOSS, # 数据丢失
|
51
|
+
}
|
52
|
+
|
53
|
+
# 可重试的 HTTP 状态码
|
54
|
+
RETRYABLE_HTTP_CODES = {408, 429, 500, 502, 503, 504}
|
55
|
+
|
56
|
+
# 可重试的错误消息关键词
|
57
|
+
RETRYABLE_ERROR_KEYWORDS = [
|
58
|
+
'timeout', 'timed out',
|
59
|
+
'connection reset', 'connection refused', 'connection error',
|
60
|
+
'temporarily unavailable', 'service unavailable',
|
61
|
+
'too many requests', 'rate limit',
|
62
|
+
'lock conflict', 'version conflict',
|
63
|
+
'resource busy', 'resource contention',
|
64
|
+
'gateway timeout', 'bad gateway',
|
65
|
+
'network unreachable', 'dns resolution failed',
|
66
|
+
]
|
67
|
+
|
68
|
+
# 不可重试的错误消息关键词
|
69
|
+
NON_RETRYABLE_ERROR_KEYWORDS = [
|
70
|
+
'invalid argument', 'invalid parameter', 'validation error',
|
71
|
+
'not found', 'does not exist',
|
72
|
+
'already exists', 'duplicate',
|
73
|
+
'permission denied', 'forbidden', 'unauthorized',
|
74
|
+
'authentication failed', 'invalid credentials',
|
75
|
+
'insufficient funds', 'quota exceeded',
|
76
|
+
'constraint violation', 'foreign key violation',
|
77
|
+
]
|
78
|
+
|
79
|
+
@classmethod
|
80
|
+
def is_retryable(cls, error: Exception) -> Tuple[bool, RetryStrategy]:
|
81
|
+
"""
|
82
|
+
判断错误是否可重试
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
(是否可重试, 重试策略)
|
86
|
+
"""
|
87
|
+
# 1. 检查 gRPC 错误
|
88
|
+
if isinstance(error, grpc.RpcError):
|
89
|
+
code = error.code()
|
90
|
+
if code in cls.NON_RETRYABLE_GRPC_CODES:
|
91
|
+
return False, RetryStrategy.NO_RETRY
|
92
|
+
if code in cls.RETRYABLE_GRPC_CODES:
|
93
|
+
# 资源耗尽需要更长的退避时间
|
94
|
+
if code == grpc.StatusCode.RESOURCE_EXHAUSTED:
|
95
|
+
return True, RetryStrategy.EXPONENTIAL_JITTER
|
96
|
+
# 中止操作(如锁冲突)使用指数退避
|
97
|
+
elif code == grpc.StatusCode.ABORTED:
|
98
|
+
return True, RetryStrategy.EXPONENTIAL
|
99
|
+
# 其他使用指数退避
|
100
|
+
else:
|
101
|
+
return True, RetryStrategy.EXPONENTIAL
|
102
|
+
|
103
|
+
# 2. 检查 HTTP 错误(通过状态码)
|
104
|
+
if hasattr(error, 'response') and hasattr(error.response, 'status_code'):
|
105
|
+
status_code = error.response.status_code
|
106
|
+
if status_code in cls.RETRYABLE_HTTP_CODES:
|
107
|
+
# 429 需要更长的退避时间和抖动
|
108
|
+
if status_code == 429:
|
109
|
+
return True, RetryStrategy.EXPONENTIAL_JITTER
|
110
|
+
return True, RetryStrategy.EXPONENTIAL
|
111
|
+
elif 400 <= status_code < 500: # 客户端错误不重试
|
112
|
+
return False, RetryStrategy.NO_RETRY
|
113
|
+
|
114
|
+
# 3. 检查错误消息
|
115
|
+
error_msg = str(error).lower()
|
116
|
+
|
117
|
+
# 检查不可重试关键词
|
118
|
+
for keyword in cls.NON_RETRYABLE_ERROR_KEYWORDS:
|
119
|
+
if keyword in error_msg:
|
120
|
+
return False, RetryStrategy.NO_RETRY
|
121
|
+
|
122
|
+
# 检查可重试关键词
|
123
|
+
for keyword in cls.RETRYABLE_ERROR_KEYWORDS:
|
124
|
+
if keyword in error_msg:
|
125
|
+
# 速率限制使用抖动策略
|
126
|
+
if 'rate limit' in error_msg or 'too many requests' in error_msg:
|
127
|
+
return True, RetryStrategy.EXPONENTIAL_JITTER
|
128
|
+
return True, RetryStrategy.EXPONENTIAL
|
129
|
+
|
130
|
+
# 4. 特定异常类型检查
|
131
|
+
from ..errors import (
|
132
|
+
ConnectionError, TimeoutError, # 可重试
|
133
|
+
ValidationError, PermissionError, FileNotFoundError # 不可重试
|
134
|
+
)
|
135
|
+
|
136
|
+
if isinstance(error, (ConnectionError, TimeoutError)):
|
137
|
+
return True, RetryStrategy.EXPONENTIAL
|
138
|
+
|
139
|
+
if isinstance(error, (ValidationError, PermissionError, FileNotFoundError)):
|
140
|
+
return False, RetryStrategy.NO_RETRY
|
141
|
+
|
142
|
+
# 5. 默认不重试未知错误
|
143
|
+
return False, RetryStrategy.NO_RETRY
|
144
|
+
|
145
|
+
@classmethod
|
146
|
+
def get_retry_delay(
|
147
|
+
cls,
|
148
|
+
strategy: RetryStrategy,
|
149
|
+
attempt: int,
|
150
|
+
base_delay: float,
|
151
|
+
max_delay: float,
|
152
|
+
backoff_factor: float = 2.0
|
153
|
+
) -> float:
|
154
|
+
"""计算重试延迟时间"""
|
155
|
+
if strategy == RetryStrategy.NO_RETRY:
|
156
|
+
return 0
|
157
|
+
|
158
|
+
elif strategy == RetryStrategy.IMMEDIATE:
|
159
|
+
return 0
|
160
|
+
|
161
|
+
elif strategy == RetryStrategy.LINEAR:
|
162
|
+
delay = base_delay * attempt
|
163
|
+
|
164
|
+
elif strategy == RetryStrategy.EXPONENTIAL:
|
165
|
+
delay = base_delay * (backoff_factor ** (attempt - 1))
|
166
|
+
|
167
|
+
elif strategy == RetryStrategy.EXPONENTIAL_JITTER:
|
168
|
+
# 指数退避 + 随机抖动 (±25%)
|
169
|
+
base = base_delay * (backoff_factor ** (attempt - 1))
|
170
|
+
jitter = base * 0.25 * (2 * random.random() - 1)
|
171
|
+
delay = base + jitter
|
172
|
+
|
173
|
+
else:
|
174
|
+
delay = base_delay
|
175
|
+
|
176
|
+
return min(delay, max_delay)
|
177
|
+
|
178
|
+
|
179
|
+
def smart_retry(
|
180
|
+
max_retries: int = 3,
|
181
|
+
base_delay: float = 1.0,
|
182
|
+
max_delay: float = 60.0,
|
183
|
+
backoff_factor: float = 2.0,
|
184
|
+
retry_on: Optional[Tuple[Type[Exception], ...]] = None,
|
185
|
+
dont_retry_on: Optional[Tuple[Type[Exception], ...]] = None,
|
186
|
+
on_retry: Optional[Callable[[Exception, int], None]] = None
|
187
|
+
):
|
188
|
+
"""
|
189
|
+
智能重试装饰器
|
190
|
+
|
191
|
+
Args:
|
192
|
+
max_retries: 最大重试次数
|
193
|
+
base_delay: 基础延迟时间(秒)
|
194
|
+
max_delay: 最大延迟时间(秒)
|
195
|
+
backoff_factor: 退避因子
|
196
|
+
retry_on: 只在这些异常时重试(如果指定)
|
197
|
+
dont_retry_on: 不在这些异常时重试(优先级高于 retry_on)
|
198
|
+
on_retry: 重试时的回调函数 (exception, attempt) -> None
|
199
|
+
"""
|
200
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
201
|
+
if asyncio.iscoroutinefunction(func):
|
202
|
+
@functools.wraps(func)
|
203
|
+
async def async_wrapper(*args, **kwargs) -> T:
|
204
|
+
last_exception = None
|
205
|
+
|
206
|
+
for attempt in range(1, max_retries + 2):
|
207
|
+
try:
|
208
|
+
result = await func(*args, **kwargs)
|
209
|
+
if attempt > 1:
|
210
|
+
logger.info(
|
211
|
+
f"✅ 重试成功 | 操作: {func.__name__} | "
|
212
|
+
f"在第 {attempt} 次尝试后成功"
|
213
|
+
)
|
214
|
+
return result
|
215
|
+
except Exception as e:
|
216
|
+
last_exception = e
|
217
|
+
|
218
|
+
# 检查是否在不重试列表中
|
219
|
+
if dont_retry_on and isinstance(e, dont_retry_on):
|
220
|
+
logger.debug(f"错误类型 {type(e).__name__} 在不重试列表中,直接抛出")
|
221
|
+
raise
|
222
|
+
|
223
|
+
# 检查是否在重试列表中(如果指定了)
|
224
|
+
if retry_on and not isinstance(e, retry_on):
|
225
|
+
logger.debug(f"错误类型 {type(e).__name__} 不在重试列表中,直接抛出")
|
226
|
+
raise
|
227
|
+
|
228
|
+
# 使用错误分类器判断
|
229
|
+
is_retryable, strategy = ErrorClassifier.is_retryable(e)
|
230
|
+
|
231
|
+
if not is_retryable or attempt > max_retries:
|
232
|
+
if attempt > max_retries:
|
233
|
+
logger.warning(f"达到最大重试次数 {max_retries},停止重试")
|
234
|
+
else:
|
235
|
+
logger.debug(f"错误不可重试: {type(e).__name__}: {str(e)}")
|
236
|
+
raise
|
237
|
+
|
238
|
+
# 计算延迟时间
|
239
|
+
delay = ErrorClassifier.get_retry_delay(
|
240
|
+
strategy, attempt, base_delay, max_delay, backoff_factor
|
241
|
+
)
|
242
|
+
|
243
|
+
# 记录详细的重试日志
|
244
|
+
logger.warning(
|
245
|
+
f"🔄 触发重试机制 | "
|
246
|
+
f"操作: {func.__name__} | "
|
247
|
+
f"尝试: {attempt}/{max_retries + 1} | "
|
248
|
+
f"错误类型: {type(e).__name__} | "
|
249
|
+
f"错误信息: {str(e)} | "
|
250
|
+
f"重试策略: {strategy.value} | "
|
251
|
+
f"延迟时间: {delay:.1f}秒"
|
252
|
+
)
|
253
|
+
|
254
|
+
# 如果是调试模式,记录更详细的信息
|
255
|
+
logger.debug(
|
256
|
+
f"重试详情 - 函数: {func.__module__}.{func.__name__}, "
|
257
|
+
f"参数: args={args}, kwargs={kwargs}"
|
258
|
+
)
|
259
|
+
|
260
|
+
# 调用重试回调
|
261
|
+
if on_retry:
|
262
|
+
on_retry(e, attempt)
|
263
|
+
|
264
|
+
# 等待后重试
|
265
|
+
if delay > 0:
|
266
|
+
await asyncio.sleep(delay)
|
267
|
+
|
268
|
+
raise last_exception
|
269
|
+
return async_wrapper
|
270
|
+
else:
|
271
|
+
@functools.wraps(func)
|
272
|
+
def sync_wrapper(*args, **kwargs) -> T:
|
273
|
+
last_exception = None
|
274
|
+
|
275
|
+
for attempt in range(1, max_retries + 2):
|
276
|
+
try:
|
277
|
+
result = func(*args, **kwargs)
|
278
|
+
if attempt > 1:
|
279
|
+
logger.info(
|
280
|
+
f"✅ 重试成功 | 操作: {func.__name__} | "
|
281
|
+
f"在第 {attempt} 次尝试后成功"
|
282
|
+
)
|
283
|
+
return result
|
284
|
+
except Exception as e:
|
285
|
+
last_exception = e
|
286
|
+
|
287
|
+
# 检查是否在不重试列表中
|
288
|
+
if dont_retry_on and isinstance(e, dont_retry_on):
|
289
|
+
logger.debug(f"错误类型 {type(e).__name__} 在不重试列表中,直接抛出")
|
290
|
+
raise
|
291
|
+
|
292
|
+
# 检查是否在重试列表中(如果指定了)
|
293
|
+
if retry_on and not isinstance(e, retry_on):
|
294
|
+
logger.debug(f"错误类型 {type(e).__name__} 不在重试列表中,直接抛出")
|
295
|
+
raise
|
296
|
+
|
297
|
+
# 使用错误分类器判断
|
298
|
+
is_retryable, strategy = ErrorClassifier.is_retryable(e)
|
299
|
+
|
300
|
+
if not is_retryable or attempt > max_retries:
|
301
|
+
if attempt > max_retries:
|
302
|
+
logger.warning(f"达到最大重试次数 {max_retries},停止重试")
|
303
|
+
else:
|
304
|
+
logger.debug(f"错误不可重试: {type(e).__name__}: {str(e)}")
|
305
|
+
raise
|
306
|
+
|
307
|
+
# 计算延迟时间
|
308
|
+
delay = ErrorClassifier.get_retry_delay(
|
309
|
+
strategy, attempt, base_delay, max_delay, backoff_factor
|
310
|
+
)
|
311
|
+
|
312
|
+
# 记录详细的重试日志
|
313
|
+
logger.warning(
|
314
|
+
f"🔄 触发重试机制 | "
|
315
|
+
f"操作: {func.__name__} | "
|
316
|
+
f"尝试: {attempt}/{max_retries + 1} | "
|
317
|
+
f"错误类型: {type(e).__name__} | "
|
318
|
+
f"错误信息: {str(e)} | "
|
319
|
+
f"重试策略: {strategy.value} | "
|
320
|
+
f"延迟时间: {delay:.1f}秒"
|
321
|
+
)
|
322
|
+
|
323
|
+
# 如果是调试模式,记录更详细的信息
|
324
|
+
logger.debug(
|
325
|
+
f"重试详情 - 函数: {func.__module__}.{func.__name__}, "
|
326
|
+
f"参数: args={args}, kwargs={kwargs}"
|
327
|
+
)
|
328
|
+
|
329
|
+
# 调用重试回调
|
330
|
+
if on_retry:
|
331
|
+
on_retry(e, attempt)
|
332
|
+
|
333
|
+
# 等待后重试
|
334
|
+
if delay > 0:
|
335
|
+
time.sleep(delay)
|
336
|
+
|
337
|
+
raise last_exception
|
338
|
+
return sync_wrapper
|
339
|
+
|
340
|
+
return decorator
|
341
|
+
|
342
|
+
|
343
|
+
# 保留原有的装饰器兼容性
|
344
|
+
def retry_with_backoff(
|
345
|
+
max_retries: int = 3,
|
346
|
+
initial_delay: float = 1.0,
|
347
|
+
backoff_factor: float = 2.0,
|
348
|
+
max_delay: float = 60.0,
|
349
|
+
exceptions: Tuple[Type[Exception], ...] = (Exception,)
|
350
|
+
):
|
351
|
+
"""
|
352
|
+
兼容旧版本的重试装饰器,内部使用智能重试
|
353
|
+
"""
|
354
|
+
return smart_retry(
|
355
|
+
max_retries=max_retries,
|
356
|
+
base_delay=initial_delay,
|
357
|
+
max_delay=max_delay,
|
358
|
+
backoff_factor=backoff_factor,
|
359
|
+
retry_on=exceptions if exceptions != (Exception,) else None
|
360
|
+
)
|
361
|
+
|
362
|
+
|
363
|
+
# 特定场景的预配置装饰器
|
364
|
+
def retry_on_network_errors(max_retries: int = 3):
|
365
|
+
"""只在网络错误时重试"""
|
366
|
+
from ..errors import ConnectionError, TimeoutError
|
367
|
+
return smart_retry(
|
368
|
+
max_retries=max_retries,
|
369
|
+
base_delay=1.0,
|
370
|
+
max_delay=30.0,
|
371
|
+
retry_on=(ConnectionError, TimeoutError, grpc.RpcError)
|
372
|
+
)
|
373
|
+
|
374
|
+
|
375
|
+
def retry_on_conflict(max_retries: int = 5):
|
376
|
+
"""在冲突时重试(如锁冲突、版本冲突)"""
|
377
|
+
return smart_retry(
|
378
|
+
max_retries=max_retries,
|
379
|
+
base_delay=0.5,
|
380
|
+
max_delay=10.0,
|
381
|
+
backoff_factor=1.5
|
382
|
+
)
|
383
|
+
|
384
|
+
|
385
|
+
def no_retry():
|
386
|
+
"""不进行任何重试"""
|
387
|
+
return smart_retry(max_retries=0)
|
388
|
+
|
389
|
+
|
390
|
+
def retry_on_transient_errors(max_retries: int = 2):
|
391
|
+
"""
|
392
|
+
只在临时性错误时重试(适用于查询操作)
|
393
|
+
|
394
|
+
- 网络临时故障会重试
|
395
|
+
- NOT_FOUND、权限错误等不会重试
|
396
|
+
- 重试次数较少,延迟较短
|
397
|
+
"""
|
398
|
+
return smart_retry(
|
399
|
+
max_retries=max_retries,
|
400
|
+
base_delay=0.5,
|
401
|
+
max_delay=5.0,
|
402
|
+
backoff_factor=2.0
|
403
|
+
)
|