tamar-model-client 0.1.22__py3-none-any.whl → 0.1.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tamar_model_client/async_client.py +1 -1
- tamar_model_client/error_handler.py +60 -60
- tamar_model_client/exceptions.py +2 -2
- tamar_model_client/sync_client.py +1 -1
- {tamar_model_client-0.1.22.dist-info → tamar_model_client-0.1.24.dist-info}/METADATA +1 -1
- {tamar_model_client-0.1.22.dist-info → tamar_model_client-0.1.24.dist-info}/RECORD +9 -9
- tests/test_google_azure_final.py +4 -4
- {tamar_model_client-0.1.22.dist-info → tamar_model_client-0.1.24.dist-info}/WHEEL +0 -0
- {tamar_model_client-0.1.22.dist-info → tamar_model_client-0.1.24.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ import logging
|
|
11
11
|
from typing import Optional, Dict, Any, Callable, Union
|
12
12
|
from collections import defaultdict
|
13
13
|
|
14
|
+
from .core import get_protected_logger
|
14
15
|
from .exceptions import (
|
15
16
|
ErrorContext, TamarModelException,
|
16
17
|
NetworkException, ConnectionException, TimeoutException,
|
@@ -20,17 +21,16 @@ from .exceptions import (
|
|
20
21
|
ERROR_CATEGORIES, RETRY_POLICY, ErrorStats
|
21
22
|
)
|
22
23
|
|
23
|
-
|
24
|
-
logger = logging.getLogger(__name__)
|
24
|
+
logger = get_protected_logger(__name__)
|
25
25
|
|
26
26
|
|
27
27
|
class GrpcErrorHandler:
|
28
28
|
"""统一的 gRPC 错误处理器"""
|
29
|
-
|
29
|
+
|
30
30
|
def __init__(self, client_logger: Optional[logging.Logger] = None):
|
31
31
|
self.logger = client_logger or logger
|
32
32
|
self.error_stats = ErrorStats()
|
33
|
-
|
33
|
+
|
34
34
|
def handle_error(self, error: Union[grpc.RpcError, Exception], context: dict) -> TamarModelException:
|
35
35
|
"""
|
36
36
|
统一错误处理流程:
|
@@ -41,7 +41,7 @@ class GrpcErrorHandler:
|
|
41
41
|
5. 返回相应异常
|
42
42
|
"""
|
43
43
|
error_context = ErrorContext(error, context)
|
44
|
-
|
44
|
+
|
45
45
|
# 记录详细错误日志
|
46
46
|
# 将error_context的重要信息平铺到日志的data字段中
|
47
47
|
log_data = {
|
@@ -61,64 +61,64 @@ class GrpcErrorHandler:
|
|
61
61
|
"is_network_cancelled": error_context.is_network_cancelled() if error_context.error_code == grpc.StatusCode.CANCELLED else None
|
62
62
|
}
|
63
63
|
}
|
64
|
-
|
64
|
+
|
65
65
|
# 如果上下文中有 duration,添加到日志中
|
66
66
|
if 'duration' in context:
|
67
67
|
log_data['duration'] = context['duration']
|
68
|
-
|
68
|
+
|
69
69
|
self.logger.error(
|
70
70
|
f"gRPC Error occurred: {error_context.error_code.name if error_context.error_code else 'UNKNOWN'}",
|
71
71
|
extra=log_data
|
72
72
|
)
|
73
|
-
|
73
|
+
|
74
74
|
# 更新错误统计
|
75
75
|
if error_context.error_code:
|
76
76
|
self.error_stats.record_error(error_context.error_code)
|
77
|
-
|
77
|
+
|
78
78
|
# 根据错误类型返回相应异常
|
79
79
|
return self._create_exception(error_context)
|
80
|
-
|
80
|
+
|
81
81
|
def _create_exception(self, error_context: ErrorContext) -> TamarModelException:
|
82
82
|
"""根据错误上下文创建相应的异常"""
|
83
83
|
error_code = error_context.error_code
|
84
|
-
|
84
|
+
|
85
85
|
if not error_code:
|
86
86
|
return TamarModelException(error_context)
|
87
|
-
|
87
|
+
|
88
88
|
# 认证相关错误
|
89
89
|
if error_code in ERROR_CATEGORIES['AUTH']:
|
90
90
|
if error_code == grpc.StatusCode.UNAUTHENTICATED:
|
91
91
|
return TokenExpiredException(error_context)
|
92
92
|
else:
|
93
93
|
return PermissionDeniedException(error_context)
|
94
|
-
|
94
|
+
|
95
95
|
# 网络相关错误
|
96
96
|
elif error_code in ERROR_CATEGORIES['NETWORK']:
|
97
97
|
if error_code == grpc.StatusCode.DEADLINE_EXCEEDED:
|
98
98
|
return TimeoutException(error_context)
|
99
99
|
else:
|
100
100
|
return ConnectionException(error_context)
|
101
|
-
|
101
|
+
|
102
102
|
# 验证相关错误
|
103
103
|
elif error_code in ERROR_CATEGORIES['VALIDATION']:
|
104
104
|
return InvalidParameterException(error_context)
|
105
|
-
|
105
|
+
|
106
106
|
# 资源相关错误
|
107
107
|
elif error_code == grpc.StatusCode.RESOURCE_EXHAUSTED:
|
108
108
|
return RateLimitException(error_context)
|
109
|
-
|
109
|
+
|
110
110
|
# 服务商相关错误
|
111
111
|
elif error_code in ERROR_CATEGORIES['PROVIDER']:
|
112
112
|
return ProviderException(error_context)
|
113
|
-
|
113
|
+
|
114
114
|
# 默认错误
|
115
115
|
else:
|
116
116
|
return TamarModelException(error_context)
|
117
|
-
|
117
|
+
|
118
118
|
def get_error_stats(self) -> Dict[str, Any]:
|
119
119
|
"""获取错误统计信息"""
|
120
120
|
return self.error_stats.get_stats()
|
121
|
-
|
121
|
+
|
122
122
|
def reset_stats(self):
|
123
123
|
"""重置错误统计"""
|
124
124
|
self.error_stats.reset()
|
@@ -126,60 +126,60 @@ class GrpcErrorHandler:
|
|
126
126
|
|
127
127
|
class ErrorRecoveryStrategy:
|
128
128
|
"""错误恢复策略"""
|
129
|
-
|
129
|
+
|
130
130
|
RECOVERY_ACTIONS = {
|
131
131
|
'refresh_token': 'handle_token_refresh',
|
132
132
|
'reconnect': 'handle_reconnect',
|
133
133
|
'backoff': 'handle_backoff',
|
134
134
|
'circuit_break': 'handle_circuit_break',
|
135
135
|
}
|
136
|
-
|
136
|
+
|
137
137
|
def __init__(self, client):
|
138
138
|
self.client = client
|
139
|
-
|
139
|
+
|
140
140
|
async def recover_from_error(self, error_context: ErrorContext):
|
141
141
|
"""根据错误类型执行恢复动作"""
|
142
142
|
if not error_context.error_code:
|
143
143
|
return
|
144
|
-
|
144
|
+
|
145
145
|
policy = RETRY_POLICY.get(error_context.error_code, {})
|
146
|
-
|
146
|
+
|
147
147
|
if action := policy.get('action'):
|
148
148
|
if action in self.RECOVERY_ACTIONS:
|
149
149
|
handler = getattr(self, self.RECOVERY_ACTIONS[action])
|
150
150
|
await handler(error_context)
|
151
|
-
|
151
|
+
|
152
152
|
async def handle_token_refresh(self, error_context: ErrorContext):
|
153
153
|
"""处理 Token 刷新"""
|
154
154
|
self.client.logger.info("Attempting to refresh JWT token")
|
155
155
|
# 这里需要客户端实现 _refresh_jwt_token 方法
|
156
156
|
if hasattr(self.client, '_refresh_jwt_token'):
|
157
157
|
await self.client._refresh_jwt_token()
|
158
|
-
|
158
|
+
|
159
159
|
async def handle_reconnect(self, error_context: ErrorContext):
|
160
160
|
"""处理重连"""
|
161
161
|
self.client.logger.info("Attempting to reconnect channel")
|
162
162
|
# 这里需要客户端实现 _reconnect_channel 方法
|
163
163
|
if hasattr(self.client, '_reconnect_channel'):
|
164
164
|
await self.client._reconnect_channel()
|
165
|
-
|
165
|
+
|
166
166
|
async def handle_backoff(self, error_context: ErrorContext):
|
167
167
|
"""处理退避等待"""
|
168
168
|
wait_time = self._calculate_backoff(error_context.retry_count)
|
169
169
|
await asyncio.sleep(wait_time)
|
170
|
-
|
170
|
+
|
171
171
|
async def handle_circuit_break(self, error_context: ErrorContext):
|
172
172
|
"""处理熔断"""
|
173
173
|
self.client.logger.warning("Circuit breaker activated")
|
174
174
|
# 这里可以实现熔断逻辑
|
175
175
|
pass
|
176
|
-
|
176
|
+
|
177
177
|
def _calculate_backoff(self, retry_count: int) -> float:
|
178
178
|
"""计算退避时间"""
|
179
179
|
base_delay = 1.0
|
180
180
|
max_delay = 60.0
|
181
181
|
jitter_factor = 0.1
|
182
|
-
|
182
|
+
|
183
183
|
delay = min(base_delay * (2 ** retry_count), max_delay)
|
184
184
|
jitter = random.uniform(0, delay * jitter_factor)
|
185
185
|
return delay + jitter
|
@@ -187,18 +187,18 @@ class ErrorRecoveryStrategy:
|
|
187
187
|
|
188
188
|
class EnhancedRetryHandler:
|
189
189
|
"""增强的重试处理器"""
|
190
|
-
|
190
|
+
|
191
191
|
def __init__(self, max_retries: int = 3, base_delay: float = 1.0):
|
192
192
|
self.max_retries = max_retries
|
193
193
|
self.base_delay = base_delay
|
194
194
|
self.error_handler = GrpcErrorHandler()
|
195
|
-
|
195
|
+
|
196
196
|
async def execute_with_retry(
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
197
|
+
self,
|
198
|
+
func: Callable,
|
199
|
+
*args,
|
200
|
+
context: Optional[Dict[str, Any]] = None,
|
201
|
+
**kwargs
|
202
202
|
):
|
203
203
|
"""
|
204
204
|
执行函数并处理重试
|
@@ -218,19 +218,19 @@ class EnhancedRetryHandler:
|
|
218
218
|
# 记录开始时间
|
219
219
|
import time
|
220
220
|
method_start_time = time.time()
|
221
|
-
|
221
|
+
|
222
222
|
context = context or {}
|
223
223
|
last_exception = None
|
224
|
-
|
224
|
+
|
225
225
|
for attempt in range(self.max_retries + 1):
|
226
226
|
try:
|
227
227
|
context['retry_count'] = attempt
|
228
228
|
return await func(*args, **kwargs)
|
229
|
-
|
229
|
+
|
230
230
|
except (grpc.RpcError, grpc.aio.AioRpcError) as e:
|
231
231
|
# 创建错误上下文
|
232
232
|
error_context = ErrorContext(e, context)
|
233
|
-
|
233
|
+
|
234
234
|
# 判断是否可以重试
|
235
235
|
if not self._should_retry(e, attempt):
|
236
236
|
# 不可重试或已达到最大重试次数
|
@@ -238,10 +238,10 @@ class EnhancedRetryHandler:
|
|
238
238
|
context['duration'] = current_duration
|
239
239
|
last_exception = self.error_handler.handle_error(e, context)
|
240
240
|
break
|
241
|
-
|
241
|
+
|
242
242
|
# 计算当前耗时
|
243
243
|
current_duration = time.time() - method_start_time
|
244
|
-
|
244
|
+
|
245
245
|
# 记录重试日志
|
246
246
|
log_data = {
|
247
247
|
"log_type": "info",
|
@@ -261,15 +261,15 @@ class EnhancedRetryHandler:
|
|
261
261
|
f"Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}",
|
262
262
|
extra=log_data
|
263
263
|
)
|
264
|
-
|
264
|
+
|
265
265
|
# 执行退避等待
|
266
266
|
if attempt < self.max_retries:
|
267
267
|
delay = self._calculate_backoff(attempt)
|
268
268
|
await asyncio.sleep(delay)
|
269
|
-
|
269
|
+
|
270
270
|
context['duration'] = current_duration
|
271
271
|
last_exception = self.error_handler.handle_error(e, context)
|
272
|
-
|
272
|
+
|
273
273
|
except Exception as e:
|
274
274
|
# 非 gRPC 错误,直接包装抛出
|
275
275
|
context['retry_count'] = attempt
|
@@ -277,28 +277,28 @@ class EnhancedRetryHandler:
|
|
277
277
|
error_context.error_message = str(e)
|
278
278
|
last_exception = TamarModelException(error_context)
|
279
279
|
break
|
280
|
-
|
280
|
+
|
281
281
|
# 抛出最后的异常
|
282
282
|
if last_exception:
|
283
283
|
raise last_exception
|
284
284
|
else:
|
285
285
|
raise TamarModelException("Unknown error occurred")
|
286
|
-
|
286
|
+
|
287
287
|
def _should_retry(self, error: grpc.RpcError, attempt: int) -> bool:
|
288
288
|
"""判断是否应该重试"""
|
289
289
|
error_code = error.code()
|
290
290
|
policy = RETRY_POLICY.get(error_code, {})
|
291
|
-
|
291
|
+
|
292
292
|
# 先检查错误级别的 max_attempts 配置
|
293
293
|
# max_attempts 表示最大重试次数(不包括初始请求)
|
294
294
|
error_max_attempts = policy.get('max_attempts', self.max_retries)
|
295
295
|
if attempt >= error_max_attempts:
|
296
296
|
return False
|
297
|
-
|
297
|
+
|
298
298
|
# 再检查全局的 max_retries
|
299
299
|
if attempt >= self.max_retries:
|
300
300
|
return False
|
301
|
-
|
301
|
+
|
302
302
|
# 检查基本重试策略
|
303
303
|
retryable = policy.get('retryable', False)
|
304
304
|
if retryable == False:
|
@@ -308,30 +308,30 @@ class EnhancedRetryHandler:
|
|
308
308
|
elif retryable == 'conditional':
|
309
309
|
# 条件重试,需要检查错误详情
|
310
310
|
return self._check_conditional_retry(error)
|
311
|
-
|
311
|
+
|
312
312
|
return False
|
313
|
-
|
313
|
+
|
314
314
|
def _check_conditional_retry(self, error: grpc.RpcError) -> bool:
|
315
315
|
"""检查条件重试"""
|
316
316
|
error_message = error.details().lower() if error.details() else ""
|
317
|
-
|
317
|
+
|
318
318
|
# 一些可重试的内部错误模式
|
319
319
|
retryable_patterns = [
|
320
|
-
'temporary', 'timeout', 'unavailable',
|
320
|
+
'temporary', 'timeout', 'unavailable',
|
321
321
|
'connection', 'network', 'try again'
|
322
322
|
]
|
323
|
-
|
323
|
+
|
324
324
|
for pattern in retryable_patterns:
|
325
325
|
if pattern in error_message:
|
326
326
|
return True
|
327
|
-
|
327
|
+
|
328
328
|
return False
|
329
|
-
|
329
|
+
|
330
330
|
def _calculate_backoff(self, attempt: int) -> float:
|
331
331
|
"""计算退避时间"""
|
332
332
|
max_delay = 60.0
|
333
333
|
jitter_factor = 0.1
|
334
|
-
|
334
|
+
|
335
335
|
delay = min(self.base_delay * (2 ** attempt), max_delay)
|
336
336
|
jitter = random.uniform(0, delay * jitter_factor)
|
337
|
-
return delay + jitter
|
337
|
+
return delay + jitter
|
tamar_model_client/exceptions.py
CHANGED
@@ -65,9 +65,9 @@ RETRY_POLICY = {
|
|
65
65
|
'max_attempts': 3
|
66
66
|
},
|
67
67
|
grpc.StatusCode.INTERNAL: {
|
68
|
-
'retryable':
|
68
|
+
'retryable': False, # 内部错误通常不应重试
|
69
69
|
'check_details': True,
|
70
|
-
'max_attempts':
|
70
|
+
'max_attempts': 0
|
71
71
|
},
|
72
72
|
grpc.StatusCode.UNAUTHENTICATED: {
|
73
73
|
'retryable': True,
|
@@ -1,12 +1,12 @@
|
|
1
1
|
tamar_model_client/__init__.py,sha256=4DEIUGlLTeiaECjJQbGYik7C0JO6hHwwfbLYpYpMdzg,444
|
2
|
-
tamar_model_client/async_client.py,sha256=
|
2
|
+
tamar_model_client/async_client.py,sha256=H6IhGI415DGXoeNAd4A0anw1oL4Ss3LYdcEVeG_Co68,34416
|
3
3
|
tamar_model_client/auth.py,sha256=gbwW5Aakeb49PMbmYvrYlVx1mfyn1LEDJ4qQVs-9DA4,438
|
4
4
|
tamar_model_client/circuit_breaker.py,sha256=0XHJXBYA4O8vwsDGwqNrae9zxNJphY5Rfucc9ytVFGA,5419
|
5
|
-
tamar_model_client/error_handler.py,sha256=
|
6
|
-
tamar_model_client/exceptions.py,sha256=
|
5
|
+
tamar_model_client/error_handler.py,sha256=oI_jUTjnq4OXu8fwJoGXNmQpddEgOFF9ZUhbytq7H6c,12384
|
6
|
+
tamar_model_client/exceptions.py,sha256=o1SxCiHVPsJwqP3bWIVPPDFcJOKFRg7zdp8oy9eaV3A,13073
|
7
7
|
tamar_model_client/json_formatter.py,sha256=IyBv_pEEzjF-KaMF-7rxRpNc_fxRYK2A-pu_2n4Liow,1990
|
8
8
|
tamar_model_client/logging_icons.py,sha256=MRTZ1Xvkep9ce_jdltj54_XZUXvIpQ95soRNmLdJ4qw,1837
|
9
|
-
tamar_model_client/sync_client.py,sha256=
|
9
|
+
tamar_model_client/sync_client.py,sha256=RDM-ptIL0cNIie-2qpkTEFh60XTks8p2Wdz0Q5YHA1Q,36241
|
10
10
|
tamar_model_client/utils.py,sha256=Kn6pFz9GEC96H4eejEax66AkzvsrXI3WCSDtgDjnVTI,5238
|
11
11
|
tamar_model_client/core/__init__.py,sha256=bJRJllrp4Xc0g_qu1pW9G-lsXNB7c1r0NBIfb2Ypxe0,832
|
12
12
|
tamar_model_client/core/base_client.py,sha256=sYvJZsDu_66akddAMowSnihFtgOoVKaQJxxnVruF9Ms,8995
|
@@ -27,10 +27,10 @@ tamar_model_client/schemas/inputs.py,sha256=dz1m8NbUIxA99JXZc8WlyzbKpDuz1lEzx3Vg
|
|
27
27
|
tamar_model_client/schemas/outputs.py,sha256=M_fcqUtXPJnfiLabHlyA8BorlC5pYkf5KLjXO1ysKIQ,1031
|
28
28
|
tests/__init__.py,sha256=kbmImddLDwdqlkkmkyKtl4bQy_ipe-R8eskpaBylU9w,38
|
29
29
|
tests/stream_hanging_analysis.py,sha256=W3W48IhQbNAR6-xvMpoWZvnWOnr56CTaH4-aORNBuD4,14807
|
30
|
-
tests/test_google_azure_final.py,sha256=
|
30
|
+
tests/test_google_azure_final.py,sha256=7SaVv8l0n5OXLdzzVOLgK8wC_kFFVx0tULZ8Y9_QAAw,26380
|
31
31
|
tests/test_logging_issue.py,sha256=JTMbotfHpAEPMBj73pOwxPn-Zn4QVQJX6scMz48FRDQ,2427
|
32
32
|
tests/test_simple.py,sha256=Xf0U-J9_xn_LzUsmYu06suK0_7DrPeko8OHoHldsNxE,7169
|
33
|
-
tamar_model_client-0.1.
|
34
|
-
tamar_model_client-0.1.
|
35
|
-
tamar_model_client-0.1.
|
36
|
-
tamar_model_client-0.1.
|
33
|
+
tamar_model_client-0.1.24.dist-info/METADATA,sha256=ilgWWWLCU-vEJe1BmIBXrwGUvG3cnaVh-z49nbIIXcg,23453
|
34
|
+
tamar_model_client-0.1.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
35
|
+
tamar_model_client-0.1.24.dist-info/top_level.txt,sha256=f1I-S8iWN-cgv4gB8gxRg9jJOTJMumvm4oGKVPfGg6A,25
|
36
|
+
tamar_model_client-0.1.24.dist-info/RECORD,,
|
tests/test_google_azure_final.py
CHANGED
@@ -27,7 +27,7 @@ test_logger.addHandler(test_handler)
|
|
27
27
|
logger = test_logger
|
28
28
|
|
29
29
|
os.environ['MODEL_MANAGER_SERVER_GRPC_USE_TLS'] = "true"
|
30
|
-
os.environ['MODEL_MANAGER_SERVER_ADDRESS'] = "
|
30
|
+
os.environ['MODEL_MANAGER_SERVER_ADDRESS'] = "localhost:50051"
|
31
31
|
os.environ['MODEL_MANAGER_SERVER_JWT_SECRET_KEY'] = "model-manager-server-jwt-key"
|
32
32
|
|
33
33
|
# 导入客户端模块
|
@@ -414,7 +414,7 @@ def test_concurrent_requests(num_requests: int = 150):
|
|
414
414
|
model="tamar-google-gemini-flash-lite",
|
415
415
|
contents="1+1等于几?",
|
416
416
|
user_context=UserContext(
|
417
|
-
user_id=f"
|
417
|
+
user_id=f"{os.environ.get('INSTANCE_ID', '0')}_{request_id:03d}",
|
418
418
|
org_id="test_org",
|
419
419
|
client_type="concurrent_test"
|
420
420
|
),
|
@@ -533,7 +533,7 @@ async def test_async_concurrent_requests(num_requests: int = 150):
|
|
533
533
|
model="tamar-google-gemini-flash-lite",
|
534
534
|
contents="1+1等于几?",
|
535
535
|
user_context=UserContext(
|
536
|
-
user_id=f"
|
536
|
+
user_id=f"{os.environ.get('INSTANCE_ID', '0')}_{request_id:03d}",
|
537
537
|
org_id="test_org",
|
538
538
|
client_type="async_concurrent_test"
|
539
539
|
),
|
@@ -648,7 +648,7 @@ async def main():
|
|
648
648
|
#test_concurrent_requests(150) # 测试150个并发请求
|
649
649
|
|
650
650
|
# 异步并发测试
|
651
|
-
await test_async_concurrent_requests(
|
651
|
+
await test_async_concurrent_requests(50) # 测试150个异步并发请求
|
652
652
|
|
653
653
|
print("\n✅ 测试完成")
|
654
654
|
|
File without changes
|
File without changes
|