tamar-model-client 0.1.24__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/PKG-INFO +1 -1
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/setup.py +1 -1
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/async_client.py +156 -2
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/base_client.py +1 -1
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/exceptions.py +1 -1
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/sync_client.py +155 -1
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/PKG-INFO +1 -1
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tests/test_google_azure_final.py +3 -3
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/README.md +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/setup.cfg +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/__init__.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/auth.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/circuit_breaker.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/__init__.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/http_fallback.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/logging_setup.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/request_builder.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/response_handler.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/utils.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/enums/__init__.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/enums/channel.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/enums/invoke.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/enums/providers.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/error_handler.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/generated/__init__.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/generated/model_service_pb2.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/generated/model_service_pb2_grpc.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/json_formatter.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/logging_icons.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/schemas/__init__.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/schemas/inputs.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/schemas/outputs.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/utils.py +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/SOURCES.txt +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/dependency_links.txt +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/requires.txt +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/top_level.txt +0 -0
- {tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tests/__init__.py +0 -0
@@ -98,6 +98,9 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
|
|
98
98
|
# === gRPC 通道和连接管理 ===
|
99
99
|
self.channel: Optional[grpc.aio.Channel] = None
|
100
100
|
self.stub: Optional[model_service_pb2_grpc.ModelServiceStub] = None
|
101
|
+
self._channel_error_count = 0
|
102
|
+
self._last_channel_error_time = None
|
103
|
+
self._channel_lock = asyncio.Lock() # 异步锁
|
101
104
|
|
102
105
|
# === 增强的重试处理器 ===
|
103
106
|
self.retry_handler = EnhancedRetryHandler(
|
@@ -176,9 +179,23 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
|
|
176
179
|
Raises:
|
177
180
|
ConnectionError: 当达到最大重试次数仍无法连接时
|
178
181
|
"""
|
179
|
-
if self.channel and self.stub:
|
182
|
+
if self.channel and self.stub and await self._is_channel_healthy():
|
180
183
|
return
|
181
184
|
|
185
|
+
# 如果 channel 存在但不健康,记录日志
|
186
|
+
if self.channel and self.stub:
|
187
|
+
logger.warning(
|
188
|
+
"Channel exists but unhealthy, will recreate",
|
189
|
+
extra={
|
190
|
+
"log_type": "channel_recreate",
|
191
|
+
"data": {
|
192
|
+
"channel_error_count": self._channel_error_count,
|
193
|
+
"time_since_last_error": time.time() - self._last_channel_error_time if self._last_channel_error_time else None
|
194
|
+
}
|
195
|
+
}
|
196
|
+
)
|
197
|
+
await self._recreate_channel()
|
198
|
+
|
182
199
|
retry_count = 0
|
183
200
|
options = self.build_channel_options()
|
184
201
|
|
@@ -228,6 +245,111 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
|
|
228
245
|
await asyncio.sleep(self.retry_delay * retry_count)
|
229
246
|
|
230
247
|
raise ConnectionError(f"Failed to connect to {self.server_address} after {self.max_retries} retries")
|
248
|
+
|
249
|
+
async def _is_channel_healthy(self) -> bool:
|
250
|
+
"""
|
251
|
+
检查 channel 是否健康
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
bool: True 如果 channel 健康,False 如果需要重建
|
255
|
+
"""
|
256
|
+
if not self.channel:
|
257
|
+
return False
|
258
|
+
|
259
|
+
try:
|
260
|
+
# 检查 channel 状态
|
261
|
+
state = self.channel.get_state()
|
262
|
+
|
263
|
+
# 如果处于关闭或失败状态,需要重建
|
264
|
+
if state in [grpc.ChannelConnectivity.SHUTDOWN,
|
265
|
+
grpc.ChannelConnectivity.TRANSIENT_FAILURE]:
|
266
|
+
logger.warning(f"Channel in unhealthy state: {state}",
|
267
|
+
extra={"log_type": "info",
|
268
|
+
"data": {"channel_state": str(state)}})
|
269
|
+
return False
|
270
|
+
|
271
|
+
# 如果最近有多次错误,也需要重建
|
272
|
+
if self._channel_error_count > 3 and self._last_channel_error_time:
|
273
|
+
if time.time() - self._last_channel_error_time < 60: # 60秒内
|
274
|
+
logger.warning("Too many channel errors recently, marking as unhealthy",
|
275
|
+
extra={"log_type": "info",
|
276
|
+
"data": {"error_count": self._channel_error_count}})
|
277
|
+
return False
|
278
|
+
|
279
|
+
return True
|
280
|
+
|
281
|
+
except Exception as e:
|
282
|
+
logger.error(f"Error checking channel health: {e}",
|
283
|
+
extra={"log_type": "info",
|
284
|
+
"data": {"error": str(e)}})
|
285
|
+
return False
|
286
|
+
|
287
|
+
async def _recreate_channel(self):
|
288
|
+
"""
|
289
|
+
重建 gRPC channel
|
290
|
+
|
291
|
+
关闭旧的 channel 并创建新的连接
|
292
|
+
"""
|
293
|
+
async with self._channel_lock:
|
294
|
+
# 关闭旧 channel
|
295
|
+
if self.channel:
|
296
|
+
try:
|
297
|
+
await self.channel.close()
|
298
|
+
logger.info("Closed unhealthy channel",
|
299
|
+
extra={"log_type": "info"})
|
300
|
+
except Exception as e:
|
301
|
+
logger.warning(f"Error closing channel: {e}",
|
302
|
+
extra={"log_type": "info"})
|
303
|
+
|
304
|
+
# 清空引用
|
305
|
+
self.channel = None
|
306
|
+
self.stub = None
|
307
|
+
|
308
|
+
# 重置错误计数
|
309
|
+
self._channel_error_count = 0
|
310
|
+
self._last_channel_error_time = None
|
311
|
+
|
312
|
+
logger.info("Recreating gRPC channel...",
|
313
|
+
extra={"log_type": "info"})
|
314
|
+
|
315
|
+
def _record_channel_error(self, error: grpc.RpcError):
|
316
|
+
"""
|
317
|
+
记录 channel 错误,用于健康检查
|
318
|
+
|
319
|
+
Args:
|
320
|
+
error: gRPC 错误
|
321
|
+
"""
|
322
|
+
self._channel_error_count += 1
|
323
|
+
self._last_channel_error_time = time.time()
|
324
|
+
|
325
|
+
# 获取当前 channel 状态
|
326
|
+
channel_state = None
|
327
|
+
if self.channel:
|
328
|
+
try:
|
329
|
+
channel_state = self.channel.get_state()
|
330
|
+
except:
|
331
|
+
channel_state = "UNKNOWN"
|
332
|
+
|
333
|
+
# 对于严重错误,增加错误权重
|
334
|
+
if error.code() in [grpc.StatusCode.INTERNAL,
|
335
|
+
grpc.StatusCode.UNAVAILABLE]:
|
336
|
+
self._channel_error_count += 2
|
337
|
+
|
338
|
+
# 记录详细的错误信息
|
339
|
+
logger.warning(
|
340
|
+
f"Channel error recorded: {error.code().name}",
|
341
|
+
extra={
|
342
|
+
"log_type": "channel_error",
|
343
|
+
"data": {
|
344
|
+
"error_code": error.code().name,
|
345
|
+
"error_count": self._channel_error_count,
|
346
|
+
"channel_state": str(channel_state) if channel_state else "NO_CHANNEL",
|
347
|
+
"time_since_last_error": time.time() - self._last_channel_error_time if self._last_channel_error_time else 0,
|
348
|
+
"error_details": error.details() if hasattr(error, 'details') else "",
|
349
|
+
"debug_string": error.debug_error_string() if hasattr(error, 'debug_error_string') else ""
|
350
|
+
}
|
351
|
+
}
|
352
|
+
)
|
231
353
|
|
232
354
|
async def _retry_request(self, func, *args, **kwargs):
|
233
355
|
"""
|
@@ -315,7 +437,33 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
|
|
315
437
|
elif retryable == 'conditional':
|
316
438
|
# 条件重试,特殊处理 CANCELLED
|
317
439
|
if error_code == grpc.StatusCode.CANCELLED:
|
318
|
-
|
440
|
+
# 获取 channel 状态信息
|
441
|
+
channel_state = None
|
442
|
+
if self.channel:
|
443
|
+
try:
|
444
|
+
channel_state = self.channel.get_state()
|
445
|
+
except:
|
446
|
+
channel_state = "UNKNOWN"
|
447
|
+
|
448
|
+
is_network_cancelled = error_context.is_network_cancelled()
|
449
|
+
|
450
|
+
logger.warning(
|
451
|
+
f"CANCELLED error in stream, channel state: {channel_state}",
|
452
|
+
extra={
|
453
|
+
"log_type": "cancelled_debug",
|
454
|
+
"request_id": context.get('request_id'),
|
455
|
+
"data": {
|
456
|
+
"channel_state": str(channel_state) if channel_state else "NO_CHANNEL",
|
457
|
+
"channel_error_count": self._channel_error_count,
|
458
|
+
"time_since_last_error": time.time() - self._last_channel_error_time if self._last_channel_error_time else None,
|
459
|
+
"channel_healthy": await self._is_channel_healthy(),
|
460
|
+
"is_network_cancelled": is_network_cancelled,
|
461
|
+
"debug_string": e.debug_error_string() if hasattr(e, 'debug_error_string') else ""
|
462
|
+
}
|
463
|
+
}
|
464
|
+
)
|
465
|
+
|
466
|
+
should_retry = is_network_cancelled
|
319
467
|
else:
|
320
468
|
should_retry = self._check_error_details_for_retry(e)
|
321
469
|
else:
|
@@ -363,6 +511,8 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
|
|
363
511
|
)
|
364
512
|
context['duration'] = current_duration
|
365
513
|
last_exception = self.error_handler.handle_error(e, context)
|
514
|
+
# 记录 channel 错误
|
515
|
+
self._record_channel_error(e)
|
366
516
|
break
|
367
517
|
|
368
518
|
last_exception = e
|
@@ -674,6 +824,10 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
|
|
674
824
|
)
|
675
825
|
})
|
676
826
|
|
827
|
+
# 记录 channel 错误
|
828
|
+
if isinstance(e, grpc.RpcError):
|
829
|
+
self._record_channel_error(e)
|
830
|
+
|
677
831
|
# 记录失败并尝试降级(如果启用了熔断)
|
678
832
|
if self.resilient_enabled and self.circuit_breaker:
|
679
833
|
# 将错误码传递给熔断器,用于智能失败统计
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/base_client.py
RENAMED
@@ -74,7 +74,7 @@ class BaseClient(ABC):
|
|
74
74
|
|
75
75
|
# === 重试配置 ===
|
76
76
|
self.max_retries = max_retries if max_retries is not None else int(
|
77
|
-
os.getenv("MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES",
|
77
|
+
os.getenv("MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES", 6))
|
78
78
|
self.retry_delay = retry_delay if retry_delay is not None else float(
|
79
79
|
os.getenv("MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY", 1.0))
|
80
80
|
|
@@ -77,7 +77,7 @@ RETRY_POLICY = {
|
|
77
77
|
grpc.StatusCode.CANCELLED: {
|
78
78
|
'retryable': True,
|
79
79
|
'backoff': 'linear', # 线性退避,网络问题通常不需要指数退避
|
80
|
-
'max_attempts':
|
80
|
+
'max_attempts': 5, # 最大重试次数(不包括初始请求),总共会尝试6次
|
81
81
|
'check_details': False # 不检查详细信息,统一重试
|
82
82
|
},
|
83
83
|
grpc.StatusCode.ABORTED: {
|
@@ -22,6 +22,7 @@ Tamar Model Client 同步客户端实现
|
|
22
22
|
import json
|
23
23
|
import logging
|
24
24
|
import random
|
25
|
+
import threading
|
25
26
|
import time
|
26
27
|
from typing import Optional, Union, Iterator
|
27
28
|
|
@@ -95,6 +96,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
95
96
|
# === gRPC 通道和连接管理 ===
|
96
97
|
self.channel: Optional[grpc.Channel] = None
|
97
98
|
self.stub: Optional[model_service_pb2_grpc.ModelServiceStub] = None
|
99
|
+
self._channel_error_count = 0
|
100
|
+
self._last_channel_error_time = None
|
101
|
+
self._channel_lock = threading.Lock() # 线程安全的channel操作
|
98
102
|
|
99
103
|
def close(self):
|
100
104
|
"""
|
@@ -143,8 +147,22 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
143
147
|
Raises:
|
144
148
|
ConnectionError: 当达到最大重试次数仍无法连接时
|
145
149
|
"""
|
146
|
-
if self.channel and self.stub:
|
150
|
+
if self.channel and self.stub and self._is_channel_healthy():
|
147
151
|
return
|
152
|
+
|
153
|
+
# 如果 channel 存在但不健康,记录日志
|
154
|
+
if self.channel and self.stub:
|
155
|
+
logger.warning(
|
156
|
+
"Channel exists but unhealthy, will recreate",
|
157
|
+
extra={
|
158
|
+
"log_type": "channel_recreate",
|
159
|
+
"data": {
|
160
|
+
"channel_error_count": self._channel_error_count,
|
161
|
+
"time_since_last_error": time.time() - self._last_channel_error_time if self._last_channel_error_time else None
|
162
|
+
}
|
163
|
+
}
|
164
|
+
)
|
165
|
+
self._recreate_channel()
|
148
166
|
|
149
167
|
retry_count = 0
|
150
168
|
options = self.build_channel_options()
|
@@ -196,6 +214,111 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
196
214
|
time.sleep(self.retry_delay * retry_count)
|
197
215
|
|
198
216
|
raise ConnectionError(f"Failed to connect to {self.server_address} after {self.max_retries} retries")
|
217
|
+
|
218
|
+
def _is_channel_healthy(self) -> bool:
|
219
|
+
"""
|
220
|
+
检查 channel 是否健康
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
bool: True 如果 channel 健康,False 如果需要重建
|
224
|
+
"""
|
225
|
+
if not self.channel:
|
226
|
+
return False
|
227
|
+
|
228
|
+
try:
|
229
|
+
# 检查 channel 状态
|
230
|
+
state = self.channel._channel.check_connectivity_state(False)
|
231
|
+
|
232
|
+
# 如果处于关闭或失败状态,需要重建
|
233
|
+
if state in [grpc.ChannelConnectivity.SHUTDOWN,
|
234
|
+
grpc.ChannelConnectivity.TRANSIENT_FAILURE]:
|
235
|
+
logger.warning(f"Channel in unhealthy state: {state}",
|
236
|
+
extra={"log_type": "info",
|
237
|
+
"data": {"channel_state": str(state)}})
|
238
|
+
return False
|
239
|
+
|
240
|
+
# 如果最近有多次错误,也需要重建
|
241
|
+
if self._channel_error_count > 3 and self._last_channel_error_time:
|
242
|
+
if time.time() - self._last_channel_error_time < 60: # 60秒内
|
243
|
+
logger.warning("Too many channel errors recently, marking as unhealthy",
|
244
|
+
extra={"log_type": "info",
|
245
|
+
"data": {"error_count": self._channel_error_count}})
|
246
|
+
return False
|
247
|
+
|
248
|
+
return True
|
249
|
+
|
250
|
+
except Exception as e:
|
251
|
+
logger.error(f"Error checking channel health: {e}",
|
252
|
+
extra={"log_type": "info",
|
253
|
+
"data": {"error": str(e)}})
|
254
|
+
return False
|
255
|
+
|
256
|
+
def _recreate_channel(self):
|
257
|
+
"""
|
258
|
+
重建 gRPC channel
|
259
|
+
|
260
|
+
关闭旧的 channel 并创建新的连接
|
261
|
+
"""
|
262
|
+
with self._channel_lock:
|
263
|
+
# 关闭旧 channel
|
264
|
+
if self.channel:
|
265
|
+
try:
|
266
|
+
self.channel.close()
|
267
|
+
logger.info("Closed unhealthy channel",
|
268
|
+
extra={"log_type": "info"})
|
269
|
+
except Exception as e:
|
270
|
+
logger.warning(f"Error closing channel: {e}",
|
271
|
+
extra={"log_type": "info"})
|
272
|
+
|
273
|
+
# 清空引用
|
274
|
+
self.channel = None
|
275
|
+
self.stub = None
|
276
|
+
|
277
|
+
# 重置错误计数
|
278
|
+
self._channel_error_count = 0
|
279
|
+
self._last_channel_error_time = None
|
280
|
+
|
281
|
+
logger.info("Recreating gRPC channel...",
|
282
|
+
extra={"log_type": "info"})
|
283
|
+
|
284
|
+
def _record_channel_error(self, error: grpc.RpcError):
|
285
|
+
"""
|
286
|
+
记录 channel 错误,用于健康检查
|
287
|
+
|
288
|
+
Args:
|
289
|
+
error: gRPC 错误
|
290
|
+
"""
|
291
|
+
self._channel_error_count += 1
|
292
|
+
self._last_channel_error_time = time.time()
|
293
|
+
|
294
|
+
# 获取当前 channel 状态
|
295
|
+
channel_state = None
|
296
|
+
if self.channel:
|
297
|
+
try:
|
298
|
+
channel_state = self.channel._channel.check_connectivity_state(False)
|
299
|
+
except:
|
300
|
+
channel_state = "UNKNOWN"
|
301
|
+
|
302
|
+
# 对于严重错误,增加错误权重
|
303
|
+
if error.code() in [grpc.StatusCode.INTERNAL,
|
304
|
+
grpc.StatusCode.UNAVAILABLE]:
|
305
|
+
self._channel_error_count += 2
|
306
|
+
|
307
|
+
# 记录详细的错误信息
|
308
|
+
logger.warning(
|
309
|
+
f"Channel error recorded: {error.code().name}",
|
310
|
+
extra={
|
311
|
+
"log_type": "channel_error",
|
312
|
+
"data": {
|
313
|
+
"error_code": error.code().name,
|
314
|
+
"error_count": self._channel_error_count,
|
315
|
+
"channel_state": str(channel_state) if channel_state else "NO_CHANNEL",
|
316
|
+
"time_since_last_error": time.time() - self._last_channel_error_time if self._last_channel_error_time else 0,
|
317
|
+
"error_details": error.details() if hasattr(error, 'details') else "",
|
318
|
+
"debug_string": error.debug_error_string() if hasattr(error, 'debug_error_string') else ""
|
319
|
+
}
|
320
|
+
}
|
321
|
+
)
|
199
322
|
|
200
323
|
def _retry_request(self, func, *args, **kwargs):
|
201
324
|
"""
|
@@ -237,6 +360,30 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
237
360
|
# 计算当前的耗时
|
238
361
|
current_duration = time.time() - method_start_time
|
239
362
|
|
363
|
+
# 特殊处理 CANCELLED 错误
|
364
|
+
if e.code() == grpc.StatusCode.CANCELLED:
|
365
|
+
channel_state = None
|
366
|
+
if self.channel:
|
367
|
+
try:
|
368
|
+
channel_state = self.channel._channel.check_connectivity_state(False)
|
369
|
+
except:
|
370
|
+
channel_state = "UNKNOWN"
|
371
|
+
|
372
|
+
logger.warning(
|
373
|
+
f"CANCELLED error detected, channel state: {channel_state}",
|
374
|
+
extra={
|
375
|
+
"log_type": "cancelled_debug",
|
376
|
+
"request_id": context.get('request_id'),
|
377
|
+
"data": {
|
378
|
+
"channel_state": str(channel_state) if channel_state else "NO_CHANNEL",
|
379
|
+
"channel_error_count": self._channel_error_count,
|
380
|
+
"time_since_last_error": time.time() - self._last_channel_error_time if self._last_channel_error_time else None,
|
381
|
+
"channel_healthy": self._is_channel_healthy(),
|
382
|
+
"debug_string": e.debug_error_string() if hasattr(e, 'debug_error_string') else ""
|
383
|
+
}
|
384
|
+
}
|
385
|
+
)
|
386
|
+
|
240
387
|
# 记录重试日志
|
241
388
|
log_data = {
|
242
389
|
"log_type": "info",
|
@@ -261,6 +408,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
261
408
|
|
262
409
|
context['duration'] = current_duration
|
263
410
|
last_exception = self.error_handler.handle_error(e, context)
|
411
|
+
|
412
|
+
# 记录 channel 错误
|
413
|
+
self._record_channel_error(e)
|
264
414
|
|
265
415
|
except Exception as e:
|
266
416
|
# 非 gRPC 错误,直接包装抛出
|
@@ -742,6 +892,10 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
742
892
|
)
|
743
893
|
})
|
744
894
|
|
895
|
+
# 记录 channel 错误
|
896
|
+
if isinstance(e, grpc.RpcError):
|
897
|
+
self._record_channel_error(e)
|
898
|
+
|
745
899
|
# 记录失败并尝试降级(如果启用了熔断)
|
746
900
|
if self.resilient_enabled and self.circuit_breaker:
|
747
901
|
# 将错误码传递给熔断器,用于智能失败统计
|
@@ -27,7 +27,7 @@ test_logger.addHandler(test_handler)
|
|
27
27
|
logger = test_logger
|
28
28
|
|
29
29
|
os.environ['MODEL_MANAGER_SERVER_GRPC_USE_TLS'] = "true"
|
30
|
-
os.environ['MODEL_MANAGER_SERVER_ADDRESS'] = "
|
30
|
+
os.environ['MODEL_MANAGER_SERVER_ADDRESS'] = "model-manager-server-grpc-131786869360.asia-northeast1.run.app"
|
31
31
|
os.environ['MODEL_MANAGER_SERVER_JWT_SECRET_KEY'] = "model-manager-server-jwt-key"
|
32
32
|
|
33
33
|
# 导入客户端模块
|
@@ -645,10 +645,10 @@ async def main():
|
|
645
645
|
# await asyncio.wait_for(test_batch_requests(), timeout=120.0)
|
646
646
|
|
647
647
|
# 同步并发测试
|
648
|
-
|
648
|
+
test_concurrent_requests(150) # 测试150个并发请求
|
649
649
|
|
650
650
|
# 异步并发测试
|
651
|
-
await test_async_concurrent_requests(
|
651
|
+
await test_async_concurrent_requests(150) # 测试150个异步并发请求
|
652
652
|
|
653
653
|
print("\n✅ 测试完成")
|
654
654
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/circuit_breaker.py
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/http_fallback.py
RENAMED
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/logging_setup.py
RENAMED
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/request_builder.py
RENAMED
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/core/response_handler.py
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/enums/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/enums/providers.py
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/generated/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/json_formatter.py
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/schemas/__init__.py
RENAMED
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/schemas/inputs.py
RENAMED
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client/schemas/outputs.py
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/requires.txt
RENAMED
File without changes
|
{tamar_model_client-0.1.24 → tamar_model_client-0.1.26}/tamar_model_client.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|