tamar-model-client 0.1.28__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tamar_model_client/async_client.py +71 -42
- tamar_model_client/auth.py +31 -2
- tamar_model_client/core/base_client.py +29 -11
- tamar_model_client/core/http_fallback.py +101 -17
- tamar_model_client/error_handler.py +8 -6
- tamar_model_client/json_formatter.py +9 -0
- tamar_model_client/sync_client.py +59 -24
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/METADATA +496 -7
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/RECORD +13 -12
- tests/test_circuit_breaker.py +269 -0
- tests/test_google_azure_final.py +589 -5
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/WHEEL +0 -0
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/top_level.txt +0 -0
@@ -159,7 +159,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
159
159
|
# 如果 channel 存在但不健康,记录日志
|
160
160
|
if self.channel and self.stub:
|
161
161
|
logger.warning(
|
162
|
-
"Channel exists but unhealthy, will recreate",
|
162
|
+
"⚠️ Channel exists but unhealthy, will recreate",
|
163
163
|
extra={
|
164
164
|
"log_type": "channel_recreate",
|
165
165
|
"data": {
|
@@ -187,7 +187,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
187
187
|
"data": {"tls_enabled": True, "server_address": self.server_address}})
|
188
188
|
else:
|
189
189
|
self.channel = grpc.insecure_channel(
|
190
|
-
self.server_address,
|
190
|
+
f"dns:///{self.server_address}",
|
191
191
|
options=options
|
192
192
|
)
|
193
193
|
logger.info("🔓 Using insecure gRPC channel (TLS disabled)",
|
@@ -238,7 +238,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
238
238
|
# 如果处于关闭或失败状态,需要重建
|
239
239
|
if state in [grpc.ChannelConnectivity.SHUTDOWN,
|
240
240
|
grpc.ChannelConnectivity.TRANSIENT_FAILURE]:
|
241
|
-
logger.warning(f"Channel in unhealthy state: {state}",
|
241
|
+
logger.warning(f"⚠️ Channel in unhealthy state: {state}",
|
242
242
|
extra={"log_type": "info",
|
243
243
|
"data": {"channel_state": str(state)}})
|
244
244
|
return False
|
@@ -246,7 +246,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
246
246
|
# 如果最近有多次错误,也需要重建
|
247
247
|
if self._channel_error_count > 3 and self._last_channel_error_time:
|
248
248
|
if time.time() - self._last_channel_error_time < 60: # 60秒内
|
249
|
-
logger.warning("Too many channel errors recently, marking as unhealthy",
|
249
|
+
logger.warning("⚠️ Too many channel errors recently, marking as unhealthy",
|
250
250
|
extra={"log_type": "info",
|
251
251
|
"data": {"error_count": self._channel_error_count}})
|
252
252
|
return False
|
@@ -254,7 +254,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
254
254
|
return True
|
255
255
|
|
256
256
|
except Exception as e:
|
257
|
-
logger.error(f"Error checking channel health: {e}",
|
257
|
+
logger.error(f"❌ Error checking channel health: {e}",
|
258
258
|
extra={"log_type": "info",
|
259
259
|
"data": {"error": str(e)}})
|
260
260
|
return False
|
@@ -270,10 +270,10 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
270
270
|
if self.channel:
|
271
271
|
try:
|
272
272
|
self.channel.close()
|
273
|
-
logger.info("Closed unhealthy channel",
|
273
|
+
logger.info("🔚 Closed unhealthy channel",
|
274
274
|
extra={"log_type": "info"})
|
275
275
|
except Exception as e:
|
276
|
-
logger.warning(f"Error closing channel: {e}",
|
276
|
+
logger.warning(f"⚠️ Error closing channel: {e}",
|
277
277
|
extra={"log_type": "info"})
|
278
278
|
|
279
279
|
# 清空引用
|
@@ -284,7 +284,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
284
284
|
self._channel_error_count = 0
|
285
285
|
self._last_channel_error_time = None
|
286
286
|
|
287
|
-
logger.info("Recreating gRPC channel...",
|
287
|
+
logger.info("🔄 Recreating gRPC channel...",
|
288
288
|
extra={"log_type": "info"})
|
289
289
|
|
290
290
|
def _record_channel_error(self, error: grpc.RpcError):
|
@@ -312,7 +312,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
312
312
|
|
313
313
|
# 记录详细的错误信息
|
314
314
|
logger.warning(
|
315
|
-
f"Channel error recorded: {error.code().name}",
|
315
|
+
f"⚠️ Channel error recorded: {error.code().name}",
|
316
316
|
extra={
|
317
317
|
"log_type": "channel_error",
|
318
318
|
"data": {
|
@@ -371,7 +371,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
371
371
|
channel_state = "UNKNOWN"
|
372
372
|
|
373
373
|
logger.warning(
|
374
|
-
f"CANCELLED error detected, channel state: {channel_state}",
|
374
|
+
f"⚠️ CANCELLED error detected, channel state: {channel_state}",
|
375
375
|
extra={
|
376
376
|
"log_type": "cancelled_debug",
|
377
377
|
"request_id": context.get('request_id'),
|
@@ -469,6 +469,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
469
469
|
"request_id": context.get('request_id'),
|
470
470
|
"data": {
|
471
471
|
"error_code": e.code().name if e.code() else 'UNKNOWN',
|
472
|
+
"error_details": e.details() if hasattr(e, 'details') else '',
|
472
473
|
"retry_count": attempt,
|
473
474
|
"max_retries": self.max_retries,
|
474
475
|
"method": context.get('method', 'unknown'),
|
@@ -476,8 +477,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
476
477
|
},
|
477
478
|
"duration": current_duration
|
478
479
|
}
|
480
|
+
error_detail = f" - {e.details()}" if e.details() else ""
|
479
481
|
logger.warning(
|
480
|
-
f"Final attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()} (no more retries)",
|
482
|
+
f"⚠️ Final attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}{error_detail} (no more retries)",
|
481
483
|
extra=log_data
|
482
484
|
)
|
483
485
|
|
@@ -490,6 +492,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
490
492
|
"request_id": context.get('request_id'),
|
491
493
|
"data": {
|
492
494
|
"error_code": e.code().name if e.code() else 'UNKNOWN',
|
495
|
+
"error_details": e.details() if hasattr(e, 'details') else '',
|
493
496
|
"retry_count": attempt,
|
494
497
|
"max_retries": self.max_retries,
|
495
498
|
"method": context.get('method', 'unknown'),
|
@@ -498,8 +501,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
498
501
|
},
|
499
502
|
"duration": current_duration
|
500
503
|
}
|
504
|
+
error_detail = f" - {e.details()}" if e.details() else ""
|
501
505
|
logger.warning(
|
502
|
-
f"Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()} (will retry)",
|
506
|
+
f"🔄 Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}{error_detail} (will retry)",
|
503
507
|
extra=log_data
|
504
508
|
)
|
505
509
|
|
@@ -683,6 +687,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
683
687
|
"request_id": context.get('request_id'),
|
684
688
|
"data": {
|
685
689
|
"error_code": e.code().name if e.code() else 'UNKNOWN',
|
690
|
+
"error_details": e.details() if hasattr(e, 'details') else '',
|
686
691
|
"retry_count": attempt,
|
687
692
|
"max_retries": self.max_retries,
|
688
693
|
"method": "stream",
|
@@ -690,8 +695,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
690
695
|
},
|
691
696
|
"duration": current_duration
|
692
697
|
}
|
698
|
+
error_detail = f" - {e.details()}" if e.details() else ""
|
693
699
|
logger.error(
|
694
|
-
f"Stream failed: {e.code()} (no retry)",
|
700
|
+
f"❌ Stream failed: {e.code()}{error_detail} (no retry)",
|
695
701
|
extra=log_data
|
696
702
|
)
|
697
703
|
context['duration'] = current_duration
|
@@ -704,14 +710,16 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
704
710
|
"request_id": context.get('request_id'),
|
705
711
|
"data": {
|
706
712
|
"error_code": e.code().name if e.code() else 'UNKNOWN',
|
713
|
+
"error_details": e.details() if hasattr(e, 'details') else '',
|
707
714
|
"retry_count": attempt,
|
708
715
|
"max_retries": self.max_retries,
|
709
716
|
"method": "stream"
|
710
717
|
},
|
711
718
|
"duration": current_duration
|
712
719
|
}
|
720
|
+
error_detail = f" - {e.details()}" if e.details() else ""
|
713
721
|
logger.warning(
|
714
|
-
f"Stream attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()} (will retry)",
|
722
|
+
f"🔄 Stream attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}{error_detail} (will retry)",
|
715
723
|
extra=log_data
|
716
724
|
)
|
717
725
|
|
@@ -734,14 +742,16 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
734
742
|
else:
|
735
743
|
raise TamarModelException("Unknown streaming error occurred")
|
736
744
|
|
737
|
-
def _stream(self, request, metadata, invoke_timeout) -> Iterator[ModelResponse]:
|
745
|
+
def _stream(self, request, metadata, invoke_timeout, request_id=None, origin_request_id=None) -> Iterator[ModelResponse]:
|
738
746
|
"""
|
739
747
|
处理流式响应
|
740
748
|
|
741
749
|
Args:
|
742
750
|
request: gRPC 请求对象
|
743
|
-
metadata:
|
751
|
+
metadata: 请求元数据(为了兼容性保留,但会被忽略)
|
744
752
|
invoke_timeout: 总体超时时间
|
753
|
+
request_id: 请求ID
|
754
|
+
origin_request_id: 原始请求ID
|
745
755
|
|
746
756
|
Yields:
|
747
757
|
ModelResponse: 流式响应的每个数据块
|
@@ -749,6 +759,11 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
749
759
|
Raises:
|
750
760
|
TimeoutError: 当等待下一个数据块超时时
|
751
761
|
"""
|
762
|
+
# 每次调用时重新生成metadata,确保JWT token是最新的
|
763
|
+
fresh_metadata = self._build_auth_metadata(
|
764
|
+
request_id or get_request_id(),
|
765
|
+
origin_request_id
|
766
|
+
)
|
752
767
|
import threading
|
753
768
|
import queue
|
754
769
|
|
@@ -759,7 +774,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
759
774
|
def fetch_responses():
|
760
775
|
"""在单独线程中获取流式响应"""
|
761
776
|
try:
|
762
|
-
for response in self.stub.Invoke(request, metadata=
|
777
|
+
for response in self.stub.Invoke(request, metadata=fresh_metadata, timeout=invoke_timeout):
|
763
778
|
response_queue.put(response)
|
764
779
|
response_queue.put(None) # 标记流结束
|
765
780
|
except Exception as e:
|
@@ -791,7 +806,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
791
806
|
except queue.Empty:
|
792
807
|
raise TimeoutError(f"流式响应在等待下一个数据块时超时 ({chunk_timeout}s)")
|
793
808
|
|
794
|
-
def _stream_with_logging(self, request, metadata, invoke_timeout, start_time, model_request) -> Iterator[
|
809
|
+
def _stream_with_logging(self, request, metadata, invoke_timeout, start_time, model_request, request_id=None, origin_request_id=None) -> Iterator[
|
795
810
|
ModelResponse]:
|
796
811
|
"""流式响应的包装器,用于记录完整的响应日志并处理重试"""
|
797
812
|
total_content = ""
|
@@ -800,7 +815,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
800
815
|
chunk_count = 0
|
801
816
|
|
802
817
|
try:
|
803
|
-
for response in self._stream(request, metadata, invoke_timeout):
|
818
|
+
for response in self._stream(request, metadata, invoke_timeout, request_id, origin_request_id):
|
804
819
|
chunk_count += 1
|
805
820
|
if response.content:
|
806
821
|
total_content += response.content
|
@@ -871,9 +886,22 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
871
886
|
)
|
872
887
|
raise
|
873
888
|
|
874
|
-
def _invoke_request(self, request, metadata, invoke_timeout):
|
875
|
-
"""执行单个非流式请求
|
876
|
-
|
889
|
+
def _invoke_request(self, request, metadata, invoke_timeout, request_id=None, origin_request_id=None):
|
890
|
+
"""执行单个非流式请求
|
891
|
+
|
892
|
+
Args:
|
893
|
+
request: gRPC请求对象
|
894
|
+
metadata: 请求元数据(为了兼容性保留,但会被忽略)
|
895
|
+
invoke_timeout: 请求超时时间
|
896
|
+
request_id: 请求ID
|
897
|
+
origin_request_id: 原始请求ID
|
898
|
+
"""
|
899
|
+
# 每次调用时重新生成metadata,确保JWT token是最新的
|
900
|
+
fresh_metadata = self._build_auth_metadata(
|
901
|
+
request_id or get_request_id(),
|
902
|
+
origin_request_id
|
903
|
+
)
|
904
|
+
response = self.stub.Invoke(request, metadata=fresh_metadata, timeout=invoke_timeout)
|
877
905
|
for response in response:
|
878
906
|
return ResponseHandler.build_model_response(response)
|
879
907
|
|
@@ -972,14 +1000,14 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
972
1000
|
return self._retry_request_stream(
|
973
1001
|
self._stream_with_logging,
|
974
1002
|
request, metadata, invoke_timeout, start_time, model_request,
|
975
|
-
request_id=request_id
|
1003
|
+
request_id=request_id, origin_request_id=origin_request_id
|
976
1004
|
)
|
977
1005
|
else:
|
978
1006
|
# 存储model_request和origin_request_id供重试方法使用
|
979
1007
|
self._current_model_request = model_request
|
980
1008
|
self._current_origin_request_id = origin_request_id
|
981
1009
|
try:
|
982
|
-
result = self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id)
|
1010
|
+
result = self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id, origin_request_id=origin_request_id)
|
983
1011
|
finally:
|
984
1012
|
# 清理临时存储
|
985
1013
|
if hasattr(self, '_current_model_request'):
|
@@ -1191,6 +1219,13 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
|
|
1191
1219
|
"batch_size": len(batch_request_model.items)
|
1192
1220
|
}
|
1193
1221
|
})
|
1222
|
+
|
1223
|
+
# 记录失败(如果启用了熔断)
|
1224
|
+
if self.resilient_enabled and self.circuit_breaker:
|
1225
|
+
# 将错误码传递给熔断器,用于智能失败统计
|
1226
|
+
error_code = e.code() if hasattr(e, 'code') else None
|
1227
|
+
self.circuit_breaker.record_failure(error_code)
|
1228
|
+
|
1194
1229
|
raise e
|
1195
1230
|
except Exception as e:
|
1196
1231
|
duration = time.time() - start_time
|