tamar-model-client 0.1.26__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,8 +31,11 @@ import grpc
31
31
  from .core import (
32
32
  generate_request_id,
33
33
  set_request_id,
34
+ set_origin_request_id,
34
35
  get_protected_logger,
35
- MAX_MESSAGE_LENGTH, get_request_id
36
+ MAX_MESSAGE_LENGTH,
37
+ get_request_id,
38
+ RequestIdManager
36
39
  )
37
40
  from .core.base_client import BaseClient
38
41
  from .core.request_builder import RequestBuilder
@@ -99,6 +102,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
99
102
  self._channel_error_count = 0
100
103
  self._last_channel_error_time = None
101
104
  self._channel_lock = threading.Lock() # 线程安全的channel操作
105
+
106
+ # === Request ID 管理 ===
107
+ self._request_id_manager = RequestIdManager()
102
108
 
103
109
  def close(self):
104
110
  """
@@ -347,20 +353,15 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
347
353
  except grpc.RpcError as e:
348
354
  # 使用新的错误处理逻辑
349
355
  context['retry_count'] = attempt
356
+ current_duration = time.time() - method_start_time
350
357
 
351
358
  # 判断是否可以重试
352
359
  should_retry = self._should_retry(e, attempt)
353
- if not should_retry or attempt >= self.max_retries:
354
- # 不可重试或已达到最大重试次数
355
- current_duration = time.time() - method_start_time
356
- context['duration'] = current_duration
357
- last_exception = self.error_handler.handle_error(e, context)
358
- break
359
-
360
- # 计算当前的耗时
361
- current_duration = time.time() - method_start_time
362
360
 
363
- # 特殊处理 CANCELLED 错误
361
+ # 记录 channel 错误
362
+ self._record_channel_error(e)
363
+
364
+ # 特殊处理 CANCELLED 错误的日志
364
365
  if e.code() == grpc.StatusCode.CANCELLED:
365
366
  channel_state = None
366
367
  if self.channel:
@@ -384,7 +385,106 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
384
385
  }
385
386
  )
386
387
 
387
- # 记录重试日志
388
+ # 检查是否应该尝试快速降级
389
+ should_try_fallback = self._should_try_fallback(e.code(), attempt)
390
+
391
+ if should_try_fallback:
392
+ # 尝试快速降级到HTTP
393
+ logger.warning(
394
+ f"🚀 Fast fallback triggered for {e.code().name} after {attempt + 1} attempts",
395
+ extra={
396
+ "log_type": "fast_fallback",
397
+ "request_id": context.get('request_id'),
398
+ "data": {
399
+ "error_code": e.code().name,
400
+ "attempt": attempt,
401
+ "fallback_reason": "immediate" if e.code() in self.immediate_fallback_errors else "after_retries"
402
+ }
403
+ }
404
+ )
405
+
406
+ try:
407
+ # 从 kwargs 中提取降级所需的参数
408
+ fallback_kwargs = kwargs.copy()
409
+
410
+ # 如果是 _invoke_request,需要提取 model_request
411
+ if func.__name__ == '_invoke_request' and len(args) >= 3:
412
+ # args 结构: (request, metadata, invoke_timeout)
413
+ # 需要从原始参数中恢复 model_request
414
+ if hasattr(self, '_current_model_request'):
415
+ model_request = self._current_model_request
416
+ origin_request_id = getattr(self, '_current_origin_request_id', None)
417
+ timeout = args[2] if len(args) > 2 else None
418
+ request_id = context.get('request_id')
419
+
420
+ # 尝试HTTP降级
421
+ result = self._invoke_http_fallback(model_request, timeout, request_id, origin_request_id)
422
+ # 如果是 BatchInvoke,需要使用批量降级
423
+ elif func.__name__ == 'BatchInvoke' and hasattr(self, '_current_batch_request'):
424
+ batch_request = self._current_batch_request
425
+ origin_request_id = getattr(self, '_current_origin_request_id', None)
426
+ timeout = fallback_kwargs.get('timeout')
427
+ request_id = context.get('request_id')
428
+
429
+ # 尝试批量HTTP降级
430
+ result = self._invoke_batch_http_fallback(batch_request, timeout, request_id, origin_request_id)
431
+ else:
432
+ # 其他情况,无法处理降级
433
+ raise ValueError(f"Unable to perform HTTP fallback for {func.__name__}")
434
+
435
+ logger.info(
436
+ f"✅ Fast fallback to HTTP succeeded",
437
+ extra={
438
+ "log_type": "fast_fallback_success",
439
+ "request_id": request_id,
440
+ "data": {
441
+ "grpc_attempts": attempt + 1,
442
+ "fallback_duration": time.time() - method_start_time
443
+ }
444
+ }
445
+ )
446
+
447
+ return result
448
+ except Exception as fallback_error:
449
+ # 降级失败,记录日志但继续原有重试逻辑
450
+ logger.warning(
451
+ f"⚠️ Fast fallback to HTTP failed: {str(fallback_error)}",
452
+ extra={
453
+ "log_type": "fast_fallback_failed",
454
+ "request_id": context.get('request_id'),
455
+ "data": {
456
+ "fallback_error": str(fallback_error),
457
+ "will_continue_grpc_retry": should_retry and attempt < self.max_retries
458
+ }
459
+ }
460
+ )
461
+
462
+ if not should_retry or attempt >= self.max_retries:
463
+ # 不可重试或已达到最大重试次数
464
+ context['duration'] = current_duration
465
+
466
+ # 记录最终失败日志
467
+ log_data = {
468
+ "log_type": "info",
469
+ "request_id": context.get('request_id'),
470
+ "data": {
471
+ "error_code": e.code().name if e.code() else 'UNKNOWN',
472
+ "retry_count": attempt,
473
+ "max_retries": self.max_retries,
474
+ "method": context.get('method', 'unknown'),
475
+ "final_failure": True
476
+ },
477
+ "duration": current_duration
478
+ }
479
+ logger.warning(
480
+ f"Final attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()} (no more retries)",
481
+ extra=log_data
482
+ )
483
+
484
+ last_exception = self.error_handler.handle_error(e, context)
485
+ break
486
+
487
+ # 可以重试,记录重试日志
388
488
  log_data = {
389
489
  "log_type": "info",
390
490
  "request_id": context.get('request_id'),
@@ -392,12 +492,14 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
392
492
  "error_code": e.code().name if e.code() else 'UNKNOWN',
393
493
  "retry_count": attempt,
394
494
  "max_retries": self.max_retries,
395
- "method": context.get('method', 'unknown')
495
+ "method": context.get('method', 'unknown'),
496
+ "will_retry": True,
497
+ "fallback_attempted": should_try_fallback
396
498
  },
397
499
  "duration": current_duration
398
500
  }
399
501
  logger.warning(
400
- f"Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}",
502
+ f"Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()} (will retry)",
401
503
  extra=log_data
402
504
  )
403
505
 
@@ -405,12 +507,9 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
405
507
  if attempt < self.max_retries:
406
508
  delay = self._calculate_backoff(attempt, e.code())
407
509
  time.sleep(delay)
408
-
409
- context['duration'] = current_duration
410
- last_exception = self.error_handler.handle_error(e, context)
411
510
 
412
- # 记录 channel 错误
413
- self._record_channel_error(e)
511
+ # 保存异常,以备后续使用
512
+ last_exception = e
414
513
 
415
514
  except Exception as e:
416
515
  # 非 gRPC 错误,直接包装抛出
@@ -798,7 +897,12 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
798
897
  if self.resilient_enabled and self.circuit_breaker and self.circuit_breaker.is_open:
799
898
  if self.http_fallback_url:
800
899
  logger.warning("🔻 Circuit breaker is OPEN, using HTTP fallback")
801
- return self._invoke_http_fallback(model_request, timeout, request_id)
900
+ # 在这里还没有计算origin_request_id,所以先计算
901
+ temp_origin_request_id = None
902
+ temp_request_id = request_id
903
+ if request_id:
904
+ temp_request_id, temp_origin_request_id = self._request_id_manager.get_composite_id(request_id)
905
+ return self._invoke_http_fallback(model_request, timeout, temp_request_id, temp_origin_request_id)
802
906
 
803
907
  self._ensure_initialized()
804
908
 
@@ -808,10 +912,24 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
808
912
  "user_id": model_request.user_context.user_id or ""
809
913
  }
810
914
 
811
- if not request_id:
915
+ # 处理 request_id
916
+ origin_request_id = None
917
+ if request_id:
918
+ # 用户提供了 request_id,生成组合 ID
919
+ request_id, origin_request_id = self._request_id_manager.get_composite_id(request_id)
920
+ else:
921
+ # 没有提供,生成新的
812
922
  request_id = generate_request_id()
923
+
813
924
  set_request_id(request_id)
814
- metadata = self._build_auth_metadata(request_id)
925
+ if origin_request_id:
926
+ set_origin_request_id(origin_request_id)
927
+ metadata = self._build_auth_metadata(request_id, origin_request_id)
928
+
929
+ # 构建日志数据
930
+ log_data = ResponseHandler.build_log_data(model_request)
931
+ if origin_request_id:
932
+ log_data['origin_request_id'] = origin_request_id
815
933
 
816
934
  # 记录开始日志
817
935
  start_time = time.time()
@@ -820,7 +938,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
820
938
  extra={
821
939
  "log_type": "request",
822
940
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
823
- "data": ResponseHandler.build_log_data(model_request)
941
+ "data": log_data
824
942
  })
825
943
 
826
944
  try:
@@ -857,18 +975,34 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
857
975
  request_id=request_id
858
976
  )
859
977
  else:
860
- result = self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id)
978
+ # 存储model_request和origin_request_id供重试方法使用
979
+ self._current_model_request = model_request
980
+ self._current_origin_request_id = origin_request_id
981
+ try:
982
+ result = self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id)
983
+ finally:
984
+ # 清理临时存储
985
+ if hasattr(self, '_current_model_request'):
986
+ delattr(self, '_current_model_request')
987
+ if hasattr(self, '_current_origin_request_id'):
988
+ delattr(self, '_current_origin_request_id')
861
989
 
862
990
  # 记录非流式响应的成功日志
863
991
  duration = time.time() - start_time
864
992
  content_length = len(result.content) if result.content else 0
993
+
994
+ # 构建响应日志数据
995
+ response_log_data = ResponseHandler.build_log_data(model_request, result)
996
+ if origin_request_id:
997
+ response_log_data['origin_request_id'] = origin_request_id
998
+
865
999
  logger.info(
866
1000
  f"✅ Request completed | content_length: {content_length}",
867
1001
  extra={
868
1002
  "log_type": "response",
869
1003
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
870
1004
  "duration": duration,
871
- "data": ResponseHandler.build_log_data(model_request, result)
1005
+ "data": response_log_data
872
1006
  }
873
1007
  )
874
1008
 
@@ -881,31 +1015,29 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
881
1015
  except (ConnectionError, grpc.RpcError) as e:
882
1016
  duration = time.time() - start_time
883
1017
  error_message = f"❌ Invoke gRPC failed: {str(e)}"
1018
+
1019
+ # 构建错误日志数据
1020
+ error_log_data = ResponseHandler.build_log_data(model_request, error=e)
1021
+ if origin_request_id:
1022
+ error_log_data['origin_request_id'] = origin_request_id
1023
+
884
1024
  logger.error(error_message, exc_info=True,
885
1025
  extra={
886
1026
  "log_type": "response",
887
1027
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
888
1028
  "duration": duration,
889
- "data": ResponseHandler.build_log_data(
890
- model_request,
891
- error=e
892
- )
1029
+ "data": error_log_data
893
1030
  })
894
1031
 
895
1032
  # 记录 channel 错误
896
1033
  if isinstance(e, grpc.RpcError):
897
1034
  self._record_channel_error(e)
898
1035
 
899
- # 记录失败并尝试降级(如果启用了熔断)
1036
+ # 记录失败(如果启用了熔断)
900
1037
  if self.resilient_enabled and self.circuit_breaker:
901
1038
  # 将错误码传递给熔断器,用于智能失败统计
902
1039
  error_code = e.code() if hasattr(e, 'code') else None
903
1040
  self.circuit_breaker.record_failure(error_code)
904
-
905
- # 如果可以降级,则降级
906
- if self.http_fallback_url and self.circuit_breaker.should_fallback():
907
- logger.warning(f"🔻 gRPC failed, falling back to HTTP: {str(e)}")
908
- return self._invoke_http_fallback(model_request, timeout, request_id)
909
1041
 
910
1042
  raise e
911
1043
  except Exception as e:
@@ -935,6 +1067,17 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
935
1067
  Returns:
936
1068
  BatchModelResponse: 批量请求的结果
937
1069
  """
1070
+ # 如果启用了熔断且熔断器打开,直接走 HTTP
1071
+ if self.resilient_enabled and self.circuit_breaker and self.circuit_breaker.is_open:
1072
+ if self.http_fallback_url:
1073
+ logger.warning("🔻 Circuit breaker is OPEN, using HTTP fallback for batch request")
1074
+ # 在这里还没有计算origin_request_id,所以先计算
1075
+ temp_origin_request_id = None
1076
+ temp_request_id = request_id
1077
+ if request_id:
1078
+ temp_request_id, temp_origin_request_id = self._request_id_manager.get_composite_id(request_id)
1079
+ return self._invoke_batch_http_fallback(batch_request_model, timeout, temp_request_id, temp_origin_request_id)
1080
+
938
1081
  self._ensure_initialized()
939
1082
 
940
1083
  if not self.default_payload:
@@ -943,10 +1086,29 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
943
1086
  "user_id": batch_request_model.user_context.user_id or ""
944
1087
  }
945
1088
 
946
- if not request_id:
1089
+ # 处理 request_id
1090
+ origin_request_id = None
1091
+ if request_id:
1092
+ # 用户提供了 request_id,生成组合 ID
1093
+ request_id, origin_request_id = self._request_id_manager.get_composite_id(request_id)
1094
+ else:
1095
+ # 没有提供,生成新的
947
1096
  request_id = generate_request_id()
1097
+
948
1098
  set_request_id(request_id)
949
- metadata = self._build_auth_metadata(request_id)
1099
+ if origin_request_id:
1100
+ set_origin_request_id(origin_request_id)
1101
+ metadata = self._build_auth_metadata(request_id, origin_request_id)
1102
+
1103
+ # 构建日志数据
1104
+ batch_log_data = {
1105
+ "batch_size": len(batch_request_model.items),
1106
+ "org_id": batch_request_model.user_context.org_id,
1107
+ "user_id": batch_request_model.user_context.user_id,
1108
+ "client_type": batch_request_model.user_context.client_type
1109
+ }
1110
+ if origin_request_id:
1111
+ batch_log_data['origin_request_id'] = origin_request_id
950
1112
 
951
1113
  # 记录开始日志
952
1114
  start_time = time.time()
@@ -955,12 +1117,7 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
955
1117
  extra={
956
1118
  "log_type": "request",
957
1119
  "uri": "/batch_invoke",
958
- "data": {
959
- "batch_size": len(batch_request_model.items),
960
- "org_id": batch_request_model.user_context.org_id,
961
- "user_id": batch_request_model.user_context.user_id,
962
- "client_type": batch_request_model.user_context.client_type
963
- }
1120
+ "data": batch_log_data
964
1121
  })
965
1122
 
966
1123
  try:
@@ -987,6 +1144,11 @@ class TamarModelClient(BaseClient, HttpFallbackMixin):
987
1144
 
988
1145
  try:
989
1146
  invoke_timeout = timeout or self.default_invoke_timeout
1147
+
1148
+ # 保存批量请求信息用于降级
1149
+ self._current_batch_request = batch_request_model
1150
+ self._current_origin_request_id = origin_request_id
1151
+
990
1152
  batch_response = self._retry_request(
991
1153
  self.stub.BatchInvoke,
992
1154
  batch_request,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tamar-model-client
3
- Version: 0.1.26
3
+ Version: 0.1.28
4
4
  Summary: A Python SDK for interacting with the Model Manager gRPC service
5
5
  Home-page: http://gitlab.tamaredge.top/project-tap/AgentOS/model-manager-client
6
6
  Author: Oscar Ou
@@ -18,6 +18,8 @@ Requires-Dist: PyJWT
18
18
  Requires-Dist: nest_asyncio
19
19
  Requires-Dist: openai
20
20
  Requires-Dist: google-genai
21
+ Requires-Dist: requests>=2.25.0
22
+ Requires-Dist: aiohttp>=3.7.0
21
23
  Dynamic: author
22
24
  Dynamic: author-email
23
25
  Dynamic: classifier
@@ -65,6 +67,8 @@ Dynamic: summary
65
67
  - 🔄 **自动重试** 指数退避策略
66
68
 
67
69
  ### 🛡️ 生产级特性
70
+ - 🛡️ **熔断降级** 服务故障时自动切换到 HTTP
71
+ - 🚀 **快速降级** 失败立即降级,最大化成功率
68
72
  - 🔐 **JWT 认证** 安全可靠
69
73
  - 📊 **使用量追踪** Token 统计与成本计算
70
74
  - 🆔 **请求追踪** 唯一 request_id
@@ -87,7 +91,14 @@ pip install tamar-model-client
87
91
 
88
92
  - Python ≥ 3.8
89
93
  - 支持 Windows / Linux / macOS
90
- - 依赖项会自动安装(grpcio, pydantic, python-dotenv 等)
94
+ - 依赖项会自动安装(包括以下核心库):
95
+ - `grpcio>=1.67.1` - gRPC 通信协议
96
+ - `pydantic` - 数据验证和序列化
97
+ - `PyJWT` - JWT 认证
98
+ - `requests>=2.25.0` - HTTP 降级功能(同步)
99
+ - `aiohttp>=3.7.0` - HTTP 降级功能(异步)
100
+ - `openai` - OpenAI 服务商支持
101
+ - `google-genai` - Google AI 服务商支持
91
102
 
92
103
  ## 🏗️ 项目架构
93
104
 
@@ -339,7 +350,7 @@ async def main():
339
350
  )
340
351
 
341
352
  # 发送请求并获取响应
342
- async for r in await client.invoke(model_request):
353
+ async for r in await client.invoke(request_data):
343
354
  if r.error:
344
355
  print(f"错误: {r.error}")
345
356
  else:
@@ -587,6 +598,62 @@ metrics = client.get_resilient_metrics()
587
598
  # }
588
599
  ```
589
600
 
601
+ ### 🚀 快速降级功能(用户体验优化)
602
+
603
+ 在传统的熔断降级基础上,SDK 新增了快速降级功能,进一步提升用户体验:
604
+
605
+ #### 传统降级 vs 快速降级
606
+
607
+ **传统模式**:
608
+ ```
609
+ gRPC请求 → 失败 → 重试1 → 失败 → 重试2 → 失败 → ... → 重试N → 失败 → HTTP降级
610
+ 耗时:(重试次数 × 退避时间) + 降级时间 // 可能需要十几秒
611
+ ```
612
+
613
+ **快速降级模式**:
614
+ ```
615
+ gRPC请求 → 失败 → 立即HTTP降级 (或重试1次后降级)
616
+ 耗时:降级时间 // 通常1-2秒内完成
617
+ ```
618
+
619
+ #### 降级策略配置
620
+
621
+ - **立即降级错误**:`UNAVAILABLE`, `DEADLINE_EXCEEDED`, `CANCELLED` (网络问题)
622
+ - **延迟降级错误**:其他错误重试指定次数后降级
623
+ - **永不降级错误**:`UNAUTHENTICATED`, `PERMISSION_DENIED` (客户端问题)
624
+
625
+ #### 使用示例
626
+
627
+ ```python
628
+ from tamar_model_client import TamarModelClient
629
+
630
+ # 启用快速降级(通过环境变量)
631
+ # MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
632
+ # MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
633
+
634
+ client = TamarModelClient()
635
+
636
+ # 正常使用,快速降级对用户透明
637
+ response = client.invoke(request)
638
+ # 如果gRPC不可用,会在1-2秒内自动切换到HTTP并返回结果
639
+ ```
640
+
641
+ #### 配置选项详解
642
+
643
+ ```bash
644
+ # 启用快速降级(默认开启)
645
+ MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
646
+
647
+ # 非立即降级的错误,重试多少次后降级(默认1次)
648
+ MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
649
+
650
+ # 网络错误立即降级(默认配置)
651
+ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
652
+
653
+ # 认证错误永不降级(避免无效降级)
654
+ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
655
+ ```
656
+
590
657
  ### ⚠️ 注意事项
591
658
 
592
659
  1. **参数说明**
@@ -614,6 +681,11 @@ export MODEL_MANAGER_SERVER_GRPC_USE_TLS="false"
614
681
  export MODEL_MANAGER_SERVER_GRPC_DEFAULT_AUTHORITY="localhost"
615
682
  export MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES="5"
616
683
  export MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY="1.5"
684
+
685
+ # 快速降级配置(可选,优化用户体验)
686
+ export MODEL_CLIENT_FAST_FALLBACK_ENABLED="true"
687
+ export MODEL_CLIENT_HTTP_FALLBACK_URL="http://localhost:8080"
688
+ export MODEL_CLIENT_FALLBACK_AFTER_RETRIES="1"
617
689
  ```
618
690
 
619
691
  或者本地 `.env` 文件
@@ -667,6 +739,27 @@ MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD=5
667
739
 
668
740
  # 熔断器恢复超时(秒,熔断后多久尝试恢复,默认 60)
669
741
  MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT=60
742
+
743
+
744
+ # ========================
745
+ # 🚀 快速降级配置(可选,优化体验)
746
+ # ========================
747
+
748
+ # 是否启用快速降级功能(默认 true)
749
+ # 启用后,gRPC 请求失败时会立即尝试 HTTP 降级,而不是等待所有重试完成
750
+ MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
751
+
752
+ # 降级前的最大 gRPC 重试次数(默认 1)
753
+ # 对于非立即降级的错误,重试指定次数后才尝试降级
754
+ MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
755
+
756
+ # 立即降级的错误类型(逗号分隔,默认网络相关错误)
757
+ # 这些错误类型会在第一次失败后立即尝试降级
758
+ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
759
+
760
+ # 永不降级的错误类型(逗号分隔,默认认证相关错误)
761
+ # 这些错误类型不会触发降级,通常是客户端问题而非服务不可用
762
+ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
670
763
  ```
671
764
 
672
765
  加载后,初始化时无需传参:
@@ -1,20 +1,21 @@
1
1
  tamar_model_client/__init__.py,sha256=4DEIUGlLTeiaECjJQbGYik7C0JO6hHwwfbLYpYpMdzg,444
2
- tamar_model_client/async_client.py,sha256=2m3-oMkIjhJRdhuJpXSV3HZxqJXjQEshD677yXBsZzo,41368
2
+ tamar_model_client/async_client.py,sha256=x1hFOpwrRS2bhLKDct-wO4bdrkfovwvCMYyJ_6XlDvU,44655
3
3
  tamar_model_client/auth.py,sha256=gbwW5Aakeb49PMbmYvrYlVx1mfyn1LEDJ4qQVs-9DA4,438
4
- tamar_model_client/circuit_breaker.py,sha256=0XHJXBYA4O8vwsDGwqNrae9zxNJphY5Rfucc9ytVFGA,5419
5
- tamar_model_client/error_handler.py,sha256=oI_jUTjnq4OXu8fwJoGXNmQpddEgOFF9ZUhbytq7H6c,12384
4
+ tamar_model_client/circuit_breaker.py,sha256=Y3AVp7WzVYU-ubcmovKsJ8DRJbbO4G7vdZgSjnwcWJQ,5550
5
+ tamar_model_client/error_handler.py,sha256=iEgaJOCoQJ4riVSi9ehLl7514jjbfBI9QDd-OTTXpnc,18091
6
6
  tamar_model_client/exceptions.py,sha256=EOr4JMYI7hVszRvNYJ1JqsUNpVmd16T2KpJ0MkFTsUE,13073
7
7
  tamar_model_client/json_formatter.py,sha256=IyBv_pEEzjF-KaMF-7rxRpNc_fxRYK2A-pu_2n4Liow,1990
8
8
  tamar_model_client/logging_icons.py,sha256=MRTZ1Xvkep9ce_jdltj54_XZUXvIpQ95soRNmLdJ4qw,1837
9
- tamar_model_client/sync_client.py,sha256=mAe-yVCXyLVQCCTkHczA3289pbIL5Fw7zeGOoY8gqP8,42976
9
+ tamar_model_client/sync_client.py,sha256=NcruPoLVmrIiyOy8hExqt3uZRE5UvBWDqmaCgl2J3jc,51497
10
10
  tamar_model_client/utils.py,sha256=Kn6pFz9GEC96H4eejEax66AkzvsrXI3WCSDtgDjnVTI,5238
11
- tamar_model_client/core/__init__.py,sha256=bJRJllrp4Xc0g_qu1pW9G-lsXNB7c1r0NBIfb2Ypxe0,832
12
- tamar_model_client/core/base_client.py,sha256=3dQLhv8-Lnh2VxKBut_TW-lPS5LBuj7pbJkIwUGq0QI,8995
13
- tamar_model_client/core/http_fallback.py,sha256=1OuSMxzhDyxy07JZa5artMTNdPNMyAhI7By3RUCSPDw,9872
14
- tamar_model_client/core/logging_setup.py,sha256=h1aky1uslIQnx4NxMqjoDMxwlc4Vg46KYTjW9yPu2xQ,6032
11
+ tamar_model_client/core/__init__.py,sha256=RMiZjV1S4csWPLxB_JfdOea8fYPz97Oj3humQSBw1OI,1054
12
+ tamar_model_client/core/base_client.py,sha256=0oEhmxz_Y60c04XFziLv70PLDijLWYAeRVSg1oldYjI,12992
13
+ tamar_model_client/core/http_fallback.py,sha256=_3Cd0ziv-w2BeWhZPlrtzVRD90WaggB0Fg_Jaup1F9E,19058
14
+ tamar_model_client/core/logging_setup.py,sha256=-MXzTR4Ax50H16cbq1jCXbxgayf5fZ0U3o0--fMmxD8,6692
15
15
  tamar_model_client/core/request_builder.py,sha256=yi8iy2Ps2m4d1YwIFiQLRxTvxQxgEGV576aXnNYRl7E,8507
16
+ tamar_model_client/core/request_id_manager.py,sha256=S-Mliaby9zN_bx-B85FvVnttal-w0skkjy2ZvWoQ5vw,3689
16
17
  tamar_model_client/core/response_handler.py,sha256=_q5galAT0_RaUT5C_yZsjg-9VnT9CBjmIASOt28BUmQ,4616
17
- tamar_model_client/core/utils.py,sha256=8jSx8UOE6ukbiIgruCX7SXN8J5FyuGbqENOmJDsxaSM,5084
18
+ tamar_model_client/core/utils.py,sha256=AcbsGfNQEaZLYI4OZJs-BdmJgxAoLUC5LFoiYmji820,5875
18
19
  tamar_model_client/enums/__init__.py,sha256=3cYYn8ztNGBa_pI_5JGRVYf2QX8fkBVWdjID1PLvoBQ,182
19
20
  tamar_model_client/enums/channel.py,sha256=wCzX579nNpTtwzGeS6S3Ls0UzVAgsOlfy4fXMzQTCAw,199
20
21
  tamar_model_client/enums/invoke.py,sha256=Up87myAg4-0SDJV5a82ggPDpYHSLEtIco8BF_5Ph1nY,322
@@ -27,10 +28,10 @@ tamar_model_client/schemas/inputs.py,sha256=dz1m8NbUIxA99JXZc8WlyzbKpDuz1lEzx3Vg
27
28
  tamar_model_client/schemas/outputs.py,sha256=M_fcqUtXPJnfiLabHlyA8BorlC5pYkf5KLjXO1ysKIQ,1031
28
29
  tests/__init__.py,sha256=kbmImddLDwdqlkkmkyKtl4bQy_ipe-R8eskpaBylU9w,38
29
30
  tests/stream_hanging_analysis.py,sha256=W3W48IhQbNAR6-xvMpoWZvnWOnr56CTaH4-aORNBuD4,14807
30
- tests/test_google_azure_final.py,sha256=BRKUpaCfL5Gd3ApxpLkOwSFfuFI5ibSm_oh6LxXgGNY,26427
31
+ tests/test_google_azure_final.py,sha256=YRBn1JH1fMJMOQHk6a04NYR9fybboOhqR4urU7vnShI,26330
31
32
  tests/test_logging_issue.py,sha256=JTMbotfHpAEPMBj73pOwxPn-Zn4QVQJX6scMz48FRDQ,2427
32
33
  tests/test_simple.py,sha256=Xf0U-J9_xn_LzUsmYu06suK0_7DrPeko8OHoHldsNxE,7169
33
- tamar_model_client-0.1.26.dist-info/METADATA,sha256=fNpG1edeXqkPy8svo2-ZSo7g2S6YTEzfyW8QC-SvgAU,23453
34
- tamar_model_client-0.1.26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
- tamar_model_client-0.1.26.dist-info/top_level.txt,sha256=f1I-S8iWN-cgv4gB8gxRg9jJOTJMumvm4oGKVPfGg6A,25
36
- tamar_model_client-0.1.26.dist-info/RECORD,,
34
+ tamar_model_client-0.1.28.dist-info/METADATA,sha256=wGQvtURWcFMoreHt52qQhjgCPjIgUruUuWaMZd4-B6A,26880
35
+ tamar_model_client-0.1.28.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
+ tamar_model_client-0.1.28.dist-info/top_level.txt,sha256=f1I-S8iWN-cgv4gB8gxRg9jJOTJMumvm4oGKVPfGg6A,25
37
+ tamar_model_client-0.1.28.dist-info/RECORD,,
@@ -26,8 +26,8 @@ test_logger.addHandler(test_handler)
26
26
 
27
27
  logger = test_logger
28
28
 
29
- os.environ['MODEL_MANAGER_SERVER_GRPC_USE_TLS'] = "true"
30
- os.environ['MODEL_MANAGER_SERVER_ADDRESS'] = "model-manager-server-grpc-131786869360.asia-northeast1.run.app"
29
+ os.environ['MODEL_MANAGER_SERVER_GRPC_USE_TLS'] = "false"
30
+ os.environ['MODEL_MANAGER_SERVER_ADDRESS'] = "localhost:50051"
31
31
  os.environ['MODEL_MANAGER_SERVER_JWT_SECRET_KEY'] = "model-manager-server-jwt-key"
32
32
 
33
33
  # 导入客户端模块
@@ -630,25 +630,25 @@ async def main():
630
630
 
631
631
  try:
632
632
  # # 同步测试
633
- # test_google_ai_studio()
634
- # test_google_vertex_ai()
635
- # test_azure_openai()
636
- #
637
- # # 同步批量测试
638
- # test_sync_batch_requests()
639
- #
640
- # # 异步流式测试
641
- # await asyncio.wait_for(test_google_streaming(), timeout=60.0)
642
- # await asyncio.wait_for(test_azure_streaming(), timeout=60.0)
643
- #
644
- # # 异步批量测试
645
- # await asyncio.wait_for(test_batch_requests(), timeout=120.0)
633
+ test_google_ai_studio()
634
+ test_google_vertex_ai()
635
+ test_azure_openai()
636
+
637
+ # 同步批量测试
638
+ test_sync_batch_requests()
639
+
640
+ # 异步流式测试
641
+ await asyncio.wait_for(test_google_streaming(), timeout=60.0)
642
+ await asyncio.wait_for(test_azure_streaming(), timeout=60.0)
643
+
644
+ # 异步批量测试
645
+ await asyncio.wait_for(test_batch_requests(), timeout=120.0)
646
646
 
647
647
  # 同步并发测试
648
- test_concurrent_requests(150) # 测试150个并发请求
648
+ test_concurrent_requests(2) # 测试150个并发请求
649
649
 
650
650
  # 异步并发测试
651
- await test_async_concurrent_requests(150) # 测试150个异步并发请求
651
+ await test_async_concurrent_requests(2) # 测试150个异步并发请求
652
652
 
653
653
  print("\n✅ 测试完成")
654
654