tamar-model-client 0.1.26__tar.gz → 0.1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/PKG-INFO +96 -3
  2. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/README.md +93 -2
  3. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/setup.py +3 -1
  4. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/async_client.py +97 -25
  5. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/circuit_breaker.py +6 -3
  6. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/core/__init__.py +9 -1
  7. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/core/base_client.py +137 -37
  8. tamar_model_client-0.1.28/tamar_model_client/core/http_fallback.py +470 -0
  9. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/core/logging_setup.py +15 -1
  10. tamar_model_client-0.1.28/tamar_model_client/core/request_id_manager.py +112 -0
  11. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/core/utils.py +27 -1
  12. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/error_handler.py +106 -13
  13. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/sync_client.py +205 -43
  14. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client.egg-info/PKG-INFO +96 -3
  15. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client.egg-info/SOURCES.txt +1 -0
  16. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client.egg-info/requires.txt +2 -0
  17. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tests/test_google_azure_final.py +17 -17
  18. tamar_model_client-0.1.26/tamar_model_client/core/http_fallback.py +0 -249
  19. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/setup.cfg +0 -0
  20. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/__init__.py +0 -0
  21. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/auth.py +0 -0
  22. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/core/request_builder.py +0 -0
  23. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/core/response_handler.py +0 -0
  24. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/enums/__init__.py +0 -0
  25. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/enums/channel.py +0 -0
  26. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/enums/invoke.py +0 -0
  27. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/enums/providers.py +0 -0
  28. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/exceptions.py +0 -0
  29. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/generated/__init__.py +0 -0
  30. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/generated/model_service_pb2.py +0 -0
  31. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/generated/model_service_pb2_grpc.py +0 -0
  32. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/json_formatter.py +0 -0
  33. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/logging_icons.py +0 -0
  34. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/schemas/__init__.py +0 -0
  35. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/schemas/inputs.py +0 -0
  36. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/schemas/outputs.py +0 -0
  37. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client/utils.py +0 -0
  38. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client.egg-info/dependency_links.txt +0 -0
  39. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tamar_model_client.egg-info/top_level.txt +0 -0
  40. {tamar_model_client-0.1.26 → tamar_model_client-0.1.28}/tests/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tamar-model-client
3
- Version: 0.1.26
3
+ Version: 0.1.28
4
4
  Summary: A Python SDK for interacting with the Model Manager gRPC service
5
5
  Home-page: http://gitlab.tamaredge.top/project-tap/AgentOS/model-manager-client
6
6
  Author: Oscar Ou
@@ -18,6 +18,8 @@ Requires-Dist: PyJWT
18
18
  Requires-Dist: nest_asyncio
19
19
  Requires-Dist: openai
20
20
  Requires-Dist: google-genai
21
+ Requires-Dist: requests>=2.25.0
22
+ Requires-Dist: aiohttp>=3.7.0
21
23
  Dynamic: author
22
24
  Dynamic: author-email
23
25
  Dynamic: classifier
@@ -65,6 +67,8 @@ Dynamic: summary
65
67
  - 🔄 **自动重试** 指数退避策略
66
68
 
67
69
  ### 🛡️ 生产级特性
70
+ - 🛡️ **熔断降级** 服务故障时自动切换到 HTTP
71
+ - 🚀 **快速降级** 失败立即降级,最大化成功率
68
72
  - 🔐 **JWT 认证** 安全可靠
69
73
  - 📊 **使用量追踪** Token 统计与成本计算
70
74
  - 🆔 **请求追踪** 唯一 request_id
@@ -87,7 +91,14 @@ pip install tamar-model-client
87
91
 
88
92
  - Python ≥ 3.8
89
93
  - 支持 Windows / Linux / macOS
90
- - 依赖项会自动安装(grpcio, pydantic, python-dotenv 等)
94
+ - 依赖项会自动安装(包括以下核心库):
95
+ - `grpcio>=1.67.1` - gRPC 通信协议
96
+ - `pydantic` - 数据验证和序列化
97
+ - `PyJWT` - JWT 认证
98
+ - `requests>=2.25.0` - HTTP 降级功能(同步)
99
+ - `aiohttp>=3.7.0` - HTTP 降级功能(异步)
100
+ - `openai` - OpenAI 服务商支持
101
+ - `google-genai` - Google AI 服务商支持
91
102
 
92
103
  ## 🏗️ 项目架构
93
104
 
@@ -339,7 +350,7 @@ async def main():
339
350
  )
340
351
 
341
352
  # 发送请求并获取响应
342
- async for r in await client.invoke(model_request):
353
+ async for r in await client.invoke(request_data):
343
354
  if r.error:
344
355
  print(f"错误: {r.error}")
345
356
  else:
@@ -587,6 +598,62 @@ metrics = client.get_resilient_metrics()
587
598
  # }
588
599
  ```
589
600
 
601
+ ### 🚀 快速降级功能(用户体验优化)
602
+
603
+ 在传统的熔断降级基础上,SDK 新增了快速降级功能,进一步提升用户体验:
604
+
605
+ #### 传统降级 vs 快速降级
606
+
607
+ **传统模式**:
608
+ ```
609
+ gRPC请求 → 失败 → 重试1 → 失败 → 重试2 → 失败 → ... → 重试N → 失败 → HTTP降级
610
+ 耗时:(重试次数 × 退避时间) + 降级时间 // 可能需要十几秒
611
+ ```
612
+
613
+ **快速降级模式**:
614
+ ```
615
+ gRPC请求 → 失败 → 立即HTTP降级 (或重试1次后降级)
616
+ 耗时:降级时间 // 通常1-2秒内完成
617
+ ```
618
+
619
+ #### 降级策略配置
620
+
621
+ - **立即降级错误**:`UNAVAILABLE`, `DEADLINE_EXCEEDED`, `CANCELLED` (网络问题)
622
+ - **延迟降级错误**:其他错误重试指定次数后降级
623
+ - **永不降级错误**:`UNAUTHENTICATED`, `PERMISSION_DENIED` (客户端问题)
624
+
625
+ #### 使用示例
626
+
627
+ ```python
628
+ from tamar_model_client import TamarModelClient
629
+
630
+ # 启用快速降级(通过环境变量)
631
+ # MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
632
+ # MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
633
+
634
+ client = TamarModelClient()
635
+
636
+ # 正常使用,快速降级对用户透明
637
+ response = client.invoke(request)
638
+ # 如果gRPC不可用,会在1-2秒内自动切换到HTTP并返回结果
639
+ ```
640
+
641
+ #### 配置选项详解
642
+
643
+ ```bash
644
+ # 启用快速降级(默认开启)
645
+ MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
646
+
647
+ # 非立即降级的错误,重试多少次后降级(默认1次)
648
+ MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
649
+
650
+ # 网络错误立即降级(默认配置)
651
+ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
652
+
653
+ # 认证错误永不降级(避免无效降级)
654
+ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
655
+ ```
656
+
590
657
  ### ⚠️ 注意事项
591
658
 
592
659
  1. **参数说明**
@@ -614,6 +681,11 @@ export MODEL_MANAGER_SERVER_GRPC_USE_TLS="false"
614
681
  export MODEL_MANAGER_SERVER_GRPC_DEFAULT_AUTHORITY="localhost"
615
682
  export MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES="5"
616
683
  export MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY="1.5"
684
+
685
+ # 快速降级配置(可选,优化用户体验)
686
+ export MODEL_CLIENT_FAST_FALLBACK_ENABLED="true"
687
+ export MODEL_CLIENT_HTTP_FALLBACK_URL="http://localhost:8080"
688
+ export MODEL_CLIENT_FALLBACK_AFTER_RETRIES="1"
617
689
  ```
618
690
 
619
691
  或者本地 `.env` 文件
@@ -667,6 +739,27 @@ MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD=5
667
739
 
668
740
  # 熔断器恢复超时(秒,熔断后多久尝试恢复,默认 60)
669
741
  MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT=60
742
+
743
+
744
+ # ========================
745
+ # 🚀 快速降级配置(可选,优化体验)
746
+ # ========================
747
+
748
+ # 是否启用快速降级功能(默认 true)
749
+ # 启用后,gRPC 请求失败时会立即尝试 HTTP 降级,而不是等待所有重试完成
750
+ MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
751
+
752
+ # 降级前的最大 gRPC 重试次数(默认 1)
753
+ # 对于非立即降级的错误,重试指定次数后才尝试降级
754
+ MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
755
+
756
+ # 立即降级的错误类型(逗号分隔,默认网络相关错误)
757
+ # 这些错误类型会在第一次失败后立即尝试降级
758
+ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
759
+
760
+ # 永不降级的错误类型(逗号分隔,默认认证相关错误)
761
+ # 这些错误类型不会触发降级,通常是客户端问题而非服务不可用
762
+ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
670
763
  ```
671
764
 
672
765
  加载后,初始化时无需传参:
@@ -35,6 +35,8 @@
35
35
  - 🔄 **自动重试** 指数退避策略
36
36
 
37
37
  ### 🛡️ 生产级特性
38
+ - 🛡️ **熔断降级** 服务故障时自动切换到 HTTP
39
+ - 🚀 **快速降级** 失败立即降级,最大化成功率
38
40
  - 🔐 **JWT 认证** 安全可靠
39
41
  - 📊 **使用量追踪** Token 统计与成本计算
40
42
  - 🆔 **请求追踪** 唯一 request_id
@@ -57,7 +59,14 @@ pip install tamar-model-client
57
59
 
58
60
  - Python ≥ 3.8
59
61
  - 支持 Windows / Linux / macOS
60
- - 依赖项会自动安装(grpcio, pydantic, python-dotenv 等)
62
+ - 依赖项会自动安装(包括以下核心库):
63
+ - `grpcio>=1.67.1` - gRPC 通信协议
64
+ - `pydantic` - 数据验证和序列化
65
+ - `PyJWT` - JWT 认证
66
+ - `requests>=2.25.0` - HTTP 降级功能(同步)
67
+ - `aiohttp>=3.7.0` - HTTP 降级功能(异步)
68
+ - `openai` - OpenAI 服务商支持
69
+ - `google-genai` - Google AI 服务商支持
61
70
 
62
71
  ## 🏗️ 项目架构
63
72
 
@@ -309,7 +318,7 @@ async def main():
309
318
  )
310
319
 
311
320
  # 发送请求并获取响应
312
- async for r in await client.invoke(model_request):
321
+ async for r in await client.invoke(request_data):
313
322
  if r.error:
314
323
  print(f"错误: {r.error}")
315
324
  else:
@@ -557,6 +566,62 @@ metrics = client.get_resilient_metrics()
557
566
  # }
558
567
  ```
559
568
 
569
+ ### 🚀 快速降级功能(用户体验优化)
570
+
571
+ 在传统的熔断降级基础上,SDK 新增了快速降级功能,进一步提升用户体验:
572
+
573
+ #### 传统降级 vs 快速降级
574
+
575
+ **传统模式**:
576
+ ```
577
+ gRPC请求 → 失败 → 重试1 → 失败 → 重试2 → 失败 → ... → 重试N → 失败 → HTTP降级
578
+ 耗时:(重试次数 × 退避时间) + 降级时间 // 可能需要十几秒
579
+ ```
580
+
581
+ **快速降级模式**:
582
+ ```
583
+ gRPC请求 → 失败 → 立即HTTP降级 (或重试1次后降级)
584
+ 耗时:降级时间 // 通常1-2秒内完成
585
+ ```
586
+
587
+ #### 降级策略配置
588
+
589
+ - **立即降级错误**:`UNAVAILABLE`, `DEADLINE_EXCEEDED`, `CANCELLED` (网络问题)
590
+ - **延迟降级错误**:其他错误重试指定次数后降级
591
+ - **永不降级错误**:`UNAUTHENTICATED`, `PERMISSION_DENIED` (客户端问题)
592
+
593
+ #### 使用示例
594
+
595
+ ```python
596
+ from tamar_model_client import TamarModelClient
597
+
598
+ # 启用快速降级(通过环境变量)
599
+ # MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
600
+ # MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
601
+
602
+ client = TamarModelClient()
603
+
604
+ # 正常使用,快速降级对用户透明
605
+ response = client.invoke(request)
606
+ # 如果gRPC不可用,会在1-2秒内自动切换到HTTP并返回结果
607
+ ```
608
+
609
+ #### 配置选项详解
610
+
611
+ ```bash
612
+ # 启用快速降级(默认开启)
613
+ MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
614
+
615
+ # 非立即降级的错误,重试多少次后降级(默认1次)
616
+ MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
617
+
618
+ # 网络错误立即降级(默认配置)
619
+ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
620
+
621
+ # 认证错误永不降级(避免无效降级)
622
+ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
623
+ ```
624
+
560
625
  ### ⚠️ 注意事项
561
626
 
562
627
  1. **参数说明**
@@ -584,6 +649,11 @@ export MODEL_MANAGER_SERVER_GRPC_USE_TLS="false"
584
649
  export MODEL_MANAGER_SERVER_GRPC_DEFAULT_AUTHORITY="localhost"
585
650
  export MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES="5"
586
651
  export MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY="1.5"
652
+
653
+ # 快速降级配置(可选,优化用户体验)
654
+ export MODEL_CLIENT_FAST_FALLBACK_ENABLED="true"
655
+ export MODEL_CLIENT_HTTP_FALLBACK_URL="http://localhost:8080"
656
+ export MODEL_CLIENT_FALLBACK_AFTER_RETRIES="1"
587
657
  ```
588
658
 
589
659
  或者本地 `.env` 文件
@@ -637,6 +707,27 @@ MODEL_CLIENT_CIRCUIT_BREAKER_THRESHOLD=5
637
707
 
638
708
  # 熔断器恢复超时(秒,熔断后多久尝试恢复,默认 60)
639
709
  MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT=60
710
+
711
+
712
+ # ========================
713
+ # 🚀 快速降级配置(可选,优化体验)
714
+ # ========================
715
+
716
+ # 是否启用快速降级功能(默认 true)
717
+ # 启用后,gRPC 请求失败时会立即尝试 HTTP 降级,而不是等待所有重试完成
718
+ MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
719
+
720
+ # 降级前的最大 gRPC 重试次数(默认 1)
721
+ # 对于非立即降级的错误,重试指定次数后才尝试降级
722
+ MODEL_CLIENT_FALLBACK_AFTER_RETRIES=1
723
+
724
+ # 立即降级的错误类型(逗号分隔,默认网络相关错误)
725
+ # 这些错误类型会在第一次失败后立即尝试降级
726
+ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
727
+
728
+ # 永不降级的错误类型(逗号分隔,默认认证相关错误)
729
+ # 这些错误类型不会触发降级,通常是客户端问题而非服务不可用
730
+ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
640
731
  ```
641
732
 
642
733
  加载后,初始化时无需传参:
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="tamar-model-client",
5
- version="0.1.26",
5
+ version="0.1.28",
6
6
  description="A Python SDK for interacting with the Model Manager gRPC service",
7
7
  author="Oscar Ou",
8
8
  author_email="oscar.ou@tamaredge.ai",
@@ -19,6 +19,8 @@ setup(
19
19
  "nest_asyncio",
20
20
  "openai",
21
21
  "google-genai",
22
+ "requests>=2.25.0", # HTTP降级功能(同步)
23
+ "aiohttp>=3.7.0", # HTTP降级功能(异步)
22
24
  ],
23
25
  long_description=open("README.md", encoding="utf-8").read(),
24
26
  long_description_content_type="text/markdown",
@@ -32,8 +32,11 @@ from grpc import RpcError
32
32
  from .core import (
33
33
  generate_request_id,
34
34
  set_request_id,
35
+ set_origin_request_id,
35
36
  get_protected_logger,
36
- MAX_MESSAGE_LENGTH, get_request_id
37
+ MAX_MESSAGE_LENGTH,
38
+ get_request_id,
39
+ RequestIdManager
37
40
  )
38
41
  from .core.base_client import BaseClient
39
42
  from .core.request_builder import RequestBuilder
@@ -102,12 +105,18 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
102
105
  self._last_channel_error_time = None
103
106
  self._channel_lock = asyncio.Lock() # 异步锁
104
107
 
108
+ # === Request ID 管理 ===
109
+ self._request_id_manager = RequestIdManager()
110
+
105
111
  # === 增强的重试处理器 ===
106
112
  self.retry_handler = EnhancedRetryHandler(
107
113
  max_retries=self.max_retries,
108
114
  base_delay=self.retry_delay
109
115
  )
110
116
 
117
+ # 设置client引用,用于快速降级
118
+ self.retry_handler.error_handler.client = self
119
+
111
120
  # 注册退出时的清理函数
112
121
  atexit.register(self._cleanup_atexit)
113
122
 
@@ -734,7 +743,12 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
734
743
  if self.resilient_enabled and self.circuit_breaker and self.circuit_breaker.is_open:
735
744
  if self.http_fallback_url:
736
745
  logger.warning("🔻 Circuit breaker is OPEN, using HTTP fallback")
737
- return await self._invoke_http_fallback(model_request, timeout, request_id)
746
+ # 在这里还没有计算origin_request_id,所以先计算
747
+ temp_origin_request_id = None
748
+ temp_request_id = request_id
749
+ if request_id:
750
+ temp_request_id, temp_origin_request_id = self._request_id_manager.get_composite_id(request_id)
751
+ return await self._invoke_http_fallback(model_request, timeout, temp_request_id, temp_origin_request_id)
738
752
 
739
753
  await self._ensure_initialized()
740
754
 
@@ -744,10 +758,24 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
744
758
  "user_id": model_request.user_context.user_id or ""
745
759
  }
746
760
 
747
- if not request_id:
761
+ # 处理 request_id
762
+ origin_request_id = None
763
+ if request_id:
764
+ # 用户提供了 request_id,生成组合 ID
765
+ request_id, origin_request_id = self._request_id_manager.get_composite_id(request_id)
766
+ else:
767
+ # 没有提供,生成新的
748
768
  request_id = generate_request_id()
769
+
749
770
  set_request_id(request_id)
750
- metadata = self._build_auth_metadata(request_id)
771
+ if origin_request_id:
772
+ set_origin_request_id(origin_request_id)
773
+ metadata = self._build_auth_metadata(request_id, origin_request_id)
774
+
775
+ # 构建日志数据
776
+ log_data = ResponseHandler.build_log_data(model_request)
777
+ if origin_request_id:
778
+ log_data['origin_request_id'] = origin_request_id
751
779
 
752
780
  # 记录开始日志
753
781
  start_time = time.time()
@@ -756,7 +784,7 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
756
784
  extra={
757
785
  "log_type": "request",
758
786
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
759
- "data": ResponseHandler.build_log_data(model_request)
787
+ "data": log_data
760
788
  })
761
789
 
762
790
  try:
@@ -789,18 +817,34 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
789
817
  # 对于流式响应,直接返回带日志记录的包装器
790
818
  return self._stream_with_logging(request, metadata, invoke_timeout, start_time, model_request)
791
819
  else:
792
- result = await self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id)
820
+ # 存储model_request和origin_request_id供重试方法使用
821
+ self._current_model_request = model_request
822
+ self._current_origin_request_id = origin_request_id
823
+ try:
824
+ result = await self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id)
825
+ finally:
826
+ # 清理临时存储
827
+ if hasattr(self, '_current_model_request'):
828
+ delattr(self, '_current_model_request')
829
+ if hasattr(self, '_current_origin_request_id'):
830
+ delattr(self, '_current_origin_request_id')
793
831
 
794
832
  # 记录非流式响应的成功日志
795
833
  duration = time.time() - start_time
796
834
  content_length = len(result.content) if result.content else 0
835
+
836
+ # 构建响应日志数据
837
+ response_log_data = ResponseHandler.build_log_data(model_request, result)
838
+ if origin_request_id:
839
+ response_log_data['origin_request_id'] = origin_request_id
840
+
797
841
  logger.info(
798
842
  f"✅ Request completed | content_length: {content_length}",
799
843
  extra={
800
844
  "log_type": "response",
801
845
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
802
846
  "duration": duration,
803
- "data": ResponseHandler.build_log_data(model_request, result)
847
+ "data": response_log_data
804
848
  }
805
849
  )
806
850
 
@@ -813,31 +857,29 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
813
857
  except (ConnectionError, grpc.RpcError) as e:
814
858
  duration = time.time() - start_time
815
859
  error_message = f"❌ Invoke gRPC failed: {str(e)}"
860
+
861
+ # 构建错误日志数据
862
+ error_log_data = ResponseHandler.build_log_data(model_request, error=e)
863
+ if origin_request_id:
864
+ error_log_data['origin_request_id'] = origin_request_id
865
+
816
866
  logger.error(error_message, exc_info=True,
817
867
  extra={
818
868
  "log_type": "response",
819
869
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
820
870
  "duration": duration,
821
- "data": ResponseHandler.build_log_data(
822
- model_request,
823
- error=e
824
- )
871
+ "data": error_log_data
825
872
  })
826
873
 
827
874
  # 记录 channel 错误
828
875
  if isinstance(e, grpc.RpcError):
829
876
  self._record_channel_error(e)
830
877
 
831
- # 记录失败并尝试降级(如果启用了熔断)
878
+ # 记录失败(如果启用了熔断)
832
879
  if self.resilient_enabled and self.circuit_breaker:
833
880
  # 将错误码传递给熔断器,用于智能失败统计
834
881
  error_code = e.code() if hasattr(e, 'code') else None
835
882
  self.circuit_breaker.record_failure(error_code)
836
-
837
- # 如果可以降级,则降级
838
- if self.http_fallback_url and self.circuit_breaker.should_fallback():
839
- logger.warning(f"🔻 gRPC failed, falling back to HTTP: {str(e)}")
840
- return await self._invoke_http_fallback(model_request, timeout, request_id)
841
883
 
842
884
  raise e
843
885
  except Exception as e:
@@ -867,6 +909,17 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
867
909
  Returns:
868
910
  BatchModelResponse: 批量请求的结果
869
911
  """
912
+ # 如果启用了熔断且熔断器打开,直接走 HTTP
913
+ if self.resilient_enabled and self.circuit_breaker and self.circuit_breaker.is_open:
914
+ if self.http_fallback_url:
915
+ logger.warning("🔻 Circuit breaker is OPEN, using HTTP fallback for batch request")
916
+ # 在这里还没有计算origin_request_id,所以先计算
917
+ temp_origin_request_id = None
918
+ temp_request_id = request_id
919
+ if request_id:
920
+ temp_request_id, temp_origin_request_id = self._request_id_manager.get_composite_id(request_id)
921
+ return await self._invoke_batch_http_fallback(batch_request_model, timeout, temp_request_id, temp_origin_request_id)
922
+
870
923
  await self._ensure_initialized()
871
924
 
872
925
  if not self.default_payload:
@@ -875,10 +928,29 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
875
928
  "user_id": batch_request_model.user_context.user_id or ""
876
929
  }
877
930
 
878
- if not request_id:
931
+ # 处理 request_id
932
+ origin_request_id = None
933
+ if request_id:
934
+ # 用户提供了 request_id,生成组合 ID
935
+ request_id, origin_request_id = self._request_id_manager.get_composite_id(request_id)
936
+ else:
937
+ # 没有提供,生成新的
879
938
  request_id = generate_request_id()
939
+
880
940
  set_request_id(request_id)
881
- metadata = self._build_auth_metadata(request_id)
941
+ if origin_request_id:
942
+ set_origin_request_id(origin_request_id)
943
+ metadata = self._build_auth_metadata(request_id, origin_request_id)
944
+
945
+ # 构建日志数据
946
+ batch_log_data = {
947
+ "batch_size": len(batch_request_model.items),
948
+ "org_id": batch_request_model.user_context.org_id,
949
+ "user_id": batch_request_model.user_context.user_id,
950
+ "client_type": batch_request_model.user_context.client_type
951
+ }
952
+ if origin_request_id:
953
+ batch_log_data['origin_request_id'] = origin_request_id
882
954
 
883
955
  # 记录开始日志
884
956
  start_time = time.time()
@@ -887,12 +959,7 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
887
959
  extra={
888
960
  "log_type": "request",
889
961
  "uri": "/batch_invoke",
890
- "data": {
891
- "batch_size": len(batch_request_model.items),
892
- "org_id": batch_request_model.user_context.org_id,
893
- "user_id": batch_request_model.user_context.user_id,
894
- "client_type": batch_request_model.user_context.client_type
895
- }
962
+ "data": batch_log_data
896
963
  })
897
964
 
898
965
  try:
@@ -919,6 +986,11 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
919
986
 
920
987
  try:
921
988
  invoke_timeout = timeout or self.default_invoke_timeout
989
+
990
+ # 保存批量请求信息用于降级
991
+ self._current_batch_request = batch_request_model
992
+ self._current_origin_request_id = origin_request_id
993
+
922
994
  batch_response = await self._retry_request(
923
995
  self.stub.BatchInvoke,
924
996
  batch_request,
@@ -101,9 +101,12 @@ class CircuitBreaker:
101
101
  logger.warning(
102
102
  f"🔻 Circuit breaker OPENED after {self.failure_count} failures",
103
103
  extra={
104
- "failure_count": self.failure_count,
105
- "threshold": self.failure_threshold,
106
- "trigger_error": error_code.name if error_code else "unknown"
104
+ "log_type": "info",
105
+ "data": {
106
+ "failure_count": self.failure_count,
107
+ "threshold": self.failure_threshold,
108
+ "trigger_error": error_code.name if error_code else "unknown"
109
+ }
107
110
  }
108
111
  )
109
112
 
@@ -10,7 +10,9 @@ from .utils import (
10
10
  remove_none_from_dict,
11
11
  generate_request_id,
12
12
  set_request_id,
13
- get_request_id
13
+ get_request_id,
14
+ set_origin_request_id,
15
+ get_origin_request_id
14
16
  )
15
17
 
16
18
  from .logging_setup import (
@@ -22,6 +24,8 @@ from .logging_setup import (
22
24
  MAX_MESSAGE_LENGTH
23
25
  )
24
26
 
27
+ from .request_id_manager import RequestIdManager
28
+
25
29
  __all__ = [
26
30
  # Utils
27
31
  'is_effective_value',
@@ -30,6 +34,8 @@ __all__ = [
30
34
  'generate_request_id',
31
35
  'set_request_id',
32
36
  'get_request_id',
37
+ 'set_origin_request_id',
38
+ 'get_origin_request_id',
33
39
  # Logging
34
40
  'setup_logger',
35
41
  'RequestIdFilter',
@@ -37,4 +43,6 @@ __all__ = [
37
43
  'get_protected_logger',
38
44
  'reset_logger_config',
39
45
  'MAX_MESSAGE_LENGTH',
46
+ # Request ID Management
47
+ 'RequestIdManager',
40
48
  ]