tamar-model-client 0.1.28__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tamar_model_client/async_client.py +71 -42
- tamar_model_client/auth.py +31 -2
- tamar_model_client/core/base_client.py +29 -11
- tamar_model_client/core/http_fallback.py +101 -17
- tamar_model_client/error_handler.py +8 -6
- tamar_model_client/json_formatter.py +9 -0
- tamar_model_client/sync_client.py +59 -24
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/METADATA +496 -7
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/RECORD +13 -12
- tests/test_circuit_breaker.py +269 -0
- tests/test_google_azure_final.py +589 -5
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/WHEEL +0 -0
- {tamar_model_client-0.1.28.dist-info → tamar_model_client-0.2.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tamar-model-client
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: A Python SDK for interacting with the Model Manager gRPC service
|
5
5
|
Home-page: http://gitlab.tamaredge.top/project-tap/AgentOS/model-manager-client
|
6
6
|
Author: Oscar Ou
|
@@ -71,9 +71,11 @@ Dynamic: summary
|
|
71
71
|
- 🚀 **快速降级** 失败立即降级,最大化成功率
|
72
72
|
- 🔐 **JWT 认证** 安全可靠
|
73
73
|
- 📊 **使用量追踪** Token 统计与成本计算
|
74
|
-
- 🆔 **请求追踪** 唯一 request_id
|
75
|
-
- ⚠️ **完善错误处理**
|
74
|
+
- 🆔 **请求追踪** 唯一 request_id 和 origin_request_id 全链路追踪
|
75
|
+
- ⚠️ **完善错误处理** 详细错误信息和异常堆栈追踪
|
76
76
|
- ✅ **类型安全** Pydantic v2 验证
|
77
|
+
- 📦 **批量降级** HTTP 降级支持批量请求
|
78
|
+
- 🔍 **结构化日志** JSON 格式日志便于监控分析
|
77
79
|
|
78
80
|
### 🚀 高性能设计
|
79
81
|
- 🔗 **gRPC 通信** HTTP/2 长连接
|
@@ -114,10 +116,20 @@ tamar_model_client/
|
|
114
116
|
│ ├── providers.py # AI 服务商(OpenAI, Google, Azure...)
|
115
117
|
│ ├── invoke.py # 调用类型(generation, images...)
|
116
118
|
│ └── channel.py # 服务通道(openai, vertexai...)
|
119
|
+
├── 📁 core/ # 核心功能模块
|
120
|
+
│ ├── base_client.py # 客户端基类(熔断、降级、配置)
|
121
|
+
│ ├── http_fallback.py # HTTP 降级功能(支持批量请求)
|
122
|
+
│ ├── request_builder.py # 请求构建器
|
123
|
+
│ ├── response_handler.py # 响应处理器
|
124
|
+
│ ├── logging_setup.py # 结构化日志配置
|
125
|
+
│ └── utils.py # 请求ID管理和工具函数
|
117
126
|
├── 📄 sync_client.py # 同步客户端 TamarModelClient
|
118
127
|
├── 📄 async_client.py # 异步客户端 AsyncTamarModelClient
|
128
|
+
├── 📄 error_handler.py # 增强错误处理和重试策略
|
129
|
+
├── 📄 circuit_breaker.py # 熔断器实现
|
119
130
|
├── 📄 exceptions.py # 异常层级定义
|
120
131
|
├── 📄 auth.py # JWT 认证管理
|
132
|
+
├── 📄 json_formatter.py # JSON 日志格式化器
|
121
133
|
└── 📄 utils.py # 工具函数
|
122
134
|
```
|
123
135
|
|
@@ -507,6 +519,305 @@ response = client.invoke(
|
|
507
519
|
)
|
508
520
|
```
|
509
521
|
|
522
|
+
### 🔄 错误处理最佳实践
|
523
|
+
|
524
|
+
SDK 提供了完善的异常体系,便于精确处理不同类型的错误:
|
525
|
+
|
526
|
+
```python
|
527
|
+
from tamar_model_client import TamarModelClient
|
528
|
+
from tamar_model_client.exceptions import (
|
529
|
+
TamarModelException,
|
530
|
+
NetworkException,
|
531
|
+
AuthenticationException,
|
532
|
+
RateLimitException,
|
533
|
+
ProviderException,
|
534
|
+
TimeoutException
|
535
|
+
)
|
536
|
+
|
537
|
+
client = TamarModelClient()
|
538
|
+
|
539
|
+
try:
|
540
|
+
response = client.invoke(request)
|
541
|
+
except TimeoutException as e:
|
542
|
+
# 处理超时错误
|
543
|
+
logger.warning(f"请求超时: {e.message}, request_id: {e.request_id}")
|
544
|
+
# 可以重试或使用更快的模型
|
545
|
+
except RateLimitException as e:
|
546
|
+
# 处理限流错误
|
547
|
+
logger.error(f"触发限流: {e.message}")
|
548
|
+
# 等待一段时间后重试
|
549
|
+
time.sleep(60)
|
550
|
+
except AuthenticationException as e:
|
551
|
+
# 处理认证错误
|
552
|
+
logger.error(f"认证失败: {e.message}")
|
553
|
+
# 检查 JWT 配置
|
554
|
+
except NetworkException as e:
|
555
|
+
# 处理网络错误(已自动重试后仍失败)
|
556
|
+
logger.error(f"网络错误: {e.message}")
|
557
|
+
# 可能需要检查网络连接或服务状态
|
558
|
+
except ProviderException as e:
|
559
|
+
# 处理提供商特定错误
|
560
|
+
logger.error(f"提供商错误: {e.message}")
|
561
|
+
# 根据错误码进行特定处理
|
562
|
+
if "insufficient_quota" in str(e):
|
563
|
+
# 切换到其他提供商
|
564
|
+
pass
|
565
|
+
except TamarModelException as e:
|
566
|
+
# 处理其他所有模型相关错误
|
567
|
+
logger.error(f"模型错误: {e.message}")
|
568
|
+
logger.error(f"错误上下文: {e.error_context}")
|
569
|
+
```
|
570
|
+
|
571
|
+
### 🔀 多提供商无缝切换
|
572
|
+
|
573
|
+
轻松实现提供商之间的切换和降级:
|
574
|
+
|
575
|
+
```python
|
576
|
+
from tamar_model_client import TamarModelClient
|
577
|
+
from tamar_model_client.schemas import ModelRequest, UserContext
|
578
|
+
from tamar_model_client.enums import ProviderType
|
579
|
+
from tamar_model_client.exceptions import ProviderException, RateLimitException
|
580
|
+
|
581
|
+
client = TamarModelClient()
|
582
|
+
|
583
|
+
# 定义提供商优先级
|
584
|
+
providers = [
|
585
|
+
(ProviderType.OPENAI, "gpt-4"),
|
586
|
+
(ProviderType.GOOGLE, "gemini-pro"),
|
587
|
+
(ProviderType.AZURE, "gpt-4o-mini")
|
588
|
+
]
|
589
|
+
|
590
|
+
user_context = UserContext(
|
591
|
+
user_id="test_user",
|
592
|
+
org_id="test_org",
|
593
|
+
client_type="python-sdk"
|
594
|
+
)
|
595
|
+
|
596
|
+
# 尝试多个提供商直到成功
|
597
|
+
for provider, model in providers:
|
598
|
+
try:
|
599
|
+
request = ModelRequest(
|
600
|
+
provider=provider,
|
601
|
+
model=model,
|
602
|
+
messages=[{"role": "user", "content": "Hello"}] if provider != ProviderType.GOOGLE else None,
|
603
|
+
contents=[{"role": "user", "parts": [{"text": "Hello"}]}] if provider == ProviderType.GOOGLE else None,
|
604
|
+
user_context=user_context
|
605
|
+
)
|
606
|
+
|
607
|
+
response = client.invoke(request)
|
608
|
+
print(f"成功使用 {provider.value} - {model}")
|
609
|
+
print(f"响应: {response.content}")
|
610
|
+
break
|
611
|
+
|
612
|
+
except (ProviderException, RateLimitException) as e:
|
613
|
+
logger.warning(f"{provider.value} 失败: {e.message}")
|
614
|
+
continue
|
615
|
+
```
|
616
|
+
|
617
|
+
### 🎯 请求上下文管理
|
618
|
+
|
619
|
+
使用上下文管理器确保资源正确释放:
|
620
|
+
|
621
|
+
```python
|
622
|
+
from tamar_model_client import TamarModelClient, AsyncTamarModelClient
|
623
|
+
import asyncio
|
624
|
+
|
625
|
+
# 同步客户端上下文管理器
|
626
|
+
with TamarModelClient() as client:
|
627
|
+
response = client.invoke(request)
|
628
|
+
print(response.content)
|
629
|
+
# 自动调用 client.close()
|
630
|
+
|
631
|
+
# 异步客户端上下文管理器
|
632
|
+
async def async_example():
|
633
|
+
async with AsyncTamarModelClient() as client:
|
634
|
+
response = await client.invoke(request)
|
635
|
+
print(response.content)
|
636
|
+
# 自动调用 await client.close()
|
637
|
+
|
638
|
+
asyncio.run(async_example())
|
639
|
+
```
|
640
|
+
|
641
|
+
### ⏱️ 超时控制
|
642
|
+
|
643
|
+
通过环境变量或代码控制请求超时:
|
644
|
+
|
645
|
+
```python
|
646
|
+
import os
|
647
|
+
from tamar_model_client import TamarModelClient
|
648
|
+
|
649
|
+
# 方式一:环境变量设置全局超时
|
650
|
+
os.environ['MODEL_MANAGER_SERVER_GRPC_TIMEOUT'] = '30' # 30秒超时
|
651
|
+
|
652
|
+
# 方式二:创建客户端时设置
|
653
|
+
client = TamarModelClient(
|
654
|
+
server_address="localhost:50051",
|
655
|
+
timeout=30.0 # 30秒超时
|
656
|
+
)
|
657
|
+
|
658
|
+
# 处理超时
|
659
|
+
try:
|
660
|
+
response = client.invoke(request)
|
661
|
+
except TimeoutException as e:
|
662
|
+
logger.error(f"请求超时: {e.message}")
|
663
|
+
# 可以尝试更小的模型或减少 max_tokens
|
664
|
+
```
|
665
|
+
|
666
|
+
### 📊 性能监控与指标
|
667
|
+
|
668
|
+
获取详细的性能指标和使用统计:
|
669
|
+
|
670
|
+
```python
|
671
|
+
from tamar_model_client import TamarModelClient
|
672
|
+
import time
|
673
|
+
|
674
|
+
client = TamarModelClient()
|
675
|
+
|
676
|
+
# 监控单次请求性能
|
677
|
+
start_time = time.time()
|
678
|
+
response = client.invoke(request)
|
679
|
+
latency = time.time() - start_time
|
680
|
+
|
681
|
+
print(f"请求延迟: {latency:.2f}秒")
|
682
|
+
print(f"Request ID: {response.request_id}")
|
683
|
+
if response.usage:
|
684
|
+
print(f"输入 Tokens: {response.usage.prompt_tokens}")
|
685
|
+
print(f"输出 Tokens: {response.usage.completion_tokens}")
|
686
|
+
print(f"总 Tokens: {response.usage.total_tokens}")
|
687
|
+
print(f"预估成本: ${response.usage.total_cost:.4f}")
|
688
|
+
|
689
|
+
# 获取熔断器指标
|
690
|
+
metrics = client.get_resilient_metrics()
|
691
|
+
if metrics:
|
692
|
+
print(f"\n熔断器状态:")
|
693
|
+
print(f"- 状态: {metrics['circuit_state']}")
|
694
|
+
print(f"- 失败次数: {metrics['failure_count']}")
|
695
|
+
print(f"- 上次失败: {metrics['last_failure_time']}")
|
696
|
+
print(f"- HTTP降级地址: {metrics['http_fallback_url']}")
|
697
|
+
```
|
698
|
+
|
699
|
+
### 🔧 自定义配置示例
|
700
|
+
|
701
|
+
灵活的配置选项满足不同场景需求:
|
702
|
+
|
703
|
+
```python
|
704
|
+
from tamar_model_client import TamarModelClient
|
705
|
+
|
706
|
+
# 完整配置示例
|
707
|
+
client = TamarModelClient(
|
708
|
+
# 服务器配置
|
709
|
+
server_address="grpc.example.com:443",
|
710
|
+
use_tls=True,
|
711
|
+
default_authority="grpc.example.com",
|
712
|
+
|
713
|
+
# 认证配置
|
714
|
+
jwt_secret_key="your-secret-key",
|
715
|
+
jwt_expiration=3600, # 1小时过期
|
716
|
+
|
717
|
+
# 重试配置
|
718
|
+
max_retries=5,
|
719
|
+
retry_delay=1.0,
|
720
|
+
|
721
|
+
# 超时配置
|
722
|
+
timeout=60.0,
|
723
|
+
|
724
|
+
# 熔断降级配置
|
725
|
+
resilient_enabled=True,
|
726
|
+
http_fallback_url="https://backup.example.com",
|
727
|
+
circuit_breaker_threshold=3,
|
728
|
+
circuit_breaker_timeout=30
|
729
|
+
)
|
730
|
+
```
|
731
|
+
|
732
|
+
### 🔐 安全最佳实践
|
733
|
+
|
734
|
+
确保 SDK 使用的安全性:
|
735
|
+
|
736
|
+
```python
|
737
|
+
import os
|
738
|
+
from tamar_model_client import TamarModelClient
|
739
|
+
|
740
|
+
# 1. 使用环境变量存储敏感信息
|
741
|
+
os.environ['MODEL_MANAGER_SERVER_JWT_SECRET_KEY'] = os.getenv('JWT_SECRET')
|
742
|
+
|
743
|
+
# 2. 启用 TLS 加密
|
744
|
+
client = TamarModelClient(
|
745
|
+
server_address="grpc.example.com:443",
|
746
|
+
use_tls=True
|
747
|
+
)
|
748
|
+
|
749
|
+
# 3. 最小权限原则 - 只请求需要的数据
|
750
|
+
request = ModelRequest(
|
751
|
+
provider=ProviderType.OPENAI,
|
752
|
+
model="gpt-3.5-turbo",
|
753
|
+
messages=[{"role": "user", "content": "分析这段文本"}],
|
754
|
+
user_context=UserContext(
|
755
|
+
user_id="limited_user",
|
756
|
+
org_id="restricted_org",
|
757
|
+
client_type="analysis-service"
|
758
|
+
),
|
759
|
+
max_tokens=100 # 限制输出长度
|
760
|
+
)
|
761
|
+
|
762
|
+
# 4. 审计日志
|
763
|
+
response = client.invoke(request)
|
764
|
+
logger.info(f"AI请求审计: user={request.user_context.user_id}, model={request.model}, request_id={response.request_id}")
|
765
|
+
```
|
766
|
+
|
767
|
+
### 🚀 并发请求优化
|
768
|
+
|
769
|
+
高效处理大量并发请求:
|
770
|
+
|
771
|
+
```python
|
772
|
+
import asyncio
|
773
|
+
from tamar_model_client import AsyncTamarModelClient
|
774
|
+
from tamar_model_client.schemas import ModelRequest, UserContext
|
775
|
+
|
776
|
+
async def process_batch_async(questions: list[str]):
|
777
|
+
"""异步并发处理多个问题"""
|
778
|
+
async with AsyncTamarModelClient() as client:
|
779
|
+
tasks = []
|
780
|
+
|
781
|
+
for i, question in enumerate(questions):
|
782
|
+
request = ModelRequest(
|
783
|
+
provider=ProviderType.OPENAI,
|
784
|
+
model="gpt-3.5-turbo",
|
785
|
+
messages=[{"role": "user", "content": question}],
|
786
|
+
user_context=UserContext(
|
787
|
+
user_id="batch_user",
|
788
|
+
org_id="test_org",
|
789
|
+
client_type="batch-processor"
|
790
|
+
)
|
791
|
+
)
|
792
|
+
|
793
|
+
# 创建异步任务
|
794
|
+
task = asyncio.create_task(client.invoke(request))
|
795
|
+
tasks.append((i, task))
|
796
|
+
|
797
|
+
# 并发执行所有请求
|
798
|
+
results = []
|
799
|
+
for i, task in tasks:
|
800
|
+
try:
|
801
|
+
response = await task
|
802
|
+
results.append((i, response.content))
|
803
|
+
except Exception as e:
|
804
|
+
results.append((i, f"Error: {str(e)}"))
|
805
|
+
|
806
|
+
return results
|
807
|
+
|
808
|
+
# 使用示例
|
809
|
+
questions = [
|
810
|
+
"什么是人工智能?",
|
811
|
+
"解释机器学习的原理",
|
812
|
+
"深度学习和机器学习的区别",
|
813
|
+
"什么是神经网络?"
|
814
|
+
]
|
815
|
+
|
816
|
+
results = asyncio.run(process_batch_async(questions))
|
817
|
+
for i, content in results:
|
818
|
+
print(f"问题 {i+1} 的回答: {content[:100]}...")
|
819
|
+
```
|
820
|
+
|
510
821
|
## 🛠️ 高级功能
|
511
822
|
|
512
823
|
### 🔥 使用场景和最佳实践
|
@@ -620,7 +931,19 @@ gRPC请求 → 失败 → 立即HTTP降级 (或重试1次后降级)
|
|
620
931
|
|
621
932
|
- **立即降级错误**:`UNAVAILABLE`, `DEADLINE_EXCEEDED`, `CANCELLED` (网络问题)
|
622
933
|
- **延迟降级错误**:其他错误重试指定次数后降级
|
623
|
-
- **永不降级错误**:`UNAUTHENTICATED`, `PERMISSION_DENIED` (客户端问题)
|
934
|
+
- **永不降级错误**:`UNAUTHENTICATED`, `PERMISSION_DENIED`, `INVALID_ARGUMENT` (客户端问题)
|
935
|
+
|
936
|
+
#### 批量请求降级支持
|
937
|
+
|
938
|
+
快速降级同时支持单个请求和批量请求:
|
939
|
+
|
940
|
+
```python
|
941
|
+
# 单个请求降级
|
942
|
+
response = client.invoke(request) # 自动降级到 /v1/invoke
|
943
|
+
|
944
|
+
# 批量请求降级
|
945
|
+
batch_response = client.invoke_batch(batch_request) # 自动降级到 /v1/batch-invoke
|
946
|
+
```
|
624
947
|
|
625
948
|
#### 使用示例
|
626
949
|
|
@@ -641,7 +964,7 @@ response = client.invoke(request)
|
|
641
964
|
#### 配置选项详解
|
642
965
|
|
643
966
|
```bash
|
644
|
-
#
|
967
|
+
# 启用快速降级(默认false,建议开启)
|
645
968
|
MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
|
646
969
|
|
647
970
|
# 非立即降级的错误,重试多少次后降级(默认1次)
|
@@ -654,6 +977,88 @@ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
|
|
654
977
|
MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
|
655
978
|
```
|
656
979
|
|
980
|
+
### 🔍 请求追踪与监控
|
981
|
+
|
982
|
+
SDK 提供了完善的请求追踪功能,便于问题排查和性能监控:
|
983
|
+
|
984
|
+
#### 请求 ID 追踪
|
985
|
+
|
986
|
+
每个请求都会自动生成唯一的 `request_id`,用于追踪单次请求:
|
987
|
+
|
988
|
+
```python
|
989
|
+
from tamar_model_client import TamarModelClient
|
990
|
+
from tamar_model_client.core import generate_request_id, set_request_id
|
991
|
+
|
992
|
+
# 自动生成 request_id
|
993
|
+
response = client.invoke(request)
|
994
|
+
print(f"Request ID: {response.request_id}")
|
995
|
+
|
996
|
+
# 手动设置 request_id
|
997
|
+
custom_request_id = generate_request_id()
|
998
|
+
set_request_id(custom_request_id)
|
999
|
+
response = client.invoke(request)
|
1000
|
+
```
|
1001
|
+
|
1002
|
+
#### 原始请求 ID 追踪
|
1003
|
+
|
1004
|
+
对于需要跨多个服务调用的场景,可以使用 `origin_request_id` 进行全链路追踪:
|
1005
|
+
|
1006
|
+
```python
|
1007
|
+
from tamar_model_client.core import set_origin_request_id
|
1008
|
+
|
1009
|
+
# 设置原始请求 ID(通常来自上游服务)
|
1010
|
+
set_origin_request_id("user-provided-id-123")
|
1011
|
+
|
1012
|
+
# 所有后续请求都会携带这个 origin_request_id
|
1013
|
+
response = client.invoke(request)
|
1014
|
+
```
|
1015
|
+
|
1016
|
+
#### 结构化日志
|
1017
|
+
|
1018
|
+
启用 JSON 日志格式后,每条日志都包含完整的追踪信息:
|
1019
|
+
|
1020
|
+
```json
|
1021
|
+
{
|
1022
|
+
"timestamp": "2025-07-03T14:40:32.729313",
|
1023
|
+
"level": "INFO",
|
1024
|
+
"type": "request",
|
1025
|
+
"uri": "/invoke/openai/chat",
|
1026
|
+
"request_id": "448a64f4-3bb0-467c-af15-d4181d0ac499",
|
1027
|
+
"data": {
|
1028
|
+
"origin_request_id": "user-provided-id-123",
|
1029
|
+
"provider": "openai",
|
1030
|
+
"model": "gpt-4",
|
1031
|
+
"stream": false
|
1032
|
+
},
|
1033
|
+
"message": "🚀 Invoke request started"
|
1034
|
+
}
|
1035
|
+
```
|
1036
|
+
|
1037
|
+
#### 错误追踪
|
1038
|
+
|
1039
|
+
错误日志包含异常堆栈和完整上下文:
|
1040
|
+
|
1041
|
+
```json
|
1042
|
+
{
|
1043
|
+
"timestamp": "2025-07-03T14:40:35.123456",
|
1044
|
+
"level": "ERROR",
|
1045
|
+
"type": "response",
|
1046
|
+
"request_id": "448a64f4-3bb0-467c-af15-d4181d0ac499",
|
1047
|
+
"data": {
|
1048
|
+
"origin_request_id": "user-provided-id-123",
|
1049
|
+
"error_code": "DEADLINE_EXCEEDED",
|
1050
|
+
"error_message": "Request timeout after 30 seconds",
|
1051
|
+
"retry_count": 2,
|
1052
|
+
"fallback_attempted": true
|
1053
|
+
},
|
1054
|
+
"exception": {
|
1055
|
+
"type": "TimeoutException",
|
1056
|
+
"message": "Request timeout after 30 seconds",
|
1057
|
+
"traceback": ["Traceback (most recent call last):", "..."]
|
1058
|
+
}
|
1059
|
+
}
|
1060
|
+
```
|
1061
|
+
|
657
1062
|
### ⚠️ 注意事项
|
658
1063
|
|
659
1064
|
1. **参数说明**
|
@@ -666,9 +1071,11 @@ MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARG
|
|
666
1071
|
- 如需多实例,务必调用 `client.close()` 释放资源
|
667
1072
|
|
668
1073
|
3. **错误处理**
|
669
|
-
- 所有错误包含 `request_id`
|
1074
|
+
- 所有错误包含 `request_id` 和 `origin_request_id` 用于全链路问题追踪
|
670
1075
|
- 网络错误会自动重试(指数退避)
|
671
1076
|
- 提供商错误保留原始错误信息
|
1077
|
+
- 支持异常堆栈追踪,便于问题排查
|
1078
|
+
- 结构化 JSON 日志格式,便于监控系统集成
|
672
1079
|
|
673
1080
|
## ⚙️ 环境变量配置(推荐)
|
674
1081
|
|
@@ -745,7 +1152,7 @@ MODEL_CLIENT_CIRCUIT_BREAKER_TIMEOUT=60
|
|
745
1152
|
# 🚀 快速降级配置(可选,优化体验)
|
746
1153
|
# ========================
|
747
1154
|
|
748
|
-
# 是否启用快速降级功能(默认
|
1155
|
+
# 是否启用快速降级功能(默认 false,建议开启)
|
749
1156
|
# 启用后,gRPC 请求失败时会立即尝试 HTTP 降级,而不是等待所有重试完成
|
750
1157
|
MODEL_CLIENT_FAST_FALLBACK_ENABLED=true
|
751
1158
|
|
@@ -760,6 +1167,18 @@ MODEL_CLIENT_IMMEDIATE_FALLBACK_ERRORS=UNAVAILABLE,DEADLINE_EXCEEDED,CANCELLED
|
|
760
1167
|
# 永不降级的错误类型(逗号分隔,默认认证相关错误)
|
761
1168
|
# 这些错误类型不会触发降级,通常是客户端问题而非服务不可用
|
762
1169
|
MODEL_CLIENT_NEVER_FALLBACK_ERRORS=UNAUTHENTICATED,PERMISSION_DENIED,INVALID_ARGUMENT
|
1170
|
+
|
1171
|
+
|
1172
|
+
# ========================
|
1173
|
+
# 🔍 日志与监控配置(可选)
|
1174
|
+
# ========================
|
1175
|
+
|
1176
|
+
# 启用结构化 JSON 日志格式(默认 false,建议开启)
|
1177
|
+
# 启用后日志将以 JSON 格式输出,便于监控系统集成
|
1178
|
+
MODEL_CLIENT_ENABLE_JSON_LOGGING=true
|
1179
|
+
|
1180
|
+
# 日志级别设置(DEBUG, INFO, WARNING, ERROR,默认 INFO)
|
1181
|
+
MODEL_CLIENT_LOG_LEVEL=INFO
|
763
1182
|
```
|
764
1183
|
|
765
1184
|
加载后,初始化时无需传参:
|
@@ -833,6 +1252,76 @@ twine upload dist/*
|
|
833
1252
|
- **并发支持**: 1000+ 并发请求
|
834
1253
|
- **连接复用**: HTTP/2 多路复用
|
835
1254
|
- **自动重试**: 指数退避,最多 5 次
|
1255
|
+
- **降级时间**: 快速降级 < 2 秒内完成
|
1256
|
+
- **熔断恢复**: 自动恢复检测,60 秒周期
|
1257
|
+
|
1258
|
+
## 🔧 故障排除
|
1259
|
+
|
1260
|
+
### 常见问题
|
1261
|
+
|
1262
|
+
#### 1. gRPC 连接失败
|
1263
|
+
```bash
|
1264
|
+
# 错误: failed to connect to all addresses
|
1265
|
+
# 解决方案: 检查服务地址和网络连接
|
1266
|
+
export MODEL_MANAGER_SERVER_ADDRESS="correct-host:port"
|
1267
|
+
```
|
1268
|
+
|
1269
|
+
#### 2. JWT 认证失败
|
1270
|
+
```bash
|
1271
|
+
# 错误: UNAUTHENTICATED
|
1272
|
+
# 解决方案: 检查 JWT 密钥或令牌
|
1273
|
+
export MODEL_MANAGER_SERVER_JWT_SECRET_KEY="your-secret-key"
|
1274
|
+
```
|
1275
|
+
|
1276
|
+
#### 3. HTTP 降级失败
|
1277
|
+
```bash
|
1278
|
+
# 错误: HTTP fallback URL not configured
|
1279
|
+
# 解决方案: 配置 HTTP 降级地址
|
1280
|
+
export MODEL_CLIENT_HTTP_FALLBACK_URL="http://backup-server:8080"
|
1281
|
+
```
|
1282
|
+
|
1283
|
+
#### 4. 依赖包缺失
|
1284
|
+
```bash
|
1285
|
+
# 错误: aiohttp library is not installed
|
1286
|
+
# 解决方案: 安装 HTTP 客户端依赖
|
1287
|
+
pip install aiohttp requests
|
1288
|
+
```
|
1289
|
+
|
1290
|
+
### 调试技巧
|
1291
|
+
|
1292
|
+
#### 启用详细日志
|
1293
|
+
```python
|
1294
|
+
import logging
|
1295
|
+
logging.basicConfig(level=logging.DEBUG)
|
1296
|
+
|
1297
|
+
# 或使用环境变量
|
1298
|
+
# MODEL_CLIENT_LOG_LEVEL=DEBUG
|
1299
|
+
```
|
1300
|
+
|
1301
|
+
#### 检查熔断器状态
|
1302
|
+
```python
|
1303
|
+
client = TamarModelClient()
|
1304
|
+
metrics = client.get_resilient_metrics()
|
1305
|
+
print(f"Circuit state: {metrics.get('circuit_state')}")
|
1306
|
+
print(f"Failure count: {metrics.get('failure_count')}")
|
1307
|
+
```
|
1308
|
+
|
1309
|
+
#### 追踪请求流程
|
1310
|
+
```python
|
1311
|
+
from tamar_model_client.core import set_origin_request_id
|
1312
|
+
set_origin_request_id("debug-trace-001")
|
1313
|
+
|
1314
|
+
# 在日志中搜索这个 ID 可以看到完整请求流程
|
1315
|
+
response = client.invoke(request)
|
1316
|
+
```
|
1317
|
+
|
1318
|
+
### 性能优化建议
|
1319
|
+
|
1320
|
+
1. **使用单例客户端**:避免频繁创建客户端实例
|
1321
|
+
2. **启用快速降级**:减少用户感知的错误延迟
|
1322
|
+
3. **合理设置超时**:根据业务需求调整超时时间
|
1323
|
+
4. **监控熔断状态**:及时发现服务问题
|
1324
|
+
5. **使用批量 API**:提高批量处理效率
|
836
1325
|
|
837
1326
|
## 🤝 支持与贡献
|
838
1327
|
|
@@ -1,16 +1,16 @@
|
|
1
1
|
tamar_model_client/__init__.py,sha256=4DEIUGlLTeiaECjJQbGYik7C0JO6hHwwfbLYpYpMdzg,444
|
2
|
-
tamar_model_client/async_client.py,sha256=
|
3
|
-
tamar_model_client/auth.py,sha256=
|
2
|
+
tamar_model_client/async_client.py,sha256=047a3Ts6Qe2Wcs4xyxtG71kvYngTuSHFzB3V8D97_ec,46431
|
3
|
+
tamar_model_client/auth.py,sha256=DrtnFpG0ZKFUnTnV_Y-FuLRiC2kobcgg0W5Gr1ywg1k,1398
|
4
4
|
tamar_model_client/circuit_breaker.py,sha256=Y3AVp7WzVYU-ubcmovKsJ8DRJbbO4G7vdZgSjnwcWJQ,5550
|
5
|
-
tamar_model_client/error_handler.py,sha256=
|
5
|
+
tamar_model_client/error_handler.py,sha256=y7EipcqkXbCecSAOsnoSP3SH7hvZSNF_NUHooTi3hP0,18364
|
6
6
|
tamar_model_client/exceptions.py,sha256=EOr4JMYI7hVszRvNYJ1JqsUNpVmd16T2KpJ0MkFTsUE,13073
|
7
|
-
tamar_model_client/json_formatter.py,sha256=
|
7
|
+
tamar_model_client/json_formatter.py,sha256=XT8XPMKKM2M22tuYR2e1rvWHcpz3UD9iLLgGPsGOjCI,2410
|
8
8
|
tamar_model_client/logging_icons.py,sha256=MRTZ1Xvkep9ce_jdltj54_XZUXvIpQ95soRNmLdJ4qw,1837
|
9
|
-
tamar_model_client/sync_client.py,sha256=
|
9
|
+
tamar_model_client/sync_client.py,sha256=FbyjuyDRiXklSS_l5h5fwNxvABI-hpLGiIWAXqhPHoI,53760
|
10
10
|
tamar_model_client/utils.py,sha256=Kn6pFz9GEC96H4eejEax66AkzvsrXI3WCSDtgDjnVTI,5238
|
11
11
|
tamar_model_client/core/__init__.py,sha256=RMiZjV1S4csWPLxB_JfdOea8fYPz97Oj3humQSBw1OI,1054
|
12
|
-
tamar_model_client/core/base_client.py,sha256=
|
13
|
-
tamar_model_client/core/http_fallback.py,sha256=
|
12
|
+
tamar_model_client/core/base_client.py,sha256=spb4zjDuPczqnXNlDcIq_bDQ09TOpxeeuX7IxpTS_38,13859
|
13
|
+
tamar_model_client/core/http_fallback.py,sha256=2N7-N_TZrtffDjuv9s3-CD8Xy7qw9AuI5xeWGUnGQ0w,22217
|
14
14
|
tamar_model_client/core/logging_setup.py,sha256=-MXzTR4Ax50H16cbq1jCXbxgayf5fZ0U3o0--fMmxD8,6692
|
15
15
|
tamar_model_client/core/request_builder.py,sha256=yi8iy2Ps2m4d1YwIFiQLRxTvxQxgEGV576aXnNYRl7E,8507
|
16
16
|
tamar_model_client/core/request_id_manager.py,sha256=S-Mliaby9zN_bx-B85FvVnttal-w0skkjy2ZvWoQ5vw,3689
|
@@ -28,10 +28,11 @@ tamar_model_client/schemas/inputs.py,sha256=dz1m8NbUIxA99JXZc8WlyzbKpDuz1lEzx3Vg
|
|
28
28
|
tamar_model_client/schemas/outputs.py,sha256=M_fcqUtXPJnfiLabHlyA8BorlC5pYkf5KLjXO1ysKIQ,1031
|
29
29
|
tests/__init__.py,sha256=kbmImddLDwdqlkkmkyKtl4bQy_ipe-R8eskpaBylU9w,38
|
30
30
|
tests/stream_hanging_analysis.py,sha256=W3W48IhQbNAR6-xvMpoWZvnWOnr56CTaH4-aORNBuD4,14807
|
31
|
-
tests/
|
31
|
+
tests/test_circuit_breaker.py,sha256=nhEBnyXFjIYjRWlUdu7Z9PnPq48ypbBK6fxN6deHedw,12172
|
32
|
+
tests/test_google_azure_final.py,sha256=Cx2lfnoj48_7pUjpCYbrx6OLJF4cI79McV24_EYt_8s,55093
|
32
33
|
tests/test_logging_issue.py,sha256=JTMbotfHpAEPMBj73pOwxPn-Zn4QVQJX6scMz48FRDQ,2427
|
33
34
|
tests/test_simple.py,sha256=Xf0U-J9_xn_LzUsmYu06suK0_7DrPeko8OHoHldsNxE,7169
|
34
|
-
tamar_model_client-0.
|
35
|
-
tamar_model_client-0.
|
36
|
-
tamar_model_client-0.
|
37
|
-
tamar_model_client-0.
|
35
|
+
tamar_model_client-0.2.0.dist-info/METADATA,sha256=600WDQi8qejbb3n9NFevzPo3EuIo8EaBWJxt07zoSpg,41309
|
36
|
+
tamar_model_client-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
37
|
+
tamar_model_client-0.2.0.dist-info/top_level.txt,sha256=f1I-S8iWN-cgv4gB8gxRg9jJOTJMumvm4oGKVPfGg6A,25
|
38
|
+
tamar_model_client-0.2.0.dist-info/RECORD,,
|