tamar-model-client 0.1.8__tar.gz → 0.1.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/PKG-INFO +10 -3
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/README.md +7 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/setup.py +3 -3
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/async_client.py +154 -55
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/generated/model_service_pb2.py +3 -3
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/generated/model_service_pb2_grpc.py +1 -1
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/schemas/inputs.py +7 -2
- tamar_model_client-0.1.15/tamar_model_client/sync_client.py +509 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/PKG-INFO +10 -3
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/requires.txt +2 -2
- tamar_model_client-0.1.8/tamar_model_client/sync_client.py +0 -111
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/setup.cfg +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/__init__.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/auth.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/enums/__init__.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/enums/channel.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/enums/invoke.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/enums/providers.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/exceptions.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/generated/__init__.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/schemas/__init__.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/schemas/outputs.py +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/SOURCES.txt +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/dependency_links.txt +0 -0
- {tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tamar-model-client
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.15
|
4
4
|
Summary: A Python SDK for interacting with the Model Manager gRPC service
|
5
5
|
Home-page: http://gitlab.tamaredge.top/project-tap/AgentOS/model-manager-client
|
6
6
|
Author: Oscar Ou
|
@@ -11,8 +11,8 @@ Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
12
12
|
Requires-Python: >=3.8
|
13
13
|
Description-Content-Type: text/markdown
|
14
|
-
Requires-Dist: grpcio
|
15
|
-
Requires-Dist: grpcio-tools
|
14
|
+
Requires-Dist: grpcio~=1.67.1
|
15
|
+
Requires-Dist: grpcio-tools~=1.67.1
|
16
16
|
Requires-Dist: pydantic
|
17
17
|
Requires-Dist: PyJWT
|
18
18
|
Requires-Dist: nest_asyncio
|
@@ -528,6 +528,13 @@ pip install -e .
|
|
528
528
|
python make_grpc.py
|
529
529
|
```
|
530
530
|
|
531
|
+
### 部署到 pip
|
532
|
+
```bash
|
533
|
+
python setup.py sdist bdist_wheel
|
534
|
+
twine check dist/*
|
535
|
+
|
536
|
+
```
|
537
|
+
|
531
538
|
## 许可证
|
532
539
|
|
533
540
|
MIT License
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="tamar-model-client",
|
5
|
-
version="0.1.
|
5
|
+
version="0.1.15",
|
6
6
|
description="A Python SDK for interacting with the Model Manager gRPC service",
|
7
7
|
author="Oscar Ou",
|
8
8
|
author_email="oscar.ou@tamaredge.ai",
|
@@ -12,8 +12,8 @@ setup(
|
|
12
12
|
"tamar_model_client": ["generated/*.py"], # 包含 gRPC 生成文件
|
13
13
|
},
|
14
14
|
install_requires=[
|
15
|
-
"grpcio",
|
16
|
-
"grpcio-tools",
|
15
|
+
"grpcio~=1.67.1",
|
16
|
+
"grpcio-tools~=1.67.1",
|
17
17
|
"pydantic",
|
18
18
|
"PyJWT",
|
19
19
|
"nest_asyncio",
|
@@ -4,6 +4,8 @@ import base64
|
|
4
4
|
import json
|
5
5
|
import logging
|
6
6
|
import os
|
7
|
+
import uuid
|
8
|
+
from contextvars import ContextVar
|
7
9
|
|
8
10
|
import grpc
|
9
11
|
from typing import Optional, AsyncIterator, Union, Iterable
|
@@ -13,21 +15,44 @@ from pydantic import BaseModel
|
|
13
15
|
|
14
16
|
from .auth import JWTAuthHandler
|
15
17
|
from .enums import ProviderType, InvokeType
|
16
|
-
from .exceptions import ConnectionError
|
18
|
+
from .exceptions import ConnectionError
|
17
19
|
from .schemas import ModelRequest, ModelResponse, BatchModelRequest, BatchModelResponse
|
18
20
|
from .generated import model_service_pb2, model_service_pb2_grpc
|
19
21
|
from .schemas.inputs import GoogleGenAiInput, OpenAIResponsesInput, OpenAIChatCompletionsInput, \
|
20
22
|
GoogleVertexAIImagesInput, OpenAIImagesInput
|
21
23
|
|
22
|
-
if not logging.getLogger().hasHandlers():
|
23
|
-
# 配置日志格式
|
24
|
-
logging.basicConfig(
|
25
|
-
level=logging.INFO,
|
26
|
-
format="%(asctime)s [%(levelname)s] %(message)s",
|
27
|
-
)
|
28
|
-
|
29
24
|
logger = logging.getLogger(__name__)
|
30
25
|
|
26
|
+
# 使用 contextvars 管理请求ID
|
27
|
+
_request_id: ContextVar[str] = ContextVar('request_id', default='-')
|
28
|
+
|
29
|
+
|
30
|
+
class RequestIdFilter(logging.Filter):
|
31
|
+
"""自定义日志过滤器,向日志中添加 request_id"""
|
32
|
+
|
33
|
+
def filter(self, record):
|
34
|
+
# 从 ContextVar 中获取当前的 request_id
|
35
|
+
record.request_id = _request_id.get()
|
36
|
+
return True
|
37
|
+
|
38
|
+
|
39
|
+
if not logger.hasHandlers():
|
40
|
+
# 创建日志处理器,输出到控制台
|
41
|
+
console_handler = logging.StreamHandler()
|
42
|
+
|
43
|
+
# 设置日志格式
|
44
|
+
formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(request_id)s] %(message)s')
|
45
|
+
console_handler.setFormatter(formatter)
|
46
|
+
|
47
|
+
# 为当前记录器添加处理器
|
48
|
+
logger.addHandler(console_handler)
|
49
|
+
|
50
|
+
# 设置日志级别
|
51
|
+
logger.setLevel(logging.INFO)
|
52
|
+
|
53
|
+
# 将自定义的 RequestIdFilter 添加到 logger 中
|
54
|
+
logger.addFilter(RequestIdFilter())
|
55
|
+
|
31
56
|
MAX_MESSAGE_LENGTH = 2 ** 31 - 1 # 对于32位系统
|
32
57
|
|
33
58
|
|
@@ -97,6 +122,16 @@ def remove_none_from_dict(data: Any) -> Any:
|
|
97
122
|
return data
|
98
123
|
|
99
124
|
|
125
|
+
def generate_request_id():
|
126
|
+
"""生成一个唯一的request_id"""
|
127
|
+
return str(uuid.uuid4())
|
128
|
+
|
129
|
+
|
130
|
+
def set_request_id(request_id: str):
|
131
|
+
"""设置当前请求的 request_id"""
|
132
|
+
_request_id.set(request_id)
|
133
|
+
|
134
|
+
|
100
135
|
class AsyncTamarModelClient:
|
101
136
|
def __init__(
|
102
137
|
self,
|
@@ -105,8 +140,8 @@ class AsyncTamarModelClient:
|
|
105
140
|
jwt_token: Optional[str] = None,
|
106
141
|
default_payload: Optional[dict] = None,
|
107
142
|
token_expires_in: int = 3600,
|
108
|
-
max_retries: int =
|
109
|
-
retry_delay: float =
|
143
|
+
max_retries: Optional[int] = None, # 最大重试次数
|
144
|
+
retry_delay: Optional[float] = None, # 初始重试延迟(秒)
|
110
145
|
):
|
111
146
|
# 服务端地址
|
112
147
|
self.server_address = server_address or os.getenv("MODEL_MANAGER_SERVER_ADDRESS")
|
@@ -137,12 +172,45 @@ class AsyncTamarModelClient:
|
|
137
172
|
self._closed = False
|
138
173
|
atexit.register(self._safe_sync_close) # 注册进程退出自动关闭
|
139
174
|
|
140
|
-
def
|
175
|
+
async def _retry_request(self, func, *args, **kwargs):
|
176
|
+
retry_count = 0
|
177
|
+
while retry_count < self.max_retries:
|
178
|
+
try:
|
179
|
+
return await func(*args, **kwargs)
|
180
|
+
except (grpc.aio.AioRpcError, grpc.RpcError) as e:
|
181
|
+
# 对于取消的情况进行指数退避重试
|
182
|
+
if isinstance(e, grpc.aio.AioRpcError) and e.code() == grpc.StatusCode.CANCELLED:
|
183
|
+
retry_count += 1
|
184
|
+
logger.warning(f"❌ RPC cancelled, retrying {retry_count}/{self.max_retries}...")
|
185
|
+
if retry_count < self.max_retries:
|
186
|
+
delay = self.retry_delay * (2 ** (retry_count - 1))
|
187
|
+
await asyncio.sleep(delay)
|
188
|
+
else:
|
189
|
+
logger.error("❌ Max retry reached for CANCELLED")
|
190
|
+
raise
|
191
|
+
# 针对其他 RPC 错误类型,如暂时的连接问题、服务器超时等
|
192
|
+
elif isinstance(e, grpc.RpcError) and e.code() in {grpc.StatusCode.UNAVAILABLE,
|
193
|
+
grpc.StatusCode.DEADLINE_EXCEEDED}:
|
194
|
+
retry_count += 1
|
195
|
+
logger.warning(f"❌ gRPC error {e.code()}, retrying {retry_count}/{self.max_retries}...")
|
196
|
+
if retry_count < self.max_retries:
|
197
|
+
delay = self.retry_delay * (2 ** (retry_count - 1))
|
198
|
+
await asyncio.sleep(delay)
|
199
|
+
else:
|
200
|
+
logger.error(f"❌ Max retry reached for {e.code()}")
|
201
|
+
raise
|
202
|
+
else:
|
203
|
+
logger.error(f"❌ Non-retryable gRPC error: {e}", exc_info=True)
|
204
|
+
raise
|
205
|
+
|
206
|
+
def _build_auth_metadata(self, request_id: str) -> list:
|
141
207
|
# if not self.jwt_token and self.jwt_handler:
|
142
208
|
# 更改为每次请求都生成一次token
|
209
|
+
metadata = [("x-request-id", request_id)] # 将 request_id 添加到 headers
|
143
210
|
if self.jwt_handler:
|
144
211
|
self.jwt_token = self.jwt_handler.encode_token(self.default_payload, expires_in=self.token_expires_in)
|
145
|
-
|
212
|
+
metadata.append(("authorization", f"Bearer {self.jwt_token}"))
|
213
|
+
return metadata
|
146
214
|
|
147
215
|
async def _ensure_initialized(self):
|
148
216
|
"""初始化 gRPC 通道,支持 TLS 与重试机制"""
|
@@ -153,6 +221,7 @@ class AsyncTamarModelClient:
|
|
153
221
|
options = [
|
154
222
|
('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
|
155
223
|
('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
|
224
|
+
('grpc.keepalive_permit_without_calls', True) # 即使没有活跃请求也保持连接
|
156
225
|
]
|
157
226
|
if self.default_authority:
|
158
227
|
options.append(("grpc.default_authority", self.default_authority))
|
@@ -178,14 +247,15 @@ class AsyncTamarModelClient:
|
|
178
247
|
logger.info(f"✅ gRPC channel initialized to {self.server_address}")
|
179
248
|
return
|
180
249
|
except grpc.FutureTimeoutError as e:
|
181
|
-
logger.
|
250
|
+
logger.error(f"❌ gRPC channel initialization timed out: {str(e)}", exc_info=True)
|
182
251
|
except grpc.RpcError as e:
|
183
|
-
logger.
|
252
|
+
logger.error(f"❌ gRPC channel initialization failed: {str(e)}", exc_info=True)
|
184
253
|
except Exception as e:
|
185
|
-
logger.
|
254
|
+
logger.error(f"❌ Unexpected error during channel initialization: {str(e)}", exc_info=True)
|
186
255
|
|
187
256
|
retry_count += 1
|
188
257
|
if retry_count > self.max_retries:
|
258
|
+
logger.error(f"❌ Failed to initialize gRPC channel after {self.max_retries} retries.", exc_info=True)
|
189
259
|
raise ConnectionError(f"❌ Failed to initialize gRPC channel after {self.max_retries} retries.")
|
190
260
|
|
191
261
|
# 指数退避:延迟时间 = retry_delay * (2 ^ (retry_count - 1))
|
@@ -193,28 +263,38 @@ class AsyncTamarModelClient:
|
|
193
263
|
logger.info(f"🚀 Retrying connection (attempt {retry_count}/{self.max_retries}) after {delay:.2f}s delay...")
|
194
264
|
await asyncio.sleep(delay)
|
195
265
|
|
266
|
+
async def _stream_inner(self, model_request, metadata, invoke_timeout) -> AsyncIterator[ModelResponse]:
|
267
|
+
"""Inner function to handle the actual streaming gRPC call."""
|
268
|
+
async for response in self.stub.Invoke(model_request, metadata=metadata, timeout=invoke_timeout):
|
269
|
+
yield ModelResponse(
|
270
|
+
content=response.content,
|
271
|
+
usage=json.loads(response.usage) if response.usage else None,
|
272
|
+
raw_response=json.loads(response.raw_response) if response.raw_response else None,
|
273
|
+
error=response.error or None,
|
274
|
+
)
|
275
|
+
|
196
276
|
async def _stream(self, model_request, metadata, invoke_timeout) -> AsyncIterator[ModelResponse]:
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
except Exception as e:
|
208
|
-
raise ValidationError(f"Invalid input: {str(e)}")
|
277
|
+
return await self._retry_request(self._stream_inner, model_request, metadata, invoke_timeout)
|
278
|
+
|
279
|
+
async def _invoke_request(self, request, metadata, invoke_timeout):
|
280
|
+
async for response in self.stub.Invoke(request, metadata=metadata, timeout=invoke_timeout):
|
281
|
+
return ModelResponse(
|
282
|
+
content=response.content,
|
283
|
+
usage=json.loads(response.usage) if response.usage else None,
|
284
|
+
error=response.error or None,
|
285
|
+
request_id=response.request_id if response.request_id else None,
|
286
|
+
)
|
209
287
|
|
210
|
-
async def invoke(self, model_request: ModelRequest, timeout: Optional[float] = None
|
288
|
+
async def invoke(self, model_request: ModelRequest, timeout: Optional[float] = None,
|
289
|
+
request_id: Optional[str] = None) -> Union[
|
211
290
|
ModelResponse, AsyncIterator[ModelResponse]]:
|
212
291
|
"""
|
213
292
|
通用调用模型方法。
|
214
293
|
|
215
294
|
Args:
|
216
295
|
model_request: ModelRequest 对象,包含请求参数。
|
217
|
-
|
296
|
+
timeout: Optional[float]
|
297
|
+
request_id: Optional[str]
|
218
298
|
Yields:
|
219
299
|
ModelResponse: 支持流式或非流式的模型响应
|
220
300
|
|
@@ -230,6 +310,15 @@ class AsyncTamarModelClient:
|
|
230
310
|
"user_id": model_request.user_context.user_id or ""
|
231
311
|
}
|
232
312
|
|
313
|
+
if not request_id:
|
314
|
+
request_id = generate_request_id() # 生成一个新的 request_id
|
315
|
+
set_request_id(request_id) # 设置当前请求的 request_id
|
316
|
+
metadata = self._build_auth_metadata(request_id) # 将 request_id 加入到请求头
|
317
|
+
|
318
|
+
# 记录开始日志
|
319
|
+
logger.info(
|
320
|
+
f"🔵 Request Start | request_id: {request_id} | provider: {model_request.provider} | invoke_type: {model_request.invoke_type} | model_request: {model_request}")
|
321
|
+
|
233
322
|
# 动态根据 provider/invoke_type 决定使用哪个 input 字段
|
234
323
|
try:
|
235
324
|
# 选择需要校验的字段集合
|
@@ -281,23 +370,23 @@ class AsyncTamarModelClient:
|
|
281
370
|
except Exception as e:
|
282
371
|
raise ValueError(f"构建请求失败: {str(e)}") from e
|
283
372
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
373
|
+
try:
|
374
|
+
invoke_timeout = timeout or self.default_invoke_timeout
|
375
|
+
if model_request.stream:
|
376
|
+
return await self._stream(request, metadata, invoke_timeout)
|
377
|
+
else:
|
378
|
+
return await self._retry_request(self._invoke_request, request, metadata, invoke_timeout)
|
379
|
+
except grpc.RpcError as e:
|
380
|
+
error_message = f"❌ Invoke gRPC failed: {str(e)}"
|
381
|
+
logger.error(error_message, exc_info=True)
|
382
|
+
raise e
|
383
|
+
except Exception as e:
|
384
|
+
error_message = f"❌ Invoke other error: {str(e)}"
|
385
|
+
logger.error(error_message, exc_info=True)
|
386
|
+
raise e
|
387
|
+
|
388
|
+
async def invoke_batch(self, batch_request_model: BatchModelRequest, timeout: Optional[float] = None,
|
389
|
+
request_id: Optional[str] = None) -> \
|
301
390
|
BatchModelResponse:
|
302
391
|
"""
|
303
392
|
批量模型调用接口
|
@@ -305,10 +394,11 @@ class AsyncTamarModelClient:
|
|
305
394
|
Args:
|
306
395
|
batch_request_model: 多条 BatchModelRequest 输入
|
307
396
|
timeout: 调用超时,单位秒
|
308
|
-
|
397
|
+
request_id: 请求id
|
309
398
|
Returns:
|
310
399
|
BatchModelResponse: 批量请求的结果
|
311
400
|
"""
|
401
|
+
|
312
402
|
await self._ensure_initialized()
|
313
403
|
|
314
404
|
if not self.default_payload:
|
@@ -317,7 +407,14 @@ class AsyncTamarModelClient:
|
|
317
407
|
"user_id": batch_request_model.user_context.user_id or ""
|
318
408
|
}
|
319
409
|
|
320
|
-
|
410
|
+
if not request_id:
|
411
|
+
request_id = generate_request_id() # 生成一个新的 request_id
|
412
|
+
set_request_id(request_id) # 设置当前请求的 request_id
|
413
|
+
metadata = self._build_auth_metadata(request_id) # 将 request_id 加入到请求头
|
414
|
+
|
415
|
+
# 记录开始日志
|
416
|
+
logger.info(
|
417
|
+
f"🔵 Batch Request Start | request_id: {request_id} | batch_size: {len(batch_request_model.items)} | batch_request_model: {batch_request_model}")
|
321
418
|
|
322
419
|
# 构造批量请求
|
323
420
|
items = []
|
@@ -378,11 +475,8 @@ class AsyncTamarModelClient:
|
|
378
475
|
invoke_timeout = timeout or self.default_invoke_timeout
|
379
476
|
|
380
477
|
# 调用 gRPC 接口
|
381
|
-
response = await self.stub.BatchInvoke(
|
382
|
-
|
383
|
-
timeout=invoke_timeout,
|
384
|
-
metadata=metadata
|
385
|
-
)
|
478
|
+
response = await self._retry_request(self.stub.BatchInvoke, model_service_pb2.ModelRequest(items=items),
|
479
|
+
timeout=invoke_timeout, metadata=metadata)
|
386
480
|
|
387
481
|
result = []
|
388
482
|
for res_item in response.items:
|
@@ -398,14 +492,19 @@ class AsyncTamarModelClient:
|
|
398
492
|
responses=result
|
399
493
|
)
|
400
494
|
except grpc.RpcError as e:
|
401
|
-
|
495
|
+
error_message = f"❌ BatchInvoke gRPC failed: {str(e)}"
|
496
|
+
logger.error(error_message, exc_info=True)
|
497
|
+
raise e
|
498
|
+
except Exception as e:
|
499
|
+
error_message = f"❌ BatchInvoke other error: {str(e)}"
|
500
|
+
logger.error(error_message, exc_info=True)
|
501
|
+
raise e
|
402
502
|
|
403
503
|
async def close(self):
|
404
504
|
"""关闭 gRPC 通道"""
|
405
505
|
if self.channel and not self._closed:
|
406
506
|
await self.channel.close()
|
407
507
|
self._closed = True
|
408
|
-
await self.channel.close()
|
409
508
|
logger.info("✅ gRPC channel closed")
|
410
509
|
|
411
510
|
def _safe_sync_close(self):
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
3
3
|
# NO CHECKED-IN PROTOBUF GENCODE
|
4
4
|
# source: model_service.proto
|
5
|
-
# Protobuf Python Version: 5.
|
5
|
+
# Protobuf Python Version: 5.27.2
|
6
6
|
"""Generated protocol buffer code."""
|
7
7
|
from google.protobuf import descriptor as _descriptor
|
8
8
|
from google.protobuf import descriptor_pool as _descriptor_pool
|
@@ -12,8 +12,8 @@ from google.protobuf.internal import builder as _builder
|
|
12
12
|
_runtime_version.ValidateProtobufRuntimeVersion(
|
13
13
|
_runtime_version.Domain.PUBLIC,
|
14
14
|
5,
|
15
|
-
|
16
|
-
|
15
|
+
27,
|
16
|
+
2,
|
17
17
|
'',
|
18
18
|
'model_service.proto'
|
19
19
|
)
|
@@ -127,11 +127,16 @@ class OpenAIChatCompletionsInput(BaseModel):
|
|
127
127
|
|
128
128
|
class OpenAIImagesInput(BaseModel):
|
129
129
|
prompt: str
|
130
|
+
background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN
|
130
131
|
model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN
|
132
|
+
moderation: Optional[Literal["low", "auto"]] | NotGiven = NOT_GIVEN
|
131
133
|
n: Optional[int] | NotGiven = NOT_GIVEN
|
134
|
+
output_compression: Optional[int] | NotGiven = NOT_GIVEN
|
135
|
+
output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN
|
132
136
|
quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN
|
133
137
|
response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN
|
134
|
-
size: Optional[Literal[
|
138
|
+
size: Optional[Literal[
|
139
|
+
"auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN
|
135
140
|
style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN
|
136
141
|
user: str | NotGiven = NOT_GIVEN
|
137
142
|
extra_headers: Headers | None = None
|
@@ -301,7 +306,7 @@ class BatchModelRequestItem(ModelRequestInput):
|
|
301
306
|
def validate_by_provider_and_invoke_type(self) -> "BatchModelRequestItem":
|
302
307
|
"""根据 provider 和 invoke_type 动态校验具体输入模型字段。"""
|
303
308
|
# 动态获取 allowed fields
|
304
|
-
base_allowed = {"provider", "channel", "invoke_type", "user_context"}
|
309
|
+
base_allowed = {"provider", "channel", "invoke_type", "user_context", "custom_id"}
|
305
310
|
google_allowed = base_allowed | set(GoogleGenAiInput.model_fields.keys())
|
306
311
|
openai_responses_allowed = base_allowed | set(OpenAIResponsesInput.model_fields.keys())
|
307
312
|
openai_chat_allowed = base_allowed | set(OpenAIChatCompletionsInput.model_fields.keys())
|
@@ -0,0 +1,509 @@
|
|
1
|
+
import base64
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import time
|
6
|
+
import uuid
|
7
|
+
import grpc
|
8
|
+
from typing import Optional, Union, Iterable, Iterator
|
9
|
+
from contextvars import ContextVar
|
10
|
+
|
11
|
+
from openai import NOT_GIVEN
|
12
|
+
from pydantic import BaseModel
|
13
|
+
|
14
|
+
from .auth import JWTAuthHandler
|
15
|
+
from .enums import ProviderType, InvokeType
|
16
|
+
from .exceptions import ConnectionError
|
17
|
+
from .generated import model_service_pb2, model_service_pb2_grpc
|
18
|
+
from .schemas import BatchModelResponse, ModelResponse
|
19
|
+
from .schemas.inputs import GoogleGenAiInput, GoogleVertexAIImagesInput, OpenAIResponsesInput, \
|
20
|
+
OpenAIChatCompletionsInput, OpenAIImagesInput, BatchModelRequest, ModelRequest
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
_request_id: ContextVar[str] = ContextVar('request_id', default='-')
|
25
|
+
|
26
|
+
|
27
|
+
class RequestIdFilter(logging.Filter):
|
28
|
+
"""自定义日志过滤器,向日志中添加 request_id"""
|
29
|
+
|
30
|
+
def filter(self, record):
|
31
|
+
# 从 ContextVar 中获取当前的 request_id
|
32
|
+
record.request_id = _request_id.get()
|
33
|
+
return True
|
34
|
+
|
35
|
+
|
36
|
+
if not logger.hasHandlers():
|
37
|
+
# 创建日志处理器,输出到控制台
|
38
|
+
console_handler = logging.StreamHandler()
|
39
|
+
|
40
|
+
# 设置日志格式
|
41
|
+
formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(request_id)s] %(message)s')
|
42
|
+
console_handler.setFormatter(formatter)
|
43
|
+
|
44
|
+
# 为当前记录器添加处理器
|
45
|
+
logger.addHandler(console_handler)
|
46
|
+
|
47
|
+
# 设置日志级别
|
48
|
+
logger.setLevel(logging.INFO)
|
49
|
+
|
50
|
+
# 将自定义的 RequestIdFilter 添加到 logger 中
|
51
|
+
logger.addFilter(RequestIdFilter())
|
52
|
+
|
53
|
+
MAX_MESSAGE_LENGTH = 2 ** 31 - 1 # 对于32位系统
|
54
|
+
|
55
|
+
|
56
|
+
def is_effective_value(value) -> bool:
|
57
|
+
"""
|
58
|
+
递归判断value是否是有意义的有效值
|
59
|
+
"""
|
60
|
+
if value is None or value is NOT_GIVEN:
|
61
|
+
return False
|
62
|
+
|
63
|
+
if isinstance(value, str):
|
64
|
+
return value.strip() != ""
|
65
|
+
|
66
|
+
if isinstance(value, bytes):
|
67
|
+
return len(value) > 0
|
68
|
+
|
69
|
+
if isinstance(value, dict):
|
70
|
+
for v in value.values():
|
71
|
+
if is_effective_value(v):
|
72
|
+
return True
|
73
|
+
return False
|
74
|
+
|
75
|
+
if isinstance(value, list):
|
76
|
+
for item in value:
|
77
|
+
if is_effective_value(item):
|
78
|
+
return True
|
79
|
+
return False
|
80
|
+
|
81
|
+
return True # 其他类型(int/float/bool)只要不是None就算有效
|
82
|
+
|
83
|
+
|
84
|
+
def serialize_value(value):
|
85
|
+
"""递归处理单个值,处理BaseModel, dict, list, bytes"""
|
86
|
+
if not is_effective_value(value):
|
87
|
+
return None
|
88
|
+
if isinstance(value, BaseModel):
|
89
|
+
return serialize_value(value.model_dump())
|
90
|
+
if hasattr(value, "dict") and callable(value.dict):
|
91
|
+
return serialize_value(value.dict())
|
92
|
+
if isinstance(value, dict):
|
93
|
+
return {k: serialize_value(v) for k, v in value.items()}
|
94
|
+
if isinstance(value, list) or (isinstance(value, Iterable) and not isinstance(value, (str, bytes))):
|
95
|
+
return [serialize_value(v) for v in value]
|
96
|
+
if isinstance(value, bytes):
|
97
|
+
return f"bytes:{base64.b64encode(value).decode('utf-8')}"
|
98
|
+
return value
|
99
|
+
|
100
|
+
|
101
|
+
from typing import Any
|
102
|
+
|
103
|
+
|
104
|
+
def remove_none_from_dict(data: Any) -> Any:
|
105
|
+
"""
|
106
|
+
遍历 dict/list,递归删除 value 为 None 的字段
|
107
|
+
"""
|
108
|
+
if isinstance(data, dict):
|
109
|
+
new_dict = {}
|
110
|
+
for key, value in data.items():
|
111
|
+
if value is None:
|
112
|
+
continue
|
113
|
+
cleaned_value = remove_none_from_dict(value)
|
114
|
+
new_dict[key] = cleaned_value
|
115
|
+
return new_dict
|
116
|
+
elif isinstance(data, list):
|
117
|
+
return [remove_none_from_dict(item) for item in data]
|
118
|
+
else:
|
119
|
+
return data
|
120
|
+
|
121
|
+
|
122
|
+
def generate_request_id():
|
123
|
+
"""生成一个唯一的request_id"""
|
124
|
+
return str(uuid.uuid4())
|
125
|
+
|
126
|
+
|
127
|
+
def set_request_id(request_id: str):
|
128
|
+
"""设置当前请求的 request_id"""
|
129
|
+
_request_id.set(request_id)
|
130
|
+
|
131
|
+
|
132
|
+
class TamarModelClient:
|
133
|
+
def __init__(
|
134
|
+
self,
|
135
|
+
server_address: Optional[str] = None,
|
136
|
+
jwt_secret_key: Optional[str] = None,
|
137
|
+
jwt_token: Optional[str] = None,
|
138
|
+
default_payload: Optional[dict] = None,
|
139
|
+
token_expires_in: int = 3600,
|
140
|
+
max_retries: Optional[int] = None, # 最大重试次数
|
141
|
+
retry_delay: Optional[float] = None, # 初始重试延迟(秒)
|
142
|
+
):
|
143
|
+
self.server_address = server_address or os.getenv("MODEL_MANAGER_SERVER_ADDRESS")
|
144
|
+
if not self.server_address:
|
145
|
+
raise ValueError("Server address must be provided via argument or environment variable.")
|
146
|
+
self.default_invoke_timeout = float(os.getenv("MODEL_MANAGER_SERVER_INVOKE_TIMEOUT", 30.0))
|
147
|
+
|
148
|
+
# JWT 配置
|
149
|
+
self.jwt_secret_key = jwt_secret_key or os.getenv("MODEL_MANAGER_SERVER_JWT_SECRET_KEY")
|
150
|
+
self.jwt_handler = JWTAuthHandler(self.jwt_secret_key)
|
151
|
+
self.jwt_token = jwt_token # 用户传入的 Token(可选)
|
152
|
+
self.default_payload = default_payload
|
153
|
+
self.token_expires_in = token_expires_in
|
154
|
+
|
155
|
+
# === TLS/Authority 配置 ===
|
156
|
+
self.use_tls = os.getenv("MODEL_MANAGER_SERVER_GRPC_USE_TLS", "true").lower() == "true"
|
157
|
+
self.default_authority = os.getenv("MODEL_MANAGER_SERVER_GRPC_DEFAULT_AUTHORITY")
|
158
|
+
|
159
|
+
# === 重试配置 ===
|
160
|
+
self.max_retries = max_retries if max_retries is not None else int(
|
161
|
+
os.getenv("MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES", 3))
|
162
|
+
self.retry_delay = retry_delay if retry_delay is not None else float(
|
163
|
+
os.getenv("MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY", 1.0))
|
164
|
+
|
165
|
+
# === gRPC 通道相关 ===
|
166
|
+
self.channel: Optional[grpc.Channel] = None
|
167
|
+
self.stub: Optional[model_service_pb2_grpc.ModelServiceStub] = None
|
168
|
+
self._closed = False
|
169
|
+
|
170
|
+
def _retry_request(self, func, *args, **kwargs):
|
171
|
+
retry_count = 0
|
172
|
+
while retry_count < self.max_retries:
|
173
|
+
try:
|
174
|
+
return func(*args, **kwargs)
|
175
|
+
except (grpc.RpcError) as e:
|
176
|
+
if e.code() in {grpc.StatusCode.UNAVAILABLE, grpc.StatusCode.DEADLINE_EXCEEDED}:
|
177
|
+
retry_count += 1
|
178
|
+
logger.error(f"❌ gRPC error {e.code()}, retrying {retry_count}/{self.max_retries}...")
|
179
|
+
if retry_count < self.max_retries:
|
180
|
+
delay = self.retry_delay * (2 ** (retry_count - 1))
|
181
|
+
time.sleep(delay)
|
182
|
+
else:
|
183
|
+
logger.error(f"❌ Max retry reached for {e.code()}")
|
184
|
+
raise
|
185
|
+
else:
|
186
|
+
logger.error(f"❌ Non-retryable gRPC error: {e}", exc_info=True)
|
187
|
+
raise
|
188
|
+
|
189
|
+
def _build_auth_metadata(self, request_id: str) -> list:
|
190
|
+
metadata = [("x-request-id", request_id)] # 将 request_id 添加到 headers
|
191
|
+
if self.jwt_handler:
|
192
|
+
self.jwt_token = self.jwt_handler.encode_token(self.default_payload, expires_in=self.token_expires_in)
|
193
|
+
metadata.append(("authorization", f"Bearer {self.jwt_token}"))
|
194
|
+
return metadata
|
195
|
+
|
196
|
+
def _ensure_initialized(self):
|
197
|
+
"""初始化 gRPC 通道,支持 TLS 与重试机制"""
|
198
|
+
if self.channel and self.stub:
|
199
|
+
return
|
200
|
+
|
201
|
+
retry_count = 0
|
202
|
+
options = [
|
203
|
+
('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
|
204
|
+
('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
|
205
|
+
('grpc.keepalive_permit_without_calls', True) # 即使没有活跃请求也保持连接
|
206
|
+
]
|
207
|
+
if self.default_authority:
|
208
|
+
options.append(("grpc.default_authority", self.default_authority))
|
209
|
+
|
210
|
+
while retry_count <= self.max_retries:
|
211
|
+
try:
|
212
|
+
if self.use_tls:
|
213
|
+
credentials = grpc.ssl_channel_credentials()
|
214
|
+
self.channel = grpc.secure_channel(
|
215
|
+
self.server_address,
|
216
|
+
credentials,
|
217
|
+
options=options
|
218
|
+
)
|
219
|
+
logger.info("🔐 Using secure gRPC channel (TLS enabled)")
|
220
|
+
else:
|
221
|
+
self.channel = grpc.insecure_channel(
|
222
|
+
self.server_address,
|
223
|
+
options=options
|
224
|
+
)
|
225
|
+
logger.info("🔓 Using insecure gRPC channel (TLS disabled)")
|
226
|
+
|
227
|
+
# Wait for the channel to be ready (synchronously)
|
228
|
+
grpc.channel_ready_future(self.channel).result() # This is blocking in sync mode
|
229
|
+
|
230
|
+
self.stub = model_service_pb2_grpc.ModelServiceStub(self.channel)
|
231
|
+
logger.info(f"✅ gRPC channel initialized to {self.server_address}")
|
232
|
+
return
|
233
|
+
except grpc.FutureTimeoutError as e:
|
234
|
+
logger.error(f"❌ gRPC channel initialization timed out: {str(e)}", exc_info=True)
|
235
|
+
except grpc.RpcError as e:
|
236
|
+
logger.error(f"❌ gRPC channel initialization failed: {str(e)}", exc_info=True)
|
237
|
+
except Exception as e:
|
238
|
+
logger.error(f"❌ Unexpected error during channel initialization: {str(e)}", exc_info=True)
|
239
|
+
|
240
|
+
retry_count += 1
|
241
|
+
if retry_count > self.max_retries:
|
242
|
+
logger.error(f"❌ Failed to initialize gRPC channel after {self.max_retries} retries.", exc_info=True)
|
243
|
+
raise ConnectionError(f"❌ Failed to initialize gRPC channel after {self.max_retries} retries.")
|
244
|
+
|
245
|
+
# 指数退避:延迟时间 = retry_delay * (2 ^ (retry_count - 1))
|
246
|
+
delay = self.retry_delay * (2 ** (retry_count - 1))
|
247
|
+
logger.info(f"🚀 Retrying connection (attempt {retry_count}/{self.max_retries}) after {delay:.2f}s delay...")
|
248
|
+
time.sleep(delay) # Blocking sleep in sync version
|
249
|
+
|
250
|
+
def _stream_inner(self, model_request, metadata, invoke_timeout) -> Iterator[ModelResponse]:
|
251
|
+
"""Inner function to handle the actual streaming gRPC call."""
|
252
|
+
response = self.stub.Invoke(model_request, metadata=metadata, timeout=invoke_timeout)
|
253
|
+
for res in response:
|
254
|
+
yield ModelResponse(
|
255
|
+
content=res.content,
|
256
|
+
usage=json.loads(res.usage) if res.usage else None,
|
257
|
+
raw_response=json.loads(res.raw_response) if res.raw_response else None,
|
258
|
+
error=res.error or None,
|
259
|
+
)
|
260
|
+
|
261
|
+
def _stream(self, model_request, metadata, invoke_timeout) -> Iterator[ModelResponse]:
|
262
|
+
return self._retry_request(self._stream_inner, model_request, metadata, invoke_timeout)
|
263
|
+
|
264
|
+
def _invoke_request(self, request, metadata, invoke_timeout):
|
265
|
+
response = self.stub.Invoke(request, metadata=metadata, timeout=invoke_timeout)
|
266
|
+
for response in response:
|
267
|
+
return ModelResponse(
|
268
|
+
content=response.content,
|
269
|
+
usage=json.loads(response.usage) if response.usage else None,
|
270
|
+
error=response.error or None,
|
271
|
+
request_id=response.request_id if response.request_id else None,
|
272
|
+
)
|
273
|
+
|
274
|
+
def invoke(self, model_request: ModelRequest, timeout: Optional[float] = None, request_id: Optional[str] = None) -> \
|
275
|
+
Union[ModelResponse, Iterator[ModelResponse]]:
|
276
|
+
"""
|
277
|
+
通用调用模型方法。
|
278
|
+
|
279
|
+
Args:
|
280
|
+
model_request: ModelRequest 对象,包含请求参数。
|
281
|
+
timeout: Optional[float]
|
282
|
+
request_id: Optional[str]
|
283
|
+
Yields:
|
284
|
+
ModelResponse: 支持流式或非流式的模型响应
|
285
|
+
|
286
|
+
Raises:
|
287
|
+
ValidationError: 输入验证失败。
|
288
|
+
ConnectionError: 连接服务端失败。
|
289
|
+
"""
|
290
|
+
self._ensure_initialized()
|
291
|
+
|
292
|
+
if not self.default_payload:
|
293
|
+
self.default_payload = {
|
294
|
+
"org_id": model_request.user_context.org_id or "",
|
295
|
+
"user_id": model_request.user_context.user_id or ""
|
296
|
+
}
|
297
|
+
|
298
|
+
if not request_id:
|
299
|
+
request_id = generate_request_id() # 生成一个新的 request_id
|
300
|
+
set_request_id(request_id) # 设置当前请求的 request_id
|
301
|
+
metadata = self._build_auth_metadata(request_id) # 将 request_id 加入到请求头
|
302
|
+
|
303
|
+
# 记录开始日志
|
304
|
+
logger.info(
|
305
|
+
f"🔵 Request Start | request_id: {request_id} | provider: {model_request.provider} | invoke_type: {model_request.invoke_type} | model_request: {model_request}")
|
306
|
+
|
307
|
+
# 动态根据 provider/invoke_type 决定使用哪个 input 字段
|
308
|
+
try:
|
309
|
+
# 选择需要校验的字段集合
|
310
|
+
# 动态分支逻辑
|
311
|
+
match (model_request.provider, model_request.invoke_type):
|
312
|
+
case (ProviderType.GOOGLE, InvokeType.GENERATION):
|
313
|
+
allowed_fields = GoogleGenAiInput.model_fields.keys()
|
314
|
+
case (ProviderType.GOOGLE, InvokeType.IMAGE_GENERATION):
|
315
|
+
allowed_fields = GoogleVertexAIImagesInput.model_fields.keys()
|
316
|
+
case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.RESPONSES | InvokeType.GENERATION):
|
317
|
+
allowed_fields = OpenAIResponsesInput.model_fields.keys()
|
318
|
+
case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.CHAT_COMPLETIONS):
|
319
|
+
allowed_fields = OpenAIChatCompletionsInput.model_fields.keys()
|
320
|
+
case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.IMAGE_GENERATION):
|
321
|
+
allowed_fields = OpenAIImagesInput.model_fields.keys()
|
322
|
+
case _:
|
323
|
+
raise ValueError(
|
324
|
+
f"Unsupported provider/invoke_type combination: {model_request.provider} + {model_request.invoke_type}")
|
325
|
+
|
326
|
+
# 将 ModelRequest 转 dict,过滤只保留 base + allowed 的字段
|
327
|
+
model_request_dict = model_request.model_dump(exclude_unset=True)
|
328
|
+
|
329
|
+
grpc_request_kwargs = {}
|
330
|
+
for field in allowed_fields:
|
331
|
+
if field in model_request_dict:
|
332
|
+
value = model_request_dict[field]
|
333
|
+
|
334
|
+
# 跳过无效的值
|
335
|
+
if not is_effective_value(value):
|
336
|
+
continue
|
337
|
+
|
338
|
+
# 序列化grpc不支持的类型
|
339
|
+
grpc_request_kwargs[field] = serialize_value(value)
|
340
|
+
|
341
|
+
# 清理 serialize后的 grpc_request_kwargs
|
342
|
+
grpc_request_kwargs = remove_none_from_dict(grpc_request_kwargs)
|
343
|
+
|
344
|
+
request = model_service_pb2.ModelRequestItem(
|
345
|
+
provider=model_request.provider.value,
|
346
|
+
channel=model_request.channel.value,
|
347
|
+
invoke_type=model_request.invoke_type.value,
|
348
|
+
stream=model_request.stream or False,
|
349
|
+
org_id=model_request.user_context.org_id or "",
|
350
|
+
user_id=model_request.user_context.user_id or "",
|
351
|
+
client_type=model_request.user_context.client_type or "",
|
352
|
+
extra=grpc_request_kwargs
|
353
|
+
)
|
354
|
+
|
355
|
+
except Exception as e:
|
356
|
+
raise ValueError(f"构建请求失败: {str(e)}") from e
|
357
|
+
|
358
|
+
try:
|
359
|
+
invoke_timeout = timeout or self.default_invoke_timeout
|
360
|
+
if model_request.stream:
|
361
|
+
return self._stream(request, metadata, invoke_timeout)
|
362
|
+
else:
|
363
|
+
return self._retry_request(self._invoke_request, request, metadata, invoke_timeout)
|
364
|
+
except grpc.RpcError as e:
|
365
|
+
error_message = f"❌ Invoke gRPC failed: {str(e)}"
|
366
|
+
logger.error(error_message, exc_info=True)
|
367
|
+
raise e
|
368
|
+
except Exception as e:
|
369
|
+
error_message = f"❌ Invoke other error: {str(e)}"
|
370
|
+
logger.error(error_message, exc_info=True)
|
371
|
+
raise e
|
372
|
+
|
373
|
+
def invoke_batch(self, batch_request_model: BatchModelRequest, timeout: Optional[float] = None,
|
374
|
+
request_id: Optional[str] = None) -> BatchModelResponse:
|
375
|
+
"""
|
376
|
+
批量模型调用接口
|
377
|
+
|
378
|
+
Args:
|
379
|
+
batch_request_model: 多条 BatchModelRequest 输入
|
380
|
+
timeout: 调用超时,单位秒
|
381
|
+
request_id: 请求id
|
382
|
+
Returns:
|
383
|
+
BatchModelResponse: 批量请求的结果
|
384
|
+
"""
|
385
|
+
|
386
|
+
self._ensure_initialized()
|
387
|
+
|
388
|
+
if not self.default_payload:
|
389
|
+
self.default_payload = {
|
390
|
+
"org_id": batch_request_model.user_context.org_id or "",
|
391
|
+
"user_id": batch_request_model.user_context.user_id or ""
|
392
|
+
}
|
393
|
+
|
394
|
+
if not request_id:
|
395
|
+
request_id = generate_request_id() # 生成一个新的 request_id
|
396
|
+
set_request_id(request_id) # 设置当前请求的 request_id
|
397
|
+
metadata = self._build_auth_metadata(request_id) # 将 request_id 加入到请求头
|
398
|
+
|
399
|
+
# 记录开始日志
|
400
|
+
logger.info(
|
401
|
+
f"🔵 Batch Request Start | request_id: {request_id} | batch_size: {len(batch_request_model.items)} | batch_request_model: {batch_request_model}")
|
402
|
+
|
403
|
+
# 构造批量请求
|
404
|
+
items = []
|
405
|
+
for model_request_item in batch_request_model.items:
|
406
|
+
# 动态根据 provider/invoke_type 决定使用哪个 input 字段
|
407
|
+
try:
|
408
|
+
match (model_request_item.provider, model_request_item.invoke_type):
|
409
|
+
case (ProviderType.GOOGLE, InvokeType.GENERATION):
|
410
|
+
allowed_fields = GoogleGenAiInput.model_fields.keys()
|
411
|
+
case (ProviderType.GOOGLE, InvokeType.IMAGE_GENERATION):
|
412
|
+
allowed_fields = GoogleVertexAIImagesInput.model_fields.keys()
|
413
|
+
case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.RESPONSES | InvokeType.GENERATION):
|
414
|
+
allowed_fields = OpenAIResponsesInput.model_fields.keys()
|
415
|
+
case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.CHAT_COMPLETIONS):
|
416
|
+
allowed_fields = OpenAIChatCompletionsInput.model_fields.keys()
|
417
|
+
case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.IMAGE_GENERATION):
|
418
|
+
allowed_fields = OpenAIImagesInput.model_fields.keys()
|
419
|
+
case _:
|
420
|
+
raise ValueError(
|
421
|
+
f"Unsupported provider/invoke_type combination: {model_request_item.provider} + {model_request_item.invoke_type}")
|
422
|
+
|
423
|
+
# 将 ModelRequest 转 dict,过滤只保留 base + allowed 的字段
|
424
|
+
model_request_dict = model_request_item.model_dump(exclude_unset=True)
|
425
|
+
|
426
|
+
grpc_request_kwargs = {}
|
427
|
+
for field in allowed_fields:
|
428
|
+
if field in model_request_dict:
|
429
|
+
value = model_request_dict[field]
|
430
|
+
|
431
|
+
# 跳过无效的值
|
432
|
+
if not is_effective_value(value):
|
433
|
+
continue
|
434
|
+
|
435
|
+
# 序列化grpc不支持的类型
|
436
|
+
grpc_request_kwargs[field] = serialize_value(value)
|
437
|
+
|
438
|
+
# 清理 serialize后的 grpc_request_kwargs
|
439
|
+
grpc_request_kwargs = remove_none_from_dict(grpc_request_kwargs)
|
440
|
+
|
441
|
+
items.append(model_service_pb2.ModelRequestItem(
|
442
|
+
provider=model_request_item.provider.value,
|
443
|
+
channel=model_request_item.channel.value,
|
444
|
+
invoke_type=model_request_item.invoke_type.value,
|
445
|
+
stream=model_request_item.stream or False,
|
446
|
+
custom_id=model_request_item.custom_id or "",
|
447
|
+
priority=model_request_item.priority or 1,
|
448
|
+
org_id=batch_request_model.user_context.org_id or "",
|
449
|
+
user_id=batch_request_model.user_context.user_id or "",
|
450
|
+
client_type=batch_request_model.user_context.client_type or "",
|
451
|
+
extra=grpc_request_kwargs,
|
452
|
+
))
|
453
|
+
|
454
|
+
except Exception as e:
|
455
|
+
raise ValueError(f"构建请求失败: {str(e)},item={model_request_item.custom_id}") from e
|
456
|
+
|
457
|
+
try:
|
458
|
+
# 超时处理逻辑
|
459
|
+
invoke_timeout = timeout or self.default_invoke_timeout
|
460
|
+
|
461
|
+
# 调用 gRPC 接口
|
462
|
+
response = self._retry_request(self.stub.BatchInvoke, model_service_pb2.ModelRequest(items=items),
|
463
|
+
timeout=invoke_timeout, metadata=metadata)
|
464
|
+
|
465
|
+
result = []
|
466
|
+
for res_item in response.items:
|
467
|
+
result.append(ModelResponse(
|
468
|
+
content=res_item.content,
|
469
|
+
usage=json.loads(res_item.usage) if res_item.usage else None,
|
470
|
+
raw_response=json.loads(res_item.raw_response) if res_item.raw_response else None,
|
471
|
+
error=res_item.error or None,
|
472
|
+
custom_id=res_item.custom_id if res_item.custom_id else None
|
473
|
+
))
|
474
|
+
return BatchModelResponse(
|
475
|
+
request_id=response.request_id if response.request_id else None,
|
476
|
+
responses=result
|
477
|
+
)
|
478
|
+
except grpc.RpcError as e:
|
479
|
+
error_message = f"❌ BatchInvoke gRPC failed: {str(e)}"
|
480
|
+
logger.error(error_message, exc_info=True)
|
481
|
+
raise e
|
482
|
+
except Exception as e:
|
483
|
+
error_message = f"❌ BatchInvoke other error: {str(e)}"
|
484
|
+
logger.error(error_message, exc_info=True)
|
485
|
+
raise e
|
486
|
+
|
487
|
+
def close(self):
|
488
|
+
"""关闭 gRPC 通道"""
|
489
|
+
if self.channel and not self._closed:
|
490
|
+
self.channel.close()
|
491
|
+
self._closed = True
|
492
|
+
logger.info("✅ gRPC channel closed")
|
493
|
+
|
494
|
+
def _safe_sync_close(self):
|
495
|
+
"""进程退出时自动关闭 channel(事件循环处理兼容)"""
|
496
|
+
if self.channel and not self._closed:
|
497
|
+
try:
|
498
|
+
self.close() # 直接调用关闭方法
|
499
|
+
except Exception as e:
|
500
|
+
logger.error(f"❌ gRPC channel close failed at exit: {e}")
|
501
|
+
|
502
|
+
def __enter__(self):
|
503
|
+
"""同步初始化连接"""
|
504
|
+
self._ensure_initialized()
|
505
|
+
return self
|
506
|
+
|
507
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
508
|
+
"""同步关闭连接"""
|
509
|
+
self.close()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tamar-model-client
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.15
|
4
4
|
Summary: A Python SDK for interacting with the Model Manager gRPC service
|
5
5
|
Home-page: http://gitlab.tamaredge.top/project-tap/AgentOS/model-manager-client
|
6
6
|
Author: Oscar Ou
|
@@ -11,8 +11,8 @@ Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
12
12
|
Requires-Python: >=3.8
|
13
13
|
Description-Content-Type: text/markdown
|
14
|
-
Requires-Dist: grpcio
|
15
|
-
Requires-Dist: grpcio-tools
|
14
|
+
Requires-Dist: grpcio~=1.67.1
|
15
|
+
Requires-Dist: grpcio-tools~=1.67.1
|
16
16
|
Requires-Dist: pydantic
|
17
17
|
Requires-Dist: PyJWT
|
18
18
|
Requires-Dist: nest_asyncio
|
@@ -528,6 +528,13 @@ pip install -e .
|
|
528
528
|
python make_grpc.py
|
529
529
|
```
|
530
530
|
|
531
|
+
### 部署到 pip
|
532
|
+
```bash
|
533
|
+
python setup.py sdist bdist_wheel
|
534
|
+
twine check dist/*
|
535
|
+
|
536
|
+
```
|
537
|
+
|
531
538
|
## 许可证
|
532
539
|
|
533
540
|
MIT License
|
@@ -1,111 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import atexit
|
3
|
-
import logging
|
4
|
-
from typing import Optional, Union, Iterator
|
5
|
-
|
6
|
-
from .async_client import AsyncTamarModelClient
|
7
|
-
from .schemas import ModelRequest, BatchModelRequest, ModelResponse, BatchModelResponse
|
8
|
-
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
|
-
|
12
|
-
class TamarModelClient:
|
13
|
-
"""
|
14
|
-
同步版本的模型管理客户端,用于非异步环境(如 Flask、Django、脚本)。
|
15
|
-
内部封装 AsyncTamarModelClient 并处理事件循环兼容性。
|
16
|
-
"""
|
17
|
-
_loop: Optional[asyncio.AbstractEventLoop] = None
|
18
|
-
|
19
|
-
def __init__(
|
20
|
-
self,
|
21
|
-
server_address: Optional[str] = None,
|
22
|
-
jwt_secret_key: Optional[str] = None,
|
23
|
-
jwt_token: Optional[str] = None,
|
24
|
-
default_payload: Optional[dict] = None,
|
25
|
-
token_expires_in: int = 3600,
|
26
|
-
max_retries: int = 3,
|
27
|
-
retry_delay: float = 1.0,
|
28
|
-
):
|
29
|
-
# 初始化全局事件循环,仅创建一次
|
30
|
-
if not TamarModelClient._loop:
|
31
|
-
try:
|
32
|
-
TamarModelClient._loop = asyncio.get_running_loop()
|
33
|
-
except RuntimeError:
|
34
|
-
TamarModelClient._loop = asyncio.new_event_loop()
|
35
|
-
asyncio.set_event_loop(TamarModelClient._loop)
|
36
|
-
|
37
|
-
self._loop = TamarModelClient._loop
|
38
|
-
|
39
|
-
self._async_client = AsyncTamarModelClient(
|
40
|
-
server_address=server_address,
|
41
|
-
jwt_secret_key=jwt_secret_key,
|
42
|
-
jwt_token=jwt_token,
|
43
|
-
default_payload=default_payload,
|
44
|
-
token_expires_in=token_expires_in,
|
45
|
-
max_retries=max_retries,
|
46
|
-
retry_delay=retry_delay,
|
47
|
-
)
|
48
|
-
atexit.register(self._safe_sync_close)
|
49
|
-
|
50
|
-
def invoke(self, model_request: ModelRequest, timeout: Optional[float] = None) -> Union[
|
51
|
-
ModelResponse, Iterator[ModelResponse]]:
|
52
|
-
"""
|
53
|
-
同步调用单个模型任务
|
54
|
-
"""
|
55
|
-
if model_request.stream:
|
56
|
-
async def stream():
|
57
|
-
async for r in await self._async_client.invoke(model_request, timeout=timeout):
|
58
|
-
yield r
|
59
|
-
|
60
|
-
return self._sync_wrap_async_generator(stream())
|
61
|
-
return self._run_async(self._async_client.invoke(model_request, timeout=timeout))
|
62
|
-
|
63
|
-
def invoke_batch(self, batch_model_request: BatchModelRequest,
|
64
|
-
timeout: Optional[float] = None) -> BatchModelResponse:
|
65
|
-
"""
|
66
|
-
同步调用批量模型任务
|
67
|
-
"""
|
68
|
-
return self._run_async(self._async_client.invoke_batch(batch_model_request, timeout=timeout))
|
69
|
-
|
70
|
-
def close(self):
|
71
|
-
"""手动关闭 gRPC 通道"""
|
72
|
-
self._run_async(self._async_client.close())
|
73
|
-
|
74
|
-
def _safe_sync_close(self):
|
75
|
-
"""退出时自动关闭"""
|
76
|
-
try:
|
77
|
-
self._run_async(self._async_client.close())
|
78
|
-
logger.info("✅ gRPC channel closed at exit")
|
79
|
-
except Exception as e:
|
80
|
-
logger.warning(f"❌ gRPC channel close failed at exit: {e}")
|
81
|
-
|
82
|
-
def _run_async(self, coro):
|
83
|
-
"""统一运行协程,兼容已存在的事件循环"""
|
84
|
-
try:
|
85
|
-
loop = asyncio.get_running_loop()
|
86
|
-
import nest_asyncio
|
87
|
-
nest_asyncio.apply()
|
88
|
-
return loop.run_until_complete(coro)
|
89
|
-
except RuntimeError:
|
90
|
-
return self._loop.run_until_complete(coro)
|
91
|
-
|
92
|
-
def _sync_wrap_async_generator(self, async_gen_func):
|
93
|
-
"""
|
94
|
-
将 async generator 转换为同步 generator,逐条 yield。
|
95
|
-
"""
|
96
|
-
loop = self._loop
|
97
|
-
|
98
|
-
# 创建异步生成器对象
|
99
|
-
agen = async_gen_func
|
100
|
-
|
101
|
-
class SyncGenerator:
|
102
|
-
def __iter__(self_inner):
|
103
|
-
return self_inner
|
104
|
-
|
105
|
-
def __next__(self_inner):
|
106
|
-
try:
|
107
|
-
return loop.run_until_complete(agen.__anext__())
|
108
|
-
except StopAsyncIteration:
|
109
|
-
raise StopIteration
|
110
|
-
|
111
|
-
return SyncGenerator()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/enums/providers.py
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/generated/__init__.py
RENAMED
File without changes
|
{tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/schemas/__init__.py
RENAMED
File without changes
|
{tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client/schemas/outputs.py
RENAMED
File without changes
|
{tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{tamar_model_client-0.1.8 → tamar_model_client-0.1.15}/tamar_model_client.egg-info/top_level.txt
RENAMED
File without changes
|