tamar-model-client 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,215 +1,152 @@
1
- import base64
1
+ """
2
+ Tamar Model Client 同步客户端实现
3
+
4
+ 本模块实现了同步的 gRPC 客户端,用于与 Model Manager Server 进行通信。
5
+ 提供了与异步客户端相同的功能,但使用同步 API,适合在同步环境中使用。
6
+
7
+ 主要功能:
8
+ - 同步 gRPC 通信
9
+ - JWT 认证
10
+ - 自动重试和错误处理
11
+ - 连接池管理
12
+ - 详细的日志记录
13
+
14
+ 使用示例:
15
+ with TamarModelClient() as client:
16
+ request = ModelRequest(...)
17
+ response = client.invoke(request)
18
+
19
+ 注意:对于需要高并发的场景,建议使用 AsyncTamarModelClient
20
+ """
21
+
2
22
  import json
3
23
  import logging
4
- import os
24
+ import random
5
25
  import time
6
- import uuid
7
- import grpc
8
- from typing import Optional, Union, Iterable, Iterator
9
- from contextvars import ContextVar
26
+ from typing import Optional, Union, Iterator
10
27
 
11
- from openai import NOT_GIVEN
12
- from pydantic import BaseModel
28
+ import grpc
13
29
 
14
- from .auth import JWTAuthHandler
15
- from .enums import ProviderType, InvokeType
16
- from .exceptions import ConnectionError
30
+ from .core import (
31
+ generate_request_id,
32
+ set_request_id,
33
+ setup_logger,
34
+ MAX_MESSAGE_LENGTH
35
+ )
36
+ from .core.base_client import BaseClient
37
+ from .core.request_builder import RequestBuilder
38
+ from .core.response_handler import ResponseHandler
39
+ from .exceptions import ConnectionError, TamarModelException, is_retryable_error
17
40
  from .generated import model_service_pb2, model_service_pb2_grpc
18
41
  from .schemas import BatchModelResponse, ModelResponse
19
- from .schemas.inputs import GoogleGenAiInput, GoogleVertexAIImagesInput, OpenAIResponsesInput, \
20
- OpenAIChatCompletionsInput, OpenAIImagesInput, OpenAIImagesEditInput, BatchModelRequest, ModelRequest
21
- from .json_formatter import JSONFormatter
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
- _request_id: ContextVar[str] = ContextVar('request_id', default='-')
26
-
27
-
28
- class RequestIdFilter(logging.Filter):
29
- """自定义日志过滤器,向日志中添加 request_id"""
30
-
31
- def filter(self, record):
32
- # 从 ContextVar 中获取当前的 request_id
33
- record.request_id = _request_id.get()
34
- return True
35
-
36
-
37
- if not logger.hasHandlers():
38
- # 创建日志处理器,输出到控制台
39
- console_handler = logging.StreamHandler()
40
-
41
- # 使用 JSON 格式化器
42
- formatter = JSONFormatter()
43
- console_handler.setFormatter(formatter)
44
-
45
- # 为当前记录器添加处理器
46
- logger.addHandler(console_handler)
47
-
48
- # 设置日志级别
49
- logger.setLevel(logging.INFO)
50
-
51
- # 将自定义的 RequestIdFilter 添加到 logger 中
52
- logger.addFilter(RequestIdFilter())
42
+ from .schemas.inputs import BatchModelRequest, ModelRequest
53
43
 
54
- MAX_MESSAGE_LENGTH = 2 ** 31 - 1 # 对于32位系统
44
+ # 配置日志记录器
45
+ logger = setup_logger(__name__)
55
46
 
56
47
 
57
- def is_effective_value(value) -> bool:
48
+ class TamarModelClient(BaseClient):
58
49
  """
59
- 递归判断value是否是有意义的有效值
60
- """
61
- if value is None or value is NOT_GIVEN:
62
- return False
63
-
64
- if isinstance(value, str):
65
- return value.strip() != ""
66
-
67
- if isinstance(value, bytes):
68
- return len(value) > 0
69
-
70
- if isinstance(value, dict):
71
- for v in value.values():
72
- if is_effective_value(v):
73
- return True
74
- return False
75
-
76
- if isinstance(value, list):
77
- for item in value:
78
- if is_effective_value(item):
79
- return True
80
- return False
81
-
82
- return True # 其他类型(int/float/bool)只要不是None就算有效
83
-
84
-
85
- def serialize_value(value):
86
- """递归处理单个值,处理BaseModel, dict, list, bytes"""
87
- if not is_effective_value(value):
88
- return None
89
- if isinstance(value, BaseModel):
90
- return serialize_value(value.model_dump())
91
- if hasattr(value, "dict") and callable(value.dict):
92
- return serialize_value(value.dict())
93
- if isinstance(value, dict):
94
- return {k: serialize_value(v) for k, v in value.items()}
95
- if isinstance(value, list) or (isinstance(value, Iterable) and not isinstance(value, (str, bytes))):
96
- return [serialize_value(v) for v in value]
97
- if isinstance(value, bytes):
98
- return f"bytes:{base64.b64encode(value).decode('utf-8')}"
99
- return value
100
-
101
-
102
- from typing import Any
103
-
104
-
105
- def remove_none_from_dict(data: Any) -> Any:
106
- """
107
- 遍历 dict/list,递归删除 value 为 None 的字段
50
+ Tamar Model Client 同步客户端
51
+
52
+ 提供与 Model Manager Server 的同步通信能力,支持:
53
+ - 单个和批量模型调用
54
+ - 流式和非流式响应
55
+ - 自动重试和错误恢复
56
+ - JWT 认证
57
+ - 连接池管理
58
+
59
+ 使用示例:
60
+ # 基本用法
61
+ client = TamarModelClient()
62
+ client.connect()
63
+
64
+ request = ModelRequest(...)
65
+ response = client.invoke(request)
66
+
67
+ # 上下文管理器用法(推荐)
68
+ with TamarModelClient() as client:
69
+ response = client.invoke(request)
70
+
71
+ 环境变量配置:
72
+ MODEL_MANAGER_SERVER_ADDRESS: gRPC 服务器地址
73
+ MODEL_MANAGER_SERVER_JWT_SECRET_KEY: JWT 密钥
74
+ MODEL_MANAGER_SERVER_GRPC_USE_TLS: 是否使用 TLS
75
+ MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES: 最大重试次数
76
+ MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY: 重试延迟
108
77
  """
109
- if isinstance(data, dict):
110
- new_dict = {}
111
- for key, value in data.items():
112
- if value is None:
113
- continue
114
- cleaned_value = remove_none_from_dict(value)
115
- new_dict[key] = cleaned_value
116
- return new_dict
117
- elif isinstance(data, list):
118
- return [remove_none_from_dict(item) for item in data]
119
- else:
120
- return data
121
-
122
-
123
- def generate_request_id():
124
- """生成一个唯一的request_id"""
125
- return str(uuid.uuid4())
126
-
127
-
128
- def set_request_id(request_id: str):
129
- """设置当前请求的 request_id"""
130
- _request_id.set(request_id)
131
-
132
-
133
- class TamarModelClient:
134
- def __init__(
135
- self,
136
- server_address: Optional[str] = None,
137
- jwt_secret_key: Optional[str] = None,
138
- jwt_token: Optional[str] = None,
139
- default_payload: Optional[dict] = None,
140
- token_expires_in: int = 3600,
141
- max_retries: Optional[int] = None, # 最大重试次数
142
- retry_delay: Optional[float] = None, # 初始重试延迟(秒)
143
- ):
144
- self.server_address = server_address or os.getenv("MODEL_MANAGER_SERVER_ADDRESS")
145
- if not self.server_address:
146
- raise ValueError("Server address must be provided via argument or environment variable.")
147
- self.default_invoke_timeout = float(os.getenv("MODEL_MANAGER_SERVER_INVOKE_TIMEOUT", 30.0))
148
-
149
- # JWT 配置
150
- self.jwt_secret_key = jwt_secret_key or os.getenv("MODEL_MANAGER_SERVER_JWT_SECRET_KEY")
151
- self.jwt_handler = JWTAuthHandler(self.jwt_secret_key)
152
- self.jwt_token = jwt_token # 用户传入的 Token(可选)
153
- self.default_payload = default_payload
154
- self.token_expires_in = token_expires_in
155
-
156
- # === TLS/Authority 配置 ===
157
- self.use_tls = os.getenv("MODEL_MANAGER_SERVER_GRPC_USE_TLS", "true").lower() == "true"
158
- self.default_authority = os.getenv("MODEL_MANAGER_SERVER_GRPC_DEFAULT_AUTHORITY")
159
-
160
- # === 重试配置 ===
161
- self.max_retries = max_retries if max_retries is not None else int(
162
- os.getenv("MODEL_MANAGER_SERVER_GRPC_MAX_RETRIES", 3))
163
- self.retry_delay = retry_delay if retry_delay is not None else float(
164
- os.getenv("MODEL_MANAGER_SERVER_GRPC_RETRY_DELAY", 1.0))
165
-
166
- # === gRPC 通道相关 ===
78
+
79
+ def __init__(self, **kwargs):
80
+ """
81
+ 初始化同步客户端
82
+
83
+ 参数继承自 BaseClient,包括:
84
+ - server_address: gRPC 服务器地址
85
+ - jwt_secret_key: JWT 签名密钥
86
+ - jwt_token: 预生成的 JWT 令牌
87
+ - default_payload: JWT 令牌的默认载荷
88
+ - token_expires_in: JWT 令牌过期时间
89
+ - max_retries: 最大重试次数
90
+ - retry_delay: 初始重试延迟
91
+ """
92
+ super().__init__(logger_name=__name__, **kwargs)
93
+
94
+ # === gRPC 通道和连接管理 ===
167
95
  self.channel: Optional[grpc.Channel] = None
168
96
  self.stub: Optional[model_service_pb2_grpc.ModelServiceStub] = None
169
- self._closed = False
170
97
 
171
- def _retry_request(self, func, *args, **kwargs):
172
- retry_count = 0
173
- while retry_count < self.max_retries:
174
- try:
175
- return func(*args, **kwargs)
176
- except (grpc.RpcError) as e:
177
- if e.code() in {grpc.StatusCode.UNAVAILABLE, grpc.StatusCode.DEADLINE_EXCEEDED}:
178
- retry_count += 1
179
- logger.info(f"❌ gRPC error {e.code()}, retrying {retry_count}/{self.max_retries}...",
180
- extra={"log_type": "info", "data": {"retry_count": retry_count, "max_retries": self.max_retries, "error_code": str(e.code())}})
181
- if retry_count < self.max_retries:
182
- delay = self.retry_delay * (2 ** (retry_count - 1))
183
- time.sleep(delay)
184
- else:
185
- logger.error(f"❌ Max retry reached for {e.code()}",
186
- extra={"log_type": "info", "data": {"error_code": str(e.code()), "max_retries_reached": True}})
187
- raise
188
- else:
189
- logger.error(f"❌ Non-retryable gRPC error: {e}", exc_info=True,
190
- extra={"log_type": "info", "data": {"error_code": str(e.code()) if hasattr(e, 'code') else None, "retryable": False}})
191
- raise
98
+ def close(self):
99
+ """
100
+ 关闭客户端连接
101
+
102
+ 优雅地关闭 gRPC 通道并清理资源。
103
+ 建议在程序结束前调用此方法,或使用上下文管理器自动管理。
104
+ """
105
+ if self.channel and not self._closed:
106
+ self.channel.close()
107
+ self._closed = True
108
+ logger.info("🔒 gRPC channel closed",
109
+ extra={"log_type": "info", "data": {"status": "closed"}})
192
110
 
193
- def _build_auth_metadata(self, request_id: str) -> list:
194
- metadata = [("x-request-id", request_id)] # 将 request_id 添加到 headers
195
- if self.jwt_handler:
196
- self.jwt_token = self.jwt_handler.encode_token(self.default_payload, expires_in=self.token_expires_in)
197
- metadata.append(("authorization", f"Bearer {self.jwt_token}"))
198
- return metadata
111
+ def __enter__(self):
112
+ """上下文管理器入口"""
113
+ self.connect()
114
+ return self
115
+
116
+ def __exit__(self, exc_type, exc_val, exc_tb):
117
+ """上下文管理器出口"""
118
+ self.close()
119
+
120
+ def connect(self):
121
+ """
122
+ 显式连接到服务器
123
+
124
+ 建立与 gRPC 服务器的连接。通常不需要手动调用,
125
+ 因为 invoke 方法会自动确保连接已建立。
126
+ """
127
+ self._ensure_initialized()
199
128
 
200
129
  def _ensure_initialized(self):
201
- """初始化 gRPC 通道,支持 TLS 与重试机制"""
130
+ """
131
+ 初始化gRPC通道
132
+
133
+ 确保gRPC通道和存根已正确初始化。如果初始化失败,
134
+ 会进行重试,支持TLS配置和完整的keepalive选项。
135
+
136
+ 连接配置包括:
137
+ - 消息大小限制
138
+ - Keepalive设置(30秒ping间隔,10秒超时)
139
+ - 连接生命周期管理(1小时最大连接时间)
140
+ - 性能优化选项(带宽探测、内置重试)
141
+
142
+ Raises:
143
+ ConnectionError: 当达到最大重试次数仍无法连接时
144
+ """
202
145
  if self.channel and self.stub:
203
146
  return
204
147
 
205
148
  retry_count = 0
206
- options = [
207
- ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH),
208
- ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),
209
- ('grpc.keepalive_permit_without_calls', True) # 即使没有活跃请求也保持连接
210
- ]
211
- if self.default_authority:
212
- options.append(("grpc.default_authority", self.default_authority))
149
+ options = self.build_channel_options()
213
150
 
214
151
  while retry_count <= self.max_retries:
215
152
  try:
@@ -221,61 +158,211 @@ class TamarModelClient:
221
158
  options=options
222
159
  )
223
160
  logger.info("🔐 Using secure gRPC channel (TLS enabled)",
224
- extra={"log_type": "info", "data": {"tls_enabled": True, "server_address": self.server_address}})
161
+ extra={"log_type": "info",
162
+ "data": {"tls_enabled": True, "server_address": self.server_address}})
225
163
  else:
226
164
  self.channel = grpc.insecure_channel(
227
165
  self.server_address,
228
166
  options=options
229
167
  )
230
168
  logger.info("🔓 Using insecure gRPC channel (TLS disabled)",
231
- extra={"log_type": "info", "data": {"tls_enabled": False, "server_address": self.server_address}})
232
-
233
- # Wait for the channel to be ready (synchronously)
234
- grpc.channel_ready_future(self.channel).result() # This is blocking in sync mode
235
-
169
+ extra={"log_type": "info",
170
+ "data": {"tls_enabled": False, "server_address": self.server_address}})
171
+
172
+ # 等待通道就绪
173
+ grpc.channel_ready_future(self.channel).result(timeout=10)
236
174
  self.stub = model_service_pb2_grpc.ModelServiceStub(self.channel)
237
175
  logger.info(f"✅ gRPC channel initialized to {self.server_address}",
238
- extra={"log_type": "info", "data": {"status": "success", "server_address": self.server_address}})
176
+ extra={"log_type": "info",
177
+ "data": {"status": "success", "server_address": self.server_address}})
239
178
  return
179
+
240
180
  except grpc.FutureTimeoutError as e:
241
181
  logger.error(f"❌ gRPC channel initialization timed out: {str(e)}", exc_info=True,
242
- extra={"log_type": "info", "data": {"error_type": "timeout", "server_address": self.server_address}})
182
+ extra={"log_type": "info",
183
+ "data": {"error_type": "timeout", "server_address": self.server_address}})
243
184
  except grpc.RpcError as e:
244
185
  logger.error(f"❌ gRPC channel initialization failed: {str(e)}", exc_info=True,
245
- extra={"log_type": "info", "data": {"error_type": "rpc_error", "server_address": self.server_address}})
186
+ extra={"log_type": "info",
187
+ "data": {"error_type": "grpc_error", "server_address": self.server_address}})
246
188
  except Exception as e:
247
- logger.error(f"❌ Unexpected error during channel initialization: {str(e)}", exc_info=True,
248
- extra={"log_type": "info", "data": {"error_type": "unexpected", "server_address": self.server_address}})
249
-
189
+ logger.error(f"❌ Unexpected error during gRPC channel initialization: {str(e)}", exc_info=True,
190
+ extra={"log_type": "info",
191
+ "data": {"error_type": "unknown", "server_address": self.server_address}})
192
+
250
193
  retry_count += 1
251
- if retry_count > self.max_retries:
252
- logger.error(f"❌ Failed to initialize gRPC channel after {self.max_retries} retries.", exc_info=True,
253
- extra={"log_type": "info", "data": {"max_retries_reached": True, "server_address": self.server_address}})
254
- raise ConnectionError(f"❌ Failed to initialize gRPC channel after {self.max_retries} retries.")
194
+ if retry_count <= self.max_retries:
195
+ time.sleep(self.retry_delay * retry_count)
255
196
 
256
- # 指数退避:延迟时间 = retry_delay * (2 ^ (retry_count - 1))
257
- delay = self.retry_delay * (2 ** (retry_count - 1))
258
- logger.info(f"🚀 Retrying connection (attempt {retry_count}/{self.max_retries}) after {delay:.2f}s delay...",
259
- extra={"log_type": "info", "data": {"retry_count": retry_count, "max_retries": self.max_retries, "delay": delay}})
260
- time.sleep(delay) # Blocking sleep in sync version
197
+ raise ConnectionError(f"Failed to connect to {self.server_address} after {self.max_retries} retries")
198
+
199
+ def _retry_request(self, func, *args, **kwargs):
200
+ """
201
+ 使用增强的错误处理器进行重试(同步版本)
202
+ """
203
+ # 构建请求上下文
204
+ context = {
205
+ 'method': func.__name__ if hasattr(func, '__name__') else 'unknown',
206
+ 'client_version': 'sync',
207
+ }
208
+
209
+ last_exception = None
210
+
211
+ for attempt in range(self.max_retries + 1):
212
+ try:
213
+ context['retry_count'] = attempt
214
+ return func(*args, **kwargs)
215
+
216
+ except grpc.RpcError as e:
217
+ # 使用新的错误处理逻辑
218
+ context['retry_count'] = attempt
219
+
220
+ # 判断是否可以重试
221
+ if not is_retryable_error(e.code()) or attempt >= self.max_retries:
222
+ # 不可重试或已达到最大重试次数
223
+ last_exception = self.error_handler.handle_error(e, context)
224
+ break
225
+
226
+ # 记录重试日志
227
+ logger.warning(
228
+ f"Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}",
229
+ extra=context
230
+ )
231
+
232
+ # 执行退避等待
233
+ if attempt < self.max_retries:
234
+ delay = self._calculate_backoff(attempt)
235
+ time.sleep(delay)
236
+
237
+ last_exception = self.error_handler.handle_error(e, context)
238
+
239
+ except Exception as e:
240
+ # 非 gRPC 错误,直接包装抛出
241
+ context['retry_count'] = attempt
242
+ last_exception = TamarModelException(str(e))
243
+ break
244
+
245
+ # 抛出最后的异常
246
+ if last_exception:
247
+ raise last_exception
248
+ else:
249
+ raise TamarModelException("Unknown error occurred")
250
+
251
+ def _calculate_backoff(self, attempt: int) -> float:
252
+ """计算退避时间"""
253
+ max_delay = 60.0
254
+ jitter_factor = 0.1
255
+
256
+ delay = min(self.retry_delay * (2 ** attempt), max_delay)
257
+ jitter = random.uniform(0, delay * jitter_factor)
258
+ return delay + jitter
259
+
260
+ def _retry_request_stream(self, func, *args, **kwargs):
261
+ """
262
+ 流式请求的重试逻辑(同步版本)
263
+
264
+ 对于流式响应,需要特殊的重试处理,因为流不能简单地重新执行。
265
+
266
+ Args:
267
+ func: 生成流的函数
268
+ *args: 函数参数
269
+ **kwargs: 函数关键字参数
270
+
271
+ Yields:
272
+ 流式响应的每个元素
273
+ """
274
+ last_exception = None
275
+
276
+ for attempt in range(self.max_retries + 1):
277
+ try:
278
+ # 尝试创建流
279
+ for item in func(*args, **kwargs):
280
+ yield item
281
+ return
282
+
283
+ except grpc.RpcError as e:
284
+ last_exception = e
285
+ if attempt < self.max_retries:
286
+ logger.warning(
287
+ f"Stream attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}",
288
+ extra={"retry_count": attempt, "error_code": str(e.code())}
289
+ )
290
+ time.sleep(self.retry_delay * (attempt + 1))
291
+ else:
292
+ break
293
+ except Exception as e:
294
+ raise TamarModelException(str(e)) from e
295
+
296
+ if last_exception:
297
+ raise self.error_handler.handle_error(last_exception, {"retry_count": self.max_retries})
298
+ else:
299
+ raise TamarModelException("Unknown streaming error occurred")
261
300
 
262
301
  def _stream(self, request, metadata, invoke_timeout) -> Iterator[ModelResponse]:
263
- for response in self.stub.Invoke(request, metadata=metadata, timeout=invoke_timeout):
264
- yield ModelResponse(
265
- content=response.content,
266
- usage=json.loads(response.usage) if response.usage else None,
267
- error=response.error or None,
268
- raw_response=json.loads(response.raw_response) if response.raw_response else None,
269
- request_id=response.request_id if response.request_id else None,
270
- )
271
-
272
- def _stream_with_logging(self, request, metadata, invoke_timeout, start_time, model_request) -> Iterator[ModelResponse]:
273
- """流式响应的包装器,用于记录完整的响应日志"""
302
+ """
303
+ 处理流式响应
304
+
305
+ Args:
306
+ request: gRPC 请求对象
307
+ metadata: 请求元数据
308
+ invoke_timeout: 总体超时时间
309
+
310
+ Yields:
311
+ ModelResponse: 流式响应的每个数据块
312
+
313
+ Raises:
314
+ TimeoutError: 当等待下一个数据块超时时
315
+ """
316
+ import threading
317
+ import queue
318
+
319
+ # 创建队列用于线程间通信
320
+ response_queue = queue.Queue()
321
+ exception_queue = queue.Queue()
322
+
323
+ def fetch_responses():
324
+ """在单独线程中获取流式响应"""
325
+ try:
326
+ for response in self.stub.Invoke(request, metadata=metadata, timeout=invoke_timeout):
327
+ response_queue.put(response)
328
+ response_queue.put(None) # 标记流结束
329
+ except Exception as e:
330
+ exception_queue.put(e)
331
+ response_queue.put(None)
332
+
333
+ # 启动响应获取线程
334
+ fetch_thread = threading.Thread(target=fetch_responses)
335
+ fetch_thread.daemon = True
336
+ fetch_thread.start()
337
+
338
+ chunk_timeout = 30.0 # 单个数据块的超时时间
339
+
340
+ while True:
341
+ # 检查是否有异常
342
+ if not exception_queue.empty():
343
+ raise exception_queue.get()
344
+
345
+ try:
346
+ # 等待下一个响应,带超时
347
+ response = response_queue.get(timeout=chunk_timeout)
348
+
349
+ if response is None:
350
+ # 流结束
351
+ break
352
+
353
+ yield ResponseHandler.build_model_response(response)
354
+
355
+ except queue.Empty:
356
+ raise TimeoutError(f"流式响应在等待下一个数据块时超时 ({chunk_timeout}s)")
357
+
358
+ def _stream_with_logging(self, request, metadata, invoke_timeout, start_time, model_request) -> Iterator[
359
+ ModelResponse]:
360
+ """流式响应的包装器,用于记录完整的响应日志并处理重试"""
274
361
  total_content = ""
275
362
  final_usage = None
276
363
  error_occurred = None
277
364
  chunk_count = 0
278
-
365
+
279
366
  try:
280
367
  for response in self._stream(request, metadata, invoke_timeout):
281
368
  chunk_count += 1
@@ -286,26 +373,46 @@ class TamarModelClient:
286
373
  if response.error:
287
374
  error_occurred = response.error
288
375
  yield response
289
-
290
- # 流式响应完成,记录成功日志
376
+
377
+ # 流式响应完成,记录日志
291
378
  duration = time.time() - start_time
292
- logger.info(
293
- f"✅ Stream completed successfully | chunks: {chunk_count}",
294
- extra={
295
- "log_type": "response",
296
- "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
297
- "duration": duration,
298
- "data": {
299
- "provider": model_request.provider.value,
300
- "invoke_type": model_request.invoke_type.value,
301
- "model": model_request.model,
302
- "stream": True,
303
- "chunks_count": chunk_count,
304
- "total_length": len(total_content),
305
- "usage": final_usage
379
+ if error_occurred:
380
+ # 流式响应中包含错误
381
+ logger.warning(
382
+ f"⚠️ Stream completed with errors | chunks: {chunk_count}",
383
+ extra={
384
+ "log_type": "response",
385
+ "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
386
+ "duration": duration,
387
+ "data": ResponseHandler.build_log_data(
388
+ model_request,
389
+ stream_stats={
390
+ "chunks_count": chunk_count,
391
+ "total_length": len(total_content),
392
+ "usage": final_usage,
393
+ "error": error_occurred
394
+ }
395
+ )
306
396
  }
307
- }
308
- )
397
+ )
398
+ else:
399
+ # 流式响应成功完成
400
+ logger.info(
401
+ f"✅ Stream completed successfully | chunks: {chunk_count}",
402
+ extra={
403
+ "log_type": "response",
404
+ "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
405
+ "duration": duration,
406
+ "data": ResponseHandler.build_log_data(
407
+ model_request,
408
+ stream_stats={
409
+ "chunks_count": chunk_count,
410
+ "total_length": len(total_content),
411
+ "usage": final_usage
412
+ }
413
+ )
414
+ }
415
+ )
309
416
  except Exception as e:
310
417
  # 流式响应出错,记录错误日志
311
418
  duration = time.time() - start_time
@@ -316,29 +423,23 @@ class TamarModelClient:
316
423
  "log_type": "response",
317
424
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
318
425
  "duration": duration,
319
- "data": {
320
- "provider": model_request.provider.value,
321
- "invoke_type": model_request.invoke_type.value,
322
- "model": model_request.model,
323
- "stream": True,
324
- "chunks_count": chunk_count,
325
- "error_type": type(e).__name__,
326
- "partial_content_length": len(total_content)
327
- }
426
+ "data": ResponseHandler.build_log_data(
427
+ model_request,
428
+ error=e,
429
+ stream_stats={
430
+ "chunks_count": chunk_count,
431
+ "partial_content_length": len(total_content)
432
+ }
433
+ )
328
434
  }
329
435
  )
330
436
  raise
331
437
 
332
438
  def _invoke_request(self, request, metadata, invoke_timeout):
439
+ """执行单个非流式请求"""
333
440
  response = self.stub.Invoke(request, metadata=metadata, timeout=invoke_timeout)
334
441
  for response in response:
335
- return ModelResponse(
336
- content=response.content,
337
- usage=json.loads(response.usage) if response.usage else None,
338
- error=response.error or None,
339
- raw_response=json.loads(response.raw_response) if response.raw_response else None,
340
- request_id=response.request_id if response.request_id else None,
341
- )
442
+ return ResponseHandler.build_model_response(response)
342
443
 
343
444
  def invoke(self, model_request: ModelRequest, timeout: Optional[float] = None, request_id: Optional[str] = None) -> \
344
445
  Union[ModelResponse, Iterator[ModelResponse]]:
@@ -365,9 +466,9 @@ class TamarModelClient:
365
466
  }
366
467
 
367
468
  if not request_id:
368
- request_id = generate_request_id() # 生成一个新的 request_id
369
- set_request_id(request_id) # 设置当前请求的 request_id
370
- metadata = self._build_auth_metadata(request_id) # 将 request_id 加入到请求头
469
+ request_id = generate_request_id()
470
+ set_request_id(request_id)
471
+ metadata = self._build_auth_metadata(request_id)
371
472
 
372
473
  # 记录开始日志
373
474
  start_time = time.time()
@@ -376,129 +477,85 @@ class TamarModelClient:
376
477
  extra={
377
478
  "log_type": "request",
378
479
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
379
- "data": {
380
- "provider": model_request.provider.value,
381
- "invoke_type": model_request.invoke_type.value,
382
- "model": model_request.model,
383
- "stream": model_request.stream,
384
- "org_id": model_request.user_context.org_id,
385
- "user_id": model_request.user_context.user_id,
386
- "client_type": model_request.user_context.client_type
387
- }
480
+ "data": ResponseHandler.build_log_data(model_request)
388
481
  })
389
482
 
390
- # 动态根据 provider/invoke_type 决定使用哪个 input 字段
391
483
  try:
392
- # 选择需要校验的字段集合
393
- # 动态分支逻辑
394
- match (model_request.provider, model_request.invoke_type):
395
- case (ProviderType.GOOGLE, InvokeType.GENERATION):
396
- allowed_fields = GoogleGenAiInput.model_fields.keys()
397
- case (ProviderType.GOOGLE, InvokeType.IMAGE_GENERATION):
398
- allowed_fields = GoogleVertexAIImagesInput.model_fields.keys()
399
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.RESPONSES | InvokeType.GENERATION):
400
- allowed_fields = OpenAIResponsesInput.model_fields.keys()
401
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.CHAT_COMPLETIONS):
402
- allowed_fields = OpenAIChatCompletionsInput.model_fields.keys()
403
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.IMAGE_GENERATION):
404
- allowed_fields = OpenAIImagesInput.model_fields.keys()
405
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.IMAGE_EDIT_GENERATION):
406
- allowed_fields = OpenAIImagesEditInput.model_fields.keys()
407
- case _:
408
- raise ValueError(
409
- f"Unsupported provider/invoke_type combination: {model_request.provider} + {model_request.invoke_type}")
410
-
411
- # 将 ModelRequest 转 dict,过滤只保留 base + allowed 的字段
412
- model_request_dict = model_request.model_dump(exclude_unset=True)
413
-
414
- grpc_request_kwargs = {}
415
- for field in allowed_fields:
416
- if field in model_request_dict:
417
- value = model_request_dict[field]
418
-
419
- # 跳过无效的值
420
- if not is_effective_value(value):
421
- continue
422
-
423
- # 序列化grpc不支持的类型
424
- grpc_request_kwargs[field] = serialize_value(value)
425
-
426
- # 清理 serialize后的 grpc_request_kwargs
427
- grpc_request_kwargs = remove_none_from_dict(grpc_request_kwargs)
428
-
429
- request = model_service_pb2.ModelRequestItem(
430
- provider=model_request.provider.value,
431
- channel=model_request.channel.value,
432
- invoke_type=model_request.invoke_type.value,
433
- stream=model_request.stream or False,
434
- org_id=model_request.user_context.org_id or "",
435
- user_id=model_request.user_context.user_id or "",
436
- client_type=model_request.user_context.client_type or "",
437
- extra=grpc_request_kwargs
438
- )
439
-
484
+ # 构建 gRPC 请求
485
+ request = RequestBuilder.build_single_request(model_request)
486
+
440
487
  except Exception as e:
488
+ duration = time.time() - start_time
489
+ logger.error(
490
+ f"❌ Request build failed: {str(e)}",
491
+ exc_info=True,
492
+ extra={
493
+ "log_type": "response",
494
+ "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
495
+ "duration": duration,
496
+ "data": {
497
+ "provider": model_request.provider.value,
498
+ "invoke_type": model_request.invoke_type.value,
499
+ "model": getattr(model_request, 'model', None),
500
+ "error_type": "build_error",
501
+ "error_message": str(e)
502
+ }
503
+ }
504
+ )
441
505
  raise ValueError(f"构建请求失败: {str(e)}") from e
442
506
 
443
507
  try:
444
508
  invoke_timeout = timeout or self.default_invoke_timeout
445
509
  if model_request.stream:
446
- # 对于流式响应,使用带日志记录的包装器
447
- return self._stream_with_logging(request, metadata, invoke_timeout, start_time, model_request)
510
+ # 对于流式响应,使用重试包装器
511
+ return self._retry_request_stream(
512
+ self._stream_with_logging,
513
+ request, metadata, invoke_timeout, start_time, model_request
514
+ )
448
515
  else:
449
516
  result = self._retry_request(self._invoke_request, request, metadata, invoke_timeout)
450
-
517
+
451
518
  # 记录非流式响应的成功日志
452
519
  duration = time.time() - start_time
520
+ content_length = len(result.content) if result.content else 0
453
521
  logger.info(
454
- f"✅ Request completed successfully",
522
+ f"✅ Request completed | content_length: {content_length}",
455
523
  extra={
456
524
  "log_type": "response",
457
525
  "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
458
526
  "duration": duration,
459
- "data": {
460
- "provider": model_request.provider.value,
461
- "invoke_type": model_request.invoke_type.value,
462
- "model": model_request.model,
463
- "stream": False,
464
- "content_length": len(result.content) if result.content else 0,
465
- "usage": result.usage
466
- }
527
+ "data": ResponseHandler.build_log_data(model_request, result)
467
528
  }
468
529
  )
469
530
  return result
531
+
470
532
  except grpc.RpcError as e:
471
533
  duration = time.time() - start_time
472
534
  error_message = f"❌ Invoke gRPC failed: {str(e)}"
473
535
  logger.error(error_message, exc_info=True,
474
- extra={
475
- "log_type": "response",
476
- "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
477
- "duration": duration,
478
- "data": {
479
- "error_type": "grpc_error",
480
- "error_code": str(e.code()) if hasattr(e, 'code') else None,
481
- "provider": model_request.provider.value,
482
- "invoke_type": model_request.invoke_type.value,
483
- "model": model_request.model
484
- }
485
- })
536
+ extra={
537
+ "log_type": "response",
538
+ "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
539
+ "duration": duration,
540
+ "data": ResponseHandler.build_log_data(
541
+ model_request,
542
+ error=e
543
+ )
544
+ })
486
545
  raise e
487
546
  except Exception as e:
488
547
  duration = time.time() - start_time
489
548
  error_message = f"❌ Invoke other error: {str(e)}"
490
549
  logger.error(error_message, exc_info=True,
491
- extra={
492
- "log_type": "response",
493
- "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
494
- "duration": duration,
495
- "data": {
496
- "error_type": "other_error",
497
- "provider": model_request.provider.value,
498
- "invoke_type": model_request.invoke_type.value,
499
- "model": model_request.model
500
- }
501
- })
550
+ extra={
551
+ "log_type": "response",
552
+ "uri": f"/invoke/{model_request.provider.value}/{model_request.invoke_type.value}",
553
+ "duration": duration,
554
+ "data": ResponseHandler.build_log_data(
555
+ model_request,
556
+ error=e
557
+ )
558
+ })
502
559
  raise e
503
560
 
504
561
  def invoke_batch(self, batch_request_model: BatchModelRequest, timeout: Optional[float] = None,
@@ -513,7 +570,6 @@ class TamarModelClient:
513
570
  Returns:
514
571
  BatchModelResponse: 批量请求的结果
515
572
  """
516
-
517
573
  self._ensure_initialized()
518
574
 
519
575
  if not self.default_payload:
@@ -523,9 +579,9 @@ class TamarModelClient:
523
579
  }
524
580
 
525
581
  if not request_id:
526
- request_id = generate_request_id() # 生成一个新的 request_id
527
- set_request_id(request_id) # 设置当前请求的 request_id
528
- metadata = self._build_auth_metadata(request_id) # 将 request_id 加入到请求头
582
+ request_id = generate_request_id()
583
+ set_request_id(request_id)
584
+ metadata = self._build_auth_metadata(request_id)
529
585
 
530
586
  # 记录开始日志
531
587
  start_time = time.time()
@@ -542,151 +598,83 @@ class TamarModelClient:
542
598
  }
543
599
  })
544
600
 
545
- # 构造批量请求
546
- items = []
547
- for model_request_item in batch_request_model.items:
548
- # 动态根据 provider/invoke_type 决定使用哪个 input 字段
549
- try:
550
- match (model_request_item.provider, model_request_item.invoke_type):
551
- case (ProviderType.GOOGLE, InvokeType.GENERATION):
552
- allowed_fields = GoogleGenAiInput.model_fields.keys()
553
- case (ProviderType.GOOGLE, InvokeType.IMAGE_GENERATION):
554
- allowed_fields = GoogleVertexAIImagesInput.model_fields.keys()
555
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.RESPONSES | InvokeType.GENERATION):
556
- allowed_fields = OpenAIResponsesInput.model_fields.keys()
557
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.CHAT_COMPLETIONS):
558
- allowed_fields = OpenAIChatCompletionsInput.model_fields.keys()
559
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.IMAGE_GENERATION):
560
- allowed_fields = OpenAIImagesInput.model_fields.keys()
561
- case ((ProviderType.OPENAI | ProviderType.AZURE), InvokeType.IMAGE_EDIT_GENERATION):
562
- allowed_fields = OpenAIImagesEditInput.model_fields.keys()
563
- case _:
564
- raise ValueError(
565
- f"Unsupported provider/invoke_type combination: {model_request_item.provider} + {model_request_item.invoke_type}")
566
-
567
- # 将 ModelRequest 转 dict,过滤只保留 base + allowed 的字段
568
- model_request_dict = model_request_item.model_dump(exclude_unset=True)
569
-
570
- grpc_request_kwargs = {}
571
- for field in allowed_fields:
572
- if field in model_request_dict:
573
- value = model_request_dict[field]
574
-
575
- # 跳过无效的值
576
- if not is_effective_value(value):
577
- continue
578
-
579
- # 序列化grpc不支持的类型
580
- grpc_request_kwargs[field] = serialize_value(value)
581
-
582
- # 清理 serialize后的 grpc_request_kwargs
583
- grpc_request_kwargs = remove_none_from_dict(grpc_request_kwargs)
584
-
585
- items.append(model_service_pb2.ModelRequestItem(
586
- provider=model_request_item.provider.value,
587
- channel=model_request_item.channel.value,
588
- invoke_type=model_request_item.invoke_type.value,
589
- stream=model_request_item.stream or False,
590
- custom_id=model_request_item.custom_id or "",
591
- priority=model_request_item.priority or 1,
592
- org_id=batch_request_model.user_context.org_id or "",
593
- user_id=batch_request_model.user_context.user_id or "",
594
- client_type=batch_request_model.user_context.client_type or "",
595
- extra=grpc_request_kwargs,
596
- ))
597
-
598
- except Exception as e:
599
- raise ValueError(f"构建请求失败: {str(e)},item={model_request_item.custom_id}") from e
601
+ try:
602
+ # 构建批量请求
603
+ batch_request = RequestBuilder.build_batch_request(batch_request_model)
604
+
605
+ except Exception as e:
606
+ duration = time.time() - start_time
607
+ logger.error(
608
+ f"❌ Batch request build failed: {str(e)}",
609
+ exc_info=True,
610
+ extra={
611
+ "log_type": "response",
612
+ "uri": "/batch_invoke",
613
+ "duration": duration,
614
+ "data": {
615
+ "batch_size": len(batch_request_model.items),
616
+ "error_type": "build_error",
617
+ "error_message": str(e)
618
+ }
619
+ }
620
+ )
621
+ raise ValueError(f"构建批量请求失败: {str(e)}") from e
600
622
 
601
623
  try:
602
- # 超时处理逻辑
603
624
  invoke_timeout = timeout or self.default_invoke_timeout
604
-
605
- # 调用 gRPC 接口
606
- response = self._retry_request(self.stub.BatchInvoke, model_service_pb2.ModelRequest(items=items),
607
- timeout=invoke_timeout, metadata=metadata)
608
-
609
- result = []
610
- for res_item in response.items:
611
- result.append(ModelResponse(
612
- content=res_item.content,
613
- usage=json.loads(res_item.usage) if res_item.usage else None,
614
- raw_response=json.loads(res_item.raw_response) if res_item.raw_response else None,
615
- error=res_item.error or None,
616
- custom_id=res_item.custom_id if res_item.custom_id else None
617
- ))
618
- batch_response = BatchModelResponse(
619
- request_id=response.request_id if response.request_id else None,
620
- responses=result
625
+ batch_response = self._retry_request(
626
+ self.stub.BatchInvoke,
627
+ batch_request,
628
+ metadata=metadata,
629
+ timeout=invoke_timeout
621
630
  )
622
-
631
+
632
+ # 构建响应对象
633
+ result = ResponseHandler.build_batch_response(batch_response)
634
+
623
635
  # 记录成功日志
624
636
  duration = time.time() - start_time
625
637
  logger.info(
626
- f"✅ Batch request completed successfully",
638
+ f"✅ Batch Request completed | batch_size: {len(result.responses)}",
627
639
  extra={
628
640
  "log_type": "response",
629
641
  "uri": "/batch_invoke",
630
642
  "duration": duration,
631
643
  "data": {
632
- "batch_size": len(batch_request_model.items),
633
- "responses_count": len(result)
644
+ "batch_size": len(result.responses),
645
+ "success_count": sum(1 for item in result.responses if not item.error),
646
+ "error_count": sum(1 for item in result.responses if item.error)
634
647
  }
635
- }
636
- )
637
- return batch_response
648
+ })
649
+
650
+ return result
651
+
638
652
  except grpc.RpcError as e:
639
653
  duration = time.time() - start_time
640
- error_message = f"❌ BatchInvoke gRPC failed: {str(e)}"
654
+ error_message = f"❌ Batch invoke gRPC failed: {str(e)}"
641
655
  logger.error(error_message, exc_info=True,
642
- extra={
643
- "log_type": "response",
644
- "uri": "/batch_invoke",
645
- "duration": duration,
646
- "data": {
647
- "error_type": "grpc_error",
648
- "error_code": str(e.code()) if hasattr(e, 'code') else None,
649
- "batch_size": len(batch_request_model.items)
650
- }
651
- })
656
+ extra={
657
+ "log_type": "response",
658
+ "uri": "/batch_invoke",
659
+ "duration": duration,
660
+ "data": {
661
+ "error_type": "grpc_error",
662
+ "error_code": str(e.code()) if hasattr(e, 'code') else None,
663
+ "batch_size": len(batch_request_model.items)
664
+ }
665
+ })
652
666
  raise e
653
667
  except Exception as e:
654
668
  duration = time.time() - start_time
655
- error_message = f"❌ BatchInvoke other error: {str(e)}"
669
+ error_message = f"❌ Batch invoke other error: {str(e)}"
656
670
  logger.error(error_message, exc_info=True,
657
- extra={
658
- "log_type": "response",
659
- "uri": "/batch_invoke",
660
- "duration": duration,
661
- "data": {
662
- "error_type": "other_error",
663
- "batch_size": len(batch_request_model.items)
664
- }
665
- })
666
- raise e
667
-
668
- def close(self):
669
- """关闭 gRPC 通道"""
670
- if self.channel and not self._closed:
671
- self.channel.close()
672
- self._closed = True
673
- logger.info("✅ gRPC channel closed",
674
- extra={"log_type": "info", "data": {"status": "success"}})
675
-
676
- def _safe_sync_close(self):
677
- """进程退出时自动关闭 channel(事件循环处理兼容)"""
678
- if self.channel and not self._closed:
679
- try:
680
- self.close() # 直接调用关闭方法
681
- except Exception as e:
682
- logger.error(f"❌ gRPC channel close failed at exit: {e}",
683
- extra={"log_type": "info", "data": {"status": "failed", "error": str(e)}})
684
-
685
- def __enter__(self):
686
- """同步初始化连接"""
687
- self._ensure_initialized()
688
- return self
689
-
690
- def __exit__(self, exc_type, exc_val, exc_tb):
691
- """同步关闭连接"""
692
- self.close()
671
+ extra={
672
+ "log_type": "response",
673
+ "uri": "/batch_invoke",
674
+ "duration": duration,
675
+ "data": {
676
+ "error_type": "other_error",
677
+ "batch_size": len(batch_request_model.items)
678
+ }
679
+ })
680
+ raise e