PyPI - tamar-model-client - Versions diffs - 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl - Mend

tamar-model-client 0.1.21py3-none-any.whl → 0.1.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

tamar_model_client/async_client.py CHANGED Viewed

@@ -33,7 +33,7 @@ from .core import (
     generate_request_id,
     set_request_id,
     get_protected_logger,
-    MAX_MESSAGE_LENGTH
+    MAX_MESSAGE_LENGTH, get_request_id
 )
 from .core.base_client import BaseClient
 from .core.request_builder import RequestBuilder
@@ -244,7 +244,16 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
         Raises:
             TamarModelException: 当所有重试都失败时
         """
-        return await self.retry_handler.execute_with_retry(func, *args, **kwargs)
+        # 从kwargs中提取request_id（如果有的话），然后移除它
+        request_id = kwargs.pop('request_id', None) or get_request_id()
+        # 构建包含request_id的上下文
+        context = {
+            'method': func.__name__ if hasattr(func, '__name__') else 'unknown',
+            'client_version': 'async',
+            'request_id': request_id,
+        }
+        return await self.retry_handler.execute_with_retry(func, *args, context=context, **kwargs)
     async def _retry_request_stream(self, func, *args, **kwargs):
         """
@@ -260,10 +269,18 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
         Returns:
             AsyncIterator: 流式响应迭代器
         """
+        # 记录方法开始时间
+        import time
+        method_start_time = time.time()
+        # 从kwargs中提取request_id（如果有的话），然后移除它
+        request_id = kwargs.pop('request_id', None) or get_request_id()
         last_exception = None
         context = {
             'method': 'stream',
             'client_version': 'async',
+            'request_id': request_id,
         }
         for attempt in range(self.max_retries + 1):
@@ -283,10 +300,16 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                 error_context = ErrorContext(e, context)
                 error_code = e.code()
                 policy = get_retry_policy(error_code)
-                retryable = policy.get('retryable', False)
-                should_retry = False
-                if attempt < self.max_retries:
+                # 先检查错误级别的 max_attempts 配置
+                # max_attempts 表示最大重试次数（不包括初始请求）
+                error_max_attempts = policy.get('max_attempts', self.max_retries)
+                if attempt >= error_max_attempts:
+                    should_retry = False
+                elif attempt >= self.max_retries:
+                    should_retry = False
+                else:
+                    retryable = policy.get('retryable', False)
                     if retryable == True:
                         should_retry = True
                     elif retryable == 'conditional':
@@ -295,8 +318,11 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                             should_retry = error_context.is_network_cancelled()
                         else:
                             should_retry = self._check_error_details_for_retry(e)
+                    else:
+                        should_retry = False
                 if should_retry:
+                    current_duration = time.time() - method_start_time
                     log_data = {
                         "log_type": "info",
                         "request_id": context.get('request_id'),
@@ -305,7 +331,8 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                             "retry_count": attempt,
                             "max_retries": self.max_retries,
                             "method": "stream"
-                        }
+                        },
+                        "duration": current_duration
                     }
                     logger.warning(
                         f"Stream attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()} (will retry)",
@@ -317,6 +344,7 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                     await asyncio.sleep(delay)
                 else:
                     # 不重试或已达到最大重试次数
+                    current_duration = time.time() - method_start_time
                     log_data = {
                         "log_type": "info",
                         "request_id": context.get('request_id'),
@@ -326,12 +354,14 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                             "max_retries": self.max_retries,
                             "method": "stream",
                             "will_retry": False
-                        }
+                        },
+                        "duration": current_duration
                     }
                     logger.error(
                         f"Stream failed: {e.code()} (no retry)",
                         extra=log_data
                     )
+                    context['duration'] = current_duration
                     last_exception = self.error_handler.handle_error(e, context)
                     break
@@ -454,7 +484,7 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
         chunk_count = 0
         # 使用重试逻辑获取流生成器
-        stream_generator = self._retry_request_stream(self._stream, request, metadata, invoke_timeout)
+        stream_generator = self._retry_request_stream(self._stream, request, metadata, invoke_timeout, request_id=get_request_id())
         try:
             async for response in stream_generator:
@@ -609,7 +639,7 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                 # 对于流式响应，直接返回带日志记录的包装器
                 return self._stream_with_logging(request, metadata, invoke_timeout, start_time, model_request)
             else:
-                result = await self._retry_request(self._invoke_request, request, metadata, invoke_timeout)
+                result = await self._retry_request(self._invoke_request, request, metadata, invoke_timeout, request_id=request_id)
                 # 记录非流式响应的成功日志
                 duration = time.time() - start_time
@@ -739,7 +769,8 @@ class AsyncTamarModelClient(BaseClient, AsyncHttpFallbackMixin):
                 self.stub.BatchInvoke,
                 batch_request,
                 metadata=metadata,
-                timeout=invoke_timeout
+                timeout=invoke_timeout,
+                request_id=request_id
             )
             # 构建响应对象

tamar_model_client/error_handler.py CHANGED Viewed

@@ -11,6 +11,7 @@ import logging
 from typing import Optional, Dict, Any, Callable, Union
 from collections import defaultdict
+from .core import get_protected_logger
 from .exceptions import (
     ErrorContext, TamarModelException,
     NetworkException, ConnectionException, TimeoutException,
@@ -20,17 +21,16 @@ from .exceptions import (
     ERROR_CATEGORIES, RETRY_POLICY, ErrorStats
 )
-logger = logging.getLogger(__name__)
+logger = get_protected_logger(__name__)
 class GrpcErrorHandler:
     """统一的 gRPC 错误处理器"""
     def __init__(self, client_logger: Optional[logging.Logger] = None):
         self.logger = client_logger or logger
         self.error_stats = ErrorStats()
     def handle_error(self, error: Union[grpc.RpcError, Exception], context: dict) -> TamarModelException:
         """
         统一错误处理流程：
@@ -41,7 +41,7 @@ class GrpcErrorHandler:
         5. 返回相应异常
         """
         error_context = ErrorContext(error, context)
         # 记录详细错误日志
         # 将error_context的重要信息平铺到日志的data字段中
         log_data = {
@@ -61,60 +61,64 @@ class GrpcErrorHandler:
                 "is_network_cancelled": error_context.is_network_cancelled() if error_context.error_code == grpc.StatusCode.CANCELLED else None
             }
         }
+        # 如果上下文中有 duration，添加到日志中
+        if 'duration' in context:
+            log_data['duration'] = context['duration']
         self.logger.error(
             f"gRPC Error occurred: {error_context.error_code.name if error_context.error_code else 'UNKNOWN'}",
             extra=log_data
         )
         # 更新错误统计
         if error_context.error_code:
             self.error_stats.record_error(error_context.error_code)
         # 根据错误类型返回相应异常
         return self._create_exception(error_context)
     def _create_exception(self, error_context: ErrorContext) -> TamarModelException:
         """根据错误上下文创建相应的异常"""
         error_code = error_context.error_code
         if not error_code:
             return TamarModelException(error_context)
         # 认证相关错误
         if error_code in ERROR_CATEGORIES['AUTH']:
             if error_code == grpc.StatusCode.UNAUTHENTICATED:
                 return TokenExpiredException(error_context)
             else:
                 return PermissionDeniedException(error_context)
         # 网络相关错误
         elif error_code in ERROR_CATEGORIES['NETWORK']:
             if error_code == grpc.StatusCode.DEADLINE_EXCEEDED:
                 return TimeoutException(error_context)
             else:
                 return ConnectionException(error_context)
         # 验证相关错误
         elif error_code in ERROR_CATEGORIES['VALIDATION']:
             return InvalidParameterException(error_context)
         # 资源相关错误
         elif error_code == grpc.StatusCode.RESOURCE_EXHAUSTED:
             return RateLimitException(error_context)
         # 服务商相关错误
         elif error_code in ERROR_CATEGORIES['PROVIDER']:
             return ProviderException(error_context)
         # 默认错误
         else:
             return TamarModelException(error_context)
     def get_error_stats(self) -> Dict[str, Any]:
         """获取错误统计信息"""
         return self.error_stats.get_stats()
     def reset_stats(self):
         """重置错误统计"""
         self.error_stats.reset()
@@ -122,60 +126,60 @@ class GrpcErrorHandler:
 class ErrorRecoveryStrategy:
     """错误恢复策略"""
     RECOVERY_ACTIONS = {
         'refresh_token': 'handle_token_refresh',
         'reconnect': 'handle_reconnect',
         'backoff': 'handle_backoff',
         'circuit_break': 'handle_circuit_break',
     }
     def __init__(self, client):
         self.client = client
     async def recover_from_error(self, error_context: ErrorContext):
         """根据错误类型执行恢复动作"""
         if not error_context.error_code:
             return
         policy = RETRY_POLICY.get(error_context.error_code, {})
         if action := policy.get('action'):
             if action in self.RECOVERY_ACTIONS:
                 handler = getattr(self, self.RECOVERY_ACTIONS[action])
                 await handler(error_context)
     async def handle_token_refresh(self, error_context: ErrorContext):
         """处理 Token 刷新"""
         self.client.logger.info("Attempting to refresh JWT token")
         # 这里需要客户端实现 _refresh_jwt_token 方法
         if hasattr(self.client, '_refresh_jwt_token'):
             await self.client._refresh_jwt_token()
     async def handle_reconnect(self, error_context: ErrorContext):
         """处理重连"""
         self.client.logger.info("Attempting to reconnect channel")
         # 这里需要客户端实现 _reconnect_channel 方法
         if hasattr(self.client, '_reconnect_channel'):
             await self.client._reconnect_channel()
     async def handle_backoff(self, error_context: ErrorContext):
         """处理退避等待"""
         wait_time = self._calculate_backoff(error_context.retry_count)
         await asyncio.sleep(wait_time)
     async def handle_circuit_break(self, error_context: ErrorContext):
         """处理熔断"""
         self.client.logger.warning("Circuit breaker activated")
         # 这里可以实现熔断逻辑
         pass
     def _calculate_backoff(self, retry_count: int) -> float:
         """计算退避时间"""
         base_delay = 1.0
         max_delay = 60.0
         jitter_factor = 0.1
         delay = min(base_delay * (2 ** retry_count), max_delay)
         jitter = random.uniform(0, delay * jitter_factor)
         return delay + jitter
@@ -183,18 +187,18 @@ class ErrorRecoveryStrategy:
 class EnhancedRetryHandler:
     """增强的重试处理器"""
     def __init__(self, max_retries: int = 3, base_delay: float = 1.0):
         self.max_retries = max_retries
         self.base_delay = base_delay
         self.error_handler = GrpcErrorHandler()
     async def execute_with_retry(
-        self,
-        func: Callable,
-        *args,
-        context: Optional[Dict[str, Any]] = None,
-        **kwargs
+            self,
+            func: Callable,
+            *args,
+            context: Optional[Dict[str, Any]] = None,
+            **kwargs
     ):
         """
         执行函数并处理重试
@@ -211,24 +215,33 @@ class EnhancedRetryHandler:
         Raises:
             TamarModelException: 包装后的异常
         """
+        # 记录开始时间
+        import time
+        method_start_time = time.time()
         context = context or {}
         last_exception = None
         for attempt in range(self.max_retries + 1):
             try:
                 context['retry_count'] = attempt
                 return await func(*args, **kwargs)
             except (grpc.RpcError, grpc.aio.AioRpcError) as e:
                 # 创建错误上下文
                 error_context = ErrorContext(e, context)
                 # 判断是否可以重试
                 if not self._should_retry(e, attempt):
                     # 不可重试或已达到最大重试次数
+                    current_duration = time.time() - method_start_time
+                    context['duration'] = current_duration
                     last_exception = self.error_handler.handle_error(e, context)
                     break
+                # 计算当前耗时
+                current_duration = time.time() - method_start_time
                 # 记录重试日志
                 log_data = {
                     "log_type": "info",
@@ -241,20 +254,22 @@ class EnhancedRetryHandler:
                         "category": error_context._get_error_category(),
                         "is_retryable": True,  # 既然在重试，说明是可重试的
                         "method": error_context.method
-                    }
+                    },
+                    "duration": current_duration
                 }
                 logger.warning(
                     f"Attempt {attempt + 1}/{self.max_retries + 1} failed: {e.code()}",
                     extra=log_data
                 )
                 # 执行退避等待
                 if attempt < self.max_retries:
                     delay = self._calculate_backoff(attempt)
                     await asyncio.sleep(delay)
+                context['duration'] = current_duration
                 last_exception = self.error_handler.handle_error(e, context)
             except Exception as e:
                 # 非 gRPC 错误，直接包装抛出
                 context['retry_count'] = attempt
@@ -262,21 +277,28 @@ class EnhancedRetryHandler:
                 error_context.error_message = str(e)
                 last_exception = TamarModelException(error_context)
                 break
         # 抛出最后的异常
         if last_exception:
             raise last_exception
         else:
             raise TamarModelException("Unknown error occurred")
     def _should_retry(self, error: grpc.RpcError, attempt: int) -> bool:
         """判断是否应该重试"""
-        if attempt >= self.max_retries:
-            return False
         error_code = error.code()
         policy = RETRY_POLICY.get(error_code, {})
+        # 先检查错误级别的 max_attempts 配置
+        # max_attempts 表示最大重试次数（不包括初始请求）
+        error_max_attempts = policy.get('max_attempts', self.max_retries)
+        if attempt >= error_max_attempts:
+            return False
+        # 再检查全局的 max_retries
+        if attempt >= self.max_retries:
+            return False
         # 检查基本重试策略
         retryable = policy.get('retryable', False)
         if retryable == False:
@@ -286,30 +308,30 @@ class EnhancedRetryHandler:
         elif retryable == 'conditional':
             # 条件重试，需要检查错误详情
             return self._check_conditional_retry(error)
         return False
     def _check_conditional_retry(self, error: grpc.RpcError) -> bool:
         """检查条件重试"""
         error_message = error.details().lower() if error.details() else ""
         # 一些可重试的内部错误模式
         retryable_patterns = [
-            'temporary', 'timeout', 'unavailable',
+            'temporary', 'timeout', 'unavailable',
             'connection', 'network', 'try again'
         ]
         for pattern in retryable_patterns:
             if pattern in error_message:
                 return True
         return False
     def _calculate_backoff(self, attempt: int) -> float:
         """计算退避时间"""
         max_delay = 60.0
         jitter_factor = 0.1
         delay = min(self.base_delay * (2 ** attempt), max_delay)
         jitter = random.uniform(0, delay * jitter_factor)
-        return delay + jitter
+        return delay + jitter

tamar_model_client/exceptions.py CHANGED Viewed

@@ -77,7 +77,7 @@ RETRY_POLICY = {
     grpc.StatusCode.CANCELLED: {
         'retryable': True,
         'backoff': 'linear',        # 线性退避，网络问题通常不需要指数退避
-        'max_attempts': 2,          # 限制重试次数，避免过度重试
+        'max_attempts': 2,          # 最大重试次数（不包括初始请求），总共会尝试3次
         'check_details': False      # 不检查详细信息，统一重试
     },
     grpc.StatusCode.ABORTED: {
@@ -184,6 +184,37 @@ class ErrorContext:
             'DATA': '数据损坏或丢失，请检查输入数据',
         }
         return suggestions.get(self._get_error_category(), '未知错误，请联系技术支持')
+    def is_network_cancelled(self) -> bool:
+        """
+        判断 CANCELLED 错误是否由网络中断导致
+        Returns:
+            bool: 如果是网络中断导致的 CANCELLED 返回 True
+        """
+        if self.error_code != grpc.StatusCode.CANCELLED:
+            return False
+        # 检查错误消息中是否包含网络相关的关键词
+        error_msg = (self.error_message or '').lower()
+        debug_msg = (self.error_debug_string or '').lower()
+        network_patterns = [
+            'connection reset',
+            'connection refused',
+            'connection closed',
+            'network unreachable',
+            'broken pipe',
+            'socket closed',
+            'eof',
+            'transport'
+        ]
+        for pattern in network_patterns:
+            if pattern in error_msg or pattern in debug_msg:
+                return True
+        return False
 # ===== 异常类层级 =====

tamar-model-client 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl

tamar-model-client 0.1.21py3-none-any.whl → 0.1.23py3-none-any.whl