tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. tetra_rp/__init__.py +109 -19
  2. tetra_rp/cli/commands/__init__.py +1 -0
  3. tetra_rp/cli/commands/apps.py +143 -0
  4. tetra_rp/cli/commands/build.py +1082 -0
  5. tetra_rp/cli/commands/build_utils/__init__.py +1 -0
  6. tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
  7. tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
  8. tetra_rp/cli/commands/build_utils/manifest.py +430 -0
  9. tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
  10. tetra_rp/cli/commands/build_utils/scanner.py +596 -0
  11. tetra_rp/cli/commands/deploy.py +580 -0
  12. tetra_rp/cli/commands/init.py +123 -0
  13. tetra_rp/cli/commands/resource.py +108 -0
  14. tetra_rp/cli/commands/run.py +296 -0
  15. tetra_rp/cli/commands/test_mothership.py +458 -0
  16. tetra_rp/cli/commands/undeploy.py +533 -0
  17. tetra_rp/cli/main.py +97 -0
  18. tetra_rp/cli/utils/__init__.py +1 -0
  19. tetra_rp/cli/utils/app.py +15 -0
  20. tetra_rp/cli/utils/conda.py +127 -0
  21. tetra_rp/cli/utils/deployment.py +530 -0
  22. tetra_rp/cli/utils/ignore.py +143 -0
  23. tetra_rp/cli/utils/skeleton.py +184 -0
  24. tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
  25. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  26. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  27. tetra_rp/cli/utils/skeleton_template/README.md +263 -0
  28. tetra_rp/cli/utils/skeleton_template/main.py +44 -0
  29. tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
  30. tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
  31. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  32. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  33. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
  34. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
  35. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
  36. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
  37. tetra_rp/client.py +136 -33
  38. tetra_rp/config.py +29 -0
  39. tetra_rp/core/api/runpod.py +591 -39
  40. tetra_rp/core/deployment.py +232 -0
  41. tetra_rp/core/discovery.py +425 -0
  42. tetra_rp/core/exceptions.py +50 -0
  43. tetra_rp/core/resources/__init__.py +27 -9
  44. tetra_rp/core/resources/app.py +738 -0
  45. tetra_rp/core/resources/base.py +139 -4
  46. tetra_rp/core/resources/constants.py +21 -0
  47. tetra_rp/core/resources/cpu.py +115 -13
  48. tetra_rp/core/resources/gpu.py +182 -16
  49. tetra_rp/core/resources/live_serverless.py +153 -16
  50. tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
  51. tetra_rp/core/resources/network_volume.py +126 -31
  52. tetra_rp/core/resources/resource_manager.py +436 -35
  53. tetra_rp/core/resources/serverless.py +537 -120
  54. tetra_rp/core/resources/serverless_cpu.py +201 -0
  55. tetra_rp/core/resources/template.py +1 -59
  56. tetra_rp/core/utils/constants.py +10 -0
  57. tetra_rp/core/utils/file_lock.py +260 -0
  58. tetra_rp/core/utils/http.py +67 -0
  59. tetra_rp/core/utils/lru_cache.py +75 -0
  60. tetra_rp/core/utils/singleton.py +36 -1
  61. tetra_rp/core/validation.py +44 -0
  62. tetra_rp/execute_class.py +301 -0
  63. tetra_rp/protos/remote_execution.py +98 -9
  64. tetra_rp/runtime/__init__.py +1 -0
  65. tetra_rp/runtime/circuit_breaker.py +274 -0
  66. tetra_rp/runtime/config.py +12 -0
  67. tetra_rp/runtime/exceptions.py +49 -0
  68. tetra_rp/runtime/generic_handler.py +206 -0
  69. tetra_rp/runtime/lb_handler.py +189 -0
  70. tetra_rp/runtime/load_balancer.py +160 -0
  71. tetra_rp/runtime/manifest_fetcher.py +192 -0
  72. tetra_rp/runtime/metrics.py +325 -0
  73. tetra_rp/runtime/models.py +73 -0
  74. tetra_rp/runtime/mothership_provisioner.py +512 -0
  75. tetra_rp/runtime/production_wrapper.py +266 -0
  76. tetra_rp/runtime/reliability_config.py +149 -0
  77. tetra_rp/runtime/retry_manager.py +118 -0
  78. tetra_rp/runtime/serialization.py +124 -0
  79. tetra_rp/runtime/service_registry.py +346 -0
  80. tetra_rp/runtime/state_manager_client.py +248 -0
  81. tetra_rp/stubs/live_serverless.py +35 -17
  82. tetra_rp/stubs/load_balancer_sls.py +357 -0
  83. tetra_rp/stubs/registry.py +145 -19
  84. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
  85. tetra_rp-0.24.0.dist-info/RECORD +99 -0
  86. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
  87. tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
  88. tetra_rp/core/pool/cluster_manager.py +0 -177
  89. tetra_rp/core/pool/dataclass.py +0 -18
  90. tetra_rp/core/pool/ex.py +0 -38
  91. tetra_rp/core/pool/job.py +0 -22
  92. tetra_rp/core/pool/worker.py +0 -19
  93. tetra_rp/core/resources/utils.py +0 -50
  94. tetra_rp/core/utils/json.py +0 -33
  95. tetra_rp-0.6.0.dist-info/RECORD +0 -39
  96. /tetra_rp/{core/pool → cli}/__init__.py +0 -0
  97. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,266 @@
1
+ """Production wrapper for cross-endpoint function routing."""
2
+
3
+ import logging
4
+ from typing import Any, Callable, Dict, Optional
5
+
6
+ from tetra_rp.core.resources.serverless import ServerlessResource
7
+
8
+ from .exceptions import RemoteExecutionError
9
+ from .serialization import serialize_args, serialize_kwargs
10
+ from .service_registry import ServiceRegistry
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ProductionWrapper:
16
+ """Wrapper that routes function execution between endpoints.
17
+
18
+ Intercepts stub execution and determines if the call is local (execute
19
+ directly) or remote (call via HTTP to another endpoint).
20
+ """
21
+
22
+ def __init__(self, service_registry: ServiceRegistry):
23
+ """Initialize production wrapper.
24
+
25
+ Args:
26
+ service_registry: Service registry for routing decisions.
27
+ """
28
+ self.service_registry = service_registry
29
+
30
+ async def wrap_function_execution(
31
+ self,
32
+ original_stub_func: Callable,
33
+ func: Callable,
34
+ dependencies: Optional[list],
35
+ system_dependencies: Optional[list],
36
+ accelerate_downloads: bool,
37
+ *args: Any,
38
+ **kwargs: Any,
39
+ ) -> Any:
40
+ """Route function execution to local or remote endpoint.
41
+
42
+ Args:
43
+ original_stub_func: The original stubbed_resource function.
44
+ func: The decorated function being called.
45
+ dependencies: Pip dependencies (for local execution).
46
+ system_dependencies: System dependencies (for local execution).
47
+ accelerate_downloads: Download acceleration flag (for local).
48
+ *args: Function positional arguments.
49
+ **kwargs: Function keyword arguments.
50
+
51
+ Returns:
52
+ Function execution result.
53
+
54
+ Raises:
55
+ Exception: If execution fails.
56
+ """
57
+ function_name = func.__name__
58
+
59
+ # Ensure manifest is loaded
60
+ await self.service_registry._ensure_manifest_loaded()
61
+
62
+ # Determine routing
63
+ try:
64
+ resource = await self.service_registry.get_resource_for_function(
65
+ function_name
66
+ )
67
+ except ValueError as e:
68
+ # Function not in manifest, execute locally
69
+ logger.debug(
70
+ f"Function {function_name} not in manifest: {e}, executing locally"
71
+ )
72
+ return await original_stub_func(
73
+ func,
74
+ dependencies,
75
+ system_dependencies,
76
+ accelerate_downloads,
77
+ *args,
78
+ **kwargs,
79
+ )
80
+
81
+ # Local execution
82
+ if resource is None:
83
+ logger.debug(f"Executing local function: {function_name}")
84
+ return await original_stub_func(
85
+ func,
86
+ dependencies,
87
+ system_dependencies,
88
+ accelerate_downloads,
89
+ *args,
90
+ **kwargs,
91
+ )
92
+
93
+ # Remote execution
94
+ logger.debug(f"Routing function {function_name} to remote endpoint")
95
+ return await self._execute_remote(
96
+ resource,
97
+ function_name,
98
+ args,
99
+ kwargs,
100
+ execution_type="function",
101
+ )
102
+
103
+ async def wrap_class_method_execution(
104
+ self,
105
+ original_method_func: Callable,
106
+ request: Any,
107
+ ) -> Any:
108
+ """Route class method execution to local or remote endpoint.
109
+
110
+ Args:
111
+ original_method_func: The original execute_class_method function.
112
+ request: FunctionRequest containing class and method info.
113
+
114
+ Returns:
115
+ Method execution result.
116
+
117
+ Raises:
118
+ Exception: If execution fails.
119
+ """
120
+ # Ensure manifest is loaded
121
+ await self.service_registry._ensure_manifest_loaded()
122
+
123
+ class_name = getattr(request, "class_name", None)
124
+
125
+ if not class_name:
126
+ # No class name, execute locally
127
+ return await original_method_func(request)
128
+
129
+ # Determine routing
130
+ try:
131
+ resource = await self.service_registry.get_resource_for_function(class_name)
132
+ except ValueError:
133
+ # Class not in manifest, execute locally
134
+ logger.debug(f"Class {class_name} not in manifest, executing locally")
135
+ return await original_method_func(request)
136
+
137
+ # Local execution
138
+ if resource is None:
139
+ logger.debug(f"Executing local class method: {class_name}")
140
+ return await original_method_func(request)
141
+
142
+ # Remote execution
143
+ logger.debug(f"Routing class {class_name} to remote endpoint")
144
+
145
+ # Convert FunctionRequest to dict payload
146
+ payload = self._build_class_payload(request)
147
+ return await self._execute_remote(
148
+ resource,
149
+ class_name,
150
+ (),
151
+ payload.get("input", {}),
152
+ execution_type="class",
153
+ )
154
+
155
+ async def _execute_remote(
156
+ self,
157
+ resource: ServerlessResource,
158
+ function_name: str,
159
+ args: tuple,
160
+ kwargs: dict,
161
+ execution_type: str = "function",
162
+ ) -> Any:
163
+ """Execute function on remote endpoint.
164
+
165
+ Args:
166
+ resource: ServerlessResource with endpoint ID set.
167
+ function_name: Name of function/class to execute.
168
+ args: Positional arguments.
169
+ kwargs: Keyword arguments.
170
+ execution_type: "function" or "class".
171
+
172
+ Returns:
173
+ Execution result.
174
+
175
+ Raises:
176
+ RemoteExecutionError: If remote execution fails.
177
+ """
178
+ # Serialize arguments
179
+ serialized_args = serialize_args(args)
180
+ serialized_kwargs = serialize_kwargs(kwargs)
181
+
182
+ # Build payload matching RunPod format
183
+ payload = {
184
+ "input": {
185
+ "function_name": function_name,
186
+ "execution_type": execution_type,
187
+ "args": serialized_args,
188
+ "kwargs": serialized_kwargs,
189
+ }
190
+ }
191
+
192
+ # Execute via ServerlessResource
193
+ result = await resource.run_sync(payload)
194
+
195
+ # Handle response
196
+ if result.error:
197
+ raise RemoteExecutionError(
198
+ f"Remote execution of {function_name} failed: {result.error}"
199
+ )
200
+
201
+ return result.output
202
+
203
+ def _build_class_payload(self, request: Any) -> Dict[str, Any]:
204
+ """Build payload from FunctionRequest for class execution.
205
+
206
+ Args:
207
+ request: FunctionRequest object.
208
+
209
+ Returns:
210
+ RunPod-format payload dict.
211
+ """
212
+ # Extract request data - handle both dict and object access patterns
213
+ if isinstance(request, dict):
214
+ data = request
215
+ else:
216
+ data = (
217
+ request.model_dump(exclude_none=True)
218
+ if hasattr(request, "model_dump")
219
+ else {}
220
+ )
221
+
222
+ # Extract class execution data
223
+ payload = {
224
+ "input": {
225
+ "function_name": data.get("class_name"),
226
+ "execution_type": "class",
227
+ "args": data.get("args", []),
228
+ "kwargs": data.get("kwargs", {}),
229
+ "method_name": data.get("method_name"),
230
+ }
231
+ }
232
+
233
+ return payload
234
+
235
+
236
+ # Singleton instance management
237
+ _wrapper_instance: Optional[ProductionWrapper] = None
238
+
239
+
240
+ def create_production_wrapper(
241
+ service_registry: Optional[ServiceRegistry] = None,
242
+ ) -> ProductionWrapper:
243
+ """Create or get singleton ProductionWrapper instance.
244
+
245
+ Args:
246
+ service_registry: Service registry. Creates if not provided.
247
+
248
+ Returns:
249
+ ProductionWrapper instance.
250
+ """
251
+ global _wrapper_instance
252
+
253
+ if _wrapper_instance is None:
254
+ # Create components if not provided
255
+ if service_registry is None:
256
+ service_registry = ServiceRegistry()
257
+
258
+ _wrapper_instance = ProductionWrapper(service_registry)
259
+
260
+ return _wrapper_instance
261
+
262
+
263
+ def reset_wrapper() -> None:
264
+ """Reset singleton wrapper (mainly for testing)."""
265
+ global _wrapper_instance
266
+ _wrapper_instance = None
@@ -0,0 +1,149 @@
1
+ """Centralized configuration for reliability features."""
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from enum import Enum
6
+ from typing import Optional
7
+
8
+
9
+ class LoadBalancerStrategy(Enum):
10
+ """Load balancing strategies for endpoint selection."""
11
+
12
+ ROUND_ROBIN = "round_robin"
13
+ LEAST_CONNECTIONS = "least_connections"
14
+ RANDOM = "random"
15
+
16
+
17
+ @dataclass
18
+ class CircuitBreakerConfig:
19
+ """Configuration for circuit breaker behavior."""
20
+
21
+ enabled: bool = True
22
+ failure_threshold: int = 5
23
+ success_threshold: int = 2
24
+ timeout_seconds: int = 60
25
+ window_size: int = 10
26
+
27
+
28
+ @dataclass
29
+ class LoadBalancerConfig:
30
+ """Configuration for load balancer behavior."""
31
+
32
+ enabled: bool = False
33
+ strategy: LoadBalancerStrategy = LoadBalancerStrategy.ROUND_ROBIN
34
+
35
+
36
+ @dataclass
37
+ class RetryConfig:
38
+ """Configuration for retry behavior with exponential backoff."""
39
+
40
+ enabled: bool = True
41
+ max_attempts: int = 3
42
+ base_delay: float = 0.5
43
+ max_delay: float = 10.0
44
+ jitter: float = 0.2
45
+ retryable_exceptions: tuple = field(
46
+ default_factory=lambda: (TimeoutError, ConnectionError)
47
+ )
48
+ retryable_status_codes: set = field(
49
+ default_factory=lambda: {408, 429, 500, 502, 503, 504}
50
+ )
51
+
52
+
53
+ @dataclass
54
+ class MetricsConfig:
55
+ """Configuration for metrics collection."""
56
+
57
+ enabled: bool = True
58
+ namespace: str = "tetra.metrics"
59
+
60
+
61
+ @dataclass
62
+ class ReliabilityConfig:
63
+ """Centralized reliability features configuration."""
64
+
65
+ circuit_breaker: CircuitBreakerConfig = field(default_factory=CircuitBreakerConfig)
66
+ load_balancer: LoadBalancerConfig = field(default_factory=LoadBalancerConfig)
67
+ retry: RetryConfig = field(default_factory=RetryConfig)
68
+ metrics: MetricsConfig = field(default_factory=MetricsConfig)
69
+
70
+ @classmethod
71
+ def from_env(cls) -> "ReliabilityConfig":
72
+ """Load configuration from environment variables.
73
+
74
+ Environment variables:
75
+ - TETRA_CIRCUIT_BREAKER_ENABLED: Enable circuit breaker (default: true)
76
+ - TETRA_CB_FAILURE_THRESHOLD: Failures before opening (default: 5)
77
+ - TETRA_CB_SUCCESS_THRESHOLD: Successes to close (default: 2)
78
+ - TETRA_CB_TIMEOUT_SECONDS: Time before half-open (default: 60)
79
+ - TETRA_LOAD_BALANCER_ENABLED: Enable load balancer (default: false)
80
+ - TETRA_LB_STRATEGY: Load balancer strategy (default: round_robin)
81
+ - TETRA_RETRY_ENABLED: Enable retry (default: true)
82
+ - TETRA_RETRY_MAX_ATTEMPTS: Max retry attempts (default: 3)
83
+ - TETRA_RETRY_BASE_DELAY: Base delay for backoff (default: 0.5)
84
+ - TETRA_METRICS_ENABLED: Enable metrics (default: true)
85
+
86
+ Returns:
87
+ ReliabilityConfig initialized from environment variables.
88
+ """
89
+ circuit_breaker = CircuitBreakerConfig(
90
+ enabled=os.getenv("TETRA_CIRCUIT_BREAKER_ENABLED", "true").lower()
91
+ == "true",
92
+ failure_threshold=int(os.getenv("TETRA_CB_FAILURE_THRESHOLD", "5")),
93
+ success_threshold=int(os.getenv("TETRA_CB_SUCCESS_THRESHOLD", "2")),
94
+ timeout_seconds=int(os.getenv("TETRA_CB_TIMEOUT_SECONDS", "60")),
95
+ )
96
+
97
+ strategy_str = os.getenv("TETRA_LB_STRATEGY", "round_robin").lower()
98
+ try:
99
+ strategy = LoadBalancerStrategy(strategy_str)
100
+ except ValueError:
101
+ strategy = LoadBalancerStrategy.ROUND_ROBIN
102
+
103
+ load_balancer = LoadBalancerConfig(
104
+ enabled=os.getenv("TETRA_LOAD_BALANCER_ENABLED", "false").lower() == "true",
105
+ strategy=strategy,
106
+ )
107
+
108
+ retry = RetryConfig(
109
+ enabled=os.getenv("TETRA_RETRY_ENABLED", "true").lower() == "true",
110
+ max_attempts=int(os.getenv("TETRA_RETRY_MAX_ATTEMPTS", "3")),
111
+ base_delay=float(os.getenv("TETRA_RETRY_BASE_DELAY", "0.5")),
112
+ )
113
+
114
+ metrics = MetricsConfig(
115
+ enabled=os.getenv("TETRA_METRICS_ENABLED", "true").lower() == "true",
116
+ )
117
+
118
+ return cls(
119
+ circuit_breaker=circuit_breaker,
120
+ load_balancer=load_balancer,
121
+ retry=retry,
122
+ metrics=metrics,
123
+ )
124
+
125
+
126
+ # Global default configuration
127
+ _config: Optional[ReliabilityConfig] = None
128
+
129
+
130
+ def get_reliability_config() -> ReliabilityConfig:
131
+ """Get global reliability configuration (lazy-loaded).
132
+
133
+ Returns:
134
+ ReliabilityConfig instance initialized from environment.
135
+ """
136
+ global _config
137
+ if _config is None:
138
+ _config = ReliabilityConfig.from_env()
139
+ return _config
140
+
141
+
142
+ def set_reliability_config(config: ReliabilityConfig) -> None:
143
+ """Set global reliability configuration (for testing).
144
+
145
+ Args:
146
+ config: ReliabilityConfig to set as global.
147
+ """
148
+ global _config
149
+ _config = config
@@ -0,0 +1,118 @@
1
+ """Retry logic with exponential backoff for failed remote calls."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Any, Callable, Optional, Set, Tuple, Type
6
+
7
+ from tetra_rp.core.utils.backoff import get_backoff_delay
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class RetryExhaustedError(Exception):
13
+ """Raised when max retry attempts are exceeded."""
14
+
15
+ pass
16
+
17
+
18
+ async def retry_with_backoff(
19
+ func: Callable[..., Any],
20
+ max_attempts: int = 3,
21
+ base_delay: float = 0.5,
22
+ max_delay: float = 10.0,
23
+ jitter: float = 0.2,
24
+ retryable_exceptions: Optional[Tuple[Type[Exception], ...]] = None,
25
+ retryable_status_codes: Optional[Set[int]] = None,
26
+ circuit_breaker: Optional[Any] = None,
27
+ *args: Any,
28
+ **kwargs: Any,
29
+ ) -> Any:
30
+ """Execute async function with retry and exponential backoff.
31
+
32
+ Args:
33
+ func: Async function to execute
34
+ max_attempts: Maximum number of attempts (default: 3)
35
+ base_delay: Base delay between retries in seconds (default: 0.5)
36
+ max_delay: Maximum delay between retries (default: 10.0)
37
+ jitter: Jitter factor (0.0-1.0) to add randomness (default: 0.2)
38
+ retryable_exceptions: Tuple of exception types to retry on
39
+ (default: (asyncio.TimeoutError, ConnectionError))
40
+ retryable_status_codes: Set of HTTP status codes to retry on
41
+ (default: {408, 429, 500, 502, 503, 504})
42
+ circuit_breaker: Optional circuit breaker to check before retry
43
+ *args: Positional arguments for func
44
+ **kwargs: Keyword arguments for func
45
+
46
+ Returns:
47
+ Result from successful function call
48
+
49
+ Raises:
50
+ RetryExhaustedError: If max attempts exceeded
51
+ Exception: If non-retryable exception occurs
52
+ """
53
+ if retryable_exceptions is None:
54
+ retryable_exceptions = (asyncio.TimeoutError, ConnectionError)
55
+
56
+ if retryable_status_codes is None:
57
+ retryable_status_codes = {408, 429, 500, 502, 503, 504}
58
+
59
+ last_exception: Optional[Exception] = None
60
+
61
+ for attempt in range(max_attempts):
62
+ try:
63
+ # Check circuit breaker before attempting
64
+ if circuit_breaker is not None:
65
+ from tetra_rp.runtime.circuit_breaker import CircuitState
66
+
67
+ if circuit_breaker.get_state() == CircuitState.OPEN:
68
+ raise RuntimeError(
69
+ f"Circuit breaker OPEN, skipping retry attempt {attempt + 1}"
70
+ )
71
+
72
+ result = await func(*args, **kwargs)
73
+
74
+ # Log success on retry
75
+ if attempt > 0:
76
+ logger.info(f"Retry succeeded on attempt {attempt + 1}/{max_attempts}")
77
+
78
+ return result
79
+
80
+ except Exception as e:
81
+ last_exception = e
82
+
83
+ # Check if exception is retryable
84
+ if not isinstance(e, retryable_exceptions):
85
+ logger.debug(
86
+ f"Non-retryable exception in {func.__name__}: {type(e).__name__}"
87
+ )
88
+ raise
89
+
90
+ # Check for retryable status codes (if exception has status_code)
91
+ if hasattr(e, "status_code"):
92
+ if e.status_code not in retryable_status_codes: # type: ignore
93
+ logger.debug(
94
+ f"Non-retryable status code {e.status_code} in {func.__name__}"
95
+ )
96
+ raise
97
+
98
+ # If this is the last attempt, don't retry
99
+ if attempt >= max_attempts - 1:
100
+ logger.warning(
101
+ f"Max retries ({max_attempts}) exhausted for {func.__name__}"
102
+ )
103
+ raise RetryExhaustedError(
104
+ f"Failed after {max_attempts} attempts: {e}"
105
+ ) from e
106
+
107
+ # Calculate delay with exponential backoff and jitter
108
+ delay = get_backoff_delay(attempt, base_delay, max_delay, jitter=jitter)
109
+ logger.debug(
110
+ f"Retry {attempt + 1}/{max_attempts} for {func.__name__} "
111
+ f"after {delay:.2f}s"
112
+ )
113
+ await asyncio.sleep(delay)
114
+
115
+ # Should never reach here, but handle edge case
116
+ if last_exception:
117
+ raise last_exception
118
+ raise RetryExhaustedError(f"Failed after {max_attempts} attempts")
@@ -0,0 +1,124 @@
1
+ """Shared serialization utilities for cloudpickle + base64 encoding."""
2
+
3
+ import base64
4
+ from typing import Any, Dict, List
5
+
6
+ import cloudpickle
7
+
8
+ from .exceptions import SerializationError
9
+
10
+
11
+ def serialize_arg(arg: Any) -> str:
12
+ """Serialize single argument with cloudpickle + base64.
13
+
14
+ Args:
15
+ arg: Argument to serialize.
16
+
17
+ Returns:
18
+ Base64-encoded cloudpickle serialized string.
19
+
20
+ Raises:
21
+ SerializationError: If serialization fails.
22
+ """
23
+ try:
24
+ return base64.b64encode(cloudpickle.dumps(arg)).decode("utf-8")
25
+ except Exception as e:
26
+ raise SerializationError(f"Failed to serialize argument: {e}") from e
27
+
28
+
29
+ def serialize_args(args: tuple) -> List[str]:
30
+ """Serialize positional arguments.
31
+
32
+ Args:
33
+ args: Tuple of arguments to serialize.
34
+
35
+ Returns:
36
+ List of base64-encoded serialized arguments.
37
+
38
+ Raises:
39
+ SerializationError: If serialization fails.
40
+ """
41
+ try:
42
+ return [serialize_arg(arg) for arg in args]
43
+ except SerializationError:
44
+ raise
45
+ except Exception as e:
46
+ raise SerializationError(f"Failed to serialize args: {e}") from e
47
+
48
+
49
+ def serialize_kwargs(kwargs: dict) -> Dict[str, str]:
50
+ """Serialize keyword arguments.
51
+
52
+ Args:
53
+ kwargs: Dictionary of keyword arguments.
54
+
55
+ Returns:
56
+ Dictionary with base64-encoded serialized values.
57
+
58
+ Raises:
59
+ SerializationError: If serialization fails.
60
+ """
61
+ try:
62
+ return {k: serialize_arg(v) for k, v in kwargs.items()}
63
+ except SerializationError:
64
+ raise
65
+ except Exception as e:
66
+ raise SerializationError(f"Failed to serialize kwargs: {e}") from e
67
+
68
+
69
+ def deserialize_arg(arg_b64: str) -> Any:
70
+ """Deserialize single base64-encoded cloudpickle argument.
71
+
72
+ Args:
73
+ arg_b64: Base64-encoded serialized argument.
74
+
75
+ Returns:
76
+ Deserialized argument.
77
+
78
+ Raises:
79
+ SerializationError: If deserialization fails.
80
+ """
81
+ try:
82
+ return cloudpickle.loads(base64.b64decode(arg_b64))
83
+ except Exception as e:
84
+ raise SerializationError(f"Failed to deserialize argument: {e}") from e
85
+
86
+
87
+ def deserialize_args(args_b64: List[str]) -> List[Any]:
88
+ """Deserialize list of base64-encoded arguments.
89
+
90
+ Args:
91
+ args_b64: List of base64-encoded serialized arguments.
92
+
93
+ Returns:
94
+ List of deserialized arguments.
95
+
96
+ Raises:
97
+ SerializationError: If deserialization fails.
98
+ """
99
+ try:
100
+ return [deserialize_arg(arg) for arg in args_b64]
101
+ except SerializationError:
102
+ raise
103
+ except Exception as e:
104
+ raise SerializationError(f"Failed to deserialize args: {e}") from e
105
+
106
+
107
+ def deserialize_kwargs(kwargs_b64: Dict[str, str]) -> Dict[str, Any]:
108
+ """Deserialize dict of base64-encoded keyword arguments.
109
+
110
+ Args:
111
+ kwargs_b64: Dictionary with base64-encoded serialized values.
112
+
113
+ Returns:
114
+ Dictionary with deserialized values.
115
+
116
+ Raises:
117
+ SerializationError: If deserialization fails.
118
+ """
119
+ try:
120
+ return {k: deserialize_arg(v) for k, v in kwargs_b64.items()}
121
+ except SerializationError:
122
+ raise
123
+ except Exception as e:
124
+ raise SerializationError(f"Failed to deserialize kwargs: {e}") from e