tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. tetra_rp/__init__.py +109 -19
  2. tetra_rp/cli/commands/__init__.py +1 -0
  3. tetra_rp/cli/commands/apps.py +143 -0
  4. tetra_rp/cli/commands/build.py +1082 -0
  5. tetra_rp/cli/commands/build_utils/__init__.py +1 -0
  6. tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
  7. tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
  8. tetra_rp/cli/commands/build_utils/manifest.py +430 -0
  9. tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
  10. tetra_rp/cli/commands/build_utils/scanner.py +596 -0
  11. tetra_rp/cli/commands/deploy.py +580 -0
  12. tetra_rp/cli/commands/init.py +123 -0
  13. tetra_rp/cli/commands/resource.py +108 -0
  14. tetra_rp/cli/commands/run.py +296 -0
  15. tetra_rp/cli/commands/test_mothership.py +458 -0
  16. tetra_rp/cli/commands/undeploy.py +533 -0
  17. tetra_rp/cli/main.py +97 -0
  18. tetra_rp/cli/utils/__init__.py +1 -0
  19. tetra_rp/cli/utils/app.py +15 -0
  20. tetra_rp/cli/utils/conda.py +127 -0
  21. tetra_rp/cli/utils/deployment.py +530 -0
  22. tetra_rp/cli/utils/ignore.py +143 -0
  23. tetra_rp/cli/utils/skeleton.py +184 -0
  24. tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
  25. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  26. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  27. tetra_rp/cli/utils/skeleton_template/README.md +263 -0
  28. tetra_rp/cli/utils/skeleton_template/main.py +44 -0
  29. tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
  30. tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
  31. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  32. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  33. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
  34. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
  35. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
  36. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
  37. tetra_rp/client.py +136 -33
  38. tetra_rp/config.py +29 -0
  39. tetra_rp/core/api/runpod.py +591 -39
  40. tetra_rp/core/deployment.py +232 -0
  41. tetra_rp/core/discovery.py +425 -0
  42. tetra_rp/core/exceptions.py +50 -0
  43. tetra_rp/core/resources/__init__.py +27 -9
  44. tetra_rp/core/resources/app.py +738 -0
  45. tetra_rp/core/resources/base.py +139 -4
  46. tetra_rp/core/resources/constants.py +21 -0
  47. tetra_rp/core/resources/cpu.py +115 -13
  48. tetra_rp/core/resources/gpu.py +182 -16
  49. tetra_rp/core/resources/live_serverless.py +153 -16
  50. tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
  51. tetra_rp/core/resources/network_volume.py +126 -31
  52. tetra_rp/core/resources/resource_manager.py +436 -35
  53. tetra_rp/core/resources/serverless.py +537 -120
  54. tetra_rp/core/resources/serverless_cpu.py +201 -0
  55. tetra_rp/core/resources/template.py +1 -59
  56. tetra_rp/core/utils/constants.py +10 -0
  57. tetra_rp/core/utils/file_lock.py +260 -0
  58. tetra_rp/core/utils/http.py +67 -0
  59. tetra_rp/core/utils/lru_cache.py +75 -0
  60. tetra_rp/core/utils/singleton.py +36 -1
  61. tetra_rp/core/validation.py +44 -0
  62. tetra_rp/execute_class.py +301 -0
  63. tetra_rp/protos/remote_execution.py +98 -9
  64. tetra_rp/runtime/__init__.py +1 -0
  65. tetra_rp/runtime/circuit_breaker.py +274 -0
  66. tetra_rp/runtime/config.py +12 -0
  67. tetra_rp/runtime/exceptions.py +49 -0
  68. tetra_rp/runtime/generic_handler.py +206 -0
  69. tetra_rp/runtime/lb_handler.py +189 -0
  70. tetra_rp/runtime/load_balancer.py +160 -0
  71. tetra_rp/runtime/manifest_fetcher.py +192 -0
  72. tetra_rp/runtime/metrics.py +325 -0
  73. tetra_rp/runtime/models.py +73 -0
  74. tetra_rp/runtime/mothership_provisioner.py +512 -0
  75. tetra_rp/runtime/production_wrapper.py +266 -0
  76. tetra_rp/runtime/reliability_config.py +149 -0
  77. tetra_rp/runtime/retry_manager.py +118 -0
  78. tetra_rp/runtime/serialization.py +124 -0
  79. tetra_rp/runtime/service_registry.py +346 -0
  80. tetra_rp/runtime/state_manager_client.py +248 -0
  81. tetra_rp/stubs/live_serverless.py +35 -17
  82. tetra_rp/stubs/load_balancer_sls.py +357 -0
  83. tetra_rp/stubs/registry.py +145 -19
  84. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
  85. tetra_rp-0.24.0.dist-info/RECORD +99 -0
  86. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
  87. tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
  88. tetra_rp/core/pool/cluster_manager.py +0 -177
  89. tetra_rp/core/pool/dataclass.py +0 -18
  90. tetra_rp/core/pool/ex.py +0 -38
  91. tetra_rp/core/pool/job.py +0 -22
  92. tetra_rp/core/pool/worker.py +0 -19
  93. tetra_rp/core/resources/utils.py +0 -50
  94. tetra_rp/core/utils/json.py +0 -33
  95. tetra_rp-0.6.0.dist-info/RECORD +0 -39
  96. /tetra_rp/{core/pool → cli}/__init__.py +0 -0
  97. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
@@ -1,32 +1,169 @@
1
1
  # Ship serverless code as you write it. No builds, no deploys — just run.
2
2
  import os
3
+
3
4
  from pydantic import model_validator
5
+
6
+ from .load_balancer_sls_resource import (
7
+ CpuLoadBalancerSlsResource,
8
+ LoadBalancerSlsResource,
9
+ )
4
10
  from .serverless import ServerlessEndpoint
11
+ from .serverless_cpu import CpuServerlessEndpoint
5
12
 
13
+ TETRA_IMAGE_TAG = os.environ.get("TETRA_IMAGE_TAG", "latest")
14
+ TETRA_GPU_IMAGE = os.environ.get(
15
+ "TETRA_GPU_IMAGE", f"runpod/tetra-rp:{TETRA_IMAGE_TAG}"
16
+ )
17
+ TETRA_CPU_IMAGE = os.environ.get(
18
+ "TETRA_CPU_IMAGE", f"runpod/tetra-rp-cpu:{TETRA_IMAGE_TAG}"
19
+ )
20
+ TETRA_LB_IMAGE = os.environ.get(
21
+ "TETRA_LB_IMAGE", f"runpod/tetra-rp-lb:{TETRA_IMAGE_TAG}"
22
+ )
23
+ TETRA_CPU_LB_IMAGE = os.environ.get(
24
+ "TETRA_CPU_LB_IMAGE", f"runpod/tetra-rp-lb-cpu:{TETRA_IMAGE_TAG}"
25
+ )
6
26
 
7
- TETRA_GPU_IMAGE = os.environ.get("TETRA_GPU_IMAGE", "runpod/tetra-rp:dev")
8
- TETRA_CPU_IMAGE = os.environ.get("TETRA_CPU_IMAGE", "runpod/tetra-rp-cpu:dev")
9
27
 
28
+ class LiveServerlessMixin:
29
+ """Common mixin for live serverless endpoints that locks the image."""
10
30
 
11
- class LiveServerless(ServerlessEndpoint):
12
- @model_validator(mode="before")
13
- @classmethod
14
- def set_live_serverless_template(cls, data: dict):
15
- """Set default templates for Live Serverless. This can't be changed."""
16
- # Always set imageName based on instanceIds presence
17
- data["imageName"] = (
18
- TETRA_CPU_IMAGE if data.get("instanceIds") else TETRA_GPU_IMAGE
19
- )
20
- return data
31
+ @property
32
+ def _live_image(self) -> str:
33
+ """Override in subclasses to specify the locked image."""
34
+ raise NotImplementedError("Subclasses must define _live_image")
21
35
 
22
36
  @property
23
37
  def imageName(self):
24
- # Lock imageName to always reflect instanceIds
25
- return (
26
- TETRA_CPU_IMAGE if getattr(self, "instanceIds", None) else TETRA_GPU_IMAGE
27
- )
38
+ # Lock imageName to specific image
39
+ return self._live_image
28
40
 
29
41
  @imageName.setter
30
42
  def imageName(self, value):
31
43
  # Prevent manual setting of imageName
32
44
  pass
45
+
46
+
47
+ class LiveServerless(LiveServerlessMixin, ServerlessEndpoint):
48
+ """GPU-only live serverless endpoint."""
49
+
50
+ @property
51
+ def _live_image(self) -> str:
52
+ return TETRA_GPU_IMAGE
53
+
54
+ @model_validator(mode="before")
55
+ @classmethod
56
+ def set_live_serverless_template(cls, data: dict):
57
+ """Set default GPU image for Live Serverless."""
58
+ data["imageName"] = TETRA_GPU_IMAGE
59
+ return data
60
+
61
+
62
+ class CpuLiveServerless(LiveServerlessMixin, CpuServerlessEndpoint):
63
+ """CPU-only live serverless endpoint with automatic disk sizing."""
64
+
65
+ @property
66
+ def _live_image(self) -> str:
67
+ return TETRA_CPU_IMAGE
68
+
69
+ @model_validator(mode="before")
70
+ @classmethod
71
+ def set_live_serverless_template(cls, data: dict):
72
+ """Set default CPU image for Live Serverless."""
73
+ data["imageName"] = TETRA_CPU_IMAGE
74
+ return data
75
+
76
+
77
+ class LiveLoadBalancer(LiveServerlessMixin, LoadBalancerSlsResource):
78
+ """Live load-balanced endpoint for local development and testing.
79
+
80
+ Similar to LiveServerless but for HTTP-based load-balanced endpoints.
81
+ Enables local testing of @remote decorated functions with LB endpoints
82
+ before deploying to production.
83
+
84
+ Features:
85
+ - Locks to Tetra LB image (tetra-rp-lb)
86
+ - Direct HTTP execution (not queue-based)
87
+ - Local development with flash run
88
+ - Same @remote decorator pattern as LoadBalancerSlsResource
89
+
90
+ Usage:
91
+ from tetra_rp import LiveLoadBalancer, remote
92
+
93
+ api = LiveLoadBalancer(name="api-service")
94
+
95
+ @remote(api, method="POST", path="/api/process")
96
+ async def process_data(x: int, y: int):
97
+ return {"result": x + y}
98
+
99
+ # Test locally
100
+ result = await process_data(5, 3)
101
+
102
+ Local Development Flow:
103
+ 1. Create LiveLoadBalancer with routing
104
+ 2. Decorate functions with @remote(lb_resource, method=..., path=...)
105
+ 3. Run with `flash run` to start local endpoint
106
+ 4. Call functions directly in tests or scripts
107
+ 5. Deploy to production with `flash build` and `flash deploy`
108
+
109
+ Note:
110
+ The endpoint_url is configured by the Flash runtime when the
111
+ endpoint is deployed locally. For true local testing without
112
+ deployment, use the functions directly or mock the HTTP layer.
113
+ """
114
+
115
+ @property
116
+ def _live_image(self) -> str:
117
+ return TETRA_LB_IMAGE
118
+
119
+ @model_validator(mode="before")
120
+ @classmethod
121
+ def set_live_lb_template(cls, data: dict):
122
+ """Set default image for Live Load-Balanced endpoint."""
123
+ data["imageName"] = TETRA_LB_IMAGE
124
+ return data
125
+
126
+
127
+ class CpuLiveLoadBalancer(LiveServerlessMixin, CpuLoadBalancerSlsResource):
128
+ """CPU-only live load-balanced endpoint for local development and testing.
129
+
130
+ Similar to LiveLoadBalancer but configured for CPU instances with
131
+ automatic disk sizing and validation.
132
+
133
+ Features:
134
+ - Locks to CPU Tetra LB image (tetra-rp-lb-cpu)
135
+ - CPU instance support with automatic disk sizing
136
+ - Direct HTTP execution (not queue-based)
137
+ - Local development with flash run
138
+ - Same @remote decorator pattern as CpuLoadBalancerSlsResource
139
+
140
+ Usage:
141
+ from tetra_rp import CpuLiveLoadBalancer, remote
142
+
143
+ api = CpuLiveLoadBalancer(name="api-service")
144
+
145
+ @remote(api, method="POST", path="/api/process")
146
+ async def process_data(x: int, y: int):
147
+ return {"result": x + y}
148
+
149
+ # Test locally
150
+ result = await process_data(5, 3)
151
+
152
+ Local Development Flow:
153
+ 1. Create CpuLiveLoadBalancer with routing
154
+ 2. Decorate functions with @remote(lb_resource, method=..., path=...)
155
+ 3. Run with `flash run` to start local endpoint
156
+ 4. Call functions directly in tests or scripts
157
+ 5. Deploy to production with `flash build` and `flash deploy`
158
+ """
159
+
160
+ @property
161
+ def _live_image(self) -> str:
162
+ return TETRA_CPU_LB_IMAGE
163
+
164
+ @model_validator(mode="before")
165
+ @classmethod
166
+ def set_live_cpu_lb_template(cls, data: dict):
167
+ """Set default CPU image for Live Load-Balanced endpoint."""
168
+ data["imageName"] = TETRA_CPU_LB_IMAGE
169
+ return data
@@ -0,0 +1,440 @@
1
+ """
2
+ LoadBalancerSlsResource - Resource type for RunPod Load-Balanced Serverless endpoints.
3
+
4
+ Load-balanced endpoints expose HTTP servers directly to clients without the queue-based
5
+ processing model of standard serverless endpoints. They're ideal for REST APIs, webhooks,
6
+ and real-time communication patterns.
7
+
8
+ Key differences from standard serverless (QB):
9
+ - Requests route directly to healthy workers via HTTP
10
+ - No automatic retries (client responsible)
11
+ - Lower latency but less fault tolerance
12
+ - Requires HTTP application, not a function handler
13
+ - Health checks via /ping endpoint
14
+ """
15
+
16
+ import asyncio
17
+ import logging
18
+ from typing import List, Optional
19
+
20
+ from pydantic import model_validator
21
+
22
+ from tetra_rp.core.utils.http import get_authenticated_httpx_client
23
+ from .constants import ENDPOINT_DOMAIN
24
+ from .cpu import CpuInstanceType
25
+ from .serverless import ServerlessResource, ServerlessType, ServerlessScalerType
26
+ from .serverless_cpu import CpuEndpointMixin
27
+
28
+ log = logging.getLogger(__name__)
29
+
30
+ # Configuration constants
31
+ DEFAULT_HEALTH_CHECK_RETRIES = 10
32
+ DEFAULT_HEALTH_CHECK_INTERVAL = 5 # seconds between retries
33
+ DEFAULT_PING_REQUEST_TIMEOUT = (
34
+ 15.0 # seconds (load-balanced workers need time for cold starts)
35
+ )
36
+ HEALTHY_STATUS_CODES = (200, 204)
37
+
38
+
39
+ class LoadBalancerSlsResource(ServerlessResource):
40
+ """
41
+ Resource configuration for RunPod Load-Balanced Serverless endpoints.
42
+
43
+ Load-balanced endpoints expose HTTP servers directly, making them suitable for:
44
+ - REST APIs
45
+ - WebSocket servers
46
+ - Real-time streaming
47
+ - Custom HTTP protocols
48
+
49
+ Configuration example:
50
+ mothership = LoadBalancerSlsResource(
51
+ name="mothership",
52
+ imageName="my-mothership:latest",
53
+ env={"FLASH_APP": "my_app"},
54
+ workersMin=1,
55
+ workersMax=3,
56
+ )
57
+ await mothership.deploy()
58
+ """
59
+
60
+ # Override default type to LB
61
+ type: Optional[ServerlessType] = ServerlessType.LB
62
+
63
+ def __init__(self, **data):
64
+ """Initialize LoadBalancerSlsResource with LB-specific defaults."""
65
+ # Ensure type is always LB
66
+ data["type"] = ServerlessType.LB
67
+
68
+ # LB endpoints shouldn't use queue-based scaling
69
+ if "scalerType" not in data:
70
+ data["scalerType"] = ServerlessScalerType.REQUEST_COUNT
71
+
72
+ super().__init__(**data)
73
+
74
+ @model_validator(mode="after")
75
+ def set_serverless_template(self):
76
+ """Create template from imageName if not provided.
77
+
78
+ Must run after sync_input_fields to ensure all input fields are synced.
79
+ """
80
+ if not any([self.imageName, self.template, self.templateId]):
81
+ raise ValueError(
82
+ "Either imageName, template, or templateId must be provided"
83
+ )
84
+
85
+ if not self.templateId and not self.template:
86
+ self.template = self._create_new_template()
87
+ elif self.template:
88
+ self._configure_existing_template()
89
+
90
+ return self
91
+
92
+ @property
93
+ def endpoint_url(self) -> str:
94
+ """Get the endpoint URL for load-balanced endpoints.
95
+
96
+ Load-balanced endpoints use a different URL format than standard
97
+ serverless endpoints. They use: https://{endpoint_id}.{ENDPOINT_DOMAIN}
98
+
99
+ Returns:
100
+ The endpoint URL for health checks and direct HTTP requests
101
+
102
+ Raises:
103
+ ValueError: If endpoint ID not set
104
+ """
105
+ if not self.id:
106
+ raise ValueError("Endpoint ID not set. Cannot determine endpoint URL.")
107
+ return f"https://{self.id}.{ENDPOINT_DOMAIN}"
108
+
109
+ def _validate_lb_configuration(self) -> None:
110
+ """
111
+ Validate LB-specific configuration constraints.
112
+
113
+ Raises:
114
+ ValueError: If configuration violates LB requirements
115
+ """
116
+ # LB must use REQUEST_COUNT scaler, not QUEUE_DELAY
117
+ if self.scalerType == ServerlessScalerType.QUEUE_DELAY:
118
+ raise ValueError(
119
+ f"LoadBalancerSlsResource requires REQUEST_COUNT scaler, "
120
+ f"not {self.scalerType.value}. "
121
+ "Load-balanced endpoints don't support queue-based scaling."
122
+ )
123
+
124
+ # Type must always be LB
125
+ if self.type != ServerlessType.LB:
126
+ raise ValueError(
127
+ f"LoadBalancerSlsResource type must be LB, got {self.type.value}"
128
+ )
129
+
130
+ async def is_deployed_async(self) -> bool:
131
+ """
132
+ Check if LB endpoint is deployed and /ping endpoint is responding.
133
+
134
+ For LB endpoints, we verify:
135
+ 1. Endpoint ID exists (created in RunPod)
136
+ 2. /ping endpoint returns 200 or 204
137
+ 3. Endpoint is in healthy state
138
+
139
+ Returns:
140
+ True if endpoint is deployed and healthy, False otherwise
141
+ """
142
+ try:
143
+ if not self.id:
144
+ return False
145
+
146
+ # Use async health check for LB endpoints
147
+ return await self._check_ping_endpoint()
148
+
149
+ except Exception as e:
150
+ log.debug(f"Error checking {self}: {e}")
151
+ return False
152
+
153
+ async def _check_ping_endpoint(self) -> bool:
154
+ """
155
+ Check if /ping endpoint is accessible and healthy.
156
+
157
+ RunPod load-balancer endpoints require a /ping endpoint that returns:
158
+ - 200 OK: Worker is healthy and ready
159
+ - 204 No Content: Worker is initializing
160
+ - Other status: Worker is unhealthy
161
+
162
+ Returns:
163
+ True if /ping endpoint responds with 200 or 204
164
+ """
165
+ try:
166
+ if not self.id:
167
+ return False
168
+
169
+ ping_url = f"{self.endpoint_url}/ping"
170
+
171
+ async with get_authenticated_httpx_client(
172
+ timeout=DEFAULT_PING_REQUEST_TIMEOUT
173
+ ) as client:
174
+ response = await client.get(ping_url)
175
+ return response.status_code in HEALTHY_STATUS_CODES
176
+ except Exception as e:
177
+ log.debug(f"Ping check failed for {self.name}: {e}")
178
+ return False
179
+
180
+ async def _wait_for_health(
181
+ self,
182
+ max_retries: int = DEFAULT_HEALTH_CHECK_RETRIES,
183
+ retry_interval: int = DEFAULT_HEALTH_CHECK_INTERVAL,
184
+ ) -> bool:
185
+ """
186
+ Poll /ping endpoint until endpoint is healthy or timeout.
187
+
188
+ Args:
189
+ max_retries: Number of health check attempts
190
+ retry_interval: Seconds between health check attempts
191
+
192
+ Returns:
193
+ True if endpoint became healthy, False if timeout
194
+
195
+ Raises:
196
+ ValueError: If endpoint ID not set
197
+ """
198
+ if not self.id:
199
+ raise ValueError("Cannot wait for health: endpoint not deployed")
200
+
201
+ log.info(
202
+ f"Waiting for LB endpoint {self.name} ({self.id}) to become healthy... "
203
+ f"(max {max_retries} retries, {retry_interval}s interval)"
204
+ )
205
+
206
+ for attempt in range(max_retries):
207
+ try:
208
+ if await self._check_ping_endpoint():
209
+ log.info(
210
+ f"LB endpoint {self.name} is healthy (attempt {attempt + 1})"
211
+ )
212
+ return True
213
+
214
+ log.debug(
215
+ f"Health check attempt {attempt + 1}/{max_retries} - "
216
+ f"endpoint not ready yet"
217
+ )
218
+
219
+ except Exception as e:
220
+ log.debug(f"Health check attempt {attempt + 1} failed: {e}")
221
+
222
+ # Wait before next attempt (except on last attempt)
223
+ if attempt < max_retries - 1:
224
+ await asyncio.sleep(retry_interval)
225
+
226
+ log.error(
227
+ f"LB endpoint {self.name} failed to become healthy after "
228
+ f"{max_retries} attempts"
229
+ )
230
+ return False
231
+
232
+ async def _do_deploy(self) -> "LoadBalancerSlsResource":
233
+ """
234
+ Deploy LB endpoint and wait for health.
235
+
236
+ Deployment flow:
237
+ 1. Validate LB configuration
238
+ 2. Call parent deploy (creates endpoint in RunPod)
239
+ 3. Poll /ping endpoint until healthy or timeout
240
+ 4. Return deployed resource
241
+
242
+ Returns:
243
+ Deployed LoadBalancerSlsResource instance
244
+
245
+ Raises:
246
+ ValueError: If LB configuration invalid or deployment fails
247
+ TimeoutError: If /ping endpoint doesn't respond in time
248
+ """
249
+ # Validate before deploying
250
+ self._validate_lb_configuration()
251
+
252
+ # Check if already deployed
253
+ if self.is_deployed():
254
+ log.debug(f"{self} already deployed")
255
+ return self
256
+
257
+ try:
258
+ # Mark this endpoint as a mothership (triggers auto-provisioning on boot)
259
+ if self.env is None:
260
+ self.env = {}
261
+ self.env["FLASH_IS_MOTHERSHIP"] = "true"
262
+
263
+ # Call parent deploy (creates endpoint via RunPod API)
264
+ log.info(f"Deploying LB endpoint {self.name}...")
265
+ deployed = await super()._do_deploy()
266
+
267
+ # Wait for /ping endpoint to become available
268
+ timeout_seconds = (
269
+ DEFAULT_HEALTH_CHECK_RETRIES * DEFAULT_HEALTH_CHECK_INTERVAL
270
+ )
271
+ log.info(
272
+ f"Endpoint created, waiting for /ping to respond "
273
+ f"({timeout_seconds}s timeout)..."
274
+ )
275
+
276
+ healthy = await self._wait_for_health(
277
+ max_retries=DEFAULT_HEALTH_CHECK_RETRIES,
278
+ retry_interval=DEFAULT_HEALTH_CHECK_INTERVAL,
279
+ )
280
+
281
+ if not healthy:
282
+ raise TimeoutError(
283
+ f"LB endpoint {self.name} ({deployed.id}) failed to become "
284
+ f"healthy within {timeout_seconds}s"
285
+ )
286
+
287
+ log.info(f"LB endpoint {self.name} ({deployed.id}) deployed and healthy")
288
+ return deployed
289
+
290
+ except Exception as e:
291
+ log.error(f"Failed to deploy LB endpoint {self.name}: {e}")
292
+ raise
293
+
294
+ def is_deployed(self) -> bool:
295
+ """
296
+ Override is_deployed to use async health check.
297
+
298
+ Note: This is a synchronous wrapper around the async health check.
299
+ Prefer is_deployed_async() in async contexts.
300
+
301
+ Returns:
302
+ True if endpoint is deployed and /ping responds
303
+ """
304
+ if not self.id:
305
+ return False
306
+
307
+ try:
308
+ # Try the RunPod SDK health check (works for basic connectivity)
309
+ response = self.endpoint.health()
310
+ return response is not None
311
+ except Exception as e:
312
+ log.debug(f"RunPod health check failed for {self.name}: {e}")
313
+ return False
314
+
315
+
316
+ class CpuLoadBalancerSlsResource(CpuEndpointMixin, LoadBalancerSlsResource):
317
+ """CPU-only load-balanced endpoint with automatic disk sizing.
318
+
319
+ Similar to LoadBalancerSlsResource but configured for CPU instances
320
+ instead of GPUs. Inherits CPU-specific functionality from CpuEndpointMixin
321
+ for automatic disk sizing and validation.
322
+
323
+ Defaults to CPU_ANY instance type if not specified.
324
+
325
+ Implementation Note - Field List Coupling:
326
+ This class overrides config_hash() with a CPU-specific field list instead of
327
+ inheriting the base ServerlessResource implementation. This is intentional to
328
+ exclude GPU fields while maintaining drift detection for CPU-specific fields.
329
+
330
+ When adding new fields to ServerlessResource:
331
+ 1. Evaluate if the field applies to CPU endpoints
332
+ 2. If yes, add it to the cpu_fields set in config_hash()
333
+ 3. If it's API-assigned, verify it's in ServerlessResource.RUNTIME_FIELDS
334
+ 4. Test drift detection with new field changes
335
+
336
+ Configuration example:
337
+ mothership = CpuLoadBalancerSlsResource(
338
+ name="mothership",
339
+ imageName="my-mothership:latest",
340
+ env={"FLASH_APP": "my_app"},
341
+ instanceIds=[CpuInstanceType.CPU3G_1_4],
342
+ workersMin=1,
343
+ workersMax=3,
344
+ )
345
+ await mothership.deploy()
346
+ """
347
+
348
+ instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
349
+
350
+ # CPU endpoints exclude GPU-specific fields from API payload
351
+ # This prevents the RunPod GraphQL API from rejecting CPU endpoints with GPU-specific fields
352
+ _input_only = {
353
+ "id",
354
+ "cudaVersions",
355
+ "datacenter",
356
+ "env",
357
+ "flashboot",
358
+ "flashEnvironmentId",
359
+ "gpus",
360
+ "gpuIds",
361
+ "gpuCount",
362
+ "allowedCudaVersions",
363
+ "imageName",
364
+ "networkVolume",
365
+ }
366
+
367
+ def _setup_cpu_template(self) -> None:
368
+ """Setup template with CPU-appropriate disk sizing."""
369
+ if not any([self.imageName, self.template, self.templateId]):
370
+ raise ValueError(
371
+ "Either imageName, template, or templateId must be provided"
372
+ )
373
+
374
+ if not self.templateId and not self.template:
375
+ self.template = self._create_new_template()
376
+ elif self.template:
377
+ self._configure_existing_template()
378
+
379
+ # Apply CPU-specific disk sizing
380
+ if self.template:
381
+ self._apply_cpu_disk_sizing(self.template)
382
+
383
+ @model_validator(mode="after")
384
+ def set_serverless_template(self):
385
+ """Create template from imageName if not provided.
386
+
387
+ Overrides parent to call _sync_cpu_fields first to ensure GPU defaults
388
+ are overridden for CPU endpoints.
389
+ """
390
+ # Sync CPU-specific fields first (override GPU defaults)
391
+ self._sync_cpu_fields()
392
+
393
+ # Setup template with validation and creation
394
+ self._setup_cpu_template()
395
+
396
+ return self
397
+
398
+ @property
399
+ def config_hash(self) -> str:
400
+ """Get hash excluding GPU fields and runtime fields.
401
+
402
+ CPU load-balanced endpoints only hash CPU-relevant fields:
403
+ - Instance types (instanceIds)
404
+ - Scaling parameters (workers, scaler)
405
+ - Deployment type (type, locations)
406
+ - Environment variables (env)
407
+
408
+ Excludes:
409
+ - GPU fields (to avoid false drift)
410
+ - Runtime fields (template, templateId, aiKey, etc.)
411
+ """
412
+ import hashlib
413
+ import json
414
+
415
+ # CPU-relevant fields for drift detection
416
+ cpu_fields = {
417
+ "datacenter",
418
+ "env",
419
+ "flashboot",
420
+ "flashEnvironmentId",
421
+ "imageName",
422
+ "networkVolume",
423
+ "instanceIds", # CPU-specific
424
+ "workersMin", # Scaling
425
+ "workersMax",
426
+ "scalerType",
427
+ "scalerValue",
428
+ "type", # LB vs QB
429
+ "idleTimeout",
430
+ "executionTimeoutMs",
431
+ "locations",
432
+ }
433
+
434
+ config_dict = self.model_dump(
435
+ exclude_none=True, include=cpu_fields, mode="json"
436
+ )
437
+
438
+ config_str = json.dumps(config_dict, sort_keys=True)
439
+ hash_obj = hashlib.md5(f"{self.__class__.__name__}:{config_str}".encode())
440
+ return hash_obj.hexdigest()