tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tetra_rp/__init__.py +109 -19
- tetra_rp/cli/commands/__init__.py +1 -0
- tetra_rp/cli/commands/apps.py +143 -0
- tetra_rp/cli/commands/build.py +1082 -0
- tetra_rp/cli/commands/build_utils/__init__.py +1 -0
- tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
- tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
- tetra_rp/cli/commands/build_utils/manifest.py +430 -0
- tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
- tetra_rp/cli/commands/build_utils/scanner.py +596 -0
- tetra_rp/cli/commands/deploy.py +580 -0
- tetra_rp/cli/commands/init.py +123 -0
- tetra_rp/cli/commands/resource.py +108 -0
- tetra_rp/cli/commands/run.py +296 -0
- tetra_rp/cli/commands/test_mothership.py +458 -0
- tetra_rp/cli/commands/undeploy.py +533 -0
- tetra_rp/cli/main.py +97 -0
- tetra_rp/cli/utils/__init__.py +1 -0
- tetra_rp/cli/utils/app.py +15 -0
- tetra_rp/cli/utils/conda.py +127 -0
- tetra_rp/cli/utils/deployment.py +530 -0
- tetra_rp/cli/utils/ignore.py +143 -0
- tetra_rp/cli/utils/skeleton.py +184 -0
- tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
- tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
- tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
- tetra_rp/cli/utils/skeleton_template/README.md +263 -0
- tetra_rp/cli/utils/skeleton_template/main.py +44 -0
- tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
- tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
- tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
- tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
- tetra_rp/client.py +136 -33
- tetra_rp/config.py +29 -0
- tetra_rp/core/api/runpod.py +591 -39
- tetra_rp/core/deployment.py +232 -0
- tetra_rp/core/discovery.py +425 -0
- tetra_rp/core/exceptions.py +50 -0
- tetra_rp/core/resources/__init__.py +27 -9
- tetra_rp/core/resources/app.py +738 -0
- tetra_rp/core/resources/base.py +139 -4
- tetra_rp/core/resources/constants.py +21 -0
- tetra_rp/core/resources/cpu.py +115 -13
- tetra_rp/core/resources/gpu.py +182 -16
- tetra_rp/core/resources/live_serverless.py +153 -16
- tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
- tetra_rp/core/resources/network_volume.py +126 -31
- tetra_rp/core/resources/resource_manager.py +436 -35
- tetra_rp/core/resources/serverless.py +537 -120
- tetra_rp/core/resources/serverless_cpu.py +201 -0
- tetra_rp/core/resources/template.py +1 -59
- tetra_rp/core/utils/constants.py +10 -0
- tetra_rp/core/utils/file_lock.py +260 -0
- tetra_rp/core/utils/http.py +67 -0
- tetra_rp/core/utils/lru_cache.py +75 -0
- tetra_rp/core/utils/singleton.py +36 -1
- tetra_rp/core/validation.py +44 -0
- tetra_rp/execute_class.py +301 -0
- tetra_rp/protos/remote_execution.py +98 -9
- tetra_rp/runtime/__init__.py +1 -0
- tetra_rp/runtime/circuit_breaker.py +274 -0
- tetra_rp/runtime/config.py +12 -0
- tetra_rp/runtime/exceptions.py +49 -0
- tetra_rp/runtime/generic_handler.py +206 -0
- tetra_rp/runtime/lb_handler.py +189 -0
- tetra_rp/runtime/load_balancer.py +160 -0
- tetra_rp/runtime/manifest_fetcher.py +192 -0
- tetra_rp/runtime/metrics.py +325 -0
- tetra_rp/runtime/models.py +73 -0
- tetra_rp/runtime/mothership_provisioner.py +512 -0
- tetra_rp/runtime/production_wrapper.py +266 -0
- tetra_rp/runtime/reliability_config.py +149 -0
- tetra_rp/runtime/retry_manager.py +118 -0
- tetra_rp/runtime/serialization.py +124 -0
- tetra_rp/runtime/service_registry.py +346 -0
- tetra_rp/runtime/state_manager_client.py +248 -0
- tetra_rp/stubs/live_serverless.py +35 -17
- tetra_rp/stubs/load_balancer_sls.py +357 -0
- tetra_rp/stubs/registry.py +145 -19
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
- tetra_rp-0.24.0.dist-info/RECORD +99 -0
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
- tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
- tetra_rp/core/pool/cluster_manager.py +0 -177
- tetra_rp/core/pool/dataclass.py +0 -18
- tetra_rp/core/pool/ex.py +0 -38
- tetra_rp/core/pool/job.py +0 -22
- tetra_rp/core/pool/worker.py +0 -19
- tetra_rp/core/resources/utils.py +0 -50
- tetra_rp/core/utils/json.py +0 -33
- tetra_rp-0.6.0.dist-info/RECORD +0 -39
- /tetra_rp/{core/pool → cli}/__init__.py +0 -0
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -1,32 +1,169 @@
|
|
|
1
1
|
# Ship serverless code as you write it. No builds, no deploys — just run.
|
|
2
2
|
import os
|
|
3
|
+
|
|
3
4
|
from pydantic import model_validator
|
|
5
|
+
|
|
6
|
+
from .load_balancer_sls_resource import (
|
|
7
|
+
CpuLoadBalancerSlsResource,
|
|
8
|
+
LoadBalancerSlsResource,
|
|
9
|
+
)
|
|
4
10
|
from .serverless import ServerlessEndpoint
|
|
11
|
+
from .serverless_cpu import CpuServerlessEndpoint
|
|
5
12
|
|
|
13
|
+
TETRA_IMAGE_TAG = os.environ.get("TETRA_IMAGE_TAG", "latest")
|
|
14
|
+
TETRA_GPU_IMAGE = os.environ.get(
|
|
15
|
+
"TETRA_GPU_IMAGE", f"runpod/tetra-rp:{TETRA_IMAGE_TAG}"
|
|
16
|
+
)
|
|
17
|
+
TETRA_CPU_IMAGE = os.environ.get(
|
|
18
|
+
"TETRA_CPU_IMAGE", f"runpod/tetra-rp-cpu:{TETRA_IMAGE_TAG}"
|
|
19
|
+
)
|
|
20
|
+
TETRA_LB_IMAGE = os.environ.get(
|
|
21
|
+
"TETRA_LB_IMAGE", f"runpod/tetra-rp-lb:{TETRA_IMAGE_TAG}"
|
|
22
|
+
)
|
|
23
|
+
TETRA_CPU_LB_IMAGE = os.environ.get(
|
|
24
|
+
"TETRA_CPU_LB_IMAGE", f"runpod/tetra-rp-lb-cpu:{TETRA_IMAGE_TAG}"
|
|
25
|
+
)
|
|
6
26
|
|
|
7
|
-
TETRA_GPU_IMAGE = os.environ.get("TETRA_GPU_IMAGE", "runpod/tetra-rp:dev")
|
|
8
|
-
TETRA_CPU_IMAGE = os.environ.get("TETRA_CPU_IMAGE", "runpod/tetra-rp-cpu:dev")
|
|
9
27
|
|
|
28
|
+
class LiveServerlessMixin:
|
|
29
|
+
"""Common mixin for live serverless endpoints that locks the image."""
|
|
10
30
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
"""Set default templates for Live Serverless. This can't be changed."""
|
|
16
|
-
# Always set imageName based on instanceIds presence
|
|
17
|
-
data["imageName"] = (
|
|
18
|
-
TETRA_CPU_IMAGE if data.get("instanceIds") else TETRA_GPU_IMAGE
|
|
19
|
-
)
|
|
20
|
-
return data
|
|
31
|
+
@property
|
|
32
|
+
def _live_image(self) -> str:
|
|
33
|
+
"""Override in subclasses to specify the locked image."""
|
|
34
|
+
raise NotImplementedError("Subclasses must define _live_image")
|
|
21
35
|
|
|
22
36
|
@property
|
|
23
37
|
def imageName(self):
|
|
24
|
-
# Lock imageName to
|
|
25
|
-
return
|
|
26
|
-
TETRA_CPU_IMAGE if getattr(self, "instanceIds", None) else TETRA_GPU_IMAGE
|
|
27
|
-
)
|
|
38
|
+
# Lock imageName to specific image
|
|
39
|
+
return self._live_image
|
|
28
40
|
|
|
29
41
|
@imageName.setter
|
|
30
42
|
def imageName(self, value):
|
|
31
43
|
# Prevent manual setting of imageName
|
|
32
44
|
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LiveServerless(LiveServerlessMixin, ServerlessEndpoint):
|
|
48
|
+
"""GPU-only live serverless endpoint."""
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def _live_image(self) -> str:
|
|
52
|
+
return TETRA_GPU_IMAGE
|
|
53
|
+
|
|
54
|
+
@model_validator(mode="before")
|
|
55
|
+
@classmethod
|
|
56
|
+
def set_live_serverless_template(cls, data: dict):
|
|
57
|
+
"""Set default GPU image for Live Serverless."""
|
|
58
|
+
data["imageName"] = TETRA_GPU_IMAGE
|
|
59
|
+
return data
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class CpuLiveServerless(LiveServerlessMixin, CpuServerlessEndpoint):
|
|
63
|
+
"""CPU-only live serverless endpoint with automatic disk sizing."""
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def _live_image(self) -> str:
|
|
67
|
+
return TETRA_CPU_IMAGE
|
|
68
|
+
|
|
69
|
+
@model_validator(mode="before")
|
|
70
|
+
@classmethod
|
|
71
|
+
def set_live_serverless_template(cls, data: dict):
|
|
72
|
+
"""Set default CPU image for Live Serverless."""
|
|
73
|
+
data["imageName"] = TETRA_CPU_IMAGE
|
|
74
|
+
return data
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class LiveLoadBalancer(LiveServerlessMixin, LoadBalancerSlsResource):
|
|
78
|
+
"""Live load-balanced endpoint for local development and testing.
|
|
79
|
+
|
|
80
|
+
Similar to LiveServerless but for HTTP-based load-balanced endpoints.
|
|
81
|
+
Enables local testing of @remote decorated functions with LB endpoints
|
|
82
|
+
before deploying to production.
|
|
83
|
+
|
|
84
|
+
Features:
|
|
85
|
+
- Locks to Tetra LB image (tetra-rp-lb)
|
|
86
|
+
- Direct HTTP execution (not queue-based)
|
|
87
|
+
- Local development with flash run
|
|
88
|
+
- Same @remote decorator pattern as LoadBalancerSlsResource
|
|
89
|
+
|
|
90
|
+
Usage:
|
|
91
|
+
from tetra_rp import LiveLoadBalancer, remote
|
|
92
|
+
|
|
93
|
+
api = LiveLoadBalancer(name="api-service")
|
|
94
|
+
|
|
95
|
+
@remote(api, method="POST", path="/api/process")
|
|
96
|
+
async def process_data(x: int, y: int):
|
|
97
|
+
return {"result": x + y}
|
|
98
|
+
|
|
99
|
+
# Test locally
|
|
100
|
+
result = await process_data(5, 3)
|
|
101
|
+
|
|
102
|
+
Local Development Flow:
|
|
103
|
+
1. Create LiveLoadBalancer with routing
|
|
104
|
+
2. Decorate functions with @remote(lb_resource, method=..., path=...)
|
|
105
|
+
3. Run with `flash run` to start local endpoint
|
|
106
|
+
4. Call functions directly in tests or scripts
|
|
107
|
+
5. Deploy to production with `flash build` and `flash deploy`
|
|
108
|
+
|
|
109
|
+
Note:
|
|
110
|
+
The endpoint_url is configured by the Flash runtime when the
|
|
111
|
+
endpoint is deployed locally. For true local testing without
|
|
112
|
+
deployment, use the functions directly or mock the HTTP layer.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def _live_image(self) -> str:
|
|
117
|
+
return TETRA_LB_IMAGE
|
|
118
|
+
|
|
119
|
+
@model_validator(mode="before")
|
|
120
|
+
@classmethod
|
|
121
|
+
def set_live_lb_template(cls, data: dict):
|
|
122
|
+
"""Set default image for Live Load-Balanced endpoint."""
|
|
123
|
+
data["imageName"] = TETRA_LB_IMAGE
|
|
124
|
+
return data
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class CpuLiveLoadBalancer(LiveServerlessMixin, CpuLoadBalancerSlsResource):
|
|
128
|
+
"""CPU-only live load-balanced endpoint for local development and testing.
|
|
129
|
+
|
|
130
|
+
Similar to LiveLoadBalancer but configured for CPU instances with
|
|
131
|
+
automatic disk sizing and validation.
|
|
132
|
+
|
|
133
|
+
Features:
|
|
134
|
+
- Locks to CPU Tetra LB image (tetra-rp-lb-cpu)
|
|
135
|
+
- CPU instance support with automatic disk sizing
|
|
136
|
+
- Direct HTTP execution (not queue-based)
|
|
137
|
+
- Local development with flash run
|
|
138
|
+
- Same @remote decorator pattern as CpuLoadBalancerSlsResource
|
|
139
|
+
|
|
140
|
+
Usage:
|
|
141
|
+
from tetra_rp import CpuLiveLoadBalancer, remote
|
|
142
|
+
|
|
143
|
+
api = CpuLiveLoadBalancer(name="api-service")
|
|
144
|
+
|
|
145
|
+
@remote(api, method="POST", path="/api/process")
|
|
146
|
+
async def process_data(x: int, y: int):
|
|
147
|
+
return {"result": x + y}
|
|
148
|
+
|
|
149
|
+
# Test locally
|
|
150
|
+
result = await process_data(5, 3)
|
|
151
|
+
|
|
152
|
+
Local Development Flow:
|
|
153
|
+
1. Create CpuLiveLoadBalancer with routing
|
|
154
|
+
2. Decorate functions with @remote(lb_resource, method=..., path=...)
|
|
155
|
+
3. Run with `flash run` to start local endpoint
|
|
156
|
+
4. Call functions directly in tests or scripts
|
|
157
|
+
5. Deploy to production with `flash build` and `flash deploy`
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def _live_image(self) -> str:
|
|
162
|
+
return TETRA_CPU_LB_IMAGE
|
|
163
|
+
|
|
164
|
+
@model_validator(mode="before")
|
|
165
|
+
@classmethod
|
|
166
|
+
def set_live_cpu_lb_template(cls, data: dict):
|
|
167
|
+
"""Set default CPU image for Live Load-Balanced endpoint."""
|
|
168
|
+
data["imageName"] = TETRA_CPU_LB_IMAGE
|
|
169
|
+
return data
|
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LoadBalancerSlsResource - Resource type for RunPod Load-Balanced Serverless endpoints.
|
|
3
|
+
|
|
4
|
+
Load-balanced endpoints expose HTTP servers directly to clients without the queue-based
|
|
5
|
+
processing model of standard serverless endpoints. They're ideal for REST APIs, webhooks,
|
|
6
|
+
and real-time communication patterns.
|
|
7
|
+
|
|
8
|
+
Key differences from standard serverless (QB):
|
|
9
|
+
- Requests route directly to healthy workers via HTTP
|
|
10
|
+
- No automatic retries (client responsible)
|
|
11
|
+
- Lower latency but less fault tolerance
|
|
12
|
+
- Requires HTTP application, not a function handler
|
|
13
|
+
- Health checks via /ping endpoint
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
from typing import List, Optional
|
|
19
|
+
|
|
20
|
+
from pydantic import model_validator
|
|
21
|
+
|
|
22
|
+
from tetra_rp.core.utils.http import get_authenticated_httpx_client
|
|
23
|
+
from .constants import ENDPOINT_DOMAIN
|
|
24
|
+
from .cpu import CpuInstanceType
|
|
25
|
+
from .serverless import ServerlessResource, ServerlessType, ServerlessScalerType
|
|
26
|
+
from .serverless_cpu import CpuEndpointMixin
|
|
27
|
+
|
|
28
|
+
log = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Configuration constants
|
|
31
|
+
DEFAULT_HEALTH_CHECK_RETRIES = 10
|
|
32
|
+
DEFAULT_HEALTH_CHECK_INTERVAL = 5 # seconds between retries
|
|
33
|
+
DEFAULT_PING_REQUEST_TIMEOUT = (
|
|
34
|
+
15.0 # seconds (load-balanced workers need time for cold starts)
|
|
35
|
+
)
|
|
36
|
+
HEALTHY_STATUS_CODES = (200, 204)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class LoadBalancerSlsResource(ServerlessResource):
|
|
40
|
+
"""
|
|
41
|
+
Resource configuration for RunPod Load-Balanced Serverless endpoints.
|
|
42
|
+
|
|
43
|
+
Load-balanced endpoints expose HTTP servers directly, making them suitable for:
|
|
44
|
+
- REST APIs
|
|
45
|
+
- WebSocket servers
|
|
46
|
+
- Real-time streaming
|
|
47
|
+
- Custom HTTP protocols
|
|
48
|
+
|
|
49
|
+
Configuration example:
|
|
50
|
+
mothership = LoadBalancerSlsResource(
|
|
51
|
+
name="mothership",
|
|
52
|
+
imageName="my-mothership:latest",
|
|
53
|
+
env={"FLASH_APP": "my_app"},
|
|
54
|
+
workersMin=1,
|
|
55
|
+
workersMax=3,
|
|
56
|
+
)
|
|
57
|
+
await mothership.deploy()
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Override default type to LB
|
|
61
|
+
type: Optional[ServerlessType] = ServerlessType.LB
|
|
62
|
+
|
|
63
|
+
def __init__(self, **data):
|
|
64
|
+
"""Initialize LoadBalancerSlsResource with LB-specific defaults."""
|
|
65
|
+
# Ensure type is always LB
|
|
66
|
+
data["type"] = ServerlessType.LB
|
|
67
|
+
|
|
68
|
+
# LB endpoints shouldn't use queue-based scaling
|
|
69
|
+
if "scalerType" not in data:
|
|
70
|
+
data["scalerType"] = ServerlessScalerType.REQUEST_COUNT
|
|
71
|
+
|
|
72
|
+
super().__init__(**data)
|
|
73
|
+
|
|
74
|
+
@model_validator(mode="after")
|
|
75
|
+
def set_serverless_template(self):
|
|
76
|
+
"""Create template from imageName if not provided.
|
|
77
|
+
|
|
78
|
+
Must run after sync_input_fields to ensure all input fields are synced.
|
|
79
|
+
"""
|
|
80
|
+
if not any([self.imageName, self.template, self.templateId]):
|
|
81
|
+
raise ValueError(
|
|
82
|
+
"Either imageName, template, or templateId must be provided"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
if not self.templateId and not self.template:
|
|
86
|
+
self.template = self._create_new_template()
|
|
87
|
+
elif self.template:
|
|
88
|
+
self._configure_existing_template()
|
|
89
|
+
|
|
90
|
+
return self
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def endpoint_url(self) -> str:
|
|
94
|
+
"""Get the endpoint URL for load-balanced endpoints.
|
|
95
|
+
|
|
96
|
+
Load-balanced endpoints use a different URL format than standard
|
|
97
|
+
serverless endpoints. They use: https://{endpoint_id}.{ENDPOINT_DOMAIN}
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
The endpoint URL for health checks and direct HTTP requests
|
|
101
|
+
|
|
102
|
+
Raises:
|
|
103
|
+
ValueError: If endpoint ID not set
|
|
104
|
+
"""
|
|
105
|
+
if not self.id:
|
|
106
|
+
raise ValueError("Endpoint ID not set. Cannot determine endpoint URL.")
|
|
107
|
+
return f"https://{self.id}.{ENDPOINT_DOMAIN}"
|
|
108
|
+
|
|
109
|
+
def _validate_lb_configuration(self) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Validate LB-specific configuration constraints.
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
ValueError: If configuration violates LB requirements
|
|
115
|
+
"""
|
|
116
|
+
# LB must use REQUEST_COUNT scaler, not QUEUE_DELAY
|
|
117
|
+
if self.scalerType == ServerlessScalerType.QUEUE_DELAY:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"LoadBalancerSlsResource requires REQUEST_COUNT scaler, "
|
|
120
|
+
f"not {self.scalerType.value}. "
|
|
121
|
+
"Load-balanced endpoints don't support queue-based scaling."
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Type must always be LB
|
|
125
|
+
if self.type != ServerlessType.LB:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
f"LoadBalancerSlsResource type must be LB, got {self.type.value}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
async def is_deployed_async(self) -> bool:
|
|
131
|
+
"""
|
|
132
|
+
Check if LB endpoint is deployed and /ping endpoint is responding.
|
|
133
|
+
|
|
134
|
+
For LB endpoints, we verify:
|
|
135
|
+
1. Endpoint ID exists (created in RunPod)
|
|
136
|
+
2. /ping endpoint returns 200 or 204
|
|
137
|
+
3. Endpoint is in healthy state
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
True if endpoint is deployed and healthy, False otherwise
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
if not self.id:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
# Use async health check for LB endpoints
|
|
147
|
+
return await self._check_ping_endpoint()
|
|
148
|
+
|
|
149
|
+
except Exception as e:
|
|
150
|
+
log.debug(f"Error checking {self}: {e}")
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
async def _check_ping_endpoint(self) -> bool:
|
|
154
|
+
"""
|
|
155
|
+
Check if /ping endpoint is accessible and healthy.
|
|
156
|
+
|
|
157
|
+
RunPod load-balancer endpoints require a /ping endpoint that returns:
|
|
158
|
+
- 200 OK: Worker is healthy and ready
|
|
159
|
+
- 204 No Content: Worker is initializing
|
|
160
|
+
- Other status: Worker is unhealthy
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
True if /ping endpoint responds with 200 or 204
|
|
164
|
+
"""
|
|
165
|
+
try:
|
|
166
|
+
if not self.id:
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
ping_url = f"{self.endpoint_url}/ping"
|
|
170
|
+
|
|
171
|
+
async with get_authenticated_httpx_client(
|
|
172
|
+
timeout=DEFAULT_PING_REQUEST_TIMEOUT
|
|
173
|
+
) as client:
|
|
174
|
+
response = await client.get(ping_url)
|
|
175
|
+
return response.status_code in HEALTHY_STATUS_CODES
|
|
176
|
+
except Exception as e:
|
|
177
|
+
log.debug(f"Ping check failed for {self.name}: {e}")
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
async def _wait_for_health(
|
|
181
|
+
self,
|
|
182
|
+
max_retries: int = DEFAULT_HEALTH_CHECK_RETRIES,
|
|
183
|
+
retry_interval: int = DEFAULT_HEALTH_CHECK_INTERVAL,
|
|
184
|
+
) -> bool:
|
|
185
|
+
"""
|
|
186
|
+
Poll /ping endpoint until endpoint is healthy or timeout.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
max_retries: Number of health check attempts
|
|
190
|
+
retry_interval: Seconds between health check attempts
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
True if endpoint became healthy, False if timeout
|
|
194
|
+
|
|
195
|
+
Raises:
|
|
196
|
+
ValueError: If endpoint ID not set
|
|
197
|
+
"""
|
|
198
|
+
if not self.id:
|
|
199
|
+
raise ValueError("Cannot wait for health: endpoint not deployed")
|
|
200
|
+
|
|
201
|
+
log.info(
|
|
202
|
+
f"Waiting for LB endpoint {self.name} ({self.id}) to become healthy... "
|
|
203
|
+
f"(max {max_retries} retries, {retry_interval}s interval)"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
for attempt in range(max_retries):
|
|
207
|
+
try:
|
|
208
|
+
if await self._check_ping_endpoint():
|
|
209
|
+
log.info(
|
|
210
|
+
f"LB endpoint {self.name} is healthy (attempt {attempt + 1})"
|
|
211
|
+
)
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
log.debug(
|
|
215
|
+
f"Health check attempt {attempt + 1}/{max_retries} - "
|
|
216
|
+
f"endpoint not ready yet"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
except Exception as e:
|
|
220
|
+
log.debug(f"Health check attempt {attempt + 1} failed: {e}")
|
|
221
|
+
|
|
222
|
+
# Wait before next attempt (except on last attempt)
|
|
223
|
+
if attempt < max_retries - 1:
|
|
224
|
+
await asyncio.sleep(retry_interval)
|
|
225
|
+
|
|
226
|
+
log.error(
|
|
227
|
+
f"LB endpoint {self.name} failed to become healthy after "
|
|
228
|
+
f"{max_retries} attempts"
|
|
229
|
+
)
|
|
230
|
+
return False
|
|
231
|
+
|
|
232
|
+
async def _do_deploy(self) -> "LoadBalancerSlsResource":
|
|
233
|
+
"""
|
|
234
|
+
Deploy LB endpoint and wait for health.
|
|
235
|
+
|
|
236
|
+
Deployment flow:
|
|
237
|
+
1. Validate LB configuration
|
|
238
|
+
2. Call parent deploy (creates endpoint in RunPod)
|
|
239
|
+
3. Poll /ping endpoint until healthy or timeout
|
|
240
|
+
4. Return deployed resource
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Deployed LoadBalancerSlsResource instance
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
ValueError: If LB configuration invalid or deployment fails
|
|
247
|
+
TimeoutError: If /ping endpoint doesn't respond in time
|
|
248
|
+
"""
|
|
249
|
+
# Validate before deploying
|
|
250
|
+
self._validate_lb_configuration()
|
|
251
|
+
|
|
252
|
+
# Check if already deployed
|
|
253
|
+
if self.is_deployed():
|
|
254
|
+
log.debug(f"{self} already deployed")
|
|
255
|
+
return self
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
# Mark this endpoint as a mothership (triggers auto-provisioning on boot)
|
|
259
|
+
if self.env is None:
|
|
260
|
+
self.env = {}
|
|
261
|
+
self.env["FLASH_IS_MOTHERSHIP"] = "true"
|
|
262
|
+
|
|
263
|
+
# Call parent deploy (creates endpoint via RunPod API)
|
|
264
|
+
log.info(f"Deploying LB endpoint {self.name}...")
|
|
265
|
+
deployed = await super()._do_deploy()
|
|
266
|
+
|
|
267
|
+
# Wait for /ping endpoint to become available
|
|
268
|
+
timeout_seconds = (
|
|
269
|
+
DEFAULT_HEALTH_CHECK_RETRIES * DEFAULT_HEALTH_CHECK_INTERVAL
|
|
270
|
+
)
|
|
271
|
+
log.info(
|
|
272
|
+
f"Endpoint created, waiting for /ping to respond "
|
|
273
|
+
f"({timeout_seconds}s timeout)..."
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
healthy = await self._wait_for_health(
|
|
277
|
+
max_retries=DEFAULT_HEALTH_CHECK_RETRIES,
|
|
278
|
+
retry_interval=DEFAULT_HEALTH_CHECK_INTERVAL,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if not healthy:
|
|
282
|
+
raise TimeoutError(
|
|
283
|
+
f"LB endpoint {self.name} ({deployed.id}) failed to become "
|
|
284
|
+
f"healthy within {timeout_seconds}s"
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
log.info(f"LB endpoint {self.name} ({deployed.id}) deployed and healthy")
|
|
288
|
+
return deployed
|
|
289
|
+
|
|
290
|
+
except Exception as e:
|
|
291
|
+
log.error(f"Failed to deploy LB endpoint {self.name}: {e}")
|
|
292
|
+
raise
|
|
293
|
+
|
|
294
|
+
def is_deployed(self) -> bool:
|
|
295
|
+
"""
|
|
296
|
+
Override is_deployed to use async health check.
|
|
297
|
+
|
|
298
|
+
Note: This is a synchronous wrapper around the async health check.
|
|
299
|
+
Prefer is_deployed_async() in async contexts.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
True if endpoint is deployed and /ping responds
|
|
303
|
+
"""
|
|
304
|
+
if not self.id:
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
# Try the RunPod SDK health check (works for basic connectivity)
|
|
309
|
+
response = self.endpoint.health()
|
|
310
|
+
return response is not None
|
|
311
|
+
except Exception as e:
|
|
312
|
+
log.debug(f"RunPod health check failed for {self.name}: {e}")
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class CpuLoadBalancerSlsResource(CpuEndpointMixin, LoadBalancerSlsResource):
|
|
317
|
+
"""CPU-only load-balanced endpoint with automatic disk sizing.
|
|
318
|
+
|
|
319
|
+
Similar to LoadBalancerSlsResource but configured for CPU instances
|
|
320
|
+
instead of GPUs. Inherits CPU-specific functionality from CpuEndpointMixin
|
|
321
|
+
for automatic disk sizing and validation.
|
|
322
|
+
|
|
323
|
+
Defaults to CPU_ANY instance type if not specified.
|
|
324
|
+
|
|
325
|
+
Implementation Note - Field List Coupling:
|
|
326
|
+
This class overrides config_hash() with a CPU-specific field list instead of
|
|
327
|
+
inheriting the base ServerlessResource implementation. This is intentional to
|
|
328
|
+
exclude GPU fields while maintaining drift detection for CPU-specific fields.
|
|
329
|
+
|
|
330
|
+
When adding new fields to ServerlessResource:
|
|
331
|
+
1. Evaluate if the field applies to CPU endpoints
|
|
332
|
+
2. If yes, add it to the cpu_fields set in config_hash()
|
|
333
|
+
3. If it's API-assigned, verify it's in ServerlessResource.RUNTIME_FIELDS
|
|
334
|
+
4. Test drift detection with new field changes
|
|
335
|
+
|
|
336
|
+
Configuration example:
|
|
337
|
+
mothership = CpuLoadBalancerSlsResource(
|
|
338
|
+
name="mothership",
|
|
339
|
+
imageName="my-mothership:latest",
|
|
340
|
+
env={"FLASH_APP": "my_app"},
|
|
341
|
+
instanceIds=[CpuInstanceType.CPU3G_1_4],
|
|
342
|
+
workersMin=1,
|
|
343
|
+
workersMax=3,
|
|
344
|
+
)
|
|
345
|
+
await mothership.deploy()
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
|
|
349
|
+
|
|
350
|
+
# CPU endpoints exclude GPU-specific fields from API payload
|
|
351
|
+
# This prevents the RunPod GraphQL API from rejecting CPU endpoints with GPU-specific fields
|
|
352
|
+
_input_only = {
|
|
353
|
+
"id",
|
|
354
|
+
"cudaVersions",
|
|
355
|
+
"datacenter",
|
|
356
|
+
"env",
|
|
357
|
+
"flashboot",
|
|
358
|
+
"flashEnvironmentId",
|
|
359
|
+
"gpus",
|
|
360
|
+
"gpuIds",
|
|
361
|
+
"gpuCount",
|
|
362
|
+
"allowedCudaVersions",
|
|
363
|
+
"imageName",
|
|
364
|
+
"networkVolume",
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
def _setup_cpu_template(self) -> None:
|
|
368
|
+
"""Setup template with CPU-appropriate disk sizing."""
|
|
369
|
+
if not any([self.imageName, self.template, self.templateId]):
|
|
370
|
+
raise ValueError(
|
|
371
|
+
"Either imageName, template, or templateId must be provided"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
if not self.templateId and not self.template:
|
|
375
|
+
self.template = self._create_new_template()
|
|
376
|
+
elif self.template:
|
|
377
|
+
self._configure_existing_template()
|
|
378
|
+
|
|
379
|
+
# Apply CPU-specific disk sizing
|
|
380
|
+
if self.template:
|
|
381
|
+
self._apply_cpu_disk_sizing(self.template)
|
|
382
|
+
|
|
383
|
+
@model_validator(mode="after")
|
|
384
|
+
def set_serverless_template(self):
|
|
385
|
+
"""Create template from imageName if not provided.
|
|
386
|
+
|
|
387
|
+
Overrides parent to call _sync_cpu_fields first to ensure GPU defaults
|
|
388
|
+
are overridden for CPU endpoints.
|
|
389
|
+
"""
|
|
390
|
+
# Sync CPU-specific fields first (override GPU defaults)
|
|
391
|
+
self._sync_cpu_fields()
|
|
392
|
+
|
|
393
|
+
# Setup template with validation and creation
|
|
394
|
+
self._setup_cpu_template()
|
|
395
|
+
|
|
396
|
+
return self
|
|
397
|
+
|
|
398
|
+
@property
|
|
399
|
+
def config_hash(self) -> str:
|
|
400
|
+
"""Get hash excluding GPU fields and runtime fields.
|
|
401
|
+
|
|
402
|
+
CPU load-balanced endpoints only hash CPU-relevant fields:
|
|
403
|
+
- Instance types (instanceIds)
|
|
404
|
+
- Scaling parameters (workers, scaler)
|
|
405
|
+
- Deployment type (type, locations)
|
|
406
|
+
- Environment variables (env)
|
|
407
|
+
|
|
408
|
+
Excludes:
|
|
409
|
+
- GPU fields (to avoid false drift)
|
|
410
|
+
- Runtime fields (template, templateId, aiKey, etc.)
|
|
411
|
+
"""
|
|
412
|
+
import hashlib
|
|
413
|
+
import json
|
|
414
|
+
|
|
415
|
+
# CPU-relevant fields for drift detection
|
|
416
|
+
cpu_fields = {
|
|
417
|
+
"datacenter",
|
|
418
|
+
"env",
|
|
419
|
+
"flashboot",
|
|
420
|
+
"flashEnvironmentId",
|
|
421
|
+
"imageName",
|
|
422
|
+
"networkVolume",
|
|
423
|
+
"instanceIds", # CPU-specific
|
|
424
|
+
"workersMin", # Scaling
|
|
425
|
+
"workersMax",
|
|
426
|
+
"scalerType",
|
|
427
|
+
"scalerValue",
|
|
428
|
+
"type", # LB vs QB
|
|
429
|
+
"idleTimeout",
|
|
430
|
+
"executionTimeoutMs",
|
|
431
|
+
"locations",
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
config_dict = self.model_dump(
|
|
435
|
+
exclude_none=True, include=cpu_fields, mode="json"
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
config_str = json.dumps(config_dict, sort_keys=True)
|
|
439
|
+
hash_obj = hashlib.md5(f"{self.__class__.__name__}:{config_str}".encode())
|
|
440
|
+
return hash_obj.hexdigest()
|