tetra-rp 0.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tetra-rp might be problematic. Click here for more details.
- tetra_rp/__init__.py +43 -0
- tetra_rp/cli/__init__.py +0 -0
- tetra_rp/cli/commands/__init__.py +1 -0
- tetra_rp/cli/commands/build.py +534 -0
- tetra_rp/cli/commands/deploy.py +370 -0
- tetra_rp/cli/commands/init.py +119 -0
- tetra_rp/cli/commands/resource.py +191 -0
- tetra_rp/cli/commands/run.py +100 -0
- tetra_rp/cli/main.py +85 -0
- tetra_rp/cli/utils/__init__.py +1 -0
- tetra_rp/cli/utils/conda.py +127 -0
- tetra_rp/cli/utils/deployment.py +172 -0
- tetra_rp/cli/utils/ignore.py +139 -0
- tetra_rp/cli/utils/skeleton.py +184 -0
- tetra_rp/cli/utils/skeleton_template/.env.example +3 -0
- tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
- tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
- tetra_rp/cli/utils/skeleton_template/README.md +256 -0
- tetra_rp/cli/utils/skeleton_template/main.py +43 -0
- tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
- tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +20 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +38 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +20 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +62 -0
- tetra_rp/client.py +128 -0
- tetra_rp/config.py +29 -0
- tetra_rp/core/__init__.py +0 -0
- tetra_rp/core/api/__init__.py +6 -0
- tetra_rp/core/api/runpod.py +319 -0
- tetra_rp/core/exceptions.py +50 -0
- tetra_rp/core/resources/__init__.py +37 -0
- tetra_rp/core/resources/base.py +47 -0
- tetra_rp/core/resources/cloud.py +4 -0
- tetra_rp/core/resources/constants.py +4 -0
- tetra_rp/core/resources/cpu.py +146 -0
- tetra_rp/core/resources/environment.py +41 -0
- tetra_rp/core/resources/gpu.py +68 -0
- tetra_rp/core/resources/live_serverless.py +62 -0
- tetra_rp/core/resources/network_volume.py +148 -0
- tetra_rp/core/resources/resource_manager.py +145 -0
- tetra_rp/core/resources/serverless.py +463 -0
- tetra_rp/core/resources/serverless_cpu.py +162 -0
- tetra_rp/core/resources/template.py +94 -0
- tetra_rp/core/resources/utils.py +50 -0
- tetra_rp/core/utils/__init__.py +0 -0
- tetra_rp/core/utils/backoff.py +43 -0
- tetra_rp/core/utils/constants.py +10 -0
- tetra_rp/core/utils/file_lock.py +260 -0
- tetra_rp/core/utils/json.py +33 -0
- tetra_rp/core/utils/lru_cache.py +75 -0
- tetra_rp/core/utils/singleton.py +21 -0
- tetra_rp/core/validation.py +44 -0
- tetra_rp/execute_class.py +319 -0
- tetra_rp/logger.py +34 -0
- tetra_rp/protos/__init__.py +0 -0
- tetra_rp/protos/remote_execution.py +148 -0
- tetra_rp/stubs/__init__.py +5 -0
- tetra_rp/stubs/live_serverless.py +155 -0
- tetra_rp/stubs/registry.py +117 -0
- tetra_rp/stubs/serverless.py +30 -0
- tetra_rp-0.17.1.dist-info/METADATA +976 -0
- tetra_rp-0.17.1.dist-info/RECORD +66 -0
- tetra_rp-0.17.1.dist-info/WHEEL +5 -0
- tetra_rp-0.17.1.dist-info/entry_points.txt +2 -0
- tetra_rp-0.17.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import (
|
|
7
|
+
BaseModel,
|
|
8
|
+
Field,
|
|
9
|
+
field_serializer,
|
|
10
|
+
field_validator,
|
|
11
|
+
model_validator,
|
|
12
|
+
)
|
|
13
|
+
from runpod.endpoint.runner import Job
|
|
14
|
+
|
|
15
|
+
from ..api.runpod import RunpodGraphQLClient
|
|
16
|
+
from ..utils.backoff import get_backoff_delay
|
|
17
|
+
from .base import DeployableResource
|
|
18
|
+
from .cloud import runpod
|
|
19
|
+
from .constants import CONSOLE_URL
|
|
20
|
+
from .environment import EnvironmentVars
|
|
21
|
+
from .gpu import GpuGroup
|
|
22
|
+
from .network_volume import NetworkVolume, DataCenter
|
|
23
|
+
from .template import KeyValuePair, PodTemplate
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Environment variables are loaded from the .env file
|
|
27
|
+
def get_env_vars() -> Dict[str, str]:
|
|
28
|
+
"""
|
|
29
|
+
Returns the environment variables from the .env file.
|
|
30
|
+
{
|
|
31
|
+
"KEY": "VALUE",
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
env_vars = EnvironmentVars()
|
|
35
|
+
return env_vars.get_env()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
log = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ServerlessScalerType(Enum):
|
|
42
|
+
QUEUE_DELAY = "QUEUE_DELAY"
|
|
43
|
+
REQUEST_COUNT = "REQUEST_COUNT"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ServerlessType(Enum):
|
|
47
|
+
"""
|
|
48
|
+
Serverless endpoint execution model.
|
|
49
|
+
|
|
50
|
+
QB (Queue-based): Traditional queue processing with automatic retries.
|
|
51
|
+
Requests are placed in queue and processed sequentially.
|
|
52
|
+
JSON input/output only. Higher latency but built-in error recovery.
|
|
53
|
+
|
|
54
|
+
LB (Load-balancer): Direct HTTP routing to healthy workers.
|
|
55
|
+
Supports custom HTTP endpoints and any data format.
|
|
56
|
+
Lower latency but no automatic retries.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
QB = "QB"
|
|
60
|
+
LB = "LB"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class CudaVersion(Enum):
|
|
64
|
+
V11_8 = "11.8"
|
|
65
|
+
V12_0 = "12.0"
|
|
66
|
+
V12_1 = "12.1"
|
|
67
|
+
V12_2 = "12.2"
|
|
68
|
+
V12_3 = "12.3"
|
|
69
|
+
V12_4 = "12.4"
|
|
70
|
+
V12_5 = "12.5"
|
|
71
|
+
V12_6 = "12.6"
|
|
72
|
+
V12_7 = "12.7"
|
|
73
|
+
V12_8 = "12.8"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ServerlessResource(DeployableResource):
|
|
77
|
+
"""
|
|
78
|
+
Base class for GPU serverless resource
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
_input_only = {
|
|
82
|
+
"id",
|
|
83
|
+
"cudaVersions",
|
|
84
|
+
"datacenter",
|
|
85
|
+
"env",
|
|
86
|
+
"gpus",
|
|
87
|
+
"flashboot",
|
|
88
|
+
"imageName",
|
|
89
|
+
"networkVolume",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# === Input-only Fields ===
|
|
93
|
+
cudaVersions: Optional[List[CudaVersion]] = [] # for allowedCudaVersions
|
|
94
|
+
env: Optional[Dict[str, str]] = Field(default_factory=get_env_vars)
|
|
95
|
+
flashboot: Optional[bool] = True
|
|
96
|
+
gpus: Optional[List[GpuGroup]] = [GpuGroup.ANY] # for gpuIds
|
|
97
|
+
imageName: Optional[str] = "" # for template.imageName
|
|
98
|
+
networkVolume: Optional[NetworkVolume] = None
|
|
99
|
+
datacenter: DataCenter = Field(default=DataCenter.EU_RO_1)
|
|
100
|
+
|
|
101
|
+
# === Input Fields ===
|
|
102
|
+
executionTimeoutMs: Optional[int] = None
|
|
103
|
+
gpuCount: Optional[int] = 1
|
|
104
|
+
idleTimeout: Optional[int] = 5
|
|
105
|
+
locations: Optional[str] = None
|
|
106
|
+
name: str
|
|
107
|
+
networkVolumeId: Optional[str] = None
|
|
108
|
+
scalerType: Optional[ServerlessScalerType] = ServerlessScalerType.QUEUE_DELAY
|
|
109
|
+
scalerValue: Optional[int] = 4
|
|
110
|
+
templateId: Optional[str] = None
|
|
111
|
+
type: Optional[ServerlessType] = None
|
|
112
|
+
workersMax: Optional[int] = 3
|
|
113
|
+
workersMin: Optional[int] = 0
|
|
114
|
+
workersPFBTarget: Optional[int] = None
|
|
115
|
+
|
|
116
|
+
# === Runtime Fields ===
|
|
117
|
+
activeBuildid: Optional[str] = None
|
|
118
|
+
aiKey: Optional[str] = None
|
|
119
|
+
allowedCudaVersions: Optional[str] = None
|
|
120
|
+
computeType: Optional[str] = None
|
|
121
|
+
createdAt: Optional[str] = None # TODO: use datetime
|
|
122
|
+
gpuIds: Optional[str] = ""
|
|
123
|
+
hubRelease: Optional[str] = None
|
|
124
|
+
repo: Optional[str] = None
|
|
125
|
+
template: Optional[PodTemplate] = None
|
|
126
|
+
userId: Optional[str] = None
|
|
127
|
+
|
|
128
|
+
def __str__(self) -> str:
|
|
129
|
+
return f"{self.__class__.__name__}:{self.id}"
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def url(self) -> str:
|
|
133
|
+
if not self.id:
|
|
134
|
+
raise ValueError("Missing self.id")
|
|
135
|
+
return CONSOLE_URL % self.id
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def endpoint(self) -> runpod.Endpoint:
|
|
139
|
+
"""
|
|
140
|
+
Returns the Runpod endpoint object for this serverless resource.
|
|
141
|
+
"""
|
|
142
|
+
if not self.id:
|
|
143
|
+
raise ValueError("Missing self.id")
|
|
144
|
+
return runpod.Endpoint(self.id)
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def endpoint_url(self) -> str:
|
|
148
|
+
base_url = self.endpoint.rp_client.endpoint_url_base
|
|
149
|
+
return f"{base_url}/{self.id}"
|
|
150
|
+
|
|
151
|
+
@field_serializer("scalerType")
|
|
152
|
+
def serialize_scaler_type(
|
|
153
|
+
self, value: Optional[ServerlessScalerType]
|
|
154
|
+
) -> Optional[str]:
|
|
155
|
+
"""Convert ServerlessScalerType enum to string."""
|
|
156
|
+
return value.value if value is not None else None
|
|
157
|
+
|
|
158
|
+
@field_serializer("type")
|
|
159
|
+
def serialize_type(self, value: Optional[ServerlessType]) -> Optional[str]:
|
|
160
|
+
"""Convert ServerlessType enum to string."""
|
|
161
|
+
return value.value if value is not None else None
|
|
162
|
+
|
|
163
|
+
@field_validator("gpus")
|
|
164
|
+
@classmethod
|
|
165
|
+
def validate_gpus(cls, value: List[GpuGroup]) -> List[GpuGroup]:
|
|
166
|
+
"""Expand ANY to all GPU groups"""
|
|
167
|
+
if value == [GpuGroup.ANY]:
|
|
168
|
+
return GpuGroup.all()
|
|
169
|
+
return value
|
|
170
|
+
|
|
171
|
+
@model_validator(mode="after")
|
|
172
|
+
def sync_input_fields(self):
|
|
173
|
+
"""Sync between temporary inputs and exported fields"""
|
|
174
|
+
if self.flashboot:
|
|
175
|
+
self.name += "-fb"
|
|
176
|
+
|
|
177
|
+
# Sync datacenter to locations field for API
|
|
178
|
+
if not self.locations:
|
|
179
|
+
self.locations = self.datacenter.value
|
|
180
|
+
|
|
181
|
+
# Validate datacenter consistency between endpoint and network volume
|
|
182
|
+
if self.networkVolume and self.networkVolume.dataCenterId != self.datacenter:
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"Network volume datacenter ({self.networkVolume.dataCenterId.value}) "
|
|
185
|
+
f"must match endpoint datacenter ({self.datacenter.value})"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if self.networkVolume and self.networkVolume.is_created:
|
|
189
|
+
# Volume already exists, use its ID
|
|
190
|
+
self.networkVolumeId = self.networkVolume.id
|
|
191
|
+
|
|
192
|
+
self._sync_input_fields_gpu()
|
|
193
|
+
|
|
194
|
+
return self
|
|
195
|
+
|
|
196
|
+
def _sync_input_fields_gpu(self):
|
|
197
|
+
# GPU-specific fields
|
|
198
|
+
if self.gpus:
|
|
199
|
+
# Convert gpus list to gpuIds string
|
|
200
|
+
self.gpuIds = ",".join(gpu.value for gpu in self.gpus)
|
|
201
|
+
elif self.gpuIds:
|
|
202
|
+
# Convert gpuIds string to gpus list (from backend responses)
|
|
203
|
+
gpu_values = [v.strip() for v in self.gpuIds.split(",") if v.strip()]
|
|
204
|
+
self.gpus = [GpuGroup(value) for value in gpu_values]
|
|
205
|
+
|
|
206
|
+
if self.cudaVersions:
|
|
207
|
+
# Convert cudaVersions list to allowedCudaVersions string
|
|
208
|
+
self.allowedCudaVersions = ",".join(v.value for v in self.cudaVersions)
|
|
209
|
+
elif self.allowedCudaVersions:
|
|
210
|
+
# Convert allowedCudaVersions string to cudaVersions list (from backend responses)
|
|
211
|
+
version_values = [
|
|
212
|
+
v.strip() for v in self.allowedCudaVersions.split(",") if v.strip()
|
|
213
|
+
]
|
|
214
|
+
self.cudaVersions = [CudaVersion(value) for value in version_values]
|
|
215
|
+
|
|
216
|
+
return self
|
|
217
|
+
|
|
218
|
+
async def _ensure_network_volume_deployed(self) -> None:
|
|
219
|
+
"""
|
|
220
|
+
Ensures network volume is deployed and ready if one is specified.
|
|
221
|
+
Updates networkVolumeId with the deployed volume ID.
|
|
222
|
+
"""
|
|
223
|
+
if self.networkVolumeId:
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
if self.networkVolume:
|
|
227
|
+
deployedNetworkVolume = await self.networkVolume.deploy()
|
|
228
|
+
self.networkVolumeId = deployedNetworkVolume.id
|
|
229
|
+
|
|
230
|
+
def is_deployed(self) -> bool:
|
|
231
|
+
"""
|
|
232
|
+
Checks if the serverless resource is deployed and available.
|
|
233
|
+
"""
|
|
234
|
+
try:
|
|
235
|
+
if not self.id:
|
|
236
|
+
return False
|
|
237
|
+
|
|
238
|
+
response = self.endpoint.health()
|
|
239
|
+
return response is not None
|
|
240
|
+
except Exception as e:
|
|
241
|
+
log.error(f"Error checking {self}: {e}")
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
async def deploy(self) -> "DeployableResource":
|
|
245
|
+
"""
|
|
246
|
+
Deploys the serverless resource using the provided configuration.
|
|
247
|
+
Returns a DeployableResource object.
|
|
248
|
+
"""
|
|
249
|
+
try:
|
|
250
|
+
# If the resource is already deployed, return it
|
|
251
|
+
if self.is_deployed():
|
|
252
|
+
log.debug(f"{self} exists")
|
|
253
|
+
return self
|
|
254
|
+
|
|
255
|
+
# NEW: Ensure network volume is deployed first
|
|
256
|
+
await self._ensure_network_volume_deployed()
|
|
257
|
+
|
|
258
|
+
async with RunpodGraphQLClient() as client:
|
|
259
|
+
payload = self.model_dump(exclude=self._input_only, exclude_none=True)
|
|
260
|
+
result = await client.create_endpoint(payload)
|
|
261
|
+
|
|
262
|
+
if endpoint := self.__class__(**result):
|
|
263
|
+
return endpoint
|
|
264
|
+
|
|
265
|
+
raise ValueError("Deployment failed, no endpoint was returned.")
|
|
266
|
+
|
|
267
|
+
except Exception as e:
|
|
268
|
+
log.error(f"{self} failed to deploy: {e}")
|
|
269
|
+
raise
|
|
270
|
+
|
|
271
|
+
async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
|
|
272
|
+
"""
|
|
273
|
+
Executes a serverless endpoint request with the payload.
|
|
274
|
+
Returns a JobOutput object.
|
|
275
|
+
"""
|
|
276
|
+
if not self.id:
|
|
277
|
+
raise ValueError("Serverless is not deployed")
|
|
278
|
+
|
|
279
|
+
def _fetch_job():
|
|
280
|
+
return self.endpoint.rp_client.post(
|
|
281
|
+
f"{self.id}/runsync", payload, timeout=60
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
# log.debug(f"[{self}] Payload: {payload}")
|
|
286
|
+
|
|
287
|
+
log.info(f"{self} | API /run_sync")
|
|
288
|
+
response = await asyncio.to_thread(_fetch_job)
|
|
289
|
+
return JobOutput(**response)
|
|
290
|
+
|
|
291
|
+
except Exception as e:
|
|
292
|
+
health = await asyncio.to_thread(self.endpoint.health)
|
|
293
|
+
health = ServerlessHealth(**health)
|
|
294
|
+
log.info(f"{self} | Health {health.workers.status}")
|
|
295
|
+
log.error(f"{self} | Exception: {e}")
|
|
296
|
+
raise
|
|
297
|
+
|
|
298
|
+
async def run(self, payload: Dict[str, Any]) -> "JobOutput":
|
|
299
|
+
"""
|
|
300
|
+
Executes a serverless endpoint async request with the payload.
|
|
301
|
+
Returns a JobOutput object.
|
|
302
|
+
"""
|
|
303
|
+
if not self.id:
|
|
304
|
+
raise ValueError("Serverless is not deployed")
|
|
305
|
+
|
|
306
|
+
job: Optional[Job] = None
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
# log.debug(f"[{self}] Payload: {payload}")
|
|
310
|
+
|
|
311
|
+
# Create a job using the endpoint
|
|
312
|
+
log.info(f"{self} | API /run")
|
|
313
|
+
job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
|
|
314
|
+
|
|
315
|
+
log_subgroup = f"Job:{job.job_id}"
|
|
316
|
+
|
|
317
|
+
log.info(f"{self} | Started {log_subgroup}")
|
|
318
|
+
|
|
319
|
+
current_pace = 0
|
|
320
|
+
attempt = 0
|
|
321
|
+
job_status = Status.UNKNOWN
|
|
322
|
+
last_status = job_status
|
|
323
|
+
|
|
324
|
+
# Poll for job status
|
|
325
|
+
while True:
|
|
326
|
+
await asyncio.sleep(current_pace)
|
|
327
|
+
|
|
328
|
+
# Check job status
|
|
329
|
+
job_status = await asyncio.to_thread(job.status)
|
|
330
|
+
|
|
331
|
+
if last_status == job_status:
|
|
332
|
+
# nothing changed, increase the gap
|
|
333
|
+
attempt += 1
|
|
334
|
+
indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
|
|
335
|
+
if indicator:
|
|
336
|
+
log.info(f"{log_subgroup} | {indicator}")
|
|
337
|
+
else:
|
|
338
|
+
# status changed, reset the gap
|
|
339
|
+
log.info(f"{log_subgroup} | Status: {job_status}")
|
|
340
|
+
attempt = 0
|
|
341
|
+
|
|
342
|
+
last_status = job_status
|
|
343
|
+
|
|
344
|
+
# Adjust polling pace appropriately
|
|
345
|
+
current_pace = get_backoff_delay(attempt)
|
|
346
|
+
|
|
347
|
+
if job_status in ("COMPLETED", "FAILED", "CANCELLED"):
|
|
348
|
+
response = await asyncio.to_thread(job._fetch_job)
|
|
349
|
+
return JobOutput(**response)
|
|
350
|
+
|
|
351
|
+
except Exception as e:
|
|
352
|
+
if job and job.job_id:
|
|
353
|
+
log.info(f"{self} | Cancelling job {job.job_id}")
|
|
354
|
+
await asyncio.to_thread(job.cancel)
|
|
355
|
+
|
|
356
|
+
log.error(f"{self} | Exception: {e}")
|
|
357
|
+
raise
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class ServerlessEndpoint(ServerlessResource):
|
|
361
|
+
"""
|
|
362
|
+
Represents a serverless endpoint distinct from a live serverless.
|
|
363
|
+
Inherits from ServerlessResource.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
def _create_new_template(self) -> PodTemplate:
|
|
367
|
+
"""Create a new PodTemplate with standard configuration."""
|
|
368
|
+
return PodTemplate(
|
|
369
|
+
name=self.resource_id,
|
|
370
|
+
imageName=self.imageName,
|
|
371
|
+
env=KeyValuePair.from_dict(self.env or get_env_vars()),
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
def _configure_existing_template(self) -> None:
|
|
375
|
+
"""Configure an existing template with necessary overrides."""
|
|
376
|
+
if self.template is None:
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
self.template.name = f"{self.resource_id}__{self.template.resource_id}"
|
|
380
|
+
|
|
381
|
+
if self.imageName:
|
|
382
|
+
self.template.imageName = self.imageName
|
|
383
|
+
if self.env:
|
|
384
|
+
self.template.env = KeyValuePair.from_dict(self.env)
|
|
385
|
+
|
|
386
|
+
@model_validator(mode="after")
|
|
387
|
+
def set_serverless_template(self):
|
|
388
|
+
if not any([self.imageName, self.template, self.templateId]):
|
|
389
|
+
raise ValueError(
|
|
390
|
+
"Either imageName, template, or templateId must be provided"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
if not self.templateId and not self.template:
|
|
394
|
+
self.template = self._create_new_template()
|
|
395
|
+
elif self.template:
|
|
396
|
+
self._configure_existing_template()
|
|
397
|
+
|
|
398
|
+
return self
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
class JobOutput(BaseModel):
|
|
402
|
+
id: str
|
|
403
|
+
workerId: str
|
|
404
|
+
status: str
|
|
405
|
+
delayTime: int
|
|
406
|
+
executionTime: int
|
|
407
|
+
output: Optional[Any] = None
|
|
408
|
+
error: Optional[str] = ""
|
|
409
|
+
|
|
410
|
+
def model_post_init(self, _: Any) -> None:
|
|
411
|
+
log_group = f"Worker:{self.workerId}"
|
|
412
|
+
log.info(f"{log_group} | Delay Time: {self.delayTime} ms")
|
|
413
|
+
log.info(f"{log_group} | Execution Time: {self.executionTime} ms")
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class Status(str, Enum):
|
|
417
|
+
READY = "READY"
|
|
418
|
+
INITIALIZING = "INITIALIZING"
|
|
419
|
+
THROTTLED = "THROTTLED"
|
|
420
|
+
UNHEALTHY = "UNHEALTHY"
|
|
421
|
+
UNKNOWN = "UNKNOWN"
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
class WorkersHealth(BaseModel):
|
|
425
|
+
idle: int
|
|
426
|
+
initializing: int
|
|
427
|
+
ready: int
|
|
428
|
+
running: int
|
|
429
|
+
throttled: int
|
|
430
|
+
unhealthy: int
|
|
431
|
+
|
|
432
|
+
@property
|
|
433
|
+
def status(self) -> Status:
|
|
434
|
+
if self.ready or self.idle or self.running:
|
|
435
|
+
return Status.READY
|
|
436
|
+
|
|
437
|
+
if self.initializing:
|
|
438
|
+
return Status.INITIALIZING
|
|
439
|
+
|
|
440
|
+
if self.throttled:
|
|
441
|
+
return Status.THROTTLED
|
|
442
|
+
|
|
443
|
+
if self.unhealthy:
|
|
444
|
+
return Status.UNHEALTHY
|
|
445
|
+
|
|
446
|
+
return Status.UNKNOWN
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
class JobsHealth(BaseModel):
|
|
450
|
+
completed: int
|
|
451
|
+
failed: int
|
|
452
|
+
inProgress: int
|
|
453
|
+
inQueue: int
|
|
454
|
+
retried: int
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
class ServerlessHealth(BaseModel):
|
|
458
|
+
workers: WorkersHealth
|
|
459
|
+
jobs: JobsHealth
|
|
460
|
+
|
|
461
|
+
@property
|
|
462
|
+
def is_ready(self) -> bool:
|
|
463
|
+
return self.workers.status == Status.READY
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CPU-specific serverless endpoint classes.
|
|
3
|
+
|
|
4
|
+
This module contains all CPU-related serverless functionality, separate from GPU serverless.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import field_serializer, model_validator, field_validator
|
|
10
|
+
|
|
11
|
+
from .cpu import (
|
|
12
|
+
CpuInstanceType,
|
|
13
|
+
CPU_INSTANCE_DISK_LIMITS,
|
|
14
|
+
get_max_disk_size_for_instances,
|
|
15
|
+
)
|
|
16
|
+
from .serverless import ServerlessEndpoint, get_env_vars
|
|
17
|
+
from .template import KeyValuePair, PodTemplate
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CpuEndpointMixin:
|
|
21
|
+
"""Mixin class that provides CPU-specific functionality for serverless endpoints."""
|
|
22
|
+
|
|
23
|
+
instanceIds: Optional[List[CpuInstanceType]]
|
|
24
|
+
|
|
25
|
+
def _is_cpu_endpoint(self) -> bool:
|
|
26
|
+
"""Check if this is a CPU endpoint (has instanceIds)."""
|
|
27
|
+
return (
|
|
28
|
+
hasattr(self, "instanceIds")
|
|
29
|
+
and self.instanceIds is not None
|
|
30
|
+
and len(self.instanceIds) > 0
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def _get_cpu_container_disk_size(self) -> Optional[int]:
|
|
34
|
+
"""Get the appropriate container disk size for CPU instances."""
|
|
35
|
+
if not self._is_cpu_endpoint():
|
|
36
|
+
return None
|
|
37
|
+
return get_max_disk_size_for_instances(self.instanceIds)
|
|
38
|
+
|
|
39
|
+
def _apply_cpu_disk_sizing(self, template: PodTemplate) -> None:
|
|
40
|
+
"""Apply CPU disk sizing to a template if it's using the default size."""
|
|
41
|
+
if not self._is_cpu_endpoint():
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
# Only auto-size if template is using the default value
|
|
45
|
+
default_disk_size = PodTemplate.model_fields["containerDiskInGb"].default
|
|
46
|
+
if template.containerDiskInGb == default_disk_size:
|
|
47
|
+
cpu_disk_size = self._get_cpu_container_disk_size()
|
|
48
|
+
if cpu_disk_size is not None:
|
|
49
|
+
template.containerDiskInGb = cpu_disk_size
|
|
50
|
+
|
|
51
|
+
def validate_cpu_container_disk_size(self) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Validate that container disk size doesn't exceed limits for CPU instances.
|
|
54
|
+
|
|
55
|
+
Raises:
|
|
56
|
+
ValueError: If container disk size exceeds the limit for any CPU instance
|
|
57
|
+
"""
|
|
58
|
+
if (
|
|
59
|
+
not self._is_cpu_endpoint()
|
|
60
|
+
or not hasattr(self, "template")
|
|
61
|
+
or not self.template
|
|
62
|
+
or not self.template.containerDiskInGb
|
|
63
|
+
):
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
max_allowed_disk_size = self._get_cpu_container_disk_size()
|
|
67
|
+
if max_allowed_disk_size is None:
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
if self.template.containerDiskInGb > max_allowed_disk_size:
|
|
71
|
+
instance_limits = []
|
|
72
|
+
for instance_type in self.instanceIds:
|
|
73
|
+
limit = CPU_INSTANCE_DISK_LIMITS[instance_type]
|
|
74
|
+
instance_limits.append(f"{instance_type.value}: max {limit}GB")
|
|
75
|
+
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"Container disk size {self.template.containerDiskInGb}GB exceeds the maximum "
|
|
78
|
+
f"allowed for CPU instances. Instance limits: {', '.join(instance_limits)}. "
|
|
79
|
+
f"Maximum allowed: {max_allowed_disk_size}GB"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def _sync_cpu_fields(self):
|
|
83
|
+
"""Sync CPU-specific fields, overriding GPU defaults."""
|
|
84
|
+
# Override GPU-specific fields for CPU
|
|
85
|
+
if hasattr(self, "gpuCount"):
|
|
86
|
+
self.gpuCount = 0
|
|
87
|
+
if hasattr(self, "allowedCudaVersions"):
|
|
88
|
+
self.allowedCudaVersions = ""
|
|
89
|
+
if hasattr(self, "gpuIds"):
|
|
90
|
+
self.gpuIds = ""
|
|
91
|
+
|
|
92
|
+
@field_serializer("instanceIds")
|
|
93
|
+
def serialize_instance_ids(
|
|
94
|
+
self, value: Optional[List[CpuInstanceType]]
|
|
95
|
+
) -> Optional[List[str]]:
|
|
96
|
+
"""Convert CpuInstanceType enums to strings."""
|
|
97
|
+
if value is None:
|
|
98
|
+
return None
|
|
99
|
+
return [item.value if hasattr(item, "value") else str(item) for item in value]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class CpuServerlessEndpoint(CpuEndpointMixin, ServerlessEndpoint):
|
|
103
|
+
"""
|
|
104
|
+
CPU-only serverless endpoint with automatic disk sizing and validation.
|
|
105
|
+
Represents a CPU-only serverless endpoint distinct from a live serverless.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.ANY]
|
|
109
|
+
|
|
110
|
+
def _create_new_template(self) -> PodTemplate:
|
|
111
|
+
"""Create a new PodTemplate with CPU-appropriate disk sizing."""
|
|
112
|
+
template = PodTemplate(
|
|
113
|
+
name=self.resource_id,
|
|
114
|
+
imageName=self.imageName,
|
|
115
|
+
env=KeyValuePair.from_dict(self.env or get_env_vars()),
|
|
116
|
+
)
|
|
117
|
+
# Apply CPU-specific disk sizing
|
|
118
|
+
self._apply_cpu_disk_sizing(template)
|
|
119
|
+
return template
|
|
120
|
+
|
|
121
|
+
def _configure_existing_template(self) -> None:
|
|
122
|
+
"""Configure an existing template with necessary overrides and CPU sizing."""
|
|
123
|
+
if self.template is None:
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
self.template.name = f"{self.resource_id}__{self.template.resource_id}"
|
|
127
|
+
|
|
128
|
+
if self.imageName:
|
|
129
|
+
self.template.imageName = self.imageName
|
|
130
|
+
if self.env:
|
|
131
|
+
self.template.env = KeyValuePair.from_dict(self.env)
|
|
132
|
+
|
|
133
|
+
# Apply CPU-specific disk sizing
|
|
134
|
+
self._apply_cpu_disk_sizing(self.template)
|
|
135
|
+
|
|
136
|
+
@field_validator("instanceIds")
|
|
137
|
+
@classmethod
|
|
138
|
+
def validate_cpus(cls, value: List[CpuInstanceType]) -> List[CpuInstanceType]:
|
|
139
|
+
"""Expand ANY to all GPU groups"""
|
|
140
|
+
if value == [CpuInstanceType.ANY]:
|
|
141
|
+
return CpuInstanceType.all()
|
|
142
|
+
return value
|
|
143
|
+
|
|
144
|
+
@model_validator(mode="after")
|
|
145
|
+
def set_serverless_template(self):
|
|
146
|
+
# Sync CPU-specific fields first
|
|
147
|
+
self._sync_cpu_fields()
|
|
148
|
+
|
|
149
|
+
if not any([self.imageName, self.template, self.templateId]):
|
|
150
|
+
raise ValueError(
|
|
151
|
+
"Either imageName, template, or templateId must be provided"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if not self.templateId and not self.template:
|
|
155
|
+
self.template = self._create_new_template()
|
|
156
|
+
elif self.template:
|
|
157
|
+
self._configure_existing_template()
|
|
158
|
+
|
|
159
|
+
# Validate container disk size for CPU instances
|
|
160
|
+
self.validate_cpu_container_disk_size()
|
|
161
|
+
|
|
162
|
+
return self
|