tetra-rp 0.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tetra-rp might be problematic. Click here for more details.

Files changed (66) hide show
  1. tetra_rp/__init__.py +43 -0
  2. tetra_rp/cli/__init__.py +0 -0
  3. tetra_rp/cli/commands/__init__.py +1 -0
  4. tetra_rp/cli/commands/build.py +534 -0
  5. tetra_rp/cli/commands/deploy.py +370 -0
  6. tetra_rp/cli/commands/init.py +119 -0
  7. tetra_rp/cli/commands/resource.py +191 -0
  8. tetra_rp/cli/commands/run.py +100 -0
  9. tetra_rp/cli/main.py +85 -0
  10. tetra_rp/cli/utils/__init__.py +1 -0
  11. tetra_rp/cli/utils/conda.py +127 -0
  12. tetra_rp/cli/utils/deployment.py +172 -0
  13. tetra_rp/cli/utils/ignore.py +139 -0
  14. tetra_rp/cli/utils/skeleton.py +184 -0
  15. tetra_rp/cli/utils/skeleton_template/.env.example +3 -0
  16. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  17. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  18. tetra_rp/cli/utils/skeleton_template/README.md +256 -0
  19. tetra_rp/cli/utils/skeleton_template/main.py +43 -0
  20. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  21. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  22. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +20 -0
  23. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +38 -0
  24. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +20 -0
  25. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +62 -0
  26. tetra_rp/client.py +128 -0
  27. tetra_rp/config.py +29 -0
  28. tetra_rp/core/__init__.py +0 -0
  29. tetra_rp/core/api/__init__.py +6 -0
  30. tetra_rp/core/api/runpod.py +319 -0
  31. tetra_rp/core/exceptions.py +50 -0
  32. tetra_rp/core/resources/__init__.py +37 -0
  33. tetra_rp/core/resources/base.py +47 -0
  34. tetra_rp/core/resources/cloud.py +4 -0
  35. tetra_rp/core/resources/constants.py +4 -0
  36. tetra_rp/core/resources/cpu.py +146 -0
  37. tetra_rp/core/resources/environment.py +41 -0
  38. tetra_rp/core/resources/gpu.py +68 -0
  39. tetra_rp/core/resources/live_serverless.py +62 -0
  40. tetra_rp/core/resources/network_volume.py +148 -0
  41. tetra_rp/core/resources/resource_manager.py +145 -0
  42. tetra_rp/core/resources/serverless.py +463 -0
  43. tetra_rp/core/resources/serverless_cpu.py +162 -0
  44. tetra_rp/core/resources/template.py +94 -0
  45. tetra_rp/core/resources/utils.py +50 -0
  46. tetra_rp/core/utils/__init__.py +0 -0
  47. tetra_rp/core/utils/backoff.py +43 -0
  48. tetra_rp/core/utils/constants.py +10 -0
  49. tetra_rp/core/utils/file_lock.py +260 -0
  50. tetra_rp/core/utils/json.py +33 -0
  51. tetra_rp/core/utils/lru_cache.py +75 -0
  52. tetra_rp/core/utils/singleton.py +21 -0
  53. tetra_rp/core/validation.py +44 -0
  54. tetra_rp/execute_class.py +319 -0
  55. tetra_rp/logger.py +34 -0
  56. tetra_rp/protos/__init__.py +0 -0
  57. tetra_rp/protos/remote_execution.py +148 -0
  58. tetra_rp/stubs/__init__.py +5 -0
  59. tetra_rp/stubs/live_serverless.py +155 -0
  60. tetra_rp/stubs/registry.py +117 -0
  61. tetra_rp/stubs/serverless.py +30 -0
  62. tetra_rp-0.17.1.dist-info/METADATA +976 -0
  63. tetra_rp-0.17.1.dist-info/RECORD +66 -0
  64. tetra_rp-0.17.1.dist-info/WHEEL +5 -0
  65. tetra_rp-0.17.1.dist-info/entry_points.txt +2 -0
  66. tetra_rp-0.17.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,463 @@
1
+ import asyncio
2
+ import logging
3
+ from enum import Enum
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ from pydantic import (
7
+ BaseModel,
8
+ Field,
9
+ field_serializer,
10
+ field_validator,
11
+ model_validator,
12
+ )
13
+ from runpod.endpoint.runner import Job
14
+
15
+ from ..api.runpod import RunpodGraphQLClient
16
+ from ..utils.backoff import get_backoff_delay
17
+ from .base import DeployableResource
18
+ from .cloud import runpod
19
+ from .constants import CONSOLE_URL
20
+ from .environment import EnvironmentVars
21
+ from .gpu import GpuGroup
22
+ from .network_volume import NetworkVolume, DataCenter
23
+ from .template import KeyValuePair, PodTemplate
24
+
25
+
26
+ # Environment variables are loaded from the .env file
27
+ def get_env_vars() -> Dict[str, str]:
28
+ """
29
+ Returns the environment variables from the .env file.
30
+ {
31
+ "KEY": "VALUE",
32
+ }
33
+ """
34
+ env_vars = EnvironmentVars()
35
+ return env_vars.get_env()
36
+
37
+
38
+ log = logging.getLogger(__name__)
39
+
40
+
41
+ class ServerlessScalerType(Enum):
42
+ QUEUE_DELAY = "QUEUE_DELAY"
43
+ REQUEST_COUNT = "REQUEST_COUNT"
44
+
45
+
46
+ class ServerlessType(Enum):
47
+ """
48
+ Serverless endpoint execution model.
49
+
50
+ QB (Queue-based): Traditional queue processing with automatic retries.
51
+ Requests are placed in queue and processed sequentially.
52
+ JSON input/output only. Higher latency but built-in error recovery.
53
+
54
+ LB (Load-balancer): Direct HTTP routing to healthy workers.
55
+ Supports custom HTTP endpoints and any data format.
56
+ Lower latency but no automatic retries.
57
+ """
58
+
59
+ QB = "QB"
60
+ LB = "LB"
61
+
62
+
63
+ class CudaVersion(Enum):
64
+ V11_8 = "11.8"
65
+ V12_0 = "12.0"
66
+ V12_1 = "12.1"
67
+ V12_2 = "12.2"
68
+ V12_3 = "12.3"
69
+ V12_4 = "12.4"
70
+ V12_5 = "12.5"
71
+ V12_6 = "12.6"
72
+ V12_7 = "12.7"
73
+ V12_8 = "12.8"
74
+
75
+
76
+ class ServerlessResource(DeployableResource):
77
+ """
78
+ Base class for GPU serverless resource
79
+ """
80
+
81
+ _input_only = {
82
+ "id",
83
+ "cudaVersions",
84
+ "datacenter",
85
+ "env",
86
+ "gpus",
87
+ "flashboot",
88
+ "imageName",
89
+ "networkVolume",
90
+ }
91
+
92
+ # === Input-only Fields ===
93
+ cudaVersions: Optional[List[CudaVersion]] = [] # for allowedCudaVersions
94
+ env: Optional[Dict[str, str]] = Field(default_factory=get_env_vars)
95
+ flashboot: Optional[bool] = True
96
+ gpus: Optional[List[GpuGroup]] = [GpuGroup.ANY] # for gpuIds
97
+ imageName: Optional[str] = "" # for template.imageName
98
+ networkVolume: Optional[NetworkVolume] = None
99
+ datacenter: DataCenter = Field(default=DataCenter.EU_RO_1)
100
+
101
+ # === Input Fields ===
102
+ executionTimeoutMs: Optional[int] = None
103
+ gpuCount: Optional[int] = 1
104
+ idleTimeout: Optional[int] = 5
105
+ locations: Optional[str] = None
106
+ name: str
107
+ networkVolumeId: Optional[str] = None
108
+ scalerType: Optional[ServerlessScalerType] = ServerlessScalerType.QUEUE_DELAY
109
+ scalerValue: Optional[int] = 4
110
+ templateId: Optional[str] = None
111
+ type: Optional[ServerlessType] = None
112
+ workersMax: Optional[int] = 3
113
+ workersMin: Optional[int] = 0
114
+ workersPFBTarget: Optional[int] = None
115
+
116
+ # === Runtime Fields ===
117
+ activeBuildid: Optional[str] = None
118
+ aiKey: Optional[str] = None
119
+ allowedCudaVersions: Optional[str] = None
120
+ computeType: Optional[str] = None
121
+ createdAt: Optional[str] = None # TODO: use datetime
122
+ gpuIds: Optional[str] = ""
123
+ hubRelease: Optional[str] = None
124
+ repo: Optional[str] = None
125
+ template: Optional[PodTemplate] = None
126
+ userId: Optional[str] = None
127
+
128
+ def __str__(self) -> str:
129
+ return f"{self.__class__.__name__}:{self.id}"
130
+
131
+ @property
132
+ def url(self) -> str:
133
+ if not self.id:
134
+ raise ValueError("Missing self.id")
135
+ return CONSOLE_URL % self.id
136
+
137
+ @property
138
+ def endpoint(self) -> runpod.Endpoint:
139
+ """
140
+ Returns the Runpod endpoint object for this serverless resource.
141
+ """
142
+ if not self.id:
143
+ raise ValueError("Missing self.id")
144
+ return runpod.Endpoint(self.id)
145
+
146
+ @property
147
+ def endpoint_url(self) -> str:
148
+ base_url = self.endpoint.rp_client.endpoint_url_base
149
+ return f"{base_url}/{self.id}"
150
+
151
+ @field_serializer("scalerType")
152
+ def serialize_scaler_type(
153
+ self, value: Optional[ServerlessScalerType]
154
+ ) -> Optional[str]:
155
+ """Convert ServerlessScalerType enum to string."""
156
+ return value.value if value is not None else None
157
+
158
+ @field_serializer("type")
159
+ def serialize_type(self, value: Optional[ServerlessType]) -> Optional[str]:
160
+ """Convert ServerlessType enum to string."""
161
+ return value.value if value is not None else None
162
+
163
+ @field_validator("gpus")
164
+ @classmethod
165
+ def validate_gpus(cls, value: List[GpuGroup]) -> List[GpuGroup]:
166
+ """Expand ANY to all GPU groups"""
167
+ if value == [GpuGroup.ANY]:
168
+ return GpuGroup.all()
169
+ return value
170
+
171
+ @model_validator(mode="after")
172
+ def sync_input_fields(self):
173
+ """Sync between temporary inputs and exported fields"""
174
+ if self.flashboot:
175
+ self.name += "-fb"
176
+
177
+ # Sync datacenter to locations field for API
178
+ if not self.locations:
179
+ self.locations = self.datacenter.value
180
+
181
+ # Validate datacenter consistency between endpoint and network volume
182
+ if self.networkVolume and self.networkVolume.dataCenterId != self.datacenter:
183
+ raise ValueError(
184
+ f"Network volume datacenter ({self.networkVolume.dataCenterId.value}) "
185
+ f"must match endpoint datacenter ({self.datacenter.value})"
186
+ )
187
+
188
+ if self.networkVolume and self.networkVolume.is_created:
189
+ # Volume already exists, use its ID
190
+ self.networkVolumeId = self.networkVolume.id
191
+
192
+ self._sync_input_fields_gpu()
193
+
194
+ return self
195
+
196
+ def _sync_input_fields_gpu(self):
197
+ # GPU-specific fields
198
+ if self.gpus:
199
+ # Convert gpus list to gpuIds string
200
+ self.gpuIds = ",".join(gpu.value for gpu in self.gpus)
201
+ elif self.gpuIds:
202
+ # Convert gpuIds string to gpus list (from backend responses)
203
+ gpu_values = [v.strip() for v in self.gpuIds.split(",") if v.strip()]
204
+ self.gpus = [GpuGroup(value) for value in gpu_values]
205
+
206
+ if self.cudaVersions:
207
+ # Convert cudaVersions list to allowedCudaVersions string
208
+ self.allowedCudaVersions = ",".join(v.value for v in self.cudaVersions)
209
+ elif self.allowedCudaVersions:
210
+ # Convert allowedCudaVersions string to cudaVersions list (from backend responses)
211
+ version_values = [
212
+ v.strip() for v in self.allowedCudaVersions.split(",") if v.strip()
213
+ ]
214
+ self.cudaVersions = [CudaVersion(value) for value in version_values]
215
+
216
+ return self
217
+
218
+ async def _ensure_network_volume_deployed(self) -> None:
219
+ """
220
+ Ensures network volume is deployed and ready if one is specified.
221
+ Updates networkVolumeId with the deployed volume ID.
222
+ """
223
+ if self.networkVolumeId:
224
+ return
225
+
226
+ if self.networkVolume:
227
+ deployedNetworkVolume = await self.networkVolume.deploy()
228
+ self.networkVolumeId = deployedNetworkVolume.id
229
+
230
+ def is_deployed(self) -> bool:
231
+ """
232
+ Checks if the serverless resource is deployed and available.
233
+ """
234
+ try:
235
+ if not self.id:
236
+ return False
237
+
238
+ response = self.endpoint.health()
239
+ return response is not None
240
+ except Exception as e:
241
+ log.error(f"Error checking {self}: {e}")
242
+ return False
243
+
244
+ async def deploy(self) -> "DeployableResource":
245
+ """
246
+ Deploys the serverless resource using the provided configuration.
247
+ Returns a DeployableResource object.
248
+ """
249
+ try:
250
+ # If the resource is already deployed, return it
251
+ if self.is_deployed():
252
+ log.debug(f"{self} exists")
253
+ return self
254
+
255
+ # NEW: Ensure network volume is deployed first
256
+ await self._ensure_network_volume_deployed()
257
+
258
+ async with RunpodGraphQLClient() as client:
259
+ payload = self.model_dump(exclude=self._input_only, exclude_none=True)
260
+ result = await client.create_endpoint(payload)
261
+
262
+ if endpoint := self.__class__(**result):
263
+ return endpoint
264
+
265
+ raise ValueError("Deployment failed, no endpoint was returned.")
266
+
267
+ except Exception as e:
268
+ log.error(f"{self} failed to deploy: {e}")
269
+ raise
270
+
271
+ async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
272
+ """
273
+ Executes a serverless endpoint request with the payload.
274
+ Returns a JobOutput object.
275
+ """
276
+ if not self.id:
277
+ raise ValueError("Serverless is not deployed")
278
+
279
+ def _fetch_job():
280
+ return self.endpoint.rp_client.post(
281
+ f"{self.id}/runsync", payload, timeout=60
282
+ )
283
+
284
+ try:
285
+ # log.debug(f"[{self}] Payload: {payload}")
286
+
287
+ log.info(f"{self} | API /run_sync")
288
+ response = await asyncio.to_thread(_fetch_job)
289
+ return JobOutput(**response)
290
+
291
+ except Exception as e:
292
+ health = await asyncio.to_thread(self.endpoint.health)
293
+ health = ServerlessHealth(**health)
294
+ log.info(f"{self} | Health {health.workers.status}")
295
+ log.error(f"{self} | Exception: {e}")
296
+ raise
297
+
298
+ async def run(self, payload: Dict[str, Any]) -> "JobOutput":
299
+ """
300
+ Executes a serverless endpoint async request with the payload.
301
+ Returns a JobOutput object.
302
+ """
303
+ if not self.id:
304
+ raise ValueError("Serverless is not deployed")
305
+
306
+ job: Optional[Job] = None
307
+
308
+ try:
309
+ # log.debug(f"[{self}] Payload: {payload}")
310
+
311
+ # Create a job using the endpoint
312
+ log.info(f"{self} | API /run")
313
+ job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
314
+
315
+ log_subgroup = f"Job:{job.job_id}"
316
+
317
+ log.info(f"{self} | Started {log_subgroup}")
318
+
319
+ current_pace = 0
320
+ attempt = 0
321
+ job_status = Status.UNKNOWN
322
+ last_status = job_status
323
+
324
+ # Poll for job status
325
+ while True:
326
+ await asyncio.sleep(current_pace)
327
+
328
+ # Check job status
329
+ job_status = await asyncio.to_thread(job.status)
330
+
331
+ if last_status == job_status:
332
+ # nothing changed, increase the gap
333
+ attempt += 1
334
+ indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
335
+ if indicator:
336
+ log.info(f"{log_subgroup} | {indicator}")
337
+ else:
338
+ # status changed, reset the gap
339
+ log.info(f"{log_subgroup} | Status: {job_status}")
340
+ attempt = 0
341
+
342
+ last_status = job_status
343
+
344
+ # Adjust polling pace appropriately
345
+ current_pace = get_backoff_delay(attempt)
346
+
347
+ if job_status in ("COMPLETED", "FAILED", "CANCELLED"):
348
+ response = await asyncio.to_thread(job._fetch_job)
349
+ return JobOutput(**response)
350
+
351
+ except Exception as e:
352
+ if job and job.job_id:
353
+ log.info(f"{self} | Cancelling job {job.job_id}")
354
+ await asyncio.to_thread(job.cancel)
355
+
356
+ log.error(f"{self} | Exception: {e}")
357
+ raise
358
+
359
+
360
+ class ServerlessEndpoint(ServerlessResource):
361
+ """
362
+ Represents a serverless endpoint distinct from a live serverless.
363
+ Inherits from ServerlessResource.
364
+ """
365
+
366
+ def _create_new_template(self) -> PodTemplate:
367
+ """Create a new PodTemplate with standard configuration."""
368
+ return PodTemplate(
369
+ name=self.resource_id,
370
+ imageName=self.imageName,
371
+ env=KeyValuePair.from_dict(self.env or get_env_vars()),
372
+ )
373
+
374
+ def _configure_existing_template(self) -> None:
375
+ """Configure an existing template with necessary overrides."""
376
+ if self.template is None:
377
+ return
378
+
379
+ self.template.name = f"{self.resource_id}__{self.template.resource_id}"
380
+
381
+ if self.imageName:
382
+ self.template.imageName = self.imageName
383
+ if self.env:
384
+ self.template.env = KeyValuePair.from_dict(self.env)
385
+
386
+ @model_validator(mode="after")
387
+ def set_serverless_template(self):
388
+ if not any([self.imageName, self.template, self.templateId]):
389
+ raise ValueError(
390
+ "Either imageName, template, or templateId must be provided"
391
+ )
392
+
393
+ if not self.templateId and not self.template:
394
+ self.template = self._create_new_template()
395
+ elif self.template:
396
+ self._configure_existing_template()
397
+
398
+ return self
399
+
400
+
401
+ class JobOutput(BaseModel):
402
+ id: str
403
+ workerId: str
404
+ status: str
405
+ delayTime: int
406
+ executionTime: int
407
+ output: Optional[Any] = None
408
+ error: Optional[str] = ""
409
+
410
+ def model_post_init(self, _: Any) -> None:
411
+ log_group = f"Worker:{self.workerId}"
412
+ log.info(f"{log_group} | Delay Time: {self.delayTime} ms")
413
+ log.info(f"{log_group} | Execution Time: {self.executionTime} ms")
414
+
415
+
416
+ class Status(str, Enum):
417
+ READY = "READY"
418
+ INITIALIZING = "INITIALIZING"
419
+ THROTTLED = "THROTTLED"
420
+ UNHEALTHY = "UNHEALTHY"
421
+ UNKNOWN = "UNKNOWN"
422
+
423
+
424
+ class WorkersHealth(BaseModel):
425
+ idle: int
426
+ initializing: int
427
+ ready: int
428
+ running: int
429
+ throttled: int
430
+ unhealthy: int
431
+
432
+ @property
433
+ def status(self) -> Status:
434
+ if self.ready or self.idle or self.running:
435
+ return Status.READY
436
+
437
+ if self.initializing:
438
+ return Status.INITIALIZING
439
+
440
+ if self.throttled:
441
+ return Status.THROTTLED
442
+
443
+ if self.unhealthy:
444
+ return Status.UNHEALTHY
445
+
446
+ return Status.UNKNOWN
447
+
448
+
449
+ class JobsHealth(BaseModel):
450
+ completed: int
451
+ failed: int
452
+ inProgress: int
453
+ inQueue: int
454
+ retried: int
455
+
456
+
457
+ class ServerlessHealth(BaseModel):
458
+ workers: WorkersHealth
459
+ jobs: JobsHealth
460
+
461
+ @property
462
+ def is_ready(self) -> bool:
463
+ return self.workers.status == Status.READY
@@ -0,0 +1,162 @@
1
+ """
2
+ CPU-specific serverless endpoint classes.
3
+
4
+ This module contains all CPU-related serverless functionality, separate from GPU serverless.
5
+ """
6
+
7
+ from typing import List, Optional
8
+
9
+ from pydantic import field_serializer, model_validator, field_validator
10
+
11
+ from .cpu import (
12
+ CpuInstanceType,
13
+ CPU_INSTANCE_DISK_LIMITS,
14
+ get_max_disk_size_for_instances,
15
+ )
16
+ from .serverless import ServerlessEndpoint, get_env_vars
17
+ from .template import KeyValuePair, PodTemplate
18
+
19
+
20
+ class CpuEndpointMixin:
21
+ """Mixin class that provides CPU-specific functionality for serverless endpoints."""
22
+
23
+ instanceIds: Optional[List[CpuInstanceType]]
24
+
25
+ def _is_cpu_endpoint(self) -> bool:
26
+ """Check if this is a CPU endpoint (has instanceIds)."""
27
+ return (
28
+ hasattr(self, "instanceIds")
29
+ and self.instanceIds is not None
30
+ and len(self.instanceIds) > 0
31
+ )
32
+
33
+ def _get_cpu_container_disk_size(self) -> Optional[int]:
34
+ """Get the appropriate container disk size for CPU instances."""
35
+ if not self._is_cpu_endpoint():
36
+ return None
37
+ return get_max_disk_size_for_instances(self.instanceIds)
38
+
39
+ def _apply_cpu_disk_sizing(self, template: PodTemplate) -> None:
40
+ """Apply CPU disk sizing to a template if it's using the default size."""
41
+ if not self._is_cpu_endpoint():
42
+ return
43
+
44
+ # Only auto-size if template is using the default value
45
+ default_disk_size = PodTemplate.model_fields["containerDiskInGb"].default
46
+ if template.containerDiskInGb == default_disk_size:
47
+ cpu_disk_size = self._get_cpu_container_disk_size()
48
+ if cpu_disk_size is not None:
49
+ template.containerDiskInGb = cpu_disk_size
50
+
51
+ def validate_cpu_container_disk_size(self) -> None:
52
+ """
53
+ Validate that container disk size doesn't exceed limits for CPU instances.
54
+
55
+ Raises:
56
+ ValueError: If container disk size exceeds the limit for any CPU instance
57
+ """
58
+ if (
59
+ not self._is_cpu_endpoint()
60
+ or not hasattr(self, "template")
61
+ or not self.template
62
+ or not self.template.containerDiskInGb
63
+ ):
64
+ return
65
+
66
+ max_allowed_disk_size = self._get_cpu_container_disk_size()
67
+ if max_allowed_disk_size is None:
68
+ return
69
+
70
+ if self.template.containerDiskInGb > max_allowed_disk_size:
71
+ instance_limits = []
72
+ for instance_type in self.instanceIds:
73
+ limit = CPU_INSTANCE_DISK_LIMITS[instance_type]
74
+ instance_limits.append(f"{instance_type.value}: max {limit}GB")
75
+
76
+ raise ValueError(
77
+ f"Container disk size {self.template.containerDiskInGb}GB exceeds the maximum "
78
+ f"allowed for CPU instances. Instance limits: {', '.join(instance_limits)}. "
79
+ f"Maximum allowed: {max_allowed_disk_size}GB"
80
+ )
81
+
82
+ def _sync_cpu_fields(self):
83
+ """Sync CPU-specific fields, overriding GPU defaults."""
84
+ # Override GPU-specific fields for CPU
85
+ if hasattr(self, "gpuCount"):
86
+ self.gpuCount = 0
87
+ if hasattr(self, "allowedCudaVersions"):
88
+ self.allowedCudaVersions = ""
89
+ if hasattr(self, "gpuIds"):
90
+ self.gpuIds = ""
91
+
92
+ @field_serializer("instanceIds")
93
+ def serialize_instance_ids(
94
+ self, value: Optional[List[CpuInstanceType]]
95
+ ) -> Optional[List[str]]:
96
+ """Convert CpuInstanceType enums to strings."""
97
+ if value is None:
98
+ return None
99
+ return [item.value if hasattr(item, "value") else str(item) for item in value]
100
+
101
+
102
+ class CpuServerlessEndpoint(CpuEndpointMixin, ServerlessEndpoint):
103
+ """
104
+ CPU-only serverless endpoint with automatic disk sizing and validation.
105
+ Represents a CPU-only serverless endpoint distinct from a live serverless.
106
+ """
107
+
108
+ instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.ANY]
109
+
110
+ def _create_new_template(self) -> PodTemplate:
111
+ """Create a new PodTemplate with CPU-appropriate disk sizing."""
112
+ template = PodTemplate(
113
+ name=self.resource_id,
114
+ imageName=self.imageName,
115
+ env=KeyValuePair.from_dict(self.env or get_env_vars()),
116
+ )
117
+ # Apply CPU-specific disk sizing
118
+ self._apply_cpu_disk_sizing(template)
119
+ return template
120
+
121
+ def _configure_existing_template(self) -> None:
122
+ """Configure an existing template with necessary overrides and CPU sizing."""
123
+ if self.template is None:
124
+ return
125
+
126
+ self.template.name = f"{self.resource_id}__{self.template.resource_id}"
127
+
128
+ if self.imageName:
129
+ self.template.imageName = self.imageName
130
+ if self.env:
131
+ self.template.env = KeyValuePair.from_dict(self.env)
132
+
133
+ # Apply CPU-specific disk sizing
134
+ self._apply_cpu_disk_sizing(self.template)
135
+
136
+ @field_validator("instanceIds")
137
+ @classmethod
138
+ def validate_cpus(cls, value: List[CpuInstanceType]) -> List[CpuInstanceType]:
139
+ """Expand ANY to all GPU groups"""
140
+ if value == [CpuInstanceType.ANY]:
141
+ return CpuInstanceType.all()
142
+ return value
143
+
144
+ @model_validator(mode="after")
145
+ def set_serverless_template(self):
146
+ # Sync CPU-specific fields first
147
+ self._sync_cpu_fields()
148
+
149
+ if not any([self.imageName, self.template, self.templateId]):
150
+ raise ValueError(
151
+ "Either imageName, template, or templateId must be provided"
152
+ )
153
+
154
+ if not self.templateId and not self.template:
155
+ self.template = self._create_new_template()
156
+ elif self.template:
157
+ self._configure_existing_template()
158
+
159
+ # Validate container disk size for CPU instances
160
+ self.validate_cpu_container_disk_size()
161
+
162
+ return self