tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. tetra_rp/__init__.py +109 -19
  2. tetra_rp/cli/commands/__init__.py +1 -0
  3. tetra_rp/cli/commands/apps.py +143 -0
  4. tetra_rp/cli/commands/build.py +1082 -0
  5. tetra_rp/cli/commands/build_utils/__init__.py +1 -0
  6. tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
  7. tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
  8. tetra_rp/cli/commands/build_utils/manifest.py +430 -0
  9. tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
  10. tetra_rp/cli/commands/build_utils/scanner.py +596 -0
  11. tetra_rp/cli/commands/deploy.py +580 -0
  12. tetra_rp/cli/commands/init.py +123 -0
  13. tetra_rp/cli/commands/resource.py +108 -0
  14. tetra_rp/cli/commands/run.py +296 -0
  15. tetra_rp/cli/commands/test_mothership.py +458 -0
  16. tetra_rp/cli/commands/undeploy.py +533 -0
  17. tetra_rp/cli/main.py +97 -0
  18. tetra_rp/cli/utils/__init__.py +1 -0
  19. tetra_rp/cli/utils/app.py +15 -0
  20. tetra_rp/cli/utils/conda.py +127 -0
  21. tetra_rp/cli/utils/deployment.py +530 -0
  22. tetra_rp/cli/utils/ignore.py +143 -0
  23. tetra_rp/cli/utils/skeleton.py +184 -0
  24. tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
  25. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  26. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  27. tetra_rp/cli/utils/skeleton_template/README.md +263 -0
  28. tetra_rp/cli/utils/skeleton_template/main.py +44 -0
  29. tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
  30. tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
  31. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  32. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  33. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
  34. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
  35. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
  36. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
  37. tetra_rp/client.py +136 -33
  38. tetra_rp/config.py +29 -0
  39. tetra_rp/core/api/runpod.py +591 -39
  40. tetra_rp/core/deployment.py +232 -0
  41. tetra_rp/core/discovery.py +425 -0
  42. tetra_rp/core/exceptions.py +50 -0
  43. tetra_rp/core/resources/__init__.py +27 -9
  44. tetra_rp/core/resources/app.py +738 -0
  45. tetra_rp/core/resources/base.py +139 -4
  46. tetra_rp/core/resources/constants.py +21 -0
  47. tetra_rp/core/resources/cpu.py +115 -13
  48. tetra_rp/core/resources/gpu.py +182 -16
  49. tetra_rp/core/resources/live_serverless.py +153 -16
  50. tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
  51. tetra_rp/core/resources/network_volume.py +126 -31
  52. tetra_rp/core/resources/resource_manager.py +436 -35
  53. tetra_rp/core/resources/serverless.py +537 -120
  54. tetra_rp/core/resources/serverless_cpu.py +201 -0
  55. tetra_rp/core/resources/template.py +1 -59
  56. tetra_rp/core/utils/constants.py +10 -0
  57. tetra_rp/core/utils/file_lock.py +260 -0
  58. tetra_rp/core/utils/http.py +67 -0
  59. tetra_rp/core/utils/lru_cache.py +75 -0
  60. tetra_rp/core/utils/singleton.py +36 -1
  61. tetra_rp/core/validation.py +44 -0
  62. tetra_rp/execute_class.py +301 -0
  63. tetra_rp/protos/remote_execution.py +98 -9
  64. tetra_rp/runtime/__init__.py +1 -0
  65. tetra_rp/runtime/circuit_breaker.py +274 -0
  66. tetra_rp/runtime/config.py +12 -0
  67. tetra_rp/runtime/exceptions.py +49 -0
  68. tetra_rp/runtime/generic_handler.py +206 -0
  69. tetra_rp/runtime/lb_handler.py +189 -0
  70. tetra_rp/runtime/load_balancer.py +160 -0
  71. tetra_rp/runtime/manifest_fetcher.py +192 -0
  72. tetra_rp/runtime/metrics.py +325 -0
  73. tetra_rp/runtime/models.py +73 -0
  74. tetra_rp/runtime/mothership_provisioner.py +512 -0
  75. tetra_rp/runtime/production_wrapper.py +266 -0
  76. tetra_rp/runtime/reliability_config.py +149 -0
  77. tetra_rp/runtime/retry_manager.py +118 -0
  78. tetra_rp/runtime/serialization.py +124 -0
  79. tetra_rp/runtime/service_registry.py +346 -0
  80. tetra_rp/runtime/state_manager_client.py +248 -0
  81. tetra_rp/stubs/live_serverless.py +35 -17
  82. tetra_rp/stubs/load_balancer_sls.py +357 -0
  83. tetra_rp/stubs/registry.py +145 -19
  84. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
  85. tetra_rp-0.24.0.dist-info/RECORD +99 -0
  86. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
  87. tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
  88. tetra_rp/core/pool/cluster_manager.py +0 -177
  89. tetra_rp/core/pool/dataclass.py +0 -18
  90. tetra_rp/core/pool/ex.py +0 -38
  91. tetra_rp/core/pool/job.py +0 -22
  92. tetra_rp/core/pool/worker.py +0 -19
  93. tetra_rp/core/resources/utils.py +0 -50
  94. tetra_rp/core/utils/json.py +0 -33
  95. tetra_rp-0.6.0.dist-info/RECORD +0 -39
  96. /tetra_rp/{core/pool → cli}/__init__.py +0 -0
  97. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
@@ -1,27 +1,33 @@
1
1
  import asyncio
2
2
  import logging
3
- from typing import Any, Dict, List, Optional
3
+ import os
4
4
  from enum import Enum
5
+ from typing import Any, ClassVar, Dict, List, Optional, Set
6
+
5
7
  from pydantic import (
8
+ BaseModel,
9
+ Field,
6
10
  field_serializer,
7
11
  field_validator,
8
12
  model_validator,
9
- BaseModel,
10
- Field,
11
13
  )
12
-
13
14
  from runpod.endpoint.runner import Job
14
15
 
15
16
  from ..api.runpod import RunpodGraphQLClient
16
17
  from ..utils.backoff import get_backoff_delay
17
-
18
- from .cloud import runpod
19
18
  from .base import DeployableResource
20
- from .template import PodTemplate, KeyValuePair
21
- from .gpu import GpuGroup
22
- from .cpu import CpuInstanceType
23
- from .environment import EnvironmentVars
19
+ from .cloud import runpod
24
20
  from .constants import CONSOLE_URL
21
+ from .environment import EnvironmentVars
22
+ from .cpu import CpuInstanceType
23
+ from .gpu import GpuGroup, GpuType
24
+ from .network_volume import NetworkVolume, DataCenter
25
+ from .template import KeyValuePair, PodTemplate
26
+ from .resource_manager import ResourceManager
27
+
28
+
29
+ # Prefix applied to endpoint names during live provisioning
30
+ LIVE_PREFIX = "live-"
25
31
 
26
32
 
27
33
  # Environment variables are loaded from the .env file
@@ -39,11 +45,36 @@ def get_env_vars() -> Dict[str, str]:
39
45
  log = logging.getLogger(__name__)
40
46
 
41
47
 
48
+ def _is_prod_environment() -> bool:
49
+ env = os.getenv("RUNPOD_ENV")
50
+ if env:
51
+ return env.lower() == "prod"
52
+ api_base = os.getenv("RUNPOD_API_BASE_URL", "https://api.runpod.io")
53
+ return "api.runpod.io" in api_base or "api.runpod.ai" in api_base
54
+
55
+
42
56
  class ServerlessScalerType(Enum):
43
57
  QUEUE_DELAY = "QUEUE_DELAY"
44
58
  REQUEST_COUNT = "REQUEST_COUNT"
45
59
 
46
60
 
61
+ class ServerlessType(Enum):
62
+ """
63
+ Serverless endpoint execution model.
64
+
65
+ QB (Queue-based): Traditional queue processing with automatic retries.
66
+ Requests are placed in queue and processed sequentially.
67
+ JSON input/output only. Higher latency but built-in error recovery.
68
+
69
+ LB (Load-balancer): Direct HTTP routing to healthy workers.
70
+ Supports custom HTTP endpoints and any data format.
71
+ Lower latency but no automatic retries.
72
+ """
73
+
74
+ QB = "QB"
75
+ LB = "LB"
76
+
77
+
47
78
  class CudaVersion(Enum):
48
79
  V11_8 = "11.8"
49
80
  V12_0 = "12.0"
@@ -62,34 +93,85 @@ class ServerlessResource(DeployableResource):
62
93
  Base class for GPU serverless resource
63
94
  """
64
95
 
65
- _input_only = {"id", "cudaVersions", "env", "gpus", "flashboot", "imageName"}
96
+ _input_only = {
97
+ "id",
98
+ "cudaVersions",
99
+ "datacenter",
100
+ "env",
101
+ "gpus",
102
+ "flashboot",
103
+ "flashEnvironmentId",
104
+ "imageName",
105
+ "networkVolume",
106
+ }
107
+
108
+ _hashed_fields = {
109
+ "datacenter",
110
+ "env",
111
+ "gpuIds",
112
+ "executionTimeoutMs",
113
+ "gpuCount",
114
+ "locations",
115
+ "name",
116
+ "networkVolumeId",
117
+ "scalerType",
118
+ "scalerValue",
119
+ "workersMax",
120
+ "workersMin",
121
+ "workersPFBTarget",
122
+ "allowedCudaVersions",
123
+ "type",
124
+ }
125
+
126
+ # Fields assigned by API that shouldn't affect drift detection
127
+ # When adding new fields to ServerlessResource, evaluate if they are:
128
+ # 1. User-specified (include in hash)
129
+ # 2. API-assigned/runtime (add to RUNTIME_FIELDS)
130
+ # 3. Dynamic identifiers (already excluded via "id")
131
+ RUNTIME_FIELDS: ClassVar[Set[str]] = {
132
+ "template",
133
+ "templateId",
134
+ "aiKey",
135
+ "userId",
136
+ "createdAt",
137
+ "activeBuildid",
138
+ "computeType",
139
+ "hubRelease",
140
+ "repo",
141
+ }
142
+
143
+ EXCLUDED_HASH_FIELDS: ClassVar[Set[str]] = {"id"}
66
144
 
67
145
  # === Input-only Fields ===
68
146
  cudaVersions: Optional[List[CudaVersion]] = [] # for allowedCudaVersions
69
147
  env: Optional[Dict[str, str]] = Field(default_factory=get_env_vars)
70
148
  flashboot: Optional[bool] = True
71
- gpus: Optional[List[GpuGroup]] = [GpuGroup.ANY] # for gpuIds
149
+ gpus: Optional[List[GpuGroup | GpuType]] = [GpuGroup.ANY] # for gpuIds
72
150
  imageName: Optional[str] = "" # for template.imageName
151
+ networkVolume: Optional[NetworkVolume] = None
152
+ datacenter: DataCenter = Field(default=DataCenter.EU_RO_1)
73
153
 
74
154
  # === Input Fields ===
75
- executionTimeoutMs: Optional[int] = None
155
+ executionTimeoutMs: Optional[int] = 0
76
156
  gpuCount: Optional[int] = 1
77
157
  idleTimeout: Optional[int] = 5
78
158
  instanceIds: Optional[List[CpuInstanceType]] = None
79
159
  locations: Optional[str] = None
80
160
  name: str
81
161
  networkVolumeId: Optional[str] = None
162
+ flashEnvironmentId: Optional[str] = None
82
163
  scalerType: Optional[ServerlessScalerType] = ServerlessScalerType.QUEUE_DELAY
83
164
  scalerValue: Optional[int] = 4
84
165
  templateId: Optional[str] = None
166
+ type: Optional[ServerlessType] = ServerlessType.QB
85
167
  workersMax: Optional[int] = 3
86
168
  workersMin: Optional[int] = 0
87
- workersPFBTarget: Optional[int] = None
169
+ workersPFBTarget: Optional[int] = 0
88
170
 
89
171
  # === Runtime Fields ===
90
172
  activeBuildid: Optional[str] = None
91
173
  aiKey: Optional[str] = None
92
- allowedCudaVersions: Optional[str] = None
174
+ allowedCudaVersions: Optional[str] = ""
93
175
  computeType: Optional[str] = None
94
176
  createdAt: Optional[str] = None # TODO: use datetime
95
177
  gpuIds: Optional[str] = ""
@@ -116,51 +198,252 @@ class ServerlessResource(DeployableResource):
116
198
  raise ValueError("Missing self.id")
117
199
  return runpod.Endpoint(self.id)
118
200
 
201
+ @property
202
+ def endpoint_url(self) -> str:
203
+ base_url = self.endpoint.rp_client.endpoint_url_base
204
+ return f"{base_url}/{self.id}"
205
+
119
206
  @field_serializer("scalerType")
120
207
  def serialize_scaler_type(
121
208
  self, value: Optional[ServerlessScalerType]
122
209
  ) -> Optional[str]:
123
- """Convert ServerlessScalerType enum to string."""
124
- return value.value if value is not None else None
210
+ """Convert ServerlessScalerType enum to string.
211
+
212
+ Handles both enum instances and pre-stringified values that may occur
213
+ during nested model serialization or when values are already deserialized.
214
+ """
215
+ if value is None:
216
+ return None
217
+ return value.value if isinstance(value, ServerlessScalerType) else value
218
+
219
+ @field_serializer("type")
220
+ def serialize_type(self, value: Optional[ServerlessType]) -> Optional[str]:
221
+ """Convert ServerlessType enum to string.
125
222
 
126
- @field_serializer("instanceIds")
127
- def serialize_instance_ids(self, value: List[CpuInstanceType]) -> List[str]:
128
- """Convert CpuInstanceType enums to strings."""
129
- return [item.value if hasattr(item, "value") else str(item) for item in value]
223
+ Handles both enum instances and pre-stringified values that may occur
224
+ during nested model serialization or when values are already deserialized.
225
+ """
226
+ if value is None:
227
+ return None
228
+ return value.value if isinstance(value, ServerlessType) else value
130
229
 
131
230
  @field_validator("gpus")
132
231
  @classmethod
133
- def validate_gpus(cls, value: List[GpuGroup]) -> List[GpuGroup]:
232
+ def validate_gpus(cls, value: List[GpuGroup | GpuType]) -> List[GpuGroup | GpuType]:
134
233
  """Expand ANY to all GPU groups"""
135
- if value == [GpuGroup.ANY]:
234
+ if not value:
235
+ return value
236
+ if GpuGroup.ANY in value or GpuType.ANY in value:
136
237
  return GpuGroup.all()
137
238
  return value
138
239
 
240
+ @property
241
+ def config_hash(self) -> str:
242
+ """Get config hash excluding env and runtime-assigned fields.
243
+
244
+ Prevents false drift from:
245
+ - Dynamic env vars computed at runtime
246
+ - Runtime-assigned fields (template, templateId, aiKey, userId, etc.)
247
+
248
+ Only hashes user-specified configuration, not server-assigned state.
249
+ """
250
+ import hashlib
251
+ import json
252
+
253
+ resource_type = self.__class__.__name__
254
+
255
+ # Exclude runtime fields, env, and id from hash
256
+ exclude_fields = (
257
+ self.__class__.RUNTIME_FIELDS | self.__class__.EXCLUDED_HASH_FIELDS
258
+ )
259
+ config_dict = self.model_dump(
260
+ exclude_none=True, exclude=exclude_fields, mode="json"
261
+ )
262
+
263
+ # Convert to JSON string for hashing
264
+ config_str = json.dumps(config_dict, sort_keys=True)
265
+ hash_obj = hashlib.md5(f"{resource_type}:{config_str}".encode())
266
+ hash_value = hash_obj.hexdigest()
267
+
268
+ return hash_value
269
+
139
270
  @model_validator(mode="after")
140
271
  def sync_input_fields(self):
141
- """Sync between temporary inputs and exported fields"""
142
- if self.flashboot:
272
+ """Sync between temporary inputs and exported fields.
273
+
274
+ Idempotent: Can be called multiple times safely without changing the result.
275
+ """
276
+ # Prepend live- prefix for live provisioning context
277
+ # Must happen BEFORE flashboot suffix to get: live-my-endpoint-fb
278
+ is_live_provisioning = (
279
+ os.getenv("FLASH_IS_LIVE_PROVISIONING", "").lower() == "true"
280
+ )
281
+
282
+ if is_live_provisioning:
283
+ # Remove existing live- prefixes for idempotency
284
+ while self.name.startswith(LIVE_PREFIX):
285
+ self.name = self.name[len(LIVE_PREFIX) :]
286
+ # Add prefix once
287
+ self.name = f"{LIVE_PREFIX}{self.name}"
288
+
289
+ if self.flashboot and not self.name.endswith("-fb"):
290
+ # Remove all trailing '-fb' suffixes, then add one
291
+ while self.name.endswith("-fb"):
292
+ self.name = self.name[:-3]
143
293
  self.name += "-fb"
144
294
 
145
- if self.instanceIds:
146
- return self._sync_input_fields_cpu()
147
- else:
148
- return self._sync_input_fields_gpu()
295
+ # Sync datacenter to locations field for API (only if not already set)
296
+ # Allow overrides in non-prod via env
297
+ env_locations = os.getenv("RUNPOD_DEFAULT_LOCATIONS")
298
+ env_datacenter = os.getenv("RUNPOD_DEFAULT_DATACENTER")
299
+ if env_locations:
300
+ self.locations = env_locations
301
+ elif not self.locations:
302
+ if env_datacenter:
303
+ try:
304
+ self.locations = DataCenter(env_datacenter).value
305
+ except ValueError:
306
+ self.locations = env_datacenter
307
+ elif _is_prod_environment():
308
+ self.locations = self.datacenter.value
309
+
310
+ # Validate datacenter consistency between endpoint and network volume
311
+ if self.networkVolume and self.networkVolume.dataCenterId != self.datacenter:
312
+ raise ValueError(
313
+ f"Network volume datacenter ({self.networkVolume.dataCenterId.value}) "
314
+ f"must match endpoint datacenter ({self.datacenter.value})"
315
+ )
316
+
317
+ if self.networkVolume and self.networkVolume.is_created:
318
+ # Volume already exists, use its ID
319
+ self.networkVolumeId = self.networkVolume.id
320
+
321
+ self._sync_input_fields_gpu()
322
+
323
+ return self
324
+
325
+ def _has_cpu_instances(self) -> bool:
326
+ """Check if endpoint has CPU instances configured.
327
+
328
+ Returns:
329
+ True if instanceIds field is present and non-empty, False otherwise.
330
+ """
331
+ return (
332
+ hasattr(self, "instanceIds")
333
+ and self.instanceIds is not None
334
+ and len(self.instanceIds) > 0
335
+ )
336
+
337
+ def _get_cpu_disk_limit(self) -> Optional[int]:
338
+ """Calculate max disk size for CPU instances.
339
+
340
+ Returns:
341
+ Maximum allowed disk size in GB, or None if no CPU instances.
342
+ """
343
+ if not self._has_cpu_instances():
344
+ return None
345
+
346
+ from .cpu import get_max_disk_size_for_instances
347
+
348
+ return get_max_disk_size_for_instances(self.instanceIds)
349
+
350
+ def _apply_smart_disk_sizing(self, template: PodTemplate) -> None:
351
+ """Apply smart disk sizing based on instance type detection.
352
+
353
+ If CPU instances are detected and using the default disk size,
354
+ auto-sizes the disk to the CPU instance limit.
355
+
356
+ Args:
357
+ template: PodTemplate to configure.
358
+ """
359
+ cpu_limit = self._get_cpu_disk_limit()
360
+
361
+ if cpu_limit is None:
362
+ return # No CPU instances, keep default
363
+
364
+ # Auto-size if using default value
365
+ default_disk_size = PodTemplate.model_fields["containerDiskInGb"].default
366
+ if template.containerDiskInGb == default_disk_size:
367
+ log.info(
368
+ f"Auto-sizing containerDiskInGb from {default_disk_size}GB "
369
+ f"to {cpu_limit}GB (CPU instance limit)"
370
+ )
371
+ template.containerDiskInGb = cpu_limit
372
+
373
+ def _validate_cpu_disk_size(self) -> None:
374
+ """Validate disk size doesn't exceed CPU instance limits.
375
+
376
+ Raises:
377
+ ValueError: If disk size exceeds CPU instance limits.
378
+ """
379
+ cpu_limit = self._get_cpu_disk_limit()
380
+
381
+ if cpu_limit is None:
382
+ return # No CPU instances, no validation needed
383
+
384
+ if not self.template or not self.template.containerDiskInGb:
385
+ return
386
+
387
+ if self.template.containerDiskInGb > cpu_limit:
388
+ from .cpu import CPU_INSTANCE_DISK_LIMITS
389
+
390
+ instance_limits = [
391
+ f"{inst.value}: max {CPU_INSTANCE_DISK_LIMITS[inst]}GB"
392
+ for inst in self.instanceIds
393
+ ]
394
+
395
+ raise ValueError(
396
+ f"Container disk size {self.template.containerDiskInGb}GB exceeds "
397
+ f"the maximum allowed for CPU instances. "
398
+ f"Instance limits: {', '.join(instance_limits)}. "
399
+ f"Maximum allowed: {cpu_limit}GB. "
400
+ f"Consider using CpuServerlessEndpoint or CpuLiveServerless classes "
401
+ f"for CPU-only deployments."
402
+ )
403
+
404
+ def _create_new_template(self) -> PodTemplate:
405
+ """Create a new PodTemplate with standard configuration."""
406
+ return PodTemplate(
407
+ name=self.resource_id,
408
+ imageName=self.imageName,
409
+ env=KeyValuePair.from_dict(self.env or get_env_vars()),
410
+ )
411
+
412
+ def _configure_existing_template(self) -> None:
413
+ """Configure an existing template with necessary overrides."""
414
+ if self.template is None:
415
+ return
416
+
417
+ self.template.name = f"{self.resource_id}__{self.template.resource_id}"
418
+
419
+ if self.imageName:
420
+ self.template.imageName = self.imageName
421
+ if self.env:
422
+ self.template.env = KeyValuePair.from_dict(self.env)
423
+
424
+ async def _sync_graphql_object_with_inputs(
425
+ self, returned_endpoint: "ServerlessResource"
426
+ ):
427
+ for _input_field in self._input_only or set():
428
+ if getattr(self, _input_field) is not None:
429
+ # sync input only fields stripped from gql request back to endpoint
430
+ setattr(returned_endpoint, _input_field, getattr(self, _input_field))
431
+
432
+ return returned_endpoint
149
433
 
150
434
  def _sync_input_fields_gpu(self):
151
- # GPU-specific fields
152
- if self.gpus:
435
+ # GPU-specific fields (idempotent - only set if not already set)
436
+ if self.gpus and not self.gpuIds:
153
437
  # Convert gpus list to gpuIds string
154
- self.gpuIds = ",".join(gpu.value for gpu in self.gpus)
155
- elif self.gpuIds:
438
+ self.gpuIds = GpuGroup.to_gpu_ids_str(self.gpus)
439
+ elif self.gpuIds and not self.gpus:
156
440
  # Convert gpuIds string to gpus list (from backend responses)
157
- gpu_values = [v.strip() for v in self.gpuIds.split(",") if v.strip()]
158
- self.gpus = [GpuGroup(value) for value in gpu_values]
441
+ self.gpus = GpuGroup.from_gpu_ids_str(self.gpuIds)
159
442
 
160
- if self.cudaVersions:
443
+ if self.cudaVersions and not self.allowedCudaVersions:
161
444
  # Convert cudaVersions list to allowedCudaVersions string
162
445
  self.allowedCudaVersions = ",".join(v.value for v in self.cudaVersions)
163
- elif self.allowedCudaVersions:
446
+ elif self.allowedCudaVersions and not self.cudaVersions:
164
447
  # Convert allowedCudaVersions string to cudaVersions list (from backend responses)
165
448
  version_values = [
166
449
  v.strip() for v in self.allowedCudaVersions.split(",") if v.strip()
@@ -169,13 +452,17 @@ class ServerlessResource(DeployableResource):
169
452
 
170
453
  return self
171
454
 
172
- def _sync_input_fields_cpu(self):
173
- # Override GPU-specific fields for CPU
174
- self.gpuCount = 0
175
- self.allowedCudaVersions = ""
176
- self.gpuIds = ""
455
+ async def _ensure_network_volume_deployed(self) -> None:
456
+ """
457
+ Ensures network volume is deployed and ready if one is specified.
458
+ Updates networkVolumeId with the deployed volume ID.
459
+ """
460
+ if self.networkVolumeId:
461
+ return
177
462
 
178
- return self
463
+ if self.networkVolume:
464
+ deployedNetworkVolume = await self.networkVolume.deploy()
465
+ self.networkVolumeId = deployedNetworkVolume.id
179
466
 
180
467
  def is_deployed(self) -> bool:
181
468
  """
@@ -191,7 +478,13 @@ class ServerlessResource(DeployableResource):
191
478
  log.error(f"Error checking {self}: {e}")
192
479
  return False
193
480
 
194
- async def deploy(self) -> "DeployableResource":
481
+ def _payload_exclude(self) -> Set[str]:
482
+ # flashEnvironmentId is input-only but must be sent when provided
483
+ exclude_fields = set(self._input_only or set())
484
+ exclude_fields.discard("flashEnvironmentId")
485
+ return exclude_fields
486
+
487
+ async def _do_deploy(self) -> "DeployableResource":
195
488
  """
196
489
  Deploys the serverless resource using the provided configuration.
197
490
  Returns a DeployableResource object.
@@ -202,11 +495,18 @@ class ServerlessResource(DeployableResource):
202
495
  log.debug(f"{self} exists")
203
496
  return self
204
497
 
498
+ # NEW: Ensure network volume is deployed first
499
+ await self._ensure_network_volume_deployed()
500
+
205
501
  async with RunpodGraphQLClient() as client:
206
- payload = self.model_dump(exclude=self._input_only, exclude_none=True)
207
- result = await client.create_endpoint(payload)
502
+ payload = self.model_dump(
503
+ exclude=self._payload_exclude(), exclude_none=True, mode="json"
504
+ )
505
+ result = await client.save_endpoint(payload)
208
506
 
209
507
  if endpoint := self.__class__(**result):
508
+ endpoint = await self._sync_graphql_object_with_inputs(endpoint)
509
+ self.id = endpoint.id
210
510
  return endpoint
211
511
 
212
512
  raise ValueError("Deployment failed, no endpoint was returned.")
@@ -215,61 +515,170 @@ class ServerlessResource(DeployableResource):
215
515
  log.error(f"{self} failed to deploy: {e}")
216
516
  raise
217
517
 
218
- async def is_ready_for_requests(self, give_up_threshold=10) -> bool:
219
- """
220
- Asynchronously checks if the serverless resource is ready to handle
221
- requests by polling its health endpoint.
518
+ async def update(self, new_config: "ServerlessResource") -> "ServerlessResource":
519
+ """Update existing endpoint with new configuration.
520
+
521
+ Uses saveEndpoint mutation which handles both version-triggering and
522
+ rolling changes. Version-triggering changes (GPU, template, volumes)
523
+ automatically increment version and trigger worker recreation server-side.
222
524
 
223
525
  Args:
224
- give_up_threshold (int, optional): The maximum number of polling
225
- attempts before giving up and raising an error. Defaults to 10.
526
+ new_config: New configuration to apply
226
527
 
227
528
  Returns:
228
- bool: True if the serverless resource is ready for requests.
529
+ Updated ServerlessResource instance
229
530
 
230
531
  Raises:
231
- ValueError: If the serverless resource is not deployed.
232
- RuntimeError: If the health status is THROTTLED, UNHEALTHY, or UNKNOWN
233
- after exceeding the give_up_threshold.
532
+ ValueError: If endpoint not deployed or update fails
234
533
  """
235
- if not self.is_deployed():
236
- raise ValueError("Serverless is not deployed")
534
+ if not self.id:
535
+ raise ValueError("Cannot update: endpoint not deployed")
536
+
537
+ try:
538
+ # Log if version-triggering changes detected (informational only)
539
+ if self._has_structural_changes(new_config):
540
+ log.info(
541
+ f"{self.name}: Version-triggering changes detected. "
542
+ "Server will increment version and recreate workers."
543
+ )
544
+ else:
545
+ log.info(f"Updating endpoint '{self.name}' (ID: {self.id})")
237
546
 
238
- log.debug(f"{self} | API /health")
547
+ # Ensure network volume is deployed if specified
548
+ await new_config._ensure_network_volume_deployed()
239
549
 
240
- current_pace = 0
241
- attempt = 0
550
+ async with RunpodGraphQLClient() as client:
551
+ # Include the endpoint ID to trigger update
552
+ payload = new_config.model_dump(
553
+ exclude=new_config._payload_exclude(),
554
+ exclude_none=True,
555
+ mode="json",
556
+ )
557
+ payload["id"] = self.id # Critical: include ID for update
242
558
 
243
- # Poll for health status
244
- while True:
245
- await asyncio.sleep(current_pace)
559
+ result = await client.save_endpoint(payload)
246
560
 
247
- health = await asyncio.to_thread(self.endpoint.health)
248
- health = ServerlessHealth(**health)
561
+ if updated := self.__class__(**result):
562
+ log.info(f"Successfully updated endpoint '{self.name}' (ID: {self.id})")
563
+ return updated
564
+
565
+ raise ValueError("Update failed, no endpoint was returned.")
566
+
567
+ except Exception as e:
568
+ log.error(f"Failed to update {self.name}: {e}")
569
+ raise
570
+
571
+ def _has_structural_changes(self, new_config: "ServerlessResource") -> bool:
572
+ """Check if config changes are version-triggering.
249
573
 
250
- if health.is_ready:
574
+ Version-triggering changes cause server-side version increment and
575
+ worker recreation:
576
+ - Image changes (imageName via templateId)
577
+ - GPU configuration (gpus, gpuIds, allowedCudaVersions, gpuCount)
578
+ - Hardware allocation (instanceIds, locations)
579
+ - Storage changes (networkVolumeId)
580
+ - Flashboot toggle
581
+
582
+ Rolling changes (no version increment):
583
+ - Worker scaling (workersMin, workersMax)
584
+ - Scaler configuration (scalerType, scalerValue)
585
+ - Timeout values (idleTimeout, executionTimeoutMs)
586
+ - Environment variables (env)
587
+
588
+ Note: This method is now informational for logging. The actual
589
+ version-triggering logic runs server-side when saveEndpoint is called.
590
+
591
+ Runtime fields (template, templateId, aiKey, userId) are excluded
592
+ to prevent false positives when comparing deployed vs new config.
593
+
594
+ Args:
595
+ new_config: New configuration to compare against
596
+
597
+ Returns:
598
+ True if version-triggering changes detected (workers will be recreated)
599
+ """
600
+ structural_fields = [
601
+ "gpus",
602
+ "gpuIds",
603
+ "imageName",
604
+ "flashboot",
605
+ "allowedCudaVersions",
606
+ "cudaVersions",
607
+ "instanceIds",
608
+ ]
609
+
610
+ for field in structural_fields:
611
+ old_val = getattr(self, field, None)
612
+ new_val = getattr(new_config, field, None)
613
+
614
+ # Handle list comparison
615
+ if isinstance(old_val, list) and isinstance(new_val, list):
616
+ if sorted(str(v) for v in old_val) != sorted(str(v) for v in new_val):
617
+ log.debug(f"Structural change in '{field}': {old_val} → {new_val}")
618
+ return True
619
+ # Handle other types
620
+ elif old_val != new_val:
621
+ log.debug(f"Structural change in '{field}': {old_val} → {new_val}")
251
622
  return True
252
- else:
253
- # nothing changed, increase the gap
254
- attempt += 1
255
- indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
256
- if indicator:
257
- log.info(f"{self} | {indicator}")
258
-
259
- status = health.workers.status
260
- if status in [
261
- Status.THROTTLED,
262
- Status.UNHEALTHY,
263
- Status.UNKNOWN,
264
- ]:
265
- log.debug(f"{self} | Health {status.value}")
266
-
267
- if attempt >= give_up_threshold:
268
- # Give up
269
- raise RuntimeError(f"Health {status.value}")
270
-
271
- # Adjust polling pace appropriately
272
- current_pace = get_backoff_delay(attempt)
623
+
624
+ return False
625
+
626
+ async def deploy(self) -> "DeployableResource":
627
+ resource_manager = ResourceManager()
628
+ resource = await resource_manager.get_or_deploy_resource(self)
629
+ # hydrate the id onto the resource so it's usable when this is called directly
630
+ # on a config
631
+ self.id = resource.id
632
+ return self
633
+
634
+ async def _do_undeploy(self) -> bool:
635
+ """
636
+ Undeploys (deletes) the serverless endpoint.
637
+
638
+ If deletion fails, verifies the endpoint still exists. If not, treats it as
639
+ successfully undeployed (handles cases where endpoint was deleted externally).
640
+
641
+ Returns:
642
+ True if successfully undeployed or endpoint doesn't exist, False otherwise
643
+ """
644
+ if not self.id:
645
+ log.warning(f"{self} has no endpoint ID, cannot undeploy")
646
+ return False
647
+
648
+ try:
649
+ async with RunpodGraphQLClient() as client:
650
+ result = await client.delete_endpoint(self.id)
651
+ success = result.get("success", False)
652
+
653
+ if success:
654
+ log.info(f"{self} successfully undeployed")
655
+ return True
656
+ else:
657
+ log.error(f"{self} failed to undeploy")
658
+ return False
659
+
660
+ except Exception as e:
661
+ log.error(f"{self} failed to undeploy: {e}")
662
+
663
+ # Deletion failed. Check if endpoint still exists.
664
+ # If it doesn't exist, treat as successful cleanup (orphaned endpoint).
665
+ try:
666
+ async with RunpodGraphQLClient() as client:
667
+ if not await client.endpoint_exists(self.id):
668
+ log.info(
669
+ f"{self} no longer exists on RunPod, removing from cache"
670
+ )
671
+ return True
672
+ except Exception as check_error:
673
+ log.warning(f"Could not verify endpoint existence: {check_error}")
674
+
675
+ return False
676
+
677
+ async def undeploy(self) -> Dict[str, Any]:
678
+ resource_manager = ResourceManager()
679
+ result = await resource_manager.undeploy_resource(self.resource_id)
680
+ log.debug(f"undeployment result: {result}")
681
+ return result
273
682
 
274
683
  async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
275
684
  """
@@ -285,10 +694,7 @@ class ServerlessResource(DeployableResource):
285
694
  )
286
695
 
287
696
  try:
288
- # log.debug(f"[{log_group}] Payload: {payload}")
289
-
290
- # Poll until requests can be sent
291
- await self.is_ready_for_requests()
697
+ # log.debug(f"[{self}] Payload: {payload}")
292
698
 
293
699
  log.info(f"{self} | API /run_sync")
294
700
  response = await asyncio.to_thread(_fetch_job)
@@ -314,9 +720,6 @@ class ServerlessResource(DeployableResource):
314
720
  try:
315
721
  # log.debug(f"[{self}] Payload: {payload}")
316
722
 
317
- # Poll until requests can be sent
318
- await self.is_ready_for_requests()
319
-
320
723
  # Create a job using the endpoint
321
724
  log.info(f"{self} | API /run")
322
725
  job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
@@ -334,9 +737,8 @@ class ServerlessResource(DeployableResource):
334
737
  while True:
335
738
  await asyncio.sleep(current_pace)
336
739
 
337
- if await self.is_ready_for_requests():
338
- # Check job status
339
- job_status = await asyncio.to_thread(job.status)
740
+ # Check job status
741
+ job_status = await asyncio.to_thread(job.status)
340
742
 
341
743
  if last_status == job_status:
342
744
  # nothing changed, increase the gap
@@ -373,38 +775,53 @@ class ServerlessEndpoint(ServerlessResource):
373
775
  Inherits from ServerlessResource.
374
776
  """
375
777
 
778
+ @model_validator(mode="after")
779
+ def validate_instance_mutual_exclusivity(self):
780
+ """Ensure gpuIds and instanceIds are mutually exclusive.
781
+
782
+ When instanceIds is specified, clears GPU configuration since CPU and GPU
783
+ are mutually exclusive resources. Prevents mixing GPU and CPU configurations.
784
+ """
785
+ has_cpu = (
786
+ hasattr(self, "instanceIds")
787
+ and self.instanceIds is not None
788
+ and len(self.instanceIds) > 0
789
+ )
790
+
791
+ if has_cpu:
792
+ # Clear GPU configuration if CPU instances are specified
793
+ # This makes CPU intent explicit
794
+ self.gpus = []
795
+ self.gpuIds = ""
796
+ self.gpuCount = 0
797
+
798
+ return self
799
+
376
800
  @model_validator(mode="after")
377
801
  def set_serverless_template(self):
802
+ """Create template from imageName if not provided.
803
+
804
+ Must run after sync_input_fields to ensure all input fields are synced.
805
+ Applies smart disk sizing and validates configuration.
806
+ """
378
807
  if not any([self.imageName, self.template, self.templateId]):
379
808
  raise ValueError(
380
809
  "Either imageName, template, or templateId must be provided"
381
810
  )
382
811
 
383
812
  if not self.templateId and not self.template:
384
- self.template = PodTemplate(
385
- name=self.resource_id,
386
- imageName=self.imageName,
387
- env=KeyValuePair.from_dict(self.env or get_env_vars()),
388
- )
389
-
813
+ self.template = self._create_new_template()
814
+ # Apply smart disk sizing to new template
815
+ self._apply_smart_disk_sizing(self.template)
390
816
  elif self.template:
391
- self.template.name = f"{self.resource_id}__{self.template.resource_id}"
392
- if self.imageName:
393
- self.template.imageName = self.imageName
394
- if self.env:
395
- self.template.env = KeyValuePair.from_dict(self.env)
396
-
397
- return self
817
+ self._configure_existing_template()
818
+ # Apply smart disk sizing to existing template
819
+ self._apply_smart_disk_sizing(self.template)
398
820
 
821
+ # Validate CPU disk size if applicable
822
+ self._validate_cpu_disk_size()
399
823
 
400
- class CpuServerlessEndpoint(ServerlessEndpoint):
401
- """
402
- Convenience class for CPU serverless endpoint.
403
- Represents a CPU-only serverless endpoint distinct from a live serverless.
404
- Inherits from ServerlessEndpoint.
405
- """
406
-
407
- instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
824
+ return self
408
825
 
409
826
 
410
827
  class JobOutput(BaseModel):
@@ -416,7 +833,7 @@ class JobOutput(BaseModel):
416
833
  output: Optional[Any] = None
417
834
  error: Optional[str] = ""
418
835
 
419
- def model_post_init(self, __context):
836
+ def model_post_init(self, _: Any) -> None:
420
837
  log_group = f"Worker:{self.workerId}"
421
838
  log.info(f"{log_group} | Delay Time: {self.delayTime} ms")
422
839
  log.info(f"{log_group} | Execution Time: {self.executionTime} ms")