PyPI - tetra-rp - Versions diffs - 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl - Mend

tetra-rp 0.6.0py3-none-any.whl → 0.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

tetra_rp/__init__.py +109 -19
tetra_rp/cli/commands/__init__.py +1 -0
tetra_rp/cli/commands/apps.py +143 -0
tetra_rp/cli/commands/build.py +1082 -0
tetra_rp/cli/commands/build_utils/__init__.py +1 -0
tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
tetra_rp/cli/commands/build_utils/manifest.py +430 -0
tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
tetra_rp/cli/commands/build_utils/scanner.py +596 -0
tetra_rp/cli/commands/deploy.py +580 -0
tetra_rp/cli/commands/init.py +123 -0
tetra_rp/cli/commands/resource.py +108 -0
tetra_rp/cli/commands/run.py +296 -0
tetra_rp/cli/commands/test_mothership.py +458 -0
tetra_rp/cli/commands/undeploy.py +533 -0
tetra_rp/cli/main.py +97 -0
tetra_rp/cli/utils/__init__.py +1 -0
tetra_rp/cli/utils/app.py +15 -0
tetra_rp/cli/utils/conda.py +127 -0
tetra_rp/cli/utils/deployment.py +530 -0
tetra_rp/cli/utils/ignore.py +143 -0
tetra_rp/cli/utils/skeleton.py +184 -0
tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
tetra_rp/cli/utils/skeleton_template/README.md +263 -0
tetra_rp/cli/utils/skeleton_template/main.py +44 -0
tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
tetra_rp/client.py +136 -33
tetra_rp/config.py +29 -0
tetra_rp/core/api/runpod.py +591 -39
tetra_rp/core/deployment.py +232 -0
tetra_rp/core/discovery.py +425 -0
tetra_rp/core/exceptions.py +50 -0
tetra_rp/core/resources/__init__.py +27 -9
tetra_rp/core/resources/app.py +738 -0
tetra_rp/core/resources/base.py +139 -4
tetra_rp/core/resources/constants.py +21 -0
tetra_rp/core/resources/cpu.py +115 -13
tetra_rp/core/resources/gpu.py +182 -16
tetra_rp/core/resources/live_serverless.py +153 -16
tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
tetra_rp/core/resources/network_volume.py +126 -31
tetra_rp/core/resources/resource_manager.py +436 -35
tetra_rp/core/resources/serverless.py +537 -120
tetra_rp/core/resources/serverless_cpu.py +201 -0
tetra_rp/core/resources/template.py +1 -59
tetra_rp/core/utils/constants.py +10 -0
tetra_rp/core/utils/file_lock.py +260 -0
tetra_rp/core/utils/http.py +67 -0
tetra_rp/core/utils/lru_cache.py +75 -0
tetra_rp/core/utils/singleton.py +36 -1
tetra_rp/core/validation.py +44 -0
tetra_rp/execute_class.py +301 -0
tetra_rp/protos/remote_execution.py +98 -9
tetra_rp/runtime/__init__.py +1 -0
tetra_rp/runtime/circuit_breaker.py +274 -0
tetra_rp/runtime/config.py +12 -0
tetra_rp/runtime/exceptions.py +49 -0
tetra_rp/runtime/generic_handler.py +206 -0
tetra_rp/runtime/lb_handler.py +189 -0
tetra_rp/runtime/load_balancer.py +160 -0
tetra_rp/runtime/manifest_fetcher.py +192 -0
tetra_rp/runtime/metrics.py +325 -0
tetra_rp/runtime/models.py +73 -0
tetra_rp/runtime/mothership_provisioner.py +512 -0
tetra_rp/runtime/production_wrapper.py +266 -0
tetra_rp/runtime/reliability_config.py +149 -0
tetra_rp/runtime/retry_manager.py +118 -0
tetra_rp/runtime/serialization.py +124 -0
tetra_rp/runtime/service_registry.py +346 -0
tetra_rp/runtime/state_manager_client.py +248 -0
tetra_rp/stubs/live_serverless.py +35 -17
tetra_rp/stubs/load_balancer_sls.py +357 -0
tetra_rp/stubs/registry.py +145 -19
{tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
tetra_rp-0.24.0.dist-info/RECORD +99 -0
{tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
tetra_rp/core/pool/cluster_manager.py +0 -177
tetra_rp/core/pool/dataclass.py +0 -18
tetra_rp/core/pool/ex.py +0 -38
tetra_rp/core/pool/job.py +0 -22
tetra_rp/core/pool/worker.py +0 -19
tetra_rp/core/resources/utils.py +0 -50
tetra_rp/core/utils/json.py +0 -33
tetra_rp-0.6.0.dist-info/RECORD +0 -39
/tetra_rp/{core/pool → cli}/__init__.py +0 -0
{tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0

tetra_rp/core/resources/serverless.py CHANGED Viewed

@@ -1,27 +1,33 @@
 import asyncio
 import logging
-from typing import Any, Dict, List, Optional
+import os
 from enum import Enum
+from typing import Any, ClassVar, Dict, List, Optional, Set
 from pydantic import (
+    BaseModel,
+    Field,
     field_serializer,
     field_validator,
     model_validator,
-    BaseModel,
-    Field,
 )
 from runpod.endpoint.runner import Job
 from ..api.runpod import RunpodGraphQLClient
 from ..utils.backoff import get_backoff_delay
-from .cloud import runpod
 from .base import DeployableResource
-from .template import PodTemplate, KeyValuePair
-from .gpu import GpuGroup
-from .cpu import CpuInstanceType
-from .environment import EnvironmentVars
+from .cloud import runpod
 from .constants import CONSOLE_URL
+from .environment import EnvironmentVars
+from .cpu import CpuInstanceType
+from .gpu import GpuGroup, GpuType
+from .network_volume import NetworkVolume, DataCenter
+from .template import KeyValuePair, PodTemplate
+from .resource_manager import ResourceManager
+# Prefix applied to endpoint names during live provisioning
+LIVE_PREFIX = "live-"
 # Environment variables are loaded from the .env file
@@ -39,11 +45,36 @@ def get_env_vars() -> Dict[str, str]:
 log = logging.getLogger(__name__)
+def _is_prod_environment() -> bool:
+    env = os.getenv("RUNPOD_ENV")
+    if env:
+        return env.lower() == "prod"
+    api_base = os.getenv("RUNPOD_API_BASE_URL", "https://api.runpod.io")
+    return "api.runpod.io" in api_base or "api.runpod.ai" in api_base
 class ServerlessScalerType(Enum):
     QUEUE_DELAY = "QUEUE_DELAY"
     REQUEST_COUNT = "REQUEST_COUNT"
+class ServerlessType(Enum):
+    """
+    Serverless endpoint execution model.
+    QB (Queue-based): Traditional queue processing with automatic retries.
+                      Requests are placed in queue and processed sequentially.
+                      JSON input/output only. Higher latency but built-in error recovery.
+    LB (Load-balancer): Direct HTTP routing to healthy workers.
+                        Supports custom HTTP endpoints and any data format.
+                        Lower latency but no automatic retries.
+    """
+    QB = "QB"
+    LB = "LB"
 class CudaVersion(Enum):
     V11_8 = "11.8"
     V12_0 = "12.0"
@@ -62,34 +93,85 @@ class ServerlessResource(DeployableResource):
     Base class for GPU serverless resource
     """
-    _input_only = {"id", "cudaVersions", "env", "gpus", "flashboot", "imageName"}
+    _input_only = {
+        "id",
+        "cudaVersions",
+        "datacenter",
+        "env",
+        "gpus",
+        "flashboot",
+        "flashEnvironmentId",
+        "imageName",
+        "networkVolume",
+    }
+    _hashed_fields = {
+        "datacenter",
+        "env",
+        "gpuIds",
+        "executionTimeoutMs",
+        "gpuCount",
+        "locations",
+        "name",
+        "networkVolumeId",
+        "scalerType",
+        "scalerValue",
+        "workersMax",
+        "workersMin",
+        "workersPFBTarget",
+        "allowedCudaVersions",
+        "type",
+    }
+    # Fields assigned by API that shouldn't affect drift detection
+    # When adding new fields to ServerlessResource, evaluate if they are:
+    # 1. User-specified (include in hash)
+    # 2. API-assigned/runtime (add to RUNTIME_FIELDS)
+    # 3. Dynamic identifiers (already excluded via "id")
+    RUNTIME_FIELDS: ClassVar[Set[str]] = {
+        "template",
+        "templateId",
+        "aiKey",
+        "userId",
+        "createdAt",
+        "activeBuildid",
+        "computeType",
+        "hubRelease",
+        "repo",
+    }
+    EXCLUDED_HASH_FIELDS: ClassVar[Set[str]] = {"id"}
     # === Input-only Fields ===
     cudaVersions: Optional[List[CudaVersion]] = []  # for allowedCudaVersions
     env: Optional[Dict[str, str]] = Field(default_factory=get_env_vars)
     flashboot: Optional[bool] = True
-    gpus: Optional[List[GpuGroup]] = [GpuGroup.ANY]  # for gpuIds
+    gpus: Optional[List[GpuGroup | GpuType]] = [GpuGroup.ANY]  # for gpuIds
     imageName: Optional[str] = ""  # for template.imageName
+    networkVolume: Optional[NetworkVolume] = None
+    datacenter: DataCenter = Field(default=DataCenter.EU_RO_1)
     # === Input Fields ===
-    executionTimeoutMs: Optional[int] = None
+    executionTimeoutMs: Optional[int] = 0
     gpuCount: Optional[int] = 1
     idleTimeout: Optional[int] = 5
     instanceIds: Optional[List[CpuInstanceType]] = None
     locations: Optional[str] = None
     name: str
     networkVolumeId: Optional[str] = None
+    flashEnvironmentId: Optional[str] = None
     scalerType: Optional[ServerlessScalerType] = ServerlessScalerType.QUEUE_DELAY
     scalerValue: Optional[int] = 4
     templateId: Optional[str] = None
+    type: Optional[ServerlessType] = ServerlessType.QB
     workersMax: Optional[int] = 3
     workersMin: Optional[int] = 0
-    workersPFBTarget: Optional[int] = None
+    workersPFBTarget: Optional[int] = 0
     # === Runtime Fields ===
     activeBuildid: Optional[str] = None
     aiKey: Optional[str] = None
-    allowedCudaVersions: Optional[str] = None
+    allowedCudaVersions: Optional[str] = ""
     computeType: Optional[str] = None
     createdAt: Optional[str] = None  # TODO: use datetime
     gpuIds: Optional[str] = ""
@@ -116,51 +198,252 @@ class ServerlessResource(DeployableResource):
             raise ValueError("Missing self.id")
         return runpod.Endpoint(self.id)
+    @property
+    def endpoint_url(self) -> str:
+        base_url = self.endpoint.rp_client.endpoint_url_base
+        return f"{base_url}/{self.id}"
     @field_serializer("scalerType")
     def serialize_scaler_type(
         self, value: Optional[ServerlessScalerType]
     ) -> Optional[str]:
-        """Convert ServerlessScalerType enum to string."""
-        return value.value if value is not None else None
+        """Convert ServerlessScalerType enum to string.
+        Handles both enum instances and pre-stringified values that may occur
+        during nested model serialization or when values are already deserialized.
+        """
+        if value is None:
+            return None
+        return value.value if isinstance(value, ServerlessScalerType) else value
+    @field_serializer("type")
+    def serialize_type(self, value: Optional[ServerlessType]) -> Optional[str]:
+        """Convert ServerlessType enum to string.
-    @field_serializer("instanceIds")
-    def serialize_instance_ids(self, value: List[CpuInstanceType]) -> List[str]:
-        """Convert CpuInstanceType enums to strings."""
-        return [item.value if hasattr(item, "value") else str(item) for item in value]
+        Handles both enum instances and pre-stringified values that may occur
+        during nested model serialization or when values are already deserialized.
+        """
+        if value is None:
+            return None
+        return value.value if isinstance(value, ServerlessType) else value
     @field_validator("gpus")
     @classmethod
-    def validate_gpus(cls, value: List[GpuGroup]) -> List[GpuGroup]:
+    def validate_gpus(cls, value: List[GpuGroup | GpuType]) -> List[GpuGroup | GpuType]:
         """Expand ANY to all GPU groups"""
-        if value == [GpuGroup.ANY]:
+        if not value:
+            return value
+        if GpuGroup.ANY in value or GpuType.ANY in value:
             return GpuGroup.all()
         return value
+    @property
+    def config_hash(self) -> str:
+        """Get config hash excluding env and runtime-assigned fields.
+        Prevents false drift from:
+        - Dynamic env vars computed at runtime
+        - Runtime-assigned fields (template, templateId, aiKey, userId, etc.)
+        Only hashes user-specified configuration, not server-assigned state.
+        """
+        import hashlib
+        import json
+        resource_type = self.__class__.__name__
+        # Exclude runtime fields, env, and id from hash
+        exclude_fields = (
+            self.__class__.RUNTIME_FIELDS | self.__class__.EXCLUDED_HASH_FIELDS
+        )
+        config_dict = self.model_dump(
+            exclude_none=True, exclude=exclude_fields, mode="json"
+        )
+        # Convert to JSON string for hashing
+        config_str = json.dumps(config_dict, sort_keys=True)
+        hash_obj = hashlib.md5(f"{resource_type}:{config_str}".encode())
+        hash_value = hash_obj.hexdigest()
+        return hash_value
     @model_validator(mode="after")
     def sync_input_fields(self):
-        """Sync between temporary inputs and exported fields"""
-        if self.flashboot:
+        """Sync between temporary inputs and exported fields.
+        Idempotent: Can be called multiple times safely without changing the result.
+        """
+        # Prepend live- prefix for live provisioning context
+        # Must happen BEFORE flashboot suffix to get: live-my-endpoint-fb
+        is_live_provisioning = (
+            os.getenv("FLASH_IS_LIVE_PROVISIONING", "").lower() == "true"
+        )
+        if is_live_provisioning:
+            # Remove existing live- prefixes for idempotency
+            while self.name.startswith(LIVE_PREFIX):
+                self.name = self.name[len(LIVE_PREFIX) :]
+            # Add prefix once
+            self.name = f"{LIVE_PREFIX}{self.name}"
+        if self.flashboot and not self.name.endswith("-fb"):
+            # Remove all trailing '-fb' suffixes, then add one
+            while self.name.endswith("-fb"):
+                self.name = self.name[:-3]
             self.name += "-fb"
-        if self.instanceIds:
-            return self._sync_input_fields_cpu()
-        else:
-            return self._sync_input_fields_gpu()
+        # Sync datacenter to locations field for API (only if not already set)
+        # Allow overrides in non-prod via env
+        env_locations = os.getenv("RUNPOD_DEFAULT_LOCATIONS")
+        env_datacenter = os.getenv("RUNPOD_DEFAULT_DATACENTER")
+        if env_locations:
+            self.locations = env_locations
+        elif not self.locations:
+            if env_datacenter:
+                try:
+                    self.locations = DataCenter(env_datacenter).value
+                except ValueError:
+                    self.locations = env_datacenter
+            elif _is_prod_environment():
+                self.locations = self.datacenter.value
+        # Validate datacenter consistency between endpoint and network volume
+        if self.networkVolume and self.networkVolume.dataCenterId != self.datacenter:
+            raise ValueError(
+                f"Network volume datacenter ({self.networkVolume.dataCenterId.value}) "
+                f"must match endpoint datacenter ({self.datacenter.value})"
+            )
+        if self.networkVolume and self.networkVolume.is_created:
+            # Volume already exists, use its ID
+            self.networkVolumeId = self.networkVolume.id
+        self._sync_input_fields_gpu()
+        return self
+    def _has_cpu_instances(self) -> bool:
+        """Check if endpoint has CPU instances configured.
+        Returns:
+            True if instanceIds field is present and non-empty, False otherwise.
+        """
+        return (
+            hasattr(self, "instanceIds")
+            and self.instanceIds is not None
+            and len(self.instanceIds) > 0
+        )
+    def _get_cpu_disk_limit(self) -> Optional[int]:
+        """Calculate max disk size for CPU instances.
+        Returns:
+            Maximum allowed disk size in GB, or None if no CPU instances.
+        """
+        if not self._has_cpu_instances():
+            return None
+        from .cpu import get_max_disk_size_for_instances
+        return get_max_disk_size_for_instances(self.instanceIds)
+    def _apply_smart_disk_sizing(self, template: PodTemplate) -> None:
+        """Apply smart disk sizing based on instance type detection.
+        If CPU instances are detected and using the default disk size,
+        auto-sizes the disk to the CPU instance limit.
+        Args:
+            template: PodTemplate to configure.
+        """
+        cpu_limit = self._get_cpu_disk_limit()
+        if cpu_limit is None:
+            return  # No CPU instances, keep default
+        # Auto-size if using default value
+        default_disk_size = PodTemplate.model_fields["containerDiskInGb"].default
+        if template.containerDiskInGb == default_disk_size:
+            log.info(
+                f"Auto-sizing containerDiskInGb from {default_disk_size}GB "
+                f"to {cpu_limit}GB (CPU instance limit)"
+            )
+            template.containerDiskInGb = cpu_limit
+    def _validate_cpu_disk_size(self) -> None:
+        """Validate disk size doesn't exceed CPU instance limits.
+        Raises:
+            ValueError: If disk size exceeds CPU instance limits.
+        """
+        cpu_limit = self._get_cpu_disk_limit()
+        if cpu_limit is None:
+            return  # No CPU instances, no validation needed
+        if not self.template or not self.template.containerDiskInGb:
+            return
+        if self.template.containerDiskInGb > cpu_limit:
+            from .cpu import CPU_INSTANCE_DISK_LIMITS
+            instance_limits = [
+                f"{inst.value}: max {CPU_INSTANCE_DISK_LIMITS[inst]}GB"
+                for inst in self.instanceIds
+            ]
+            raise ValueError(
+                f"Container disk size {self.template.containerDiskInGb}GB exceeds "
+                f"the maximum allowed for CPU instances. "
+                f"Instance limits: {', '.join(instance_limits)}. "
+                f"Maximum allowed: {cpu_limit}GB. "
+                f"Consider using CpuServerlessEndpoint or CpuLiveServerless classes "
+                f"for CPU-only deployments."
+            )
+    def _create_new_template(self) -> PodTemplate:
+        """Create a new PodTemplate with standard configuration."""
+        return PodTemplate(
+            name=self.resource_id,
+            imageName=self.imageName,
+            env=KeyValuePair.from_dict(self.env or get_env_vars()),
+        )
+    def _configure_existing_template(self) -> None:
+        """Configure an existing template with necessary overrides."""
+        if self.template is None:
+            return
+        self.template.name = f"{self.resource_id}__{self.template.resource_id}"
+        if self.imageName:
+            self.template.imageName = self.imageName
+        if self.env:
+            self.template.env = KeyValuePair.from_dict(self.env)
+    async def _sync_graphql_object_with_inputs(
+        self, returned_endpoint: "ServerlessResource"
+    ):
+        for _input_field in self._input_only or set():
+            if getattr(self, _input_field) is not None:
+                # sync input only fields stripped from gql request back to endpoint
+                setattr(returned_endpoint, _input_field, getattr(self, _input_field))
+        return returned_endpoint
     def _sync_input_fields_gpu(self):
-        # GPU-specific fields
-        if self.gpus:
+        # GPU-specific fields (idempotent - only set if not already set)
+        if self.gpus and not self.gpuIds:
             # Convert gpus list to gpuIds string
-            self.gpuIds = ",".join(gpu.value for gpu in self.gpus)
-        elif self.gpuIds:
+            self.gpuIds = GpuGroup.to_gpu_ids_str(self.gpus)
+        elif self.gpuIds and not self.gpus:
             # Convert gpuIds string to gpus list (from backend responses)
-            gpu_values = [v.strip() for v in self.gpuIds.split(",") if v.strip()]
-            self.gpus = [GpuGroup(value) for value in gpu_values]
+            self.gpus = GpuGroup.from_gpu_ids_str(self.gpuIds)
-        if self.cudaVersions:
+        if self.cudaVersions and not self.allowedCudaVersions:
             # Convert cudaVersions list to allowedCudaVersions string
             self.allowedCudaVersions = ",".join(v.value for v in self.cudaVersions)
-        elif self.allowedCudaVersions:
+        elif self.allowedCudaVersions and not self.cudaVersions:
             # Convert allowedCudaVersions string to cudaVersions list (from backend responses)
             version_values = [
                 v.strip() for v in self.allowedCudaVersions.split(",") if v.strip()
@@ -169,13 +452,17 @@ class ServerlessResource(DeployableResource):
         return self
-    def _sync_input_fields_cpu(self):
-        # Override GPU-specific fields for CPU
-        self.gpuCount = 0
-        self.allowedCudaVersions = ""
-        self.gpuIds = ""
+    async def _ensure_network_volume_deployed(self) -> None:
+        """
+        Ensures network volume is deployed and ready if one is specified.
+        Updates networkVolumeId with the deployed volume ID.
+        """
+        if self.networkVolumeId:
+            return
-        return self
+        if self.networkVolume:
+            deployedNetworkVolume = await self.networkVolume.deploy()
+            self.networkVolumeId = deployedNetworkVolume.id
     def is_deployed(self) -> bool:
         """
@@ -191,7 +478,13 @@ class ServerlessResource(DeployableResource):
             log.error(f"Error checking {self}: {e}")
             return False
-    async def deploy(self) -> "DeployableResource":
+    def _payload_exclude(self) -> Set[str]:
+        # flashEnvironmentId is input-only but must be sent when provided
+        exclude_fields = set(self._input_only or set())
+        exclude_fields.discard("flashEnvironmentId")
+        return exclude_fields
+    async def _do_deploy(self) -> "DeployableResource":
         """
         Deploys the serverless resource using the provided configuration.
         Returns a DeployableResource object.
@@ -202,11 +495,18 @@ class ServerlessResource(DeployableResource):
                 log.debug(f"{self} exists")
                 return self
+            # NEW: Ensure network volume is deployed first
+            await self._ensure_network_volume_deployed()
             async with RunpodGraphQLClient() as client:
-                payload = self.model_dump(exclude=self._input_only, exclude_none=True)
-                result = await client.create_endpoint(payload)
+                payload = self.model_dump(
+                    exclude=self._payload_exclude(), exclude_none=True, mode="json"
+                )
+                result = await client.save_endpoint(payload)
             if endpoint := self.__class__(**result):
+                endpoint = await self._sync_graphql_object_with_inputs(endpoint)
+                self.id = endpoint.id
                 return endpoint
             raise ValueError("Deployment failed, no endpoint was returned.")
@@ -215,61 +515,170 @@ class ServerlessResource(DeployableResource):
             log.error(f"{self} failed to deploy: {e}")
             raise
-    async def is_ready_for_requests(self, give_up_threshold=10) -> bool:
-        """
-        Asynchronously checks if the serverless resource is ready to handle
-        requests by polling its health endpoint.
+    async def update(self, new_config: "ServerlessResource") -> "ServerlessResource":
+        """Update existing endpoint with new configuration.
+        Uses saveEndpoint mutation which handles both version-triggering and
+        rolling changes. Version-triggering changes (GPU, template, volumes)
+        automatically increment version and trigger worker recreation server-side.
         Args:
-            give_up_threshold (int, optional): The maximum number of polling
-            attempts before giving up and raising an error. Defaults to 10.
+            new_config: New configuration to apply
         Returns:
-            bool: True if the serverless resource is ready for requests.
+            Updated ServerlessResource instance
         Raises:
-            ValueError: If the serverless resource is not deployed.
-            RuntimeError: If the health status is THROTTLED, UNHEALTHY, or UNKNOWN
-            after exceeding the give_up_threshold.
+            ValueError: If endpoint not deployed or update fails
         """
-        if not self.is_deployed():
-            raise ValueError("Serverless is not deployed")
+        if not self.id:
+            raise ValueError("Cannot update: endpoint not deployed")
+        try:
+            # Log if version-triggering changes detected (informational only)
+            if self._has_structural_changes(new_config):
+                log.info(
+                    f"{self.name}: Version-triggering changes detected. "
+                    "Server will increment version and recreate workers."
+                )
+            else:
+                log.info(f"Updating endpoint '{self.name}' (ID: {self.id})")
-        log.debug(f"{self} | API /health")
+            # Ensure network volume is deployed if specified
+            await new_config._ensure_network_volume_deployed()
-        current_pace = 0
-        attempt = 0
+            async with RunpodGraphQLClient() as client:
+                # Include the endpoint ID to trigger update
+                payload = new_config.model_dump(
+                    exclude=new_config._payload_exclude(),
+                    exclude_none=True,
+                    mode="json",
+                )
+                payload["id"] = self.id  # Critical: include ID for update
-        # Poll for health status
-        while True:
-            await asyncio.sleep(current_pace)
+                result = await client.save_endpoint(payload)
-            health = await asyncio.to_thread(self.endpoint.health)
-            health = ServerlessHealth(**health)
+            if updated := self.__class__(**result):
+                log.info(f"Successfully updated endpoint '{self.name}' (ID: {self.id})")
+                return updated
+            raise ValueError("Update failed, no endpoint was returned.")
+        except Exception as e:
+            log.error(f"Failed to update {self.name}: {e}")
+            raise
+    def _has_structural_changes(self, new_config: "ServerlessResource") -> bool:
+        """Check if config changes are version-triggering.
-            if health.is_ready:
+        Version-triggering changes cause server-side version increment and
+        worker recreation:
+        - Image changes (imageName via templateId)
+        - GPU configuration (gpus, gpuIds, allowedCudaVersions, gpuCount)
+        - Hardware allocation (instanceIds, locations)
+        - Storage changes (networkVolumeId)
+        - Flashboot toggle
+        Rolling changes (no version increment):
+        - Worker scaling (workersMin, workersMax)
+        - Scaler configuration (scalerType, scalerValue)
+        - Timeout values (idleTimeout, executionTimeoutMs)
+        - Environment variables (env)
+        Note: This method is now informational for logging. The actual
+        version-triggering logic runs server-side when saveEndpoint is called.
+        Runtime fields (template, templateId, aiKey, userId) are excluded
+        to prevent false positives when comparing deployed vs new config.
+        Args:
+            new_config: New configuration to compare against
+        Returns:
+            True if version-triggering changes detected (workers will be recreated)
+        """
+        structural_fields = [
+            "gpus",
+            "gpuIds",
+            "imageName",
+            "flashboot",
+            "allowedCudaVersions",
+            "cudaVersions",
+            "instanceIds",
+        ]
+        for field in structural_fields:
+            old_val = getattr(self, field, None)
+            new_val = getattr(new_config, field, None)
+            # Handle list comparison
+            if isinstance(old_val, list) and isinstance(new_val, list):
+                if sorted(str(v) for v in old_val) != sorted(str(v) for v in new_val):
+                    log.debug(f"Structural change in '{field}': {old_val} → {new_val}")
+                    return True
+            # Handle other types
+            elif old_val != new_val:
+                log.debug(f"Structural change in '{field}': {old_val} → {new_val}")
                 return True
-            else:
-                # nothing changed, increase the gap
-                attempt += 1
-                indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
-                if indicator:
-                    log.info(f"{self} | {indicator}")
-                status = health.workers.status
-                if status in [
-                    Status.THROTTLED,
-                    Status.UNHEALTHY,
-                    Status.UNKNOWN,
-                ]:
-                    log.debug(f"{self} | Health {status.value}")
-                    if attempt >= give_up_threshold:
-                        # Give up
-                        raise RuntimeError(f"Health {status.value}")
-            # Adjust polling pace appropriately
-            current_pace = get_backoff_delay(attempt)
+        return False
+    async def deploy(self) -> "DeployableResource":
+        resource_manager = ResourceManager()
+        resource = await resource_manager.get_or_deploy_resource(self)
+        # hydrate the id onto the resource so it's usable when this is called directly
+        # on a config
+        self.id = resource.id
+        return self
+    async def _do_undeploy(self) -> bool:
+        """
+        Undeploys (deletes) the serverless endpoint.
+        If deletion fails, verifies the endpoint still exists. If not, treats it as
+        successfully undeployed (handles cases where endpoint was deleted externally).
+        Returns:
+            True if successfully undeployed or endpoint doesn't exist, False otherwise
+        """
+        if not self.id:
+            log.warning(f"{self} has no endpoint ID, cannot undeploy")
+            return False
+        try:
+            async with RunpodGraphQLClient() as client:
+                result = await client.delete_endpoint(self.id)
+                success = result.get("success", False)
+                if success:
+                    log.info(f"{self} successfully undeployed")
+                    return True
+                else:
+                    log.error(f"{self} failed to undeploy")
+                    return False
+        except Exception as e:
+            log.error(f"{self} failed to undeploy: {e}")
+            # Deletion failed. Check if endpoint still exists.
+            # If it doesn't exist, treat as successful cleanup (orphaned endpoint).
+            try:
+                async with RunpodGraphQLClient() as client:
+                    if not await client.endpoint_exists(self.id):
+                        log.info(
+                            f"{self} no longer exists on RunPod, removing from cache"
+                        )
+                        return True
+            except Exception as check_error:
+                log.warning(f"Could not verify endpoint existence: {check_error}")
+            return False
+    async def undeploy(self) -> Dict[str, Any]:
+        resource_manager = ResourceManager()
+        result = await resource_manager.undeploy_resource(self.resource_id)
+        log.debug(f"undeployment result: {result}")
+        return result
     async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
         """
@@ -285,10 +694,7 @@ class ServerlessResource(DeployableResource):
             )
         try:
-            # log.debug(f"[{log_group}] Payload: {payload}")
-            # Poll until requests can be sent
-            await self.is_ready_for_requests()
+            # log.debug(f"[{self}] Payload: {payload}")
             log.info(f"{self} | API /run_sync")
             response = await asyncio.to_thread(_fetch_job)
@@ -314,9 +720,6 @@ class ServerlessResource(DeployableResource):
         try:
             # log.debug(f"[{self}] Payload: {payload}")
-            # Poll until requests can be sent
-            await self.is_ready_for_requests()
             # Create a job using the endpoint
             log.info(f"{self} | API /run")
             job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
@@ -334,9 +737,8 @@ class ServerlessResource(DeployableResource):
             while True:
                 await asyncio.sleep(current_pace)
-                if await self.is_ready_for_requests():
-                    # Check job status
-                    job_status = await asyncio.to_thread(job.status)
+                # Check job status
+                job_status = await asyncio.to_thread(job.status)
                 if last_status == job_status:
                     # nothing changed, increase the gap
@@ -373,38 +775,53 @@ class ServerlessEndpoint(ServerlessResource):
     Inherits from ServerlessResource.
     """
+    @model_validator(mode="after")
+    def validate_instance_mutual_exclusivity(self):
+        """Ensure gpuIds and instanceIds are mutually exclusive.
+        When instanceIds is specified, clears GPU configuration since CPU and GPU
+        are mutually exclusive resources. Prevents mixing GPU and CPU configurations.
+        """
+        has_cpu = (
+            hasattr(self, "instanceIds")
+            and self.instanceIds is not None
+            and len(self.instanceIds) > 0
+        )
+        if has_cpu:
+            # Clear GPU configuration if CPU instances are specified
+            # This makes CPU intent explicit
+            self.gpus = []
+            self.gpuIds = ""
+            self.gpuCount = 0
+        return self
     @model_validator(mode="after")
     def set_serverless_template(self):
+        """Create template from imageName if not provided.
+        Must run after sync_input_fields to ensure all input fields are synced.
+        Applies smart disk sizing and validates configuration.
+        """
         if not any([self.imageName, self.template, self.templateId]):
             raise ValueError(
                 "Either imageName, template, or templateId must be provided"
             )
         if not self.templateId and not self.template:
-            self.template = PodTemplate(
-                name=self.resource_id,
-                imageName=self.imageName,
-                env=KeyValuePair.from_dict(self.env or get_env_vars()),
-            )
+            self.template = self._create_new_template()
+            # Apply smart disk sizing to new template
+            self._apply_smart_disk_sizing(self.template)
         elif self.template:
-            self.template.name = f"{self.resource_id}__{self.template.resource_id}"
-            if self.imageName:
-                self.template.imageName = self.imageName
-            if self.env:
-                self.template.env = KeyValuePair.from_dict(self.env)
-        return self
+            self._configure_existing_template()
+            # Apply smart disk sizing to existing template
+            self._apply_smart_disk_sizing(self.template)
+        # Validate CPU disk size if applicable
+        self._validate_cpu_disk_size()
-class CpuServerlessEndpoint(ServerlessEndpoint):
-    """
-    Convenience class for CPU serverless endpoint.
-    Represents a CPU-only serverless endpoint distinct from a live serverless.
-    Inherits from ServerlessEndpoint.
-    """
-    instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
+        return self
 class JobOutput(BaseModel):
@@ -416,7 +833,7 @@ class JobOutput(BaseModel):
     output: Optional[Any] = None
     error: Optional[str] = ""
-    def model_post_init(self, __context):
+    def model_post_init(self, _: Any) -> None:
         log_group = f"Worker:{self.workerId}"
         log.info(f"{log_group} | Delay Time: {self.delayTime} ms")
         log.info(f"{log_group} | Execution Time: {self.executionTime} ms")

tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

tetra-rp 0.6.0py3-none-any.whl → 0.24.0py3-none-any.whl