tetra-rp 0.11.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tetra-rp might be problematic. Click here for more details.

Files changed (47) hide show
  1. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/PKG-INFO +1 -1
  2. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/pyproject.toml +1 -1
  3. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/__init__.py +2 -0
  4. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/__init__.py +3 -2
  5. tetra_rp-0.12.0/src/tetra_rp/core/resources/cpu.py +137 -0
  6. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/gpu.py +29 -14
  7. tetra_rp-0.12.0/src/tetra_rp/core/resources/live_serverless.py +62 -0
  8. tetra_rp-0.12.0/src/tetra_rp/core/resources/resource_manager.py +121 -0
  9. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/serverless.py +27 -46
  10. tetra_rp-0.12.0/src/tetra_rp/core/resources/serverless_cpu.py +154 -0
  11. tetra_rp-0.12.0/src/tetra_rp/core/utils/file_lock.py +260 -0
  12. tetra_rp-0.12.0/src/tetra_rp/core/utils/singleton.py +21 -0
  13. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/live_serverless.py +10 -6
  14. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/registry.py +27 -12
  15. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/PKG-INFO +1 -1
  16. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/SOURCES.txt +2 -0
  17. tetra_rp-0.11.0/src/tetra_rp/core/resources/cpu.py +0 -34
  18. tetra_rp-0.11.0/src/tetra_rp/core/resources/live_serverless.py +0 -36
  19. tetra_rp-0.11.0/src/tetra_rp/core/resources/resource_manager.py +0 -80
  20. tetra_rp-0.11.0/src/tetra_rp/core/utils/singleton.py +0 -7
  21. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/README.md +0 -0
  22. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/setup.cfg +0 -0
  23. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/client.py +0 -0
  24. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/__init__.py +0 -0
  25. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/api/__init__.py +0 -0
  26. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/api/runpod.py +0 -0
  27. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/base.py +0 -0
  28. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/cloud.py +0 -0
  29. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/constants.py +0 -0
  30. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/environment.py +0 -0
  31. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/network_volume.py +0 -0
  32. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/template.py +0 -0
  33. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/utils.py +0 -0
  34. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/__init__.py +0 -0
  35. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/backoff.py +0 -0
  36. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/constants.py +0 -0
  37. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/json.py +0 -0
  38. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/lru_cache.py +0 -0
  39. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/execute_class.py +0 -0
  40. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/logger.py +0 -0
  41. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/protos/__init__.py +0 -0
  42. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/protos/remote_execution.py +0 -0
  43. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/__init__.py +0 -0
  44. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/serverless.py +0 -0
  45. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/dependency_links.txt +0 -0
  46. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/requires.txt +0 -0
  47. {tetra_rp-0.11.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tetra_rp
3
- Version: 0.11.0
3
+ Version: 0.12.0
4
4
  Summary: A Python library for distributed inference and serving of machine learning models
5
5
  Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tetra_rp"
3
- version = "0.11.0"
3
+ version = "0.12.0"
4
4
  description = "A Python library for distributed inference and serving of machine learning models"
5
5
  authors = [
6
6
  { name = "Marut Pandya", email = "pandyamarut@gmail.com" },
@@ -13,6 +13,7 @@ from .client import remote # noqa: E402
13
13
  from .core.resources import ( # noqa: E402
14
14
  CpuServerlessEndpoint,
15
15
  CpuInstanceType,
16
+ CpuLiveServerless,
16
17
  CudaVersion,
17
18
  DataCenter,
18
19
  GpuGroup,
@@ -29,6 +30,7 @@ __all__ = [
29
30
  "remote",
30
31
  "CpuServerlessEndpoint",
31
32
  "CpuInstanceType",
33
+ "CpuLiveServerless",
32
34
  "CudaVersion",
33
35
  "DataCenter",
34
36
  "GpuGroup",
@@ -3,14 +3,14 @@ from .cloud import runpod
3
3
  from .cpu import CpuInstanceType
4
4
  from .gpu import GpuGroup, GpuType, GpuTypeDetail
5
5
  from .resource_manager import ResourceManager
6
- from .live_serverless import LiveServerless
6
+ from .live_serverless import LiveServerless, CpuLiveServerless
7
7
  from .serverless import (
8
- CpuServerlessEndpoint,
9
8
  ServerlessResource,
10
9
  ServerlessEndpoint,
11
10
  JobOutput,
12
11
  CudaVersion,
13
12
  )
13
+ from .serverless_cpu import CpuServerlessEndpoint
14
14
  from .template import PodTemplate
15
15
  from .network_volume import NetworkVolume, DataCenter
16
16
 
@@ -19,6 +19,7 @@ __all__ = [
19
19
  "runpod",
20
20
  "BaseResource",
21
21
  "CpuInstanceType",
22
+ "CpuLiveServerless",
22
23
  "CpuServerlessEndpoint",
23
24
  "CudaVersion",
24
25
  "DataCenter",
@@ -0,0 +1,137 @@
1
+ from enum import Enum
2
+ from typing import List, Optional
3
+
4
+
5
+ class CpuInstanceType(str, Enum):
6
+ """Valid CPU instance types.
7
+
8
+ Format: {generation}{type}-{vcpu}-{memory_gb}
9
+ Based on Runpod backend validation logic:
10
+ - memoryInGb = vcpuCount * flavor.ramMultiplier
11
+
12
+ RAM Multipliers (DEV environment):
13
+ - cpu3g: 4.0 (1 vCPU = 4GB, 2 vCPU = 8GB, etc.)
14
+ - cpu3c: 2.0 (1 vCPU = 2GB, 2 vCPU = 4GB, etc.)
15
+ - cpu5c: 2.0 (1 vCPU = 2GB, 2 vCPU = 4GB, etc.)
16
+ - cpu5g: Not available
17
+ """
18
+
19
+ # 3rd Generation General Purpose (RAM multiplier: 4.0)
20
+
21
+ CPU3G_1_4 = "cpu3g-1-4"
22
+ """1 vCPU, 4GB RAM, max 10GB container disk"""
23
+
24
+ CPU3G_2_8 = "cpu3g-2-8"
25
+ """2 vCPU, 8GB RAM, max 20GB container disk"""
26
+
27
+ CPU3G_4_16 = "cpu3g-4-16"
28
+ """4 vCPU, 16GB RAM, max 40GB container disk"""
29
+
30
+ CPU3G_8_32 = "cpu3g-8-32"
31
+ """8 vCPU, 32GB RAM, max 80GB container disk"""
32
+
33
+ # 3rd Generation Compute-Optimized (RAM multiplier: 2.0)
34
+
35
+ CPU3C_1_2 = "cpu3c-1-2"
36
+ """1 vCPU, 2GB RAM, max 10GB container disk"""
37
+
38
+ CPU3C_2_4 = "cpu3c-2-4"
39
+ """2 vCPU, 4GB RAM, max 20GB container disk"""
40
+
41
+ CPU3C_4_8 = "cpu3c-4-8"
42
+ """4 vCPU, 8GB RAM, max 40GB container disk"""
43
+
44
+ CPU3C_8_16 = "cpu3c-8-16"
45
+ """8 vCPU, 16GB RAM, max 80GB container disk"""
46
+
47
+ # 5th Generation Compute-Optimized (RAM multiplier: 2.0)
48
+
49
+ CPU5C_1_2 = "cpu5c-1-2"
50
+ """1 vCPU, 2GB RAM, max 15GB container disk"""
51
+
52
+ CPU5C_2_4 = "cpu5c-2-4"
53
+ """2 vCPU, 4GB RAM, max 30GB container disk"""
54
+
55
+ CPU5C_4_8 = "cpu5c-4-8"
56
+ """4 vCPU, 8GB RAM, max 60GB container disk"""
57
+
58
+ CPU5C_8_16 = "cpu5c-8-16"
59
+ """8 vCPU, 16GB RAM, max 120GB container disk"""
60
+
61
+
62
+ def calculate_max_disk_size(instance_type: CpuInstanceType) -> int:
63
+ """
64
+ Calculate the maximum container disk size for a CPU instance type.
65
+
66
+ Formula:
67
+ - CPU3G/CPU3C: vCPU count × 10GB
68
+ - CPU5C: vCPU count × 15GB
69
+
70
+ Args:
71
+ instance_type: CPU instance type enum
72
+
73
+ Returns:
74
+ Maximum container disk size in GB
75
+
76
+ Example:
77
+ >>> calculate_max_disk_size(CpuInstanceType.CPU3G_1_4)
78
+ 10
79
+ >>> calculate_max_disk_size(CpuInstanceType.CPU5C_2_4)
80
+ 30
81
+ """
82
+ # Parse the instance type string to extract vCPU count
83
+ # Format: "cpu{generation}{type}-{vcpu}-{memory}"
84
+ instance_str = instance_type.value
85
+ parts = instance_str.split("-")
86
+
87
+ if len(parts) != 3:
88
+ raise ValueError(f"Invalid instance type format: {instance_str}")
89
+
90
+ vcpu_count = int(parts[1])
91
+
92
+ # Determine disk multiplier based on generation
93
+ if instance_str.startswith("cpu5c"):
94
+ disk_multiplier = 15 # CPU5C: 15GB per vCPU
95
+ elif instance_str.startswith(("cpu3g", "cpu3c")):
96
+ disk_multiplier = 10 # CPU3G/CPU3C: 10GB per vCPU
97
+ else:
98
+ raise ValueError(f"Unknown CPU generation/type: {instance_str}")
99
+
100
+ return vcpu_count * disk_multiplier
101
+
102
+
103
+ # CPU Instance Type Disk Limits (calculated programmatically)
104
+ CPU_INSTANCE_DISK_LIMITS = {
105
+ instance_type: calculate_max_disk_size(instance_type)
106
+ for instance_type in CpuInstanceType
107
+ }
108
+
109
+
110
+ def get_max_disk_size_for_instances(
111
+ instance_types: Optional[List[CpuInstanceType]],
112
+ ) -> Optional[int]:
113
+ """
114
+ Calculate the maximum container disk size for a list of CPU instance types.
115
+
116
+ Returns the minimum disk limit across all instance types to ensure compatibility
117
+ with all specified instances.
118
+
119
+ Args:
120
+ instance_types: List of CPU instance types, or None
121
+
122
+ Returns:
123
+ Maximum allowed disk size in GB, or None if no CPU instances specified
124
+
125
+ Example:
126
+ >>> get_max_disk_size_for_instances([CpuInstanceType.CPU3G_1_4])
127
+ 10
128
+ >>> get_max_disk_size_for_instances([CpuInstanceType.CPU3G_1_4, CpuInstanceType.CPU3G_2_8])
129
+ 10
130
+ """
131
+ if not instance_types:
132
+ return None
133
+
134
+ disk_limits = [
135
+ CPU_INSTANCE_DISK_LIMITS[instance_type] for instance_type in instance_types
136
+ ]
137
+ return min(disk_limits)
@@ -32,20 +32,35 @@ class GpuTypeDetail(GpuType):
32
32
 
33
33
  # TODO: this should be fetched from an API
34
34
  class GpuGroup(Enum):
35
- ANY = "any" # "Any GPU"
36
- ADA_24 = "ADA_24" # "NVIDIA GeForce RTX 4090"
37
- ADA_32_PRO = "ADA_32_PRO" # "NVIDIA GeForce RTX 5090"
38
- ADA_48_PRO = (
39
- "ADA_48_PRO" # "NVIDIA RTX 6000 Ada Generation, NVIDIA L40, NVIDIA L40S"
40
- )
41
- ADA_80_PRO = (
42
- "ADA_80_PRO" # "NVIDIA H100 PCIe, NVIDIA H100 80GB HBM3, NVIDIA H100 NVL"
43
- )
44
- AMPERE_16 = "AMPERE_16" # "NVIDIA RTX A4000, NVIDIA RTX A4500, NVIDIA RTX 4000 Ada Generation, NVIDIA RTX 2000 Ada Generation"
45
- AMPERE_24 = "AMPERE_24" # "NVIDIA RTX A5000, NVIDIA L4, NVIDIA GeForce RTX 3090"
46
- AMPERE_48 = "AMPERE_48" # "NVIDIA A40, NVIDIA RTX A6000"
47
- AMPERE_80 = "AMPERE_80" # "NVIDIA A100 80GB PCIe, NVIDIA A100-SXM4-80GB"
48
- HOPPER_141 = "HOPPER_141" # "NVIDIA H200"
35
+ ANY = "any"
36
+ """Any GPU"""
37
+
38
+ ADA_24 = "ADA_24"
39
+ """NVIDIA GeForce RTX 4090"""
40
+
41
+ ADA_32_PRO = "ADA_32_PRO"
42
+ """NVIDIA GeForce RTX 5090"""
43
+
44
+ ADA_48_PRO = "ADA_48_PRO"
45
+ """NVIDIA RTX 6000 Ada Generation, NVIDIA L40, NVIDIA L40S"""
46
+
47
+ ADA_80_PRO = "ADA_80_PRO"
48
+ """NVIDIA H100 PCIe, NVIDIA H100 80GB HBM3, NVIDIA H100 NVL"""
49
+
50
+ AMPERE_16 = "AMPERE_16"
51
+ """NVIDIA RTX A4000, NVIDIA RTX A4500, NVIDIA RTX 4000 Ada Generation, NVIDIA RTX 2000 Ada Generation"""
52
+
53
+ AMPERE_24 = "AMPERE_24"
54
+ """NVIDIA RTX A5000, NVIDIA L4, NVIDIA GeForce RTX 3090"""
55
+
56
+ AMPERE_48 = "AMPERE_48"
57
+ """NVIDIA A40, NVIDIA RTX A6000"""
58
+
59
+ AMPERE_80 = "AMPERE_80"
60
+ """NVIDIA A100 80GB PCIe, NVIDIA A100-SXM4-80GB"""
61
+
62
+ HOPPER_141 = "HOPPER_141"
63
+ """NVIDIA H200"""
49
64
 
50
65
  @classmethod
51
66
  def all(cls) -> List["GpuGroup"]:
@@ -0,0 +1,62 @@
1
+ # Ship serverless code as you write it. No builds, no deploys — just run.
2
+ import os
3
+ from pydantic import model_validator
4
+ from .serverless import ServerlessEndpoint
5
+ from .serverless_cpu import CpuServerlessEndpoint
6
+
7
+ TETRA_IMAGE_TAG = os.environ.get("TETRA_IMAGE_TAG", "latest")
8
+ TETRA_GPU_IMAGE = os.environ.get(
9
+ "TETRA_GPU_IMAGE", f"runpod/tetra-rp:{TETRA_IMAGE_TAG}"
10
+ )
11
+ TETRA_CPU_IMAGE = os.environ.get(
12
+ "TETRA_CPU_IMAGE", f"runpod/tetra-rp-cpu:{TETRA_IMAGE_TAG}"
13
+ )
14
+
15
+
16
+ class LiveServerlessMixin:
17
+ """Common mixin for live serverless endpoints that locks the image."""
18
+
19
+ @property
20
+ def _live_image(self) -> str:
21
+ """Override in subclasses to specify the locked image."""
22
+ raise NotImplementedError("Subclasses must define _live_image")
23
+
24
+ @property
25
+ def imageName(self):
26
+ # Lock imageName to specific image
27
+ return self._live_image
28
+
29
+ @imageName.setter
30
+ def imageName(self, value):
31
+ # Prevent manual setting of imageName
32
+ pass
33
+
34
+
35
+ class LiveServerless(LiveServerlessMixin, ServerlessEndpoint):
36
+ """GPU-only live serverless endpoint."""
37
+
38
+ @property
39
+ def _live_image(self) -> str:
40
+ return TETRA_GPU_IMAGE
41
+
42
+ @model_validator(mode="before")
43
+ @classmethod
44
+ def set_live_serverless_template(cls, data: dict):
45
+ """Set default GPU image for Live Serverless."""
46
+ data["imageName"] = TETRA_GPU_IMAGE
47
+ return data
48
+
49
+
50
+ class CpuLiveServerless(LiveServerlessMixin, CpuServerlessEndpoint):
51
+ """CPU-only live serverless endpoint with automatic disk sizing."""
52
+
53
+ @property
54
+ def _live_image(self) -> str:
55
+ return TETRA_CPU_IMAGE
56
+
57
+ @model_validator(mode="before")
58
+ @classmethod
59
+ def set_live_serverless_template(cls, data: dict):
60
+ """Set default CPU image for Live Serverless."""
61
+ data["imageName"] = TETRA_CPU_IMAGE
62
+ return data
@@ -0,0 +1,121 @@
1
+ import asyncio
2
+ import cloudpickle
3
+ import logging
4
+ from typing import Dict, Optional
5
+ from pathlib import Path
6
+
7
+ from ..utils.singleton import SingletonMixin
8
+ from ..utils.file_lock import file_lock, FileLockError
9
+
10
+ from .base import DeployableResource
11
+
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+ # File to persist state of resources
16
+ RESOURCE_STATE_FILE = Path(".tetra_resources.pkl")
17
+
18
+
19
+ class ResourceManager(SingletonMixin):
20
+ """Manages dynamic provisioning and tracking of remote resources."""
21
+
22
+ # Class variables shared across all instances (singleton)
23
+ _resources: Dict[str, DeployableResource] = {}
24
+ _deployment_locks: Dict[str, asyncio.Lock] = {}
25
+ _global_lock: Optional[asyncio.Lock] = None # Will be initialized lazily
26
+ _lock_initialized = False
27
+
28
+ def __init__(self):
29
+ # Ensure async locks are initialized properly for the singleton instance
30
+ if not ResourceManager._lock_initialized:
31
+ ResourceManager._global_lock = asyncio.Lock()
32
+ ResourceManager._lock_initialized = True
33
+
34
+ if not self._resources:
35
+ self._load_resources()
36
+
37
+ def _load_resources(self) -> Dict[str, DeployableResource]:
38
+ """Load persisted resource information using cross-platform file locking."""
39
+ if RESOURCE_STATE_FILE.exists():
40
+ try:
41
+ with open(RESOURCE_STATE_FILE, "rb") as f:
42
+ # Acquire shared lock for reading (cross-platform)
43
+ with file_lock(f, exclusive=False):
44
+ self._resources = cloudpickle.load(f)
45
+ log.debug(f"Loaded saved resources from {RESOURCE_STATE_FILE}")
46
+ except (FileLockError, Exception) as e:
47
+ log.error(f"Failed to load resources from {RESOURCE_STATE_FILE}: {e}")
48
+ return self._resources
49
+
50
+ def _save_resources(self) -> None:
51
+ """Persist state of resources to disk using cross-platform file locking."""
52
+ try:
53
+ with open(RESOURCE_STATE_FILE, "wb") as f:
54
+ # Acquire exclusive lock for writing (cross-platform)
55
+ with file_lock(f, exclusive=True):
56
+ cloudpickle.dump(self._resources, f)
57
+ f.flush() # Ensure data is written to disk
58
+ log.debug(f"Saved resources in {RESOURCE_STATE_FILE}")
59
+ except (FileLockError, Exception) as e:
60
+ log.error(f"Failed to save resources to {RESOURCE_STATE_FILE}: {e}")
61
+ raise
62
+
63
+ def add_resource(self, uid: str, resource: DeployableResource):
64
+ """Add a resource to the manager."""
65
+ self._resources[uid] = resource
66
+ self._save_resources()
67
+
68
+ # function to check if resource still exists remotely, else remove it
69
+ def remove_resource(self, uid: str):
70
+ """Remove a resource from the manager."""
71
+ if uid not in self._resources:
72
+ log.warning(f"Resource {uid} not found for removal")
73
+ return
74
+
75
+ del self._resources[uid]
76
+ log.debug(f"Removed resource {uid}")
77
+
78
+ self._save_resources()
79
+
80
+ async def get_or_deploy_resource(
81
+ self, config: DeployableResource
82
+ ) -> DeployableResource:
83
+ """Get existing or create new resource based on config.
84
+
85
+ Thread-safe implementation that prevents concurrent deployments
86
+ of the same resource configuration.
87
+ """
88
+ uid = config.resource_id
89
+
90
+ # Ensure global lock is initialized (should be done in __init__)
91
+ assert ResourceManager._global_lock is not None, "Global lock not initialized"
92
+
93
+ # Get or create a per-resource lock
94
+ async with ResourceManager._global_lock:
95
+ if uid not in ResourceManager._deployment_locks:
96
+ ResourceManager._deployment_locks[uid] = asyncio.Lock()
97
+ resource_lock = ResourceManager._deployment_locks[uid]
98
+
99
+ # Acquire per-resource lock for this specific configuration
100
+ async with resource_lock:
101
+ # Double-check pattern: check again inside the lock
102
+ if existing := self._resources.get(uid):
103
+ if not existing.is_deployed():
104
+ log.warning(f"{existing} is no longer valid, redeploying.")
105
+ self.remove_resource(uid)
106
+ # Don't recursive call - deploy directly within the lock
107
+ deployed_resource = await config.deploy()
108
+ log.info(f"URL: {deployed_resource.url}")
109
+ self.add_resource(uid, deployed_resource)
110
+ return deployed_resource
111
+
112
+ log.debug(f"{existing} exists, reusing.")
113
+ log.info(f"URL: {existing.url}")
114
+ return existing
115
+
116
+ # No existing resource, deploy new one
117
+ log.debug(f"Deploying new resource: {uid}")
118
+ deployed_resource = await config.deploy()
119
+ log.info(f"URL: {deployed_resource.url}")
120
+ self.add_resource(uid, deployed_resource)
121
+ return deployed_resource
@@ -17,7 +17,6 @@ from ..utils.backoff import get_backoff_delay
17
17
  from .base import DeployableResource
18
18
  from .cloud import runpod
19
19
  from .constants import CONSOLE_URL
20
- from .cpu import CpuInstanceType
21
20
  from .environment import EnvironmentVars
22
21
  from .gpu import GpuGroup
23
22
  from .network_volume import NetworkVolume, DataCenter
@@ -86,7 +85,6 @@ class ServerlessResource(DeployableResource):
86
85
  executionTimeoutMs: Optional[int] = None
87
86
  gpuCount: Optional[int] = 1
88
87
  idleTimeout: Optional[int] = 5
89
- instanceIds: Optional[List[CpuInstanceType]] = None
90
88
  locations: Optional[str] = None
91
89
  name: str
92
90
  networkVolumeId: Optional[str] = None
@@ -134,15 +132,6 @@ class ServerlessResource(DeployableResource):
134
132
  """Convert ServerlessScalerType enum to string."""
135
133
  return value.value if value is not None else None
136
134
 
137
- @field_serializer("instanceIds")
138
- def serialize_instance_ids(
139
- self, value: Optional[List[CpuInstanceType]]
140
- ) -> Optional[List[str]]:
141
- """Convert CpuInstanceType enums to strings."""
142
- if value is None:
143
- return None
144
- return [item.value if hasattr(item, "value") else str(item) for item in value]
145
-
146
135
  @field_validator("gpus")
147
136
  @classmethod
148
137
  def validate_gpus(cls, value: List[GpuGroup]) -> List[GpuGroup]:
@@ -172,10 +161,9 @@ class ServerlessResource(DeployableResource):
172
161
  # Volume already exists, use its ID
173
162
  self.networkVolumeId = self.networkVolume.id
174
163
 
175
- if self.instanceIds:
176
- return self._sync_input_fields_cpu()
177
- else:
178
- return self._sync_input_fields_gpu()
164
+ self._sync_input_fields_gpu()
165
+
166
+ return self
179
167
 
180
168
  def _sync_input_fields_gpu(self):
181
169
  # GPU-specific fields
@@ -199,14 +187,6 @@ class ServerlessResource(DeployableResource):
199
187
 
200
188
  return self
201
189
 
202
- def _sync_input_fields_cpu(self):
203
- # Override GPU-specific fields for CPU
204
- self.gpuCount = 0
205
- self.allowedCudaVersions = ""
206
- self.gpuIds = ""
207
-
208
- return self
209
-
210
190
  async def _ensure_network_volume_deployed(self) -> None:
211
191
  """
212
192
  Ensures network volume is deployed and ready if one is specified.
@@ -274,7 +254,7 @@ class ServerlessResource(DeployableResource):
274
254
  )
275
255
 
276
256
  try:
277
- # log.debug(f"[{log_group}] Payload: {payload}")
257
+ # log.debug(f"[{self}] Payload: {payload}")
278
258
 
279
259
  log.info(f"{self} | API /run_sync")
280
260
  response = await asyncio.to_thread(_fetch_job)
@@ -355,6 +335,26 @@ class ServerlessEndpoint(ServerlessResource):
355
335
  Inherits from ServerlessResource.
356
336
  """
357
337
 
338
+ def _create_new_template(self) -> PodTemplate:
339
+ """Create a new PodTemplate with standard configuration."""
340
+ return PodTemplate(
341
+ name=self.resource_id,
342
+ imageName=self.imageName,
343
+ env=KeyValuePair.from_dict(self.env or get_env_vars()),
344
+ )
345
+
346
+ def _configure_existing_template(self) -> None:
347
+ """Configure an existing template with necessary overrides."""
348
+ if self.template is None:
349
+ return
350
+
351
+ self.template.name = f"{self.resource_id}__{self.template.resource_id}"
352
+
353
+ if self.imageName:
354
+ self.template.imageName = self.imageName
355
+ if self.env:
356
+ self.template.env = KeyValuePair.from_dict(self.env)
357
+
358
358
  @model_validator(mode="after")
359
359
  def set_serverless_template(self):
360
360
  if not any([self.imageName, self.template, self.templateId]):
@@ -363,32 +363,13 @@ class ServerlessEndpoint(ServerlessResource):
363
363
  )
364
364
 
365
365
  if not self.templateId and not self.template:
366
- self.template = PodTemplate(
367
- name=self.resource_id,
368
- imageName=self.imageName,
369
- env=KeyValuePair.from_dict(self.env or get_env_vars()),
370
- )
371
-
366
+ self.template = self._create_new_template()
372
367
  elif self.template:
373
- self.template.name = f"{self.resource_id}__{self.template.resource_id}"
374
- if self.imageName:
375
- self.template.imageName = self.imageName
376
- if self.env:
377
- self.template.env = KeyValuePair.from_dict(self.env)
368
+ self._configure_existing_template()
378
369
 
379
370
  return self
380
371
 
381
372
 
382
- class CpuServerlessEndpoint(ServerlessEndpoint):
383
- """
384
- Convenience class for CPU serverless endpoint.
385
- Represents a CPU-only serverless endpoint distinct from a live serverless.
386
- Inherits from ServerlessEndpoint.
387
- """
388
-
389
- instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
390
-
391
-
392
373
  class JobOutput(BaseModel):
393
374
  id: str
394
375
  workerId: str
@@ -398,7 +379,7 @@ class JobOutput(BaseModel):
398
379
  output: Optional[Any] = None
399
380
  error: Optional[str] = ""
400
381
 
401
- def model_post_init(self, __context):
382
+ def model_post_init(self, _: Any) -> None:
402
383
  log_group = f"Worker:{self.workerId}"
403
384
  log.info(f"{log_group} | Delay Time: {self.delayTime} ms")
404
385
  log.info(f"{log_group} | Execution Time: {self.executionTime} ms")