tetra-rp 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tetra-rp might be problematic. Click here for more details.
- tetra_rp/__init__.py +2 -0
- tetra_rp/client.py +25 -3
- tetra_rp/core/api/runpod.py +24 -0
- tetra_rp/core/resources/__init__.py +2 -1
- tetra_rp/core/resources/network_volume.py +65 -24
- tetra_rp/core/resources/serverless.py +24 -74
- tetra_rp/core/resources/template.py +1 -1
- tetra_rp/execute_class.py +6 -0
- tetra_rp/protos/remote_execution.py +36 -12
- tetra_rp/stubs/live_serverless.py +12 -1
- tetra_rp/stubs/registry.py +14 -2
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.11.0.dist-info}/METADATA +2 -1
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.11.0.dist-info}/RECORD +15 -15
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.11.0.dist-info}/WHEEL +0 -0
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.11.0.dist-info}/top_level.txt +0 -0
tetra_rp/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@ from .core.resources import ( # noqa: E402
|
|
|
14
14
|
CpuServerlessEndpoint,
|
|
15
15
|
CpuInstanceType,
|
|
16
16
|
CudaVersion,
|
|
17
|
+
DataCenter,
|
|
17
18
|
GpuGroup,
|
|
18
19
|
LiveServerless,
|
|
19
20
|
PodTemplate,
|
|
@@ -29,6 +30,7 @@ __all__ = [
|
|
|
29
30
|
"CpuServerlessEndpoint",
|
|
30
31
|
"CpuInstanceType",
|
|
31
32
|
"CudaVersion",
|
|
33
|
+
"DataCenter",
|
|
32
34
|
"GpuGroup",
|
|
33
35
|
"LiveServerless",
|
|
34
36
|
"PodTemplate",
|
tetra_rp/client.py
CHANGED
|
@@ -14,6 +14,8 @@ def remote(
|
|
|
14
14
|
resource_config: ServerlessResource,
|
|
15
15
|
dependencies: Optional[List[str]] = None,
|
|
16
16
|
system_dependencies: Optional[List[str]] = None,
|
|
17
|
+
accelerate_downloads: bool = True,
|
|
18
|
+
hf_models_to_cache: Optional[List[str]] = None,
|
|
17
19
|
**extra,
|
|
18
20
|
):
|
|
19
21
|
"""
|
|
@@ -22,10 +24,17 @@ def remote(
|
|
|
22
24
|
This decorator allows a function to be executed in a remote serverless environment, with support for
|
|
23
25
|
dynamic resource provisioning and installation of required dependencies.
|
|
24
26
|
|
|
27
|
+
Args:
|
|
25
28
|
resource_config (ServerlessResource): Configuration object specifying the serverless resource
|
|
26
29
|
to be provisioned or used.
|
|
27
30
|
dependencies (List[str], optional): A list of pip package names to be installed in the remote
|
|
28
31
|
environment before executing the function. Defaults to None.
|
|
32
|
+
system_dependencies (List[str], optional): A list of system packages to be installed in the remote
|
|
33
|
+
environment before executing the function. Defaults to None.
|
|
34
|
+
accelerate_downloads (bool, optional): Enable download acceleration for dependencies and models.
|
|
35
|
+
Defaults to True.
|
|
36
|
+
hf_models_to_cache (List[str], optional): List of HuggingFace model IDs to pre-cache using
|
|
37
|
+
download acceleration. Defaults to None.
|
|
29
38
|
extra (dict, optional): Additional parameters for the execution of the resource. Defaults to an empty dict.
|
|
30
39
|
|
|
31
40
|
Returns:
|
|
@@ -37,7 +46,8 @@ def remote(
|
|
|
37
46
|
@remote(
|
|
38
47
|
resource_config=my_resource_config,
|
|
39
48
|
dependencies=["numpy", "pandas"],
|
|
40
|
-
|
|
49
|
+
accelerate_downloads=True,
|
|
50
|
+
hf_models_to_cache=["gpt2", "bert-base-uncased"]
|
|
41
51
|
)
|
|
42
52
|
async def my_function(data):
|
|
43
53
|
# Function logic here
|
|
@@ -49,7 +59,13 @@ def remote(
|
|
|
49
59
|
if inspect.isclass(func_or_class):
|
|
50
60
|
# Handle class decoration
|
|
51
61
|
return create_remote_class(
|
|
52
|
-
func_or_class,
|
|
62
|
+
func_or_class,
|
|
63
|
+
resource_config,
|
|
64
|
+
dependencies,
|
|
65
|
+
system_dependencies,
|
|
66
|
+
accelerate_downloads,
|
|
67
|
+
hf_models_to_cache,
|
|
68
|
+
extra,
|
|
53
69
|
)
|
|
54
70
|
else:
|
|
55
71
|
# Handle function decoration (unchanged)
|
|
@@ -62,7 +78,13 @@ def remote(
|
|
|
62
78
|
|
|
63
79
|
stub = stub_resource(remote_resource, **extra)
|
|
64
80
|
return await stub(
|
|
65
|
-
func_or_class,
|
|
81
|
+
func_or_class,
|
|
82
|
+
dependencies,
|
|
83
|
+
system_dependencies,
|
|
84
|
+
accelerate_downloads,
|
|
85
|
+
hf_models_to_cache,
|
|
86
|
+
*args,
|
|
87
|
+
**kwargs,
|
|
66
88
|
)
|
|
67
89
|
|
|
68
90
|
return wrapper
|
tetra_rp/core/api/runpod.py
CHANGED
|
@@ -281,6 +281,30 @@ class RunpodRestClient:
|
|
|
281
281
|
|
|
282
282
|
return result
|
|
283
283
|
|
|
284
|
+
async def list_network_volumes(self) -> Dict[str, Any]:
|
|
285
|
+
"""
|
|
286
|
+
List all network volumes in Runpod.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
List of network volume objects or dict containing networkVolumes key.
|
|
290
|
+
The API may return either format depending on version.
|
|
291
|
+
"""
|
|
292
|
+
log.debug("Listing network volumes")
|
|
293
|
+
|
|
294
|
+
result = await self._execute_rest(
|
|
295
|
+
"GET", f"{RUNPOD_REST_API_URL}/networkvolumes"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Handle both list and dict responses
|
|
299
|
+
if isinstance(result, list):
|
|
300
|
+
volume_count = len(result)
|
|
301
|
+
else:
|
|
302
|
+
volume_count = len(result.get("networkVolumes", []))
|
|
303
|
+
|
|
304
|
+
log.debug(f"Listed {volume_count} network volumes")
|
|
305
|
+
|
|
306
|
+
return result
|
|
307
|
+
|
|
284
308
|
async def close(self):
|
|
285
309
|
"""Close the HTTP session."""
|
|
286
310
|
if self.session and not self.session.closed:
|
|
@@ -12,7 +12,7 @@ from .serverless import (
|
|
|
12
12
|
CudaVersion,
|
|
13
13
|
)
|
|
14
14
|
from .template import PodTemplate
|
|
15
|
-
from .network_volume import NetworkVolume
|
|
15
|
+
from .network_volume import NetworkVolume, DataCenter
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
@@ -21,6 +21,7 @@ __all__ = [
|
|
|
21
21
|
"CpuInstanceType",
|
|
22
22
|
"CpuServerlessEndpoint",
|
|
23
23
|
"CudaVersion",
|
|
24
|
+
"DataCenter",
|
|
24
25
|
"DeployableResource",
|
|
25
26
|
"GpuGroup",
|
|
26
27
|
"GpuType",
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import logging
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Optional
|
|
@@ -25,10 +26,11 @@ class DataCenter(str, Enum):
|
|
|
25
26
|
|
|
26
27
|
class NetworkVolume(DeployableResource):
|
|
27
28
|
"""
|
|
28
|
-
NetworkVolume resource for creating and managing Runpod
|
|
29
|
+
NetworkVolume resource for creating and managing Runpod network volumes.
|
|
29
30
|
|
|
30
31
|
This class handles the creation, deployment, and management of network volumes
|
|
31
|
-
that can be attached to serverless resources.
|
|
32
|
+
that can be attached to serverless resources. Supports idempotent deployment
|
|
33
|
+
where multiple volumes with the same name will reuse existing volumes.
|
|
32
34
|
|
|
33
35
|
"""
|
|
34
36
|
|
|
@@ -36,12 +38,21 @@ class NetworkVolume(DeployableResource):
|
|
|
36
38
|
dataCenterId: DataCenter = Field(default=DataCenter.EU_RO_1, frozen=True)
|
|
37
39
|
|
|
38
40
|
id: Optional[str] = Field(default=None)
|
|
39
|
-
name:
|
|
40
|
-
size: Optional[int] = Field(default=
|
|
41
|
+
name: str
|
|
42
|
+
size: Optional[int] = Field(default=100, gt=0) # Size in GB
|
|
41
43
|
|
|
42
44
|
def __str__(self) -> str:
|
|
43
45
|
return f"{self.__class__.__name__}:{self.id}"
|
|
44
46
|
|
|
47
|
+
@property
|
|
48
|
+
def resource_id(self) -> str:
|
|
49
|
+
"""Unique resource ID based on name and datacenter for idempotent behavior."""
|
|
50
|
+
# Use name + datacenter to ensure idempotence
|
|
51
|
+
resource_type = self.__class__.__name__
|
|
52
|
+
config_key = f"{self.name}:{self.dataCenterId.value}"
|
|
53
|
+
hash_obj = hashlib.md5(f"{resource_type}:{config_key}".encode())
|
|
54
|
+
return f"{resource_type}_{hash_obj.hexdigest()}"
|
|
55
|
+
|
|
45
56
|
@field_serializer("dataCenterId")
|
|
46
57
|
def serialize_data_center_id(self, value: Optional[DataCenter]) -> Optional[str]:
|
|
47
58
|
"""Convert DataCenter enum to string."""
|
|
@@ -61,24 +72,57 @@ class NetworkVolume(DeployableResource):
|
|
|
61
72
|
raise ValueError("Network volume ID is not set")
|
|
62
73
|
return f"{CONSOLE_BASE_URL}/user/storage"
|
|
63
74
|
|
|
64
|
-
|
|
75
|
+
def is_deployed(self) -> bool:
|
|
65
76
|
"""
|
|
66
|
-
|
|
67
|
-
Returns the volume ID.
|
|
77
|
+
Checks if the network volume resource is deployed and available.
|
|
68
78
|
"""
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
79
|
+
return self.id is not None
|
|
80
|
+
|
|
81
|
+
def _normalize_volumes_response(self, volumes_response) -> list:
|
|
82
|
+
"""Normalize API response to list format."""
|
|
83
|
+
if isinstance(volumes_response, list):
|
|
84
|
+
return volumes_response
|
|
85
|
+
return volumes_response.get("networkVolumes", [])
|
|
86
|
+
|
|
87
|
+
def _find_matching_volume(self, existing_volumes: list) -> Optional[dict]:
|
|
88
|
+
"""Find existing volume matching name and datacenter."""
|
|
89
|
+
for volume_data in existing_volumes:
|
|
90
|
+
if (
|
|
91
|
+
volume_data.get("name") == self.name
|
|
92
|
+
and volume_data.get("dataCenterId") == self.dataCenterId.value
|
|
93
|
+
):
|
|
94
|
+
return volume_data
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
async def _find_existing_volume(self, client) -> Optional["NetworkVolume"]:
|
|
98
|
+
"""Check for existing volume with same name and datacenter."""
|
|
99
|
+
if not self.name:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
log.debug(f"Checking for existing network volume with name: {self.name}")
|
|
103
|
+
volumes_response = await client.list_network_volumes()
|
|
104
|
+
existing_volumes = self._normalize_volumes_response(volumes_response)
|
|
105
|
+
|
|
106
|
+
if matching_volume := self._find_matching_volume(existing_volumes):
|
|
107
|
+
log.info(
|
|
108
|
+
f"Found existing network volume: {matching_volume.get('id')} with name '{self.name}'"
|
|
109
|
+
)
|
|
110
|
+
# Update our instance with the existing volume's ID
|
|
111
|
+
self.id = matching_volume.get("id")
|
|
112
|
+
return self
|
|
113
|
+
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
async def _create_new_volume(self, client) -> "NetworkVolume":
|
|
117
|
+
"""Create a new network volume."""
|
|
118
|
+
log.debug(f"Creating new network volume: {self.name or 'unnamed'}")
|
|
119
|
+
payload = self.model_dump(exclude_none=True)
|
|
120
|
+
result = await client.create_network_volume(payload)
|
|
73
121
|
|
|
74
122
|
if volume := self.__class__(**result):
|
|
75
123
|
return volume
|
|
76
124
|
|
|
77
|
-
|
|
78
|
-
"""
|
|
79
|
-
Checks if the network volume resource is deployed and available.
|
|
80
|
-
"""
|
|
81
|
-
return self.id is not None
|
|
125
|
+
raise ValueError("Deployment failed, no volume was created.")
|
|
82
126
|
|
|
83
127
|
async def deploy(self) -> "DeployableResource":
|
|
84
128
|
"""
|
|
@@ -91,16 +135,13 @@ class NetworkVolume(DeployableResource):
|
|
|
91
135
|
log.debug(f"{self} exists")
|
|
92
136
|
return self
|
|
93
137
|
|
|
94
|
-
# Create the network volume
|
|
95
138
|
async with RunpodRestClient() as client:
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if volume := self.__class__(**result):
|
|
101
|
-
return volume
|
|
139
|
+
# Check for existing volume first
|
|
140
|
+
if existing_volume := await self._find_existing_volume(client):
|
|
141
|
+
return existing_volume
|
|
102
142
|
|
|
103
|
-
|
|
143
|
+
# No existing volume found, create a new one
|
|
144
|
+
return await self._create_new_volume(client)
|
|
104
145
|
|
|
105
146
|
except Exception as e:
|
|
106
147
|
log.error(f"{self} failed to deploy: {e}")
|
|
@@ -20,7 +20,7 @@ from .constants import CONSOLE_URL
|
|
|
20
20
|
from .cpu import CpuInstanceType
|
|
21
21
|
from .environment import EnvironmentVars
|
|
22
22
|
from .gpu import GpuGroup
|
|
23
|
-
from .network_volume import NetworkVolume
|
|
23
|
+
from .network_volume import NetworkVolume, DataCenter
|
|
24
24
|
from .template import KeyValuePair, PodTemplate
|
|
25
25
|
|
|
26
26
|
|
|
@@ -65,6 +65,7 @@ class ServerlessResource(DeployableResource):
|
|
|
65
65
|
_input_only = {
|
|
66
66
|
"id",
|
|
67
67
|
"cudaVersions",
|
|
68
|
+
"datacenter",
|
|
68
69
|
"env",
|
|
69
70
|
"gpus",
|
|
70
71
|
"flashboot",
|
|
@@ -78,8 +79,8 @@ class ServerlessResource(DeployableResource):
|
|
|
78
79
|
flashboot: Optional[bool] = True
|
|
79
80
|
gpus: Optional[List[GpuGroup]] = [GpuGroup.ANY] # for gpuIds
|
|
80
81
|
imageName: Optional[str] = "" # for template.imageName
|
|
81
|
-
|
|
82
82
|
networkVolume: Optional[NetworkVolume] = None
|
|
83
|
+
datacenter: DataCenter = Field(default=DataCenter.EU_RO_1)
|
|
83
84
|
|
|
84
85
|
# === Input Fields ===
|
|
85
86
|
executionTimeoutMs: Optional[int] = None
|
|
@@ -134,8 +135,12 @@ class ServerlessResource(DeployableResource):
|
|
|
134
135
|
return value.value if value is not None else None
|
|
135
136
|
|
|
136
137
|
@field_serializer("instanceIds")
|
|
137
|
-
def serialize_instance_ids(
|
|
138
|
+
def serialize_instance_ids(
|
|
139
|
+
self, value: Optional[List[CpuInstanceType]]
|
|
140
|
+
) -> Optional[List[str]]:
|
|
138
141
|
"""Convert CpuInstanceType enums to strings."""
|
|
142
|
+
if value is None:
|
|
143
|
+
return None
|
|
139
144
|
return [item.value if hasattr(item, "value") else str(item) for item in value]
|
|
140
145
|
|
|
141
146
|
@field_validator("gpus")
|
|
@@ -152,6 +157,17 @@ class ServerlessResource(DeployableResource):
|
|
|
152
157
|
if self.flashboot:
|
|
153
158
|
self.name += "-fb"
|
|
154
159
|
|
|
160
|
+
# Sync datacenter to locations field for API
|
|
161
|
+
if not self.locations:
|
|
162
|
+
self.locations = self.datacenter.value
|
|
163
|
+
|
|
164
|
+
# Validate datacenter consistency between endpoint and network volume
|
|
165
|
+
if self.networkVolume and self.networkVolume.dataCenterId != self.datacenter:
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"Network volume datacenter ({self.networkVolume.dataCenterId.value}) "
|
|
168
|
+
f"must match endpoint datacenter ({self.datacenter.value})"
|
|
169
|
+
)
|
|
170
|
+
|
|
155
171
|
if self.networkVolume and self.networkVolume.is_created:
|
|
156
172
|
# Volume already exists, use its ID
|
|
157
173
|
self.networkVolumeId = self.networkVolume.id
|
|
@@ -193,17 +209,14 @@ class ServerlessResource(DeployableResource):
|
|
|
193
209
|
|
|
194
210
|
async def _ensure_network_volume_deployed(self) -> None:
|
|
195
211
|
"""
|
|
196
|
-
Ensures network volume is deployed and ready.
|
|
212
|
+
Ensures network volume is deployed and ready if one is specified.
|
|
197
213
|
Updates networkVolumeId with the deployed volume ID.
|
|
198
214
|
"""
|
|
199
215
|
if self.networkVolumeId:
|
|
200
216
|
return
|
|
201
217
|
|
|
202
|
-
if
|
|
203
|
-
|
|
204
|
-
self.networkVolume = NetworkVolume(name=f"{self.name}-volume")
|
|
205
|
-
|
|
206
|
-
if deployedNetworkVolume := await self.networkVolume.deploy():
|
|
218
|
+
if self.networkVolume:
|
|
219
|
+
deployedNetworkVolume = await self.networkVolume.deploy()
|
|
207
220
|
self.networkVolumeId = deployedNetworkVolume.id
|
|
208
221
|
|
|
209
222
|
def is_deployed(self) -> bool:
|
|
@@ -247,62 +260,6 @@ class ServerlessResource(DeployableResource):
|
|
|
247
260
|
log.error(f"{self} failed to deploy: {e}")
|
|
248
261
|
raise
|
|
249
262
|
|
|
250
|
-
async def is_ready_for_requests(self, give_up_threshold=10) -> bool:
|
|
251
|
-
"""
|
|
252
|
-
Asynchronously checks if the serverless resource is ready to handle
|
|
253
|
-
requests by polling its health endpoint.
|
|
254
|
-
|
|
255
|
-
Args:
|
|
256
|
-
give_up_threshold (int, optional): The maximum number of polling
|
|
257
|
-
attempts before giving up and raising an error. Defaults to 10.
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
bool: True if the serverless resource is ready for requests.
|
|
261
|
-
|
|
262
|
-
Raises:
|
|
263
|
-
ValueError: If the serverless resource is not deployed.
|
|
264
|
-
RuntimeError: If the health status is THROTTLED, UNHEALTHY, or UNKNOWN
|
|
265
|
-
after exceeding the give_up_threshold.
|
|
266
|
-
"""
|
|
267
|
-
if not self.is_deployed():
|
|
268
|
-
raise ValueError("Serverless is not deployed")
|
|
269
|
-
|
|
270
|
-
log.debug(f"{self} | API /health")
|
|
271
|
-
|
|
272
|
-
current_pace = 0
|
|
273
|
-
attempt = 0
|
|
274
|
-
|
|
275
|
-
# Poll for health status
|
|
276
|
-
while True:
|
|
277
|
-
await asyncio.sleep(current_pace)
|
|
278
|
-
|
|
279
|
-
health = await asyncio.to_thread(self.endpoint.health)
|
|
280
|
-
health = ServerlessHealth(**health)
|
|
281
|
-
|
|
282
|
-
if health.is_ready:
|
|
283
|
-
return True
|
|
284
|
-
else:
|
|
285
|
-
# nothing changed, increase the gap
|
|
286
|
-
attempt += 1
|
|
287
|
-
indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
|
|
288
|
-
if indicator:
|
|
289
|
-
log.info(f"{self} | {indicator}")
|
|
290
|
-
|
|
291
|
-
status = health.workers.status
|
|
292
|
-
if status in [
|
|
293
|
-
Status.THROTTLED,
|
|
294
|
-
Status.UNHEALTHY,
|
|
295
|
-
Status.UNKNOWN,
|
|
296
|
-
]:
|
|
297
|
-
log.debug(f"{self} | Health {status.value}")
|
|
298
|
-
|
|
299
|
-
if attempt >= give_up_threshold:
|
|
300
|
-
# Give up
|
|
301
|
-
raise RuntimeError(f"Health {status.value}")
|
|
302
|
-
|
|
303
|
-
# Adjust polling pace appropriately
|
|
304
|
-
current_pace = get_backoff_delay(attempt)
|
|
305
|
-
|
|
306
263
|
async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
|
|
307
264
|
"""
|
|
308
265
|
Executes a serverless endpoint request with the payload.
|
|
@@ -319,9 +276,6 @@ class ServerlessResource(DeployableResource):
|
|
|
319
276
|
try:
|
|
320
277
|
# log.debug(f"[{log_group}] Payload: {payload}")
|
|
321
278
|
|
|
322
|
-
# Poll until requests can be sent
|
|
323
|
-
await self.is_ready_for_requests()
|
|
324
|
-
|
|
325
279
|
log.info(f"{self} | API /run_sync")
|
|
326
280
|
response = await asyncio.to_thread(_fetch_job)
|
|
327
281
|
return JobOutput(**response)
|
|
@@ -346,9 +300,6 @@ class ServerlessResource(DeployableResource):
|
|
|
346
300
|
try:
|
|
347
301
|
# log.debug(f"[{self}] Payload: {payload}")
|
|
348
302
|
|
|
349
|
-
# Poll until requests can be sent
|
|
350
|
-
await self.is_ready_for_requests()
|
|
351
|
-
|
|
352
303
|
# Create a job using the endpoint
|
|
353
304
|
log.info(f"{self} | API /run")
|
|
354
305
|
job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
|
|
@@ -366,9 +317,8 @@ class ServerlessResource(DeployableResource):
|
|
|
366
317
|
while True:
|
|
367
318
|
await asyncio.sleep(current_pace)
|
|
368
319
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
job_status = await asyncio.to_thread(job.status)
|
|
320
|
+
# Check job status
|
|
321
|
+
job_status = await asyncio.to_thread(job.status)
|
|
372
322
|
|
|
373
323
|
if last_status == job_status:
|
|
374
324
|
# nothing changed, increase the gap
|
|
@@ -22,7 +22,7 @@ class KeyValuePair(BaseModel):
|
|
|
22
22
|
class PodTemplate(BaseResource):
|
|
23
23
|
advancedStart: Optional[bool] = False
|
|
24
24
|
config: Optional[Dict[str, Any]] = {}
|
|
25
|
-
containerDiskInGb: Optional[int] =
|
|
25
|
+
containerDiskInGb: Optional[int] = 64
|
|
26
26
|
containerRegistryAuthId: Optional[str] = ""
|
|
27
27
|
dockerArgs: Optional[str] = ""
|
|
28
28
|
env: Optional[List[KeyValuePair]] = []
|
tetra_rp/execute_class.py
CHANGED
|
@@ -202,6 +202,8 @@ def create_remote_class(
|
|
|
202
202
|
resource_config: ServerlessResource,
|
|
203
203
|
dependencies: Optional[List[str]],
|
|
204
204
|
system_dependencies: Optional[List[str]],
|
|
205
|
+
accelerate_downloads: bool,
|
|
206
|
+
hf_models_to_cache: Optional[List[str]],
|
|
205
207
|
extra: dict,
|
|
206
208
|
):
|
|
207
209
|
"""
|
|
@@ -219,6 +221,8 @@ def create_remote_class(
|
|
|
219
221
|
self._resource_config = resource_config
|
|
220
222
|
self._dependencies = dependencies or []
|
|
221
223
|
self._system_dependencies = system_dependencies or []
|
|
224
|
+
self._accelerate_downloads = accelerate_downloads
|
|
225
|
+
self._hf_models_to_cache = hf_models_to_cache
|
|
222
226
|
self._extra = extra
|
|
223
227
|
self._constructor_args = args
|
|
224
228
|
self._constructor_kwargs = kwargs
|
|
@@ -302,6 +306,8 @@ def create_remote_class(
|
|
|
302
306
|
constructor_kwargs=constructor_kwargs,
|
|
303
307
|
dependencies=self._dependencies,
|
|
304
308
|
system_dependencies=self._system_dependencies,
|
|
309
|
+
accelerate_downloads=self._accelerate_downloads,
|
|
310
|
+
hf_models_to_cache=self._hf_models_to_cache,
|
|
305
311
|
instance_id=self._instance_id,
|
|
306
312
|
create_new_instance=not hasattr(
|
|
307
313
|
self, "_stub"
|
|
@@ -1,11 +1,22 @@
|
|
|
1
|
-
|
|
1
|
+
"""Remote execution protocol definitions using Pydantic models.
|
|
2
|
+
|
|
3
|
+
This module defines the request/response protocol for remote function and class execution.
|
|
4
|
+
The models align with the protobuf schema for communication with remote workers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
2
7
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Dict, List, Optional
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
4
9
|
|
|
5
10
|
from pydantic import BaseModel, Field, model_validator
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
class FunctionRequest(BaseModel):
|
|
14
|
+
"""Request model for remote function or class execution.
|
|
15
|
+
|
|
16
|
+
Supports both function-based execution and class instantiation with method calls.
|
|
17
|
+
All serialized data (args, kwargs, etc.) are base64-encoded cloudpickle strings.
|
|
18
|
+
"""
|
|
19
|
+
|
|
9
20
|
# MADE OPTIONAL - can be None for class-only execution
|
|
10
21
|
function_name: Optional[str] = Field(
|
|
11
22
|
default=None,
|
|
@@ -15,19 +26,19 @@ class FunctionRequest(BaseModel):
|
|
|
15
26
|
default=None,
|
|
16
27
|
description="Source code of the function to execute",
|
|
17
28
|
)
|
|
18
|
-
args: List = Field(
|
|
29
|
+
args: List[str] = Field(
|
|
19
30
|
default_factory=list,
|
|
20
31
|
description="List of base64-encoded cloudpickle-serialized arguments",
|
|
21
32
|
)
|
|
22
|
-
kwargs: Dict = Field(
|
|
33
|
+
kwargs: Dict[str, str] = Field(
|
|
23
34
|
default_factory=dict,
|
|
24
35
|
description="Dictionary of base64-encoded cloudpickle-serialized keyword arguments",
|
|
25
36
|
)
|
|
26
|
-
dependencies: Optional[List] = Field(
|
|
37
|
+
dependencies: Optional[List[str]] = Field(
|
|
27
38
|
default=None,
|
|
28
39
|
description="Optional list of pip packages to install before executing the function",
|
|
29
40
|
)
|
|
30
|
-
system_dependencies: Optional[List] = Field(
|
|
41
|
+
system_dependencies: Optional[List[str]] = Field(
|
|
31
42
|
default=None,
|
|
32
43
|
description="Optional list of system dependencies to install before executing the function",
|
|
33
44
|
)
|
|
@@ -44,11 +55,11 @@ class FunctionRequest(BaseModel):
|
|
|
44
55
|
default=None,
|
|
45
56
|
description="Source code of the class to instantiate (for class execution)",
|
|
46
57
|
)
|
|
47
|
-
constructor_args:
|
|
58
|
+
constructor_args: List[str] = Field(
|
|
48
59
|
default_factory=list,
|
|
49
60
|
description="List of base64-encoded cloudpickle-serialized constructor arguments",
|
|
50
61
|
)
|
|
51
|
-
constructor_kwargs:
|
|
62
|
+
constructor_kwargs: Dict[str, str] = Field(
|
|
52
63
|
default_factory=dict,
|
|
53
64
|
description="Dictionary of base64-encoded cloudpickle-serialized constructor keyword arguments",
|
|
54
65
|
)
|
|
@@ -65,6 +76,16 @@ class FunctionRequest(BaseModel):
|
|
|
65
76
|
description="Whether to create a new instance or reuse existing one",
|
|
66
77
|
)
|
|
67
78
|
|
|
79
|
+
# Download acceleration fields
|
|
80
|
+
accelerate_downloads: bool = Field(
|
|
81
|
+
default=True,
|
|
82
|
+
description="Enable download acceleration for dependencies and models",
|
|
83
|
+
)
|
|
84
|
+
hf_models_to_cache: Optional[List[str]] = Field(
|
|
85
|
+
default=None,
|
|
86
|
+
description="List of HuggingFace model IDs to pre-cache using acceleration",
|
|
87
|
+
)
|
|
88
|
+
|
|
68
89
|
@model_validator(mode="after")
|
|
69
90
|
def validate_execution_requirements(self) -> "FunctionRequest":
|
|
70
91
|
"""Validate that required fields are provided based on execution_type"""
|
|
@@ -92,7 +113,12 @@ class FunctionRequest(BaseModel):
|
|
|
92
113
|
|
|
93
114
|
|
|
94
115
|
class FunctionResponse(BaseModel):
|
|
95
|
-
|
|
116
|
+
"""Response model for remote function or class execution results.
|
|
117
|
+
|
|
118
|
+
Contains execution results, error information, and metadata about class instances
|
|
119
|
+
when applicable. The result field contains base64-encoded cloudpickle data.
|
|
120
|
+
"""
|
|
121
|
+
|
|
96
122
|
success: bool = Field(
|
|
97
123
|
description="Indicates if the function execution was successful",
|
|
98
124
|
)
|
|
@@ -108,12 +134,10 @@ class FunctionResponse(BaseModel):
|
|
|
108
134
|
default=None,
|
|
109
135
|
description="Captured standard output from the function execution",
|
|
110
136
|
)
|
|
111
|
-
|
|
112
|
-
# NEW FIELDS FOR CLASS SUPPORT
|
|
113
137
|
instance_id: Optional[str] = Field(
|
|
114
138
|
default=None, description="ID of the class instance that was used/created"
|
|
115
139
|
)
|
|
116
|
-
instance_info: Optional[Dict] = Field(
|
|
140
|
+
instance_info: Optional[Dict[str, Any]] = Field(
|
|
117
141
|
default=None,
|
|
118
142
|
description="Metadata about the class instance (creation time, call count, etc.)",
|
|
119
143
|
)
|
|
@@ -60,13 +60,24 @@ class LiveServerlessStub(RemoteExecutorStub):
|
|
|
60
60
|
def __init__(self, server: LiveServerless):
|
|
61
61
|
self.server = server
|
|
62
62
|
|
|
63
|
-
def prepare_request(
|
|
63
|
+
def prepare_request(
|
|
64
|
+
self,
|
|
65
|
+
func,
|
|
66
|
+
dependencies,
|
|
67
|
+
system_dependencies,
|
|
68
|
+
accelerate_downloads,
|
|
69
|
+
hf_models_to_cache,
|
|
70
|
+
*args,
|
|
71
|
+
**kwargs,
|
|
72
|
+
):
|
|
64
73
|
source, src_hash = get_function_source(func)
|
|
65
74
|
|
|
66
75
|
request = {
|
|
67
76
|
"function_name": func.__name__,
|
|
68
77
|
"dependencies": dependencies,
|
|
69
78
|
"system_dependencies": system_dependencies,
|
|
79
|
+
"accelerate_downloads": accelerate_downloads,
|
|
80
|
+
"hf_models_to_cache": hf_models_to_cache,
|
|
70
81
|
}
|
|
71
82
|
|
|
72
83
|
# check if the function is already cached
|
tetra_rp/stubs/registry.py
CHANGED
|
@@ -26,13 +26,25 @@ def _(resource, **extra):
|
|
|
26
26
|
|
|
27
27
|
# Function execution
|
|
28
28
|
async def stubbed_resource(
|
|
29
|
-
func,
|
|
29
|
+
func,
|
|
30
|
+
dependencies,
|
|
31
|
+
system_dependencies,
|
|
32
|
+
accelerate_downloads,
|
|
33
|
+
hf_models_to_cache,
|
|
34
|
+
*args,
|
|
35
|
+
**kwargs,
|
|
30
36
|
) -> dict:
|
|
31
37
|
if args == (None,):
|
|
32
38
|
args = []
|
|
33
39
|
|
|
34
40
|
request = stub.prepare_request(
|
|
35
|
-
func,
|
|
41
|
+
func,
|
|
42
|
+
dependencies,
|
|
43
|
+
system_dependencies,
|
|
44
|
+
accelerate_downloads,
|
|
45
|
+
hf_models_to_cache,
|
|
46
|
+
*args,
|
|
47
|
+
**kwargs,
|
|
36
48
|
)
|
|
37
49
|
response = await stub.ExecuteFunction(request)
|
|
38
50
|
return stub.handle_response(response)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tetra_rp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: A Python library for distributed inference and serving of machine learning models
|
|
5
5
|
Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
|
|
6
6
|
License: MIT
|
|
@@ -13,6 +13,7 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
Requires-Dist: cloudpickle>=3.1.1
|
|
14
14
|
Requires-Dist: runpod
|
|
15
15
|
Requires-Dist: python-dotenv>=1.0.0
|
|
16
|
+
Requires-Dist: pydantic>=2.0.0
|
|
16
17
|
|
|
17
18
|
# Tetra: Serverless computing for AI workloads
|
|
18
19
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
tetra_rp/__init__.py,sha256
|
|
2
|
-
tetra_rp/client.py,sha256=
|
|
3
|
-
tetra_rp/execute_class.py,sha256=
|
|
1
|
+
tetra_rp/__init__.py,sha256=1ZWWK0rHCpkvxN4hYSFv1xJ6pwJxDJHqsKTBdr0Lxa4,721
|
|
2
|
+
tetra_rp/client.py,sha256=urSVh0j9didd9U8lboPv3TtFYURp2XO6ReOICr9Xrls,3414
|
|
3
|
+
tetra_rp/execute_class.py,sha256=jYNFalqqjKvvCz1zzodRvOkrLQd2FYnLYa4EElEYp8w,12243
|
|
4
4
|
tetra_rp/logger.py,sha256=gk5-PWp3k_GQ5DxndsRkBCX0jarp_3lgZ1oiTFuThQg,1125
|
|
5
5
|
tetra_rp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
tetra_rp/core/api/__init__.py,sha256=oldrEKMwxYoBPLvPfVlaFS3wfUtTTxCN6-HzlpTh6vE,124
|
|
7
|
-
tetra_rp/core/api/runpod.py,sha256=
|
|
8
|
-
tetra_rp/core/resources/__init__.py,sha256=
|
|
7
|
+
tetra_rp/core/api/runpod.py,sha256=3TTx1fkXMLZ2R5JCrQYPEn8dhdUsBt8i5OEwAfaKQ_k,10451
|
|
8
|
+
tetra_rp/core/resources/__init__.py,sha256=b9Odwyc9BzINinDajKWX-WV-fR6mPrg6bF4evMTZpWU,844
|
|
9
9
|
tetra_rp/core/resources/base.py,sha256=UJeDiFN45aO1n5SBcxn56ohLhj-AWHoj0KO7mF4yJ_o,1440
|
|
10
10
|
tetra_rp/core/resources/cloud.py,sha256=XJOWPfzYlDVJGHxgffcfpEaOKrWhGdi7AzTlaGuYj0o,70
|
|
11
11
|
tetra_rp/core/resources/constants.py,sha256=F1gPqFaXcCmfrbUSO9PQtUBv984TxFc3pySgVy-kXk8,158
|
|
@@ -13,10 +13,10 @@ tetra_rp/core/resources/cpu.py,sha256=YIE-tKolSU3JJzpPB7ey-PbRdqKWsJZ_Ad4h2OYaai
|
|
|
13
13
|
tetra_rp/core/resources/environment.py,sha256=FC9kJCa8YLSar75AKUKqJYnNLrUdjZj8ZTOrspBrS00,1267
|
|
14
14
|
tetra_rp/core/resources/gpu.py,sha256=2jIIMr8PNnlIAP8ZTKO8Imx-rdxXp2rbdSHJeVfjawk,1858
|
|
15
15
|
tetra_rp/core/resources/live_serverless.py,sha256=A3JRdCYwHR2KN_OlmTLcv-m_ObxNhBhc5CnUzXOpOtc,1177
|
|
16
|
-
tetra_rp/core/resources/network_volume.py,sha256=
|
|
16
|
+
tetra_rp/core/resources/network_volume.py,sha256=h_1xhrbBm9jJWROOGl5qy9u4_kCKSyV4idzt0567-J8,5193
|
|
17
17
|
tetra_rp/core/resources/resource_manager.py,sha256=kUVZDblfUzaG78S8FwOzu4rN6QSegUgQNK3fJ_X7l0w,2834
|
|
18
|
-
tetra_rp/core/resources/serverless.py,sha256=
|
|
19
|
-
tetra_rp/core/resources/template.py,sha256=
|
|
18
|
+
tetra_rp/core/resources/serverless.py,sha256=OxJb5ojx6GidA6nFvVav52PkzRFDC1CetdMYSyfZG2s,14102
|
|
19
|
+
tetra_rp/core/resources/template.py,sha256=qQ8Wd7Rzr1_YeAbW1V7_k7AVHzgWR_RPjcaRfKsetAk,3141
|
|
20
20
|
tetra_rp/core/resources/utils.py,sha256=mgXfgz_NuHN_IC7TzMNdH9II-LMjxcDCG7syDTcPiGs,1721
|
|
21
21
|
tetra_rp/core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
tetra_rp/core/utils/backoff.py,sha256=1pfa0smFNpib8nztcIgBbtrVvQeECKh-aNOfL2TztgU,1324
|
|
@@ -25,12 +25,12 @@ tetra_rp/core/utils/json.py,sha256=q0r7aEdfh8kKVeHGeh9fBDfuhHYNopSreislAMB6HhM,1
|
|
|
25
25
|
tetra_rp/core/utils/lru_cache.py,sha256=drwKg-DfLbeBRGTzuxKqNKMQq0EuZV15LMTZIOyZuVk,2618
|
|
26
26
|
tetra_rp/core/utils/singleton.py,sha256=JRli0HhBfq4P9mBUOg1TZUUwMvIenRqWdymX3qFMm2k,210
|
|
27
27
|
tetra_rp/protos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
tetra_rp/protos/remote_execution.py,sha256=
|
|
28
|
+
tetra_rp/protos/remote_execution.py,sha256=flKJG0U4ked84cXyF4Gfs_7fBgLsEVOzBv8ZWB9UlP0,5648
|
|
29
29
|
tetra_rp/stubs/__init__.py,sha256=ozKsHs8q0T7o2qhQEquub9hqomh1Htys53mMraaRu2E,72
|
|
30
|
-
tetra_rp/stubs/live_serverless.py,sha256=
|
|
31
|
-
tetra_rp/stubs/registry.py,sha256=
|
|
30
|
+
tetra_rp/stubs/live_serverless.py,sha256=IjocUFkbtvbfXIXWjvxnc0Qk-rMCbwpnuERv6TpmAq0,4424
|
|
31
|
+
tetra_rp/stubs/registry.py,sha256=HcEoedZS-okQ2P9E4LoqK3cE2U9ozvHvf4R6ppHNGog,3045
|
|
32
32
|
tetra_rp/stubs/serverless.py,sha256=BM_a5Ml5VADBYu2WRNmo9qnicP8NnXDGl5ywifulbD0,947
|
|
33
|
-
tetra_rp-0.
|
|
34
|
-
tetra_rp-0.
|
|
35
|
-
tetra_rp-0.
|
|
36
|
-
tetra_rp-0.
|
|
33
|
+
tetra_rp-0.11.0.dist-info/METADATA,sha256=J2uAyIH81jei8H_QSkBaff-uivccdtsugDtxSqT_L80,28077
|
|
34
|
+
tetra_rp-0.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
35
|
+
tetra_rp-0.11.0.dist-info/top_level.txt,sha256=bBay7JTDwJXsTYvVjrwno9hnF-j0q272lk65f2AcPjU,9
|
|
36
|
+
tetra_rp-0.11.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|