tetra-rp 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tetra_rp/core/api/runpod.py +24 -0
- tetra_rp/core/resources/network_volume.py +68 -23
- tetra_rp/core/resources/serverless.py +7 -66
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.10.0.dist-info}/METADATA +1 -1
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.10.0.dist-info}/RECORD +7 -7
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.10.0.dist-info}/WHEEL +0 -0
- {tetra_rp-0.9.0.dist-info → tetra_rp-0.10.0.dist-info}/top_level.txt +0 -0
tetra_rp/core/api/runpod.py
CHANGED
|
@@ -281,6 +281,30 @@ class RunpodRestClient:
|
|
|
281
281
|
|
|
282
282
|
return result
|
|
283
283
|
|
|
284
|
+
async def list_network_volumes(self) -> Dict[str, Any]:
|
|
285
|
+
"""
|
|
286
|
+
List all network volumes in Runpod.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
List of network volume objects or dict containing networkVolumes key.
|
|
290
|
+
The API may return either format depending on version.
|
|
291
|
+
"""
|
|
292
|
+
log.debug("Listing network volumes")
|
|
293
|
+
|
|
294
|
+
result = await self._execute_rest(
|
|
295
|
+
"GET", f"{RUNPOD_REST_API_URL}/networkvolumes"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# Handle both list and dict responses
|
|
299
|
+
if isinstance(result, list):
|
|
300
|
+
volume_count = len(result)
|
|
301
|
+
else:
|
|
302
|
+
volume_count = len(result.get("networkVolumes", []))
|
|
303
|
+
|
|
304
|
+
log.debug(f"Listed {volume_count} network volumes")
|
|
305
|
+
|
|
306
|
+
return result
|
|
307
|
+
|
|
284
308
|
async def close(self):
|
|
285
309
|
"""Close the HTTP session."""
|
|
286
310
|
if self.session and not self.session.closed:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import logging
|
|
2
3
|
from enum import Enum
|
|
3
4
|
from typing import Optional
|
|
@@ -25,10 +26,11 @@ class DataCenter(str, Enum):
|
|
|
25
26
|
|
|
26
27
|
class NetworkVolume(DeployableResource):
|
|
27
28
|
"""
|
|
28
|
-
NetworkVolume resource for creating and managing Runpod
|
|
29
|
+
NetworkVolume resource for creating and managing Runpod network volumes.
|
|
29
30
|
|
|
30
31
|
This class handles the creation, deployment, and management of network volumes
|
|
31
|
-
that can be attached to serverless resources.
|
|
32
|
+
that can be attached to serverless resources. Supports idempotent deployment
|
|
33
|
+
where multiple volumes with the same name will reuse existing volumes.
|
|
32
34
|
|
|
33
35
|
"""
|
|
34
36
|
|
|
@@ -37,11 +39,24 @@ class NetworkVolume(DeployableResource):
|
|
|
37
39
|
|
|
38
40
|
id: Optional[str] = Field(default=None)
|
|
39
41
|
name: Optional[str] = None
|
|
40
|
-
size: Optional[int] = Field(default=
|
|
42
|
+
size: Optional[int] = Field(default=50, gt=0) # Size in GB
|
|
41
43
|
|
|
42
44
|
def __str__(self) -> str:
|
|
43
45
|
return f"{self.__class__.__name__}:{self.id}"
|
|
44
46
|
|
|
47
|
+
@property
|
|
48
|
+
def resource_id(self) -> str:
|
|
49
|
+
"""Unique resource ID based on name and datacenter for idempotent behavior."""
|
|
50
|
+
if self.name:
|
|
51
|
+
# Use name + datacenter for volumes with names to ensure idempotence
|
|
52
|
+
resource_type = self.__class__.__name__
|
|
53
|
+
config_key = f"{self.name}:{self.dataCenterId.value}"
|
|
54
|
+
hash_obj = hashlib.md5(f"{resource_type}:{config_key}".encode())
|
|
55
|
+
return f"{resource_type}_{hash_obj.hexdigest()}"
|
|
56
|
+
else:
|
|
57
|
+
# Fall back to default behavior for unnamed volumes
|
|
58
|
+
return super().resource_id
|
|
59
|
+
|
|
45
60
|
@field_serializer("dataCenterId")
|
|
46
61
|
def serialize_data_center_id(self, value: Optional[DataCenter]) -> Optional[str]:
|
|
47
62
|
"""Convert DataCenter enum to string."""
|
|
@@ -61,24 +76,57 @@ class NetworkVolume(DeployableResource):
|
|
|
61
76
|
raise ValueError("Network volume ID is not set")
|
|
62
77
|
return f"{CONSOLE_BASE_URL}/user/storage"
|
|
63
78
|
|
|
64
|
-
|
|
79
|
+
def is_deployed(self) -> bool:
|
|
65
80
|
"""
|
|
66
|
-
|
|
67
|
-
Returns the volume ID.
|
|
81
|
+
Checks if the network volume resource is deployed and available.
|
|
68
82
|
"""
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
83
|
+
return self.id is not None
|
|
84
|
+
|
|
85
|
+
def _normalize_volumes_response(self, volumes_response) -> list:
|
|
86
|
+
"""Normalize API response to list format."""
|
|
87
|
+
if isinstance(volumes_response, list):
|
|
88
|
+
return volumes_response
|
|
89
|
+
return volumes_response.get("networkVolumes", [])
|
|
90
|
+
|
|
91
|
+
def _find_matching_volume(self, existing_volumes: list) -> Optional[dict]:
|
|
92
|
+
"""Find existing volume matching name and datacenter."""
|
|
93
|
+
for volume_data in existing_volumes:
|
|
94
|
+
if (
|
|
95
|
+
volume_data.get("name") == self.name
|
|
96
|
+
and volume_data.get("dataCenterId") == self.dataCenterId.value
|
|
97
|
+
):
|
|
98
|
+
return volume_data
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
async def _find_existing_volume(self, client) -> Optional["NetworkVolume"]:
|
|
102
|
+
"""Check for existing volume with same name and datacenter."""
|
|
103
|
+
if not self.name:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
log.debug(f"Checking for existing network volume with name: {self.name}")
|
|
107
|
+
volumes_response = await client.list_network_volumes()
|
|
108
|
+
existing_volumes = self._normalize_volumes_response(volumes_response)
|
|
109
|
+
|
|
110
|
+
if matching_volume := self._find_matching_volume(existing_volumes):
|
|
111
|
+
log.info(
|
|
112
|
+
f"Found existing network volume: {matching_volume.get('id')} with name '{self.name}'"
|
|
113
|
+
)
|
|
114
|
+
# Update our instance with the existing volume's ID
|
|
115
|
+
self.id = matching_volume.get("id")
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
async def _create_new_volume(self, client) -> "NetworkVolume":
|
|
121
|
+
"""Create a new network volume."""
|
|
122
|
+
log.debug(f"Creating new network volume: {self.name or 'unnamed'}")
|
|
123
|
+
payload = self.model_dump(exclude_none=True)
|
|
124
|
+
result = await client.create_network_volume(payload)
|
|
73
125
|
|
|
74
126
|
if volume := self.__class__(**result):
|
|
75
127
|
return volume
|
|
76
128
|
|
|
77
|
-
|
|
78
|
-
"""
|
|
79
|
-
Checks if the network volume resource is deployed and available.
|
|
80
|
-
"""
|
|
81
|
-
return self.id is not None
|
|
129
|
+
raise ValueError("Deployment failed, no volume was created.")
|
|
82
130
|
|
|
83
131
|
async def deploy(self) -> "DeployableResource":
|
|
84
132
|
"""
|
|
@@ -91,16 +139,13 @@ class NetworkVolume(DeployableResource):
|
|
|
91
139
|
log.debug(f"{self} exists")
|
|
92
140
|
return self
|
|
93
141
|
|
|
94
|
-
# Create the network volume
|
|
95
142
|
async with RunpodRestClient() as client:
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if volume := self.__class__(**result):
|
|
101
|
-
return volume
|
|
143
|
+
# Check for existing volume first
|
|
144
|
+
if existing_volume := await self._find_existing_volume(client):
|
|
145
|
+
return existing_volume
|
|
102
146
|
|
|
103
|
-
|
|
147
|
+
# No existing volume found, create a new one
|
|
148
|
+
return await self._create_new_volume(client)
|
|
104
149
|
|
|
105
150
|
except Exception as e:
|
|
106
151
|
log.error(f"{self} failed to deploy: {e}")
|
|
@@ -134,8 +134,12 @@ class ServerlessResource(DeployableResource):
|
|
|
134
134
|
return value.value if value is not None else None
|
|
135
135
|
|
|
136
136
|
@field_serializer("instanceIds")
|
|
137
|
-
def serialize_instance_ids(
|
|
137
|
+
def serialize_instance_ids(
|
|
138
|
+
self, value: Optional[List[CpuInstanceType]]
|
|
139
|
+
) -> Optional[List[str]]:
|
|
138
140
|
"""Convert CpuInstanceType enums to strings."""
|
|
141
|
+
if value is None:
|
|
142
|
+
return None
|
|
139
143
|
return [item.value if hasattr(item, "value") else str(item) for item in value]
|
|
140
144
|
|
|
141
145
|
@field_validator("gpus")
|
|
@@ -247,62 +251,6 @@ class ServerlessResource(DeployableResource):
|
|
|
247
251
|
log.error(f"{self} failed to deploy: {e}")
|
|
248
252
|
raise
|
|
249
253
|
|
|
250
|
-
async def is_ready_for_requests(self, give_up_threshold=10) -> bool:
|
|
251
|
-
"""
|
|
252
|
-
Asynchronously checks if the serverless resource is ready to handle
|
|
253
|
-
requests by polling its health endpoint.
|
|
254
|
-
|
|
255
|
-
Args:
|
|
256
|
-
give_up_threshold (int, optional): The maximum number of polling
|
|
257
|
-
attempts before giving up and raising an error. Defaults to 10.
|
|
258
|
-
|
|
259
|
-
Returns:
|
|
260
|
-
bool: True if the serverless resource is ready for requests.
|
|
261
|
-
|
|
262
|
-
Raises:
|
|
263
|
-
ValueError: If the serverless resource is not deployed.
|
|
264
|
-
RuntimeError: If the health status is THROTTLED, UNHEALTHY, or UNKNOWN
|
|
265
|
-
after exceeding the give_up_threshold.
|
|
266
|
-
"""
|
|
267
|
-
if not self.is_deployed():
|
|
268
|
-
raise ValueError("Serverless is not deployed")
|
|
269
|
-
|
|
270
|
-
log.debug(f"{self} | API /health")
|
|
271
|
-
|
|
272
|
-
current_pace = 0
|
|
273
|
-
attempt = 0
|
|
274
|
-
|
|
275
|
-
# Poll for health status
|
|
276
|
-
while True:
|
|
277
|
-
await asyncio.sleep(current_pace)
|
|
278
|
-
|
|
279
|
-
health = await asyncio.to_thread(self.endpoint.health)
|
|
280
|
-
health = ServerlessHealth(**health)
|
|
281
|
-
|
|
282
|
-
if health.is_ready:
|
|
283
|
-
return True
|
|
284
|
-
else:
|
|
285
|
-
# nothing changed, increase the gap
|
|
286
|
-
attempt += 1
|
|
287
|
-
indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
|
|
288
|
-
if indicator:
|
|
289
|
-
log.info(f"{self} | {indicator}")
|
|
290
|
-
|
|
291
|
-
status = health.workers.status
|
|
292
|
-
if status in [
|
|
293
|
-
Status.THROTTLED,
|
|
294
|
-
Status.UNHEALTHY,
|
|
295
|
-
Status.UNKNOWN,
|
|
296
|
-
]:
|
|
297
|
-
log.debug(f"{self} | Health {status.value}")
|
|
298
|
-
|
|
299
|
-
if attempt >= give_up_threshold:
|
|
300
|
-
# Give up
|
|
301
|
-
raise RuntimeError(f"Health {status.value}")
|
|
302
|
-
|
|
303
|
-
# Adjust polling pace appropriately
|
|
304
|
-
current_pace = get_backoff_delay(attempt)
|
|
305
|
-
|
|
306
254
|
async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
|
|
307
255
|
"""
|
|
308
256
|
Executes a serverless endpoint request with the payload.
|
|
@@ -319,9 +267,6 @@ class ServerlessResource(DeployableResource):
|
|
|
319
267
|
try:
|
|
320
268
|
# log.debug(f"[{log_group}] Payload: {payload}")
|
|
321
269
|
|
|
322
|
-
# Poll until requests can be sent
|
|
323
|
-
await self.is_ready_for_requests()
|
|
324
|
-
|
|
325
270
|
log.info(f"{self} | API /run_sync")
|
|
326
271
|
response = await asyncio.to_thread(_fetch_job)
|
|
327
272
|
return JobOutput(**response)
|
|
@@ -346,9 +291,6 @@ class ServerlessResource(DeployableResource):
|
|
|
346
291
|
try:
|
|
347
292
|
# log.debug(f"[{self}] Payload: {payload}")
|
|
348
293
|
|
|
349
|
-
# Poll until requests can be sent
|
|
350
|
-
await self.is_ready_for_requests()
|
|
351
|
-
|
|
352
294
|
# Create a job using the endpoint
|
|
353
295
|
log.info(f"{self} | API /run")
|
|
354
296
|
job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
|
|
@@ -366,9 +308,8 @@ class ServerlessResource(DeployableResource):
|
|
|
366
308
|
while True:
|
|
367
309
|
await asyncio.sleep(current_pace)
|
|
368
310
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
job_status = await asyncio.to_thread(job.status)
|
|
311
|
+
# Check job status
|
|
312
|
+
job_status = await asyncio.to_thread(job.status)
|
|
372
313
|
|
|
373
314
|
if last_status == job_status:
|
|
374
315
|
# nothing changed, increase the gap
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tetra_rp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary: A Python library for distributed inference and serving of machine learning models
|
|
5
5
|
Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ tetra_rp/execute_class.py,sha256=HoH-qWDA7X6yGvQMwmHn5-MKxbLWHEDEHsuat5dzl2U,119
|
|
|
4
4
|
tetra_rp/logger.py,sha256=gk5-PWp3k_GQ5DxndsRkBCX0jarp_3lgZ1oiTFuThQg,1125
|
|
5
5
|
tetra_rp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
tetra_rp/core/api/__init__.py,sha256=oldrEKMwxYoBPLvPfVlaFS3wfUtTTxCN6-HzlpTh6vE,124
|
|
7
|
-
tetra_rp/core/api/runpod.py,sha256=
|
|
7
|
+
tetra_rp/core/api/runpod.py,sha256=3TTx1fkXMLZ2R5JCrQYPEn8dhdUsBt8i5OEwAfaKQ_k,10451
|
|
8
8
|
tetra_rp/core/resources/__init__.py,sha256=UhIwo1Y6-tw5qsULamR296sQiztuz-oWrSTreqfmFSw,814
|
|
9
9
|
tetra_rp/core/resources/base.py,sha256=UJeDiFN45aO1n5SBcxn56ohLhj-AWHoj0KO7mF4yJ_o,1440
|
|
10
10
|
tetra_rp/core/resources/cloud.py,sha256=XJOWPfzYlDVJGHxgffcfpEaOKrWhGdi7AzTlaGuYj0o,70
|
|
@@ -13,9 +13,9 @@ tetra_rp/core/resources/cpu.py,sha256=YIE-tKolSU3JJzpPB7ey-PbRdqKWsJZ_Ad4h2OYaai
|
|
|
13
13
|
tetra_rp/core/resources/environment.py,sha256=FC9kJCa8YLSar75AKUKqJYnNLrUdjZj8ZTOrspBrS00,1267
|
|
14
14
|
tetra_rp/core/resources/gpu.py,sha256=2jIIMr8PNnlIAP8ZTKO8Imx-rdxXp2rbdSHJeVfjawk,1858
|
|
15
15
|
tetra_rp/core/resources/live_serverless.py,sha256=A3JRdCYwHR2KN_OlmTLcv-m_ObxNhBhc5CnUzXOpOtc,1177
|
|
16
|
-
tetra_rp/core/resources/network_volume.py,sha256=
|
|
16
|
+
tetra_rp/core/resources/network_volume.py,sha256=h11dRlAkkxrqyNvUP9Eb8BHAUSFQyRP4lNgBdKChezw,5391
|
|
17
17
|
tetra_rp/core/resources/resource_manager.py,sha256=kUVZDblfUzaG78S8FwOzu4rN6QSegUgQNK3fJ_X7l0w,2834
|
|
18
|
-
tetra_rp/core/resources/serverless.py,sha256=
|
|
18
|
+
tetra_rp/core/resources/serverless.py,sha256=48mENAPQrR8fMjWFpb7mpGFOMqjXZnRWGULGH7NPa5E,13629
|
|
19
19
|
tetra_rp/core/resources/template.py,sha256=UkflJXZFWIbQkLuUt4oRLAjn-yIpw9_mT2X1cAH69CU,3141
|
|
20
20
|
tetra_rp/core/resources/utils.py,sha256=mgXfgz_NuHN_IC7TzMNdH9II-LMjxcDCG7syDTcPiGs,1721
|
|
21
21
|
tetra_rp/core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -30,7 +30,7 @@ tetra_rp/stubs/__init__.py,sha256=ozKsHs8q0T7o2qhQEquub9hqomh1Htys53mMraaRu2E,72
|
|
|
30
30
|
tetra_rp/stubs/live_serverless.py,sha256=o1NH5XEwUD-27NXJsEGO0IwnuDp8iXwUiw5nZtaZZOI,4199
|
|
31
31
|
tetra_rp/stubs/registry.py,sha256=dmbyC7uBp04_sXsG2wJCloFfFRzYjYQ-naEBKhTRo-U,2839
|
|
32
32
|
tetra_rp/stubs/serverless.py,sha256=BM_a5Ml5VADBYu2WRNmo9qnicP8NnXDGl5ywifulbD0,947
|
|
33
|
-
tetra_rp-0.
|
|
34
|
-
tetra_rp-0.
|
|
35
|
-
tetra_rp-0.
|
|
36
|
-
tetra_rp-0.
|
|
33
|
+
tetra_rp-0.10.0.dist-info/METADATA,sha256=Ck626kHGCXM6r5CHIm9P7gcg1q3IGWhB7Wiw7x0yIJs,28046
|
|
34
|
+
tetra_rp-0.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
35
|
+
tetra_rp-0.10.0.dist-info/top_level.txt,sha256=bBay7JTDwJXsTYvVjrwno9hnF-j0q272lk65f2AcPjU,9
|
|
36
|
+
tetra_rp-0.10.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|