tetra-rp 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/PKG-INFO +1 -1
  2. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/pyproject.toml +1 -1
  3. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/api/runpod.py +24 -0
  4. tetra_rp-0.10.0/src/tetra_rp/core/resources/network_volume.py +152 -0
  5. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/serverless.py +7 -66
  6. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp.egg-info/PKG-INFO +1 -1
  7. tetra_rp-0.9.0/src/tetra_rp/core/resources/network_volume.py +0 -107
  8. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/README.md +0 -0
  9. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/setup.cfg +0 -0
  10. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/__init__.py +0 -0
  11. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/client.py +0 -0
  12. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/__init__.py +0 -0
  13. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/api/__init__.py +0 -0
  14. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/__init__.py +0 -0
  15. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/base.py +0 -0
  16. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/cloud.py +0 -0
  17. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/constants.py +0 -0
  18. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/cpu.py +0 -0
  19. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/environment.py +0 -0
  20. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/gpu.py +0 -0
  21. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/live_serverless.py +0 -0
  22. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/resource_manager.py +0 -0
  23. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/template.py +0 -0
  24. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/resources/utils.py +0 -0
  25. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/utils/__init__.py +0 -0
  26. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/utils/backoff.py +0 -0
  27. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/utils/constants.py +0 -0
  28. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/utils/json.py +0 -0
  29. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/utils/lru_cache.py +0 -0
  30. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/core/utils/singleton.py +0 -0
  31. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/execute_class.py +0 -0
  32. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/logger.py +0 -0
  33. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/protos/__init__.py +0 -0
  34. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/protos/remote_execution.py +0 -0
  35. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/stubs/__init__.py +0 -0
  36. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/stubs/live_serverless.py +0 -0
  37. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/stubs/registry.py +0 -0
  38. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp/stubs/serverless.py +0 -0
  39. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp.egg-info/SOURCES.txt +0 -0
  40. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp.egg-info/dependency_links.txt +0 -0
  41. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp.egg-info/requires.txt +0 -0
  42. {tetra_rp-0.9.0 → tetra_rp-0.10.0}/src/tetra_rp.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tetra_rp
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: A Python library for distributed inference and serving of machine learning models
5
5
  Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tetra_rp"
3
- version = "0.9.0"
3
+ version = "0.10.0"
4
4
  description = "A Python library for distributed inference and serving of machine learning models"
5
5
  authors = [
6
6
  { name = "Marut Pandya", email = "pandyamarut@gmail.com" },
@@ -281,6 +281,30 @@ class RunpodRestClient:
281
281
 
282
282
  return result
283
283
 
284
+ async def list_network_volumes(self) -> Dict[str, Any]:
285
+ """
286
+ List all network volumes in Runpod.
287
+
288
+ Returns:
289
+ List of network volume objects or dict containing networkVolumes key.
290
+ The API may return either format depending on version.
291
+ """
292
+ log.debug("Listing network volumes")
293
+
294
+ result = await self._execute_rest(
295
+ "GET", f"{RUNPOD_REST_API_URL}/networkvolumes"
296
+ )
297
+
298
+ # Handle both list and dict responses
299
+ if isinstance(result, list):
300
+ volume_count = len(result)
301
+ else:
302
+ volume_count = len(result.get("networkVolumes", []))
303
+
304
+ log.debug(f"Listed {volume_count} network volumes")
305
+
306
+ return result
307
+
284
308
  async def close(self):
285
309
  """Close the HTTP session."""
286
310
  if self.session and not self.session.closed:
@@ -0,0 +1,152 @@
1
+ import hashlib
2
+ import logging
3
+ from enum import Enum
4
+ from typing import Optional
5
+
6
+ from pydantic import (
7
+ Field,
8
+ field_serializer,
9
+ )
10
+
11
+ from ..api.runpod import RunpodRestClient
12
+ from .base import DeployableResource
13
+ from .constants import CONSOLE_BASE_URL
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+
18
+ class DataCenter(str, Enum):
19
+ """
20
+ Enum representing available data centers for network volumes.
21
+ #TODO: Add more data centers as needed. Lock this to the available data center.
22
+ """
23
+
24
+ EU_RO_1 = "EU-RO-1"
25
+
26
+
27
+ class NetworkVolume(DeployableResource):
28
+ """
29
+ NetworkVolume resource for creating and managing Runpod network volumes.
30
+
31
+ This class handles the creation, deployment, and management of network volumes
32
+ that can be attached to serverless resources. Supports idempotent deployment
33
+ where multiple volumes with the same name will reuse existing volumes.
34
+
35
+ """
36
+
37
+ # Internal fixed value
38
+ dataCenterId: DataCenter = Field(default=DataCenter.EU_RO_1, frozen=True)
39
+
40
+ id: Optional[str] = Field(default=None)
41
+ name: Optional[str] = None
42
+ size: Optional[int] = Field(default=50, gt=0) # Size in GB
43
+
44
+ def __str__(self) -> str:
45
+ return f"{self.__class__.__name__}:{self.id}"
46
+
47
+ @property
48
+ def resource_id(self) -> str:
49
+ """Unique resource ID based on name and datacenter for idempotent behavior."""
50
+ if self.name:
51
+ # Use name + datacenter for volumes with names to ensure idempotence
52
+ resource_type = self.__class__.__name__
53
+ config_key = f"{self.name}:{self.dataCenterId.value}"
54
+ hash_obj = hashlib.md5(f"{resource_type}:{config_key}".encode())
55
+ return f"{resource_type}_{hash_obj.hexdigest()}"
56
+ else:
57
+ # Fall back to default behavior for unnamed volumes
58
+ return super().resource_id
59
+
60
+ @field_serializer("dataCenterId")
61
+ def serialize_data_center_id(self, value: Optional[DataCenter]) -> Optional[str]:
62
+ """Convert DataCenter enum to string."""
63
+ return value.value if value is not None else None
64
+
65
+ @property
66
+ def is_created(self) -> bool:
67
+ "Returns True if the network volume already exists."
68
+ return self.id is not None
69
+
70
+ @property
71
+ def url(self) -> str:
72
+ """
73
+ Returns the URL for the network volume resource.
74
+ """
75
+ if not self.id:
76
+ raise ValueError("Network volume ID is not set")
77
+ return f"{CONSOLE_BASE_URL}/user/storage"
78
+
79
+ def is_deployed(self) -> bool:
80
+ """
81
+ Checks if the network volume resource is deployed and available.
82
+ """
83
+ return self.id is not None
84
+
85
+ def _normalize_volumes_response(self, volumes_response) -> list:
86
+ """Normalize API response to list format."""
87
+ if isinstance(volumes_response, list):
88
+ return volumes_response
89
+ return volumes_response.get("networkVolumes", [])
90
+
91
+ def _find_matching_volume(self, existing_volumes: list) -> Optional[dict]:
92
+ """Find existing volume matching name and datacenter."""
93
+ for volume_data in existing_volumes:
94
+ if (
95
+ volume_data.get("name") == self.name
96
+ and volume_data.get("dataCenterId") == self.dataCenterId.value
97
+ ):
98
+ return volume_data
99
+ return None
100
+
101
+ async def _find_existing_volume(self, client) -> Optional["NetworkVolume"]:
102
+ """Check for existing volume with same name and datacenter."""
103
+ if not self.name:
104
+ return None
105
+
106
+ log.debug(f"Checking for existing network volume with name: {self.name}")
107
+ volumes_response = await client.list_network_volumes()
108
+ existing_volumes = self._normalize_volumes_response(volumes_response)
109
+
110
+ if matching_volume := self._find_matching_volume(existing_volumes):
111
+ log.info(
112
+ f"Found existing network volume: {matching_volume.get('id')} with name '{self.name}'"
113
+ )
114
+ # Update our instance with the existing volume's ID
115
+ self.id = matching_volume.get("id")
116
+ return self
117
+
118
+ return None
119
+
120
+ async def _create_new_volume(self, client) -> "NetworkVolume":
121
+ """Create a new network volume."""
122
+ log.debug(f"Creating new network volume: {self.name or 'unnamed'}")
123
+ payload = self.model_dump(exclude_none=True)
124
+ result = await client.create_network_volume(payload)
125
+
126
+ if volume := self.__class__(**result):
127
+ return volume
128
+
129
+ raise ValueError("Deployment failed, no volume was created.")
130
+
131
+ async def deploy(self) -> "DeployableResource":
132
+ """
133
+ Deploys the network volume resource using the provided configuration.
134
+ Returns a DeployableResource object.
135
+ """
136
+ try:
137
+ # If the resource is already deployed, return it
138
+ if self.is_deployed():
139
+ log.debug(f"{self} exists")
140
+ return self
141
+
142
+ async with RunpodRestClient() as client:
143
+ # Check for existing volume first
144
+ if existing_volume := await self._find_existing_volume(client):
145
+ return existing_volume
146
+
147
+ # No existing volume found, create a new one
148
+ return await self._create_new_volume(client)
149
+
150
+ except Exception as e:
151
+ log.error(f"{self} failed to deploy: {e}")
152
+ raise
@@ -134,8 +134,12 @@ class ServerlessResource(DeployableResource):
134
134
  return value.value if value is not None else None
135
135
 
136
136
  @field_serializer("instanceIds")
137
- def serialize_instance_ids(self, value: List[CpuInstanceType]) -> List[str]:
137
+ def serialize_instance_ids(
138
+ self, value: Optional[List[CpuInstanceType]]
139
+ ) -> Optional[List[str]]:
138
140
  """Convert CpuInstanceType enums to strings."""
141
+ if value is None:
142
+ return None
139
143
  return [item.value if hasattr(item, "value") else str(item) for item in value]
140
144
 
141
145
  @field_validator("gpus")
@@ -247,62 +251,6 @@ class ServerlessResource(DeployableResource):
247
251
  log.error(f"{self} failed to deploy: {e}")
248
252
  raise
249
253
 
250
- async def is_ready_for_requests(self, give_up_threshold=10) -> bool:
251
- """
252
- Asynchronously checks if the serverless resource is ready to handle
253
- requests by polling its health endpoint.
254
-
255
- Args:
256
- give_up_threshold (int, optional): The maximum number of polling
257
- attempts before giving up and raising an error. Defaults to 10.
258
-
259
- Returns:
260
- bool: True if the serverless resource is ready for requests.
261
-
262
- Raises:
263
- ValueError: If the serverless resource is not deployed.
264
- RuntimeError: If the health status is THROTTLED, UNHEALTHY, or UNKNOWN
265
- after exceeding the give_up_threshold.
266
- """
267
- if not self.is_deployed():
268
- raise ValueError("Serverless is not deployed")
269
-
270
- log.debug(f"{self} | API /health")
271
-
272
- current_pace = 0
273
- attempt = 0
274
-
275
- # Poll for health status
276
- while True:
277
- await asyncio.sleep(current_pace)
278
-
279
- health = await asyncio.to_thread(self.endpoint.health)
280
- health = ServerlessHealth(**health)
281
-
282
- if health.is_ready:
283
- return True
284
- else:
285
- # nothing changed, increase the gap
286
- attempt += 1
287
- indicator = "." * (attempt // 2) if attempt % 2 == 0 else ""
288
- if indicator:
289
- log.info(f"{self} | {indicator}")
290
-
291
- status = health.workers.status
292
- if status in [
293
- Status.THROTTLED,
294
- Status.UNHEALTHY,
295
- Status.UNKNOWN,
296
- ]:
297
- log.debug(f"{self} | Health {status.value}")
298
-
299
- if attempt >= give_up_threshold:
300
- # Give up
301
- raise RuntimeError(f"Health {status.value}")
302
-
303
- # Adjust polling pace appropriately
304
- current_pace = get_backoff_delay(attempt)
305
-
306
254
  async def run_sync(self, payload: Dict[str, Any]) -> "JobOutput":
307
255
  """
308
256
  Executes a serverless endpoint request with the payload.
@@ -319,9 +267,6 @@ class ServerlessResource(DeployableResource):
319
267
  try:
320
268
  # log.debug(f"[{log_group}] Payload: {payload}")
321
269
 
322
- # Poll until requests can be sent
323
- await self.is_ready_for_requests()
324
-
325
270
  log.info(f"{self} | API /run_sync")
326
271
  response = await asyncio.to_thread(_fetch_job)
327
272
  return JobOutput(**response)
@@ -346,9 +291,6 @@ class ServerlessResource(DeployableResource):
346
291
  try:
347
292
  # log.debug(f"[{self}] Payload: {payload}")
348
293
 
349
- # Poll until requests can be sent
350
- await self.is_ready_for_requests()
351
-
352
294
  # Create a job using the endpoint
353
295
  log.info(f"{self} | API /run")
354
296
  job = await asyncio.to_thread(self.endpoint.run, request_input=payload)
@@ -366,9 +308,8 @@ class ServerlessResource(DeployableResource):
366
308
  while True:
367
309
  await asyncio.sleep(current_pace)
368
310
 
369
- if await self.is_ready_for_requests():
370
- # Check job status
371
- job_status = await asyncio.to_thread(job.status)
311
+ # Check job status
312
+ job_status = await asyncio.to_thread(job.status)
372
313
 
373
314
  if last_status == job_status:
374
315
  # nothing changed, increase the gap
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tetra_rp
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: A Python library for distributed inference and serving of machine learning models
5
5
  Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
6
6
  License: MIT
@@ -1,107 +0,0 @@
1
- import logging
2
- from enum import Enum
3
- from typing import Optional
4
-
5
- from pydantic import (
6
- Field,
7
- field_serializer,
8
- )
9
-
10
- from ..api.runpod import RunpodRestClient
11
- from .base import DeployableResource
12
- from .constants import CONSOLE_BASE_URL
13
-
14
- log = logging.getLogger(__name__)
15
-
16
-
17
- class DataCenter(str, Enum):
18
- """
19
- Enum representing available data centers for network volumes.
20
- #TODO: Add more data centers as needed. Lock this to the available data center.
21
- """
22
-
23
- EU_RO_1 = "EU-RO-1"
24
-
25
-
26
- class NetworkVolume(DeployableResource):
27
- """
28
- NetworkVolume resource for creating and managing Runpod netowrk volumes.
29
-
30
- This class handles the creation, deployment, and management of network volumes
31
- that can be attached to serverless resources.
32
-
33
- """
34
-
35
- # Internal fixed value
36
- dataCenterId: DataCenter = Field(default=DataCenter.EU_RO_1, frozen=True)
37
-
38
- id: Optional[str] = Field(default=None)
39
- name: Optional[str] = None
40
- size: Optional[int] = Field(default=10, gt=0) # Size in GB
41
-
42
- def __str__(self) -> str:
43
- return f"{self.__class__.__name__}:{self.id}"
44
-
45
- @field_serializer("dataCenterId")
46
- def serialize_data_center_id(self, value: Optional[DataCenter]) -> Optional[str]:
47
- """Convert DataCenter enum to string."""
48
- return value.value if value is not None else None
49
-
50
- @property
51
- def is_created(self) -> bool:
52
- "Returns True if the network volume already exists."
53
- return self.id is not None
54
-
55
- @property
56
- def url(self) -> str:
57
- """
58
- Returns the URL for the network volume resource.
59
- """
60
- if not self.id:
61
- raise ValueError("Network volume ID is not set")
62
- return f"{CONSOLE_BASE_URL}/user/storage"
63
-
64
- async def create_network_volume(self) -> str:
65
- """
66
- Creates a network volume using the provided configuration.
67
- Returns the volume ID.
68
- """
69
- async with RunpodRestClient() as client:
70
- # Create the network volume
71
- payload = self.model_dump(exclude_none=True)
72
- result = await client.create_network_volume(payload)
73
-
74
- if volume := self.__class__(**result):
75
- return volume
76
-
77
- def is_deployed(self) -> bool:
78
- """
79
- Checks if the network volume resource is deployed and available.
80
- """
81
- return self.id is not None
82
-
83
- async def deploy(self) -> "DeployableResource":
84
- """
85
- Deploys the network volume resource using the provided configuration.
86
- Returns a DeployableResource object.
87
- """
88
- try:
89
- # If the resource is already deployed, return it
90
- if self.is_deployed():
91
- log.debug(f"{self} exists")
92
- return self
93
-
94
- # Create the network volume
95
- async with RunpodRestClient() as client:
96
- # Create the network volume
97
- payload = self.model_dump(exclude_none=True)
98
- result = await client.create_network_volume(payload)
99
-
100
- if volume := self.__class__(**result):
101
- return volume
102
-
103
- raise ValueError("Deployment failed, no volume was created.")
104
-
105
- except Exception as e:
106
- log.error(f"{self} failed to deploy: {e}")
107
- raise
File without changes
File without changes