tetra-rp 0.10.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tetra-rp might be problematic. Click here for more details.
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/PKG-INFO +2 -1
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/pyproject.toml +2 -1
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/__init__.py +4 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/client.py +25 -3
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/__init__.py +5 -3
- tetra_rp-0.12.0/src/tetra_rp/core/resources/cpu.py +137 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/gpu.py +29 -14
- tetra_rp-0.12.0/src/tetra_rp/core/resources/live_serverless.py +62 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/network_volume.py +7 -11
- tetra_rp-0.12.0/src/tetra_rp/core/resources/resource_manager.py +121 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/serverless.py +44 -54
- tetra_rp-0.12.0/src/tetra_rp/core/resources/serverless_cpu.py +154 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/template.py +1 -1
- tetra_rp-0.12.0/src/tetra_rp/core/utils/file_lock.py +260 -0
- tetra_rp-0.12.0/src/tetra_rp/core/utils/singleton.py +21 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/execute_class.py +6 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/protos/remote_execution.py +36 -12
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/live_serverless.py +22 -7
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/registry.py +41 -14
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/PKG-INFO +2 -1
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/SOURCES.txt +2 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/requires.txt +1 -0
- tetra_rp-0.10.0/src/tetra_rp/core/resources/cpu.py +0 -34
- tetra_rp-0.10.0/src/tetra_rp/core/resources/live_serverless.py +0 -36
- tetra_rp-0.10.0/src/tetra_rp/core/resources/resource_manager.py +0 -80
- tetra_rp-0.10.0/src/tetra_rp/core/utils/singleton.py +0 -7
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/README.md +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/setup.cfg +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/__init__.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/api/__init__.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/api/runpod.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/base.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/cloud.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/constants.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/environment.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/resources/utils.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/__init__.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/backoff.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/constants.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/json.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/core/utils/lru_cache.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/logger.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/protos/__init__.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/__init__.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp/stubs/serverless.py +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/dependency_links.txt +0 -0
- {tetra_rp-0.10.0 → tetra_rp-0.12.0}/src/tetra_rp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tetra_rp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: A Python library for distributed inference and serving of machine learning models
|
|
5
5
|
Author-email: Marut Pandya <pandyamarut@gmail.com>, Patrick Rachford <prachford@icloud.com>, Dean Quinanola <dean.quinanola@runpod.io>
|
|
6
6
|
License: MIT
|
|
@@ -13,6 +13,7 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
Requires-Dist: cloudpickle>=3.1.1
|
|
14
14
|
Requires-Dist: runpod
|
|
15
15
|
Requires-Dist: python-dotenv>=1.0.0
|
|
16
|
+
Requires-Dist: pydantic>=2.0.0
|
|
16
17
|
|
|
17
18
|
# Tetra: Serverless computing for AI workloads
|
|
18
19
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tetra_rp"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.12.0"
|
|
4
4
|
description = "A Python library for distributed inference and serving of machine learning models"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Marut Pandya", email = "pandyamarut@gmail.com" },
|
|
@@ -21,6 +21,7 @@ dependencies = [
|
|
|
21
21
|
"cloudpickle>=3.1.1",
|
|
22
22
|
"runpod",
|
|
23
23
|
"python-dotenv>=1.0.0",
|
|
24
|
+
"pydantic>=2.0.0",
|
|
24
25
|
]
|
|
25
26
|
|
|
26
27
|
[dependency-groups]
|
|
@@ -13,7 +13,9 @@ from .client import remote # noqa: E402
|
|
|
13
13
|
from .core.resources import ( # noqa: E402
|
|
14
14
|
CpuServerlessEndpoint,
|
|
15
15
|
CpuInstanceType,
|
|
16
|
+
CpuLiveServerless,
|
|
16
17
|
CudaVersion,
|
|
18
|
+
DataCenter,
|
|
17
19
|
GpuGroup,
|
|
18
20
|
LiveServerless,
|
|
19
21
|
PodTemplate,
|
|
@@ -28,7 +30,9 @@ __all__ = [
|
|
|
28
30
|
"remote",
|
|
29
31
|
"CpuServerlessEndpoint",
|
|
30
32
|
"CpuInstanceType",
|
|
33
|
+
"CpuLiveServerless",
|
|
31
34
|
"CudaVersion",
|
|
35
|
+
"DataCenter",
|
|
32
36
|
"GpuGroup",
|
|
33
37
|
"LiveServerless",
|
|
34
38
|
"PodTemplate",
|
|
@@ -14,6 +14,8 @@ def remote(
|
|
|
14
14
|
resource_config: ServerlessResource,
|
|
15
15
|
dependencies: Optional[List[str]] = None,
|
|
16
16
|
system_dependencies: Optional[List[str]] = None,
|
|
17
|
+
accelerate_downloads: bool = True,
|
|
18
|
+
hf_models_to_cache: Optional[List[str]] = None,
|
|
17
19
|
**extra,
|
|
18
20
|
):
|
|
19
21
|
"""
|
|
@@ -22,10 +24,17 @@ def remote(
|
|
|
22
24
|
This decorator allows a function to be executed in a remote serverless environment, with support for
|
|
23
25
|
dynamic resource provisioning and installation of required dependencies.
|
|
24
26
|
|
|
27
|
+
Args:
|
|
25
28
|
resource_config (ServerlessResource): Configuration object specifying the serverless resource
|
|
26
29
|
to be provisioned or used.
|
|
27
30
|
dependencies (List[str], optional): A list of pip package names to be installed in the remote
|
|
28
31
|
environment before executing the function. Defaults to None.
|
|
32
|
+
system_dependencies (List[str], optional): A list of system packages to be installed in the remote
|
|
33
|
+
environment before executing the function. Defaults to None.
|
|
34
|
+
accelerate_downloads (bool, optional): Enable download acceleration for dependencies and models.
|
|
35
|
+
Defaults to True.
|
|
36
|
+
hf_models_to_cache (List[str], optional): List of HuggingFace model IDs to pre-cache using
|
|
37
|
+
download acceleration. Defaults to None.
|
|
29
38
|
extra (dict, optional): Additional parameters for the execution of the resource. Defaults to an empty dict.
|
|
30
39
|
|
|
31
40
|
Returns:
|
|
@@ -37,7 +46,8 @@ def remote(
|
|
|
37
46
|
@remote(
|
|
38
47
|
resource_config=my_resource_config,
|
|
39
48
|
dependencies=["numpy", "pandas"],
|
|
40
|
-
|
|
49
|
+
accelerate_downloads=True,
|
|
50
|
+
hf_models_to_cache=["gpt2", "bert-base-uncased"]
|
|
41
51
|
)
|
|
42
52
|
async def my_function(data):
|
|
43
53
|
# Function logic here
|
|
@@ -49,7 +59,13 @@ def remote(
|
|
|
49
59
|
if inspect.isclass(func_or_class):
|
|
50
60
|
# Handle class decoration
|
|
51
61
|
return create_remote_class(
|
|
52
|
-
func_or_class,
|
|
62
|
+
func_or_class,
|
|
63
|
+
resource_config,
|
|
64
|
+
dependencies,
|
|
65
|
+
system_dependencies,
|
|
66
|
+
accelerate_downloads,
|
|
67
|
+
hf_models_to_cache,
|
|
68
|
+
extra,
|
|
53
69
|
)
|
|
54
70
|
else:
|
|
55
71
|
# Handle function decoration (unchanged)
|
|
@@ -62,7 +78,13 @@ def remote(
|
|
|
62
78
|
|
|
63
79
|
stub = stub_resource(remote_resource, **extra)
|
|
64
80
|
return await stub(
|
|
65
|
-
func_or_class,
|
|
81
|
+
func_or_class,
|
|
82
|
+
dependencies,
|
|
83
|
+
system_dependencies,
|
|
84
|
+
accelerate_downloads,
|
|
85
|
+
hf_models_to_cache,
|
|
86
|
+
*args,
|
|
87
|
+
**kwargs,
|
|
66
88
|
)
|
|
67
89
|
|
|
68
90
|
return wrapper
|
|
@@ -3,24 +3,26 @@ from .cloud import runpod
|
|
|
3
3
|
from .cpu import CpuInstanceType
|
|
4
4
|
from .gpu import GpuGroup, GpuType, GpuTypeDetail
|
|
5
5
|
from .resource_manager import ResourceManager
|
|
6
|
-
from .live_serverless import LiveServerless
|
|
6
|
+
from .live_serverless import LiveServerless, CpuLiveServerless
|
|
7
7
|
from .serverless import (
|
|
8
|
-
CpuServerlessEndpoint,
|
|
9
8
|
ServerlessResource,
|
|
10
9
|
ServerlessEndpoint,
|
|
11
10
|
JobOutput,
|
|
12
11
|
CudaVersion,
|
|
13
12
|
)
|
|
13
|
+
from .serverless_cpu import CpuServerlessEndpoint
|
|
14
14
|
from .template import PodTemplate
|
|
15
|
-
from .network_volume import NetworkVolume
|
|
15
|
+
from .network_volume import NetworkVolume, DataCenter
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"runpod",
|
|
20
20
|
"BaseResource",
|
|
21
21
|
"CpuInstanceType",
|
|
22
|
+
"CpuLiveServerless",
|
|
22
23
|
"CpuServerlessEndpoint",
|
|
23
24
|
"CudaVersion",
|
|
25
|
+
"DataCenter",
|
|
24
26
|
"DeployableResource",
|
|
25
27
|
"GpuGroup",
|
|
26
28
|
"GpuType",
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CpuInstanceType(str, Enum):
|
|
6
|
+
"""Valid CPU instance types.
|
|
7
|
+
|
|
8
|
+
Format: {generation}{type}-{vcpu}-{memory_gb}
|
|
9
|
+
Based on Runpod backend validation logic:
|
|
10
|
+
- memoryInGb = vcpuCount * flavor.ramMultiplier
|
|
11
|
+
|
|
12
|
+
RAM Multipliers (DEV environment):
|
|
13
|
+
- cpu3g: 4.0 (1 vCPU = 4GB, 2 vCPU = 8GB, etc.)
|
|
14
|
+
- cpu3c: 2.0 (1 vCPU = 2GB, 2 vCPU = 4GB, etc.)
|
|
15
|
+
- cpu5c: 2.0 (1 vCPU = 2GB, 2 vCPU = 4GB, etc.)
|
|
16
|
+
- cpu5g: Not available
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# 3rd Generation General Purpose (RAM multiplier: 4.0)
|
|
20
|
+
|
|
21
|
+
CPU3G_1_4 = "cpu3g-1-4"
|
|
22
|
+
"""1 vCPU, 4GB RAM, max 10GB container disk"""
|
|
23
|
+
|
|
24
|
+
CPU3G_2_8 = "cpu3g-2-8"
|
|
25
|
+
"""2 vCPU, 8GB RAM, max 20GB container disk"""
|
|
26
|
+
|
|
27
|
+
CPU3G_4_16 = "cpu3g-4-16"
|
|
28
|
+
"""4 vCPU, 16GB RAM, max 40GB container disk"""
|
|
29
|
+
|
|
30
|
+
CPU3G_8_32 = "cpu3g-8-32"
|
|
31
|
+
"""8 vCPU, 32GB RAM, max 80GB container disk"""
|
|
32
|
+
|
|
33
|
+
# 3rd Generation Compute-Optimized (RAM multiplier: 2.0)
|
|
34
|
+
|
|
35
|
+
CPU3C_1_2 = "cpu3c-1-2"
|
|
36
|
+
"""1 vCPU, 2GB RAM, max 10GB container disk"""
|
|
37
|
+
|
|
38
|
+
CPU3C_2_4 = "cpu3c-2-4"
|
|
39
|
+
"""2 vCPU, 4GB RAM, max 20GB container disk"""
|
|
40
|
+
|
|
41
|
+
CPU3C_4_8 = "cpu3c-4-8"
|
|
42
|
+
"""4 vCPU, 8GB RAM, max 40GB container disk"""
|
|
43
|
+
|
|
44
|
+
CPU3C_8_16 = "cpu3c-8-16"
|
|
45
|
+
"""8 vCPU, 16GB RAM, max 80GB container disk"""
|
|
46
|
+
|
|
47
|
+
# 5th Generation Compute-Optimized (RAM multiplier: 2.0)
|
|
48
|
+
|
|
49
|
+
CPU5C_1_2 = "cpu5c-1-2"
|
|
50
|
+
"""1 vCPU, 2GB RAM, max 15GB container disk"""
|
|
51
|
+
|
|
52
|
+
CPU5C_2_4 = "cpu5c-2-4"
|
|
53
|
+
"""2 vCPU, 4GB RAM, max 30GB container disk"""
|
|
54
|
+
|
|
55
|
+
CPU5C_4_8 = "cpu5c-4-8"
|
|
56
|
+
"""4 vCPU, 8GB RAM, max 60GB container disk"""
|
|
57
|
+
|
|
58
|
+
CPU5C_8_16 = "cpu5c-8-16"
|
|
59
|
+
"""8 vCPU, 16GB RAM, max 120GB container disk"""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def calculate_max_disk_size(instance_type: CpuInstanceType) -> int:
|
|
63
|
+
"""
|
|
64
|
+
Calculate the maximum container disk size for a CPU instance type.
|
|
65
|
+
|
|
66
|
+
Formula:
|
|
67
|
+
- CPU3G/CPU3C: vCPU count × 10GB
|
|
68
|
+
- CPU5C: vCPU count × 15GB
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
instance_type: CPU instance type enum
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Maximum container disk size in GB
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
>>> calculate_max_disk_size(CpuInstanceType.CPU3G_1_4)
|
|
78
|
+
10
|
|
79
|
+
>>> calculate_max_disk_size(CpuInstanceType.CPU5C_2_4)
|
|
80
|
+
30
|
|
81
|
+
"""
|
|
82
|
+
# Parse the instance type string to extract vCPU count
|
|
83
|
+
# Format: "cpu{generation}{type}-{vcpu}-{memory}"
|
|
84
|
+
instance_str = instance_type.value
|
|
85
|
+
parts = instance_str.split("-")
|
|
86
|
+
|
|
87
|
+
if len(parts) != 3:
|
|
88
|
+
raise ValueError(f"Invalid instance type format: {instance_str}")
|
|
89
|
+
|
|
90
|
+
vcpu_count = int(parts[1])
|
|
91
|
+
|
|
92
|
+
# Determine disk multiplier based on generation
|
|
93
|
+
if instance_str.startswith("cpu5c"):
|
|
94
|
+
disk_multiplier = 15 # CPU5C: 15GB per vCPU
|
|
95
|
+
elif instance_str.startswith(("cpu3g", "cpu3c")):
|
|
96
|
+
disk_multiplier = 10 # CPU3G/CPU3C: 10GB per vCPU
|
|
97
|
+
else:
|
|
98
|
+
raise ValueError(f"Unknown CPU generation/type: {instance_str}")
|
|
99
|
+
|
|
100
|
+
return vcpu_count * disk_multiplier
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# CPU Instance Type Disk Limits (calculated programmatically)
|
|
104
|
+
CPU_INSTANCE_DISK_LIMITS = {
|
|
105
|
+
instance_type: calculate_max_disk_size(instance_type)
|
|
106
|
+
for instance_type in CpuInstanceType
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_max_disk_size_for_instances(
|
|
111
|
+
instance_types: Optional[List[CpuInstanceType]],
|
|
112
|
+
) -> Optional[int]:
|
|
113
|
+
"""
|
|
114
|
+
Calculate the maximum container disk size for a list of CPU instance types.
|
|
115
|
+
|
|
116
|
+
Returns the minimum disk limit across all instance types to ensure compatibility
|
|
117
|
+
with all specified instances.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
instance_types: List of CPU instance types, or None
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Maximum allowed disk size in GB, or None if no CPU instances specified
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
>>> get_max_disk_size_for_instances([CpuInstanceType.CPU3G_1_4])
|
|
127
|
+
10
|
|
128
|
+
>>> get_max_disk_size_for_instances([CpuInstanceType.CPU3G_1_4, CpuInstanceType.CPU3G_2_8])
|
|
129
|
+
10
|
|
130
|
+
"""
|
|
131
|
+
if not instance_types:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
disk_limits = [
|
|
135
|
+
CPU_INSTANCE_DISK_LIMITS[instance_type] for instance_type in instance_types
|
|
136
|
+
]
|
|
137
|
+
return min(disk_limits)
|
|
@@ -32,20 +32,35 @@ class GpuTypeDetail(GpuType):
|
|
|
32
32
|
|
|
33
33
|
# TODO: this should be fetched from an API
|
|
34
34
|
class GpuGroup(Enum):
|
|
35
|
-
ANY = "any"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
35
|
+
ANY = "any"
|
|
36
|
+
"""Any GPU"""
|
|
37
|
+
|
|
38
|
+
ADA_24 = "ADA_24"
|
|
39
|
+
"""NVIDIA GeForce RTX 4090"""
|
|
40
|
+
|
|
41
|
+
ADA_32_PRO = "ADA_32_PRO"
|
|
42
|
+
"""NVIDIA GeForce RTX 5090"""
|
|
43
|
+
|
|
44
|
+
ADA_48_PRO = "ADA_48_PRO"
|
|
45
|
+
"""NVIDIA RTX 6000 Ada Generation, NVIDIA L40, NVIDIA L40S"""
|
|
46
|
+
|
|
47
|
+
ADA_80_PRO = "ADA_80_PRO"
|
|
48
|
+
"""NVIDIA H100 PCIe, NVIDIA H100 80GB HBM3, NVIDIA H100 NVL"""
|
|
49
|
+
|
|
50
|
+
AMPERE_16 = "AMPERE_16"
|
|
51
|
+
"""NVIDIA RTX A4000, NVIDIA RTX A4500, NVIDIA RTX 4000 Ada Generation, NVIDIA RTX 2000 Ada Generation"""
|
|
52
|
+
|
|
53
|
+
AMPERE_24 = "AMPERE_24"
|
|
54
|
+
"""NVIDIA RTX A5000, NVIDIA L4, NVIDIA GeForce RTX 3090"""
|
|
55
|
+
|
|
56
|
+
AMPERE_48 = "AMPERE_48"
|
|
57
|
+
"""NVIDIA A40, NVIDIA RTX A6000"""
|
|
58
|
+
|
|
59
|
+
AMPERE_80 = "AMPERE_80"
|
|
60
|
+
"""NVIDIA A100 80GB PCIe, NVIDIA A100-SXM4-80GB"""
|
|
61
|
+
|
|
62
|
+
HOPPER_141 = "HOPPER_141"
|
|
63
|
+
"""NVIDIA H200"""
|
|
49
64
|
|
|
50
65
|
@classmethod
|
|
51
66
|
def all(cls) -> List["GpuGroup"]:
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Ship serverless code as you write it. No builds, no deploys — just run.
|
|
2
|
+
import os
|
|
3
|
+
from pydantic import model_validator
|
|
4
|
+
from .serverless import ServerlessEndpoint
|
|
5
|
+
from .serverless_cpu import CpuServerlessEndpoint
|
|
6
|
+
|
|
7
|
+
TETRA_IMAGE_TAG = os.environ.get("TETRA_IMAGE_TAG", "latest")
|
|
8
|
+
TETRA_GPU_IMAGE = os.environ.get(
|
|
9
|
+
"TETRA_GPU_IMAGE", f"runpod/tetra-rp:{TETRA_IMAGE_TAG}"
|
|
10
|
+
)
|
|
11
|
+
TETRA_CPU_IMAGE = os.environ.get(
|
|
12
|
+
"TETRA_CPU_IMAGE", f"runpod/tetra-rp-cpu:{TETRA_IMAGE_TAG}"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LiveServerlessMixin:
|
|
17
|
+
"""Common mixin for live serverless endpoints that locks the image."""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def _live_image(self) -> str:
|
|
21
|
+
"""Override in subclasses to specify the locked image."""
|
|
22
|
+
raise NotImplementedError("Subclasses must define _live_image")
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def imageName(self):
|
|
26
|
+
# Lock imageName to specific image
|
|
27
|
+
return self._live_image
|
|
28
|
+
|
|
29
|
+
@imageName.setter
|
|
30
|
+
def imageName(self, value):
|
|
31
|
+
# Prevent manual setting of imageName
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LiveServerless(LiveServerlessMixin, ServerlessEndpoint):
|
|
36
|
+
"""GPU-only live serverless endpoint."""
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def _live_image(self) -> str:
|
|
40
|
+
return TETRA_GPU_IMAGE
|
|
41
|
+
|
|
42
|
+
@model_validator(mode="before")
|
|
43
|
+
@classmethod
|
|
44
|
+
def set_live_serverless_template(cls, data: dict):
|
|
45
|
+
"""Set default GPU image for Live Serverless."""
|
|
46
|
+
data["imageName"] = TETRA_GPU_IMAGE
|
|
47
|
+
return data
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CpuLiveServerless(LiveServerlessMixin, CpuServerlessEndpoint):
|
|
51
|
+
"""CPU-only live serverless endpoint with automatic disk sizing."""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def _live_image(self) -> str:
|
|
55
|
+
return TETRA_CPU_IMAGE
|
|
56
|
+
|
|
57
|
+
@model_validator(mode="before")
|
|
58
|
+
@classmethod
|
|
59
|
+
def set_live_serverless_template(cls, data: dict):
|
|
60
|
+
"""Set default CPU image for Live Serverless."""
|
|
61
|
+
data["imageName"] = TETRA_CPU_IMAGE
|
|
62
|
+
return data
|
|
@@ -38,8 +38,8 @@ class NetworkVolume(DeployableResource):
|
|
|
38
38
|
dataCenterId: DataCenter = Field(default=DataCenter.EU_RO_1, frozen=True)
|
|
39
39
|
|
|
40
40
|
id: Optional[str] = Field(default=None)
|
|
41
|
-
name:
|
|
42
|
-
size: Optional[int] = Field(default=
|
|
41
|
+
name: str
|
|
42
|
+
size: Optional[int] = Field(default=100, gt=0) # Size in GB
|
|
43
43
|
|
|
44
44
|
def __str__(self) -> str:
|
|
45
45
|
return f"{self.__class__.__name__}:{self.id}"
|
|
@@ -47,15 +47,11 @@ class NetworkVolume(DeployableResource):
|
|
|
47
47
|
@property
|
|
48
48
|
def resource_id(self) -> str:
|
|
49
49
|
"""Unique resource ID based on name and datacenter for idempotent behavior."""
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
return f"{resource_type}_{hash_obj.hexdigest()}"
|
|
56
|
-
else:
|
|
57
|
-
# Fall back to default behavior for unnamed volumes
|
|
58
|
-
return super().resource_id
|
|
50
|
+
# Use name + datacenter to ensure idempotence
|
|
51
|
+
resource_type = self.__class__.__name__
|
|
52
|
+
config_key = f"{self.name}:{self.dataCenterId.value}"
|
|
53
|
+
hash_obj = hashlib.md5(f"{resource_type}:{config_key}".encode())
|
|
54
|
+
return f"{resource_type}_{hash_obj.hexdigest()}"
|
|
59
55
|
|
|
60
56
|
@field_serializer("dataCenterId")
|
|
61
57
|
def serialize_data_center_id(self, value: Optional[DataCenter]) -> Optional[str]:
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import cloudpickle
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..utils.singleton import SingletonMixin
|
|
8
|
+
from ..utils.file_lock import file_lock, FileLockError
|
|
9
|
+
|
|
10
|
+
from .base import DeployableResource
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# File to persist state of resources
|
|
16
|
+
RESOURCE_STATE_FILE = Path(".tetra_resources.pkl")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ResourceManager(SingletonMixin):
|
|
20
|
+
"""Manages dynamic provisioning and tracking of remote resources."""
|
|
21
|
+
|
|
22
|
+
# Class variables shared across all instances (singleton)
|
|
23
|
+
_resources: Dict[str, DeployableResource] = {}
|
|
24
|
+
_deployment_locks: Dict[str, asyncio.Lock] = {}
|
|
25
|
+
_global_lock: Optional[asyncio.Lock] = None # Will be initialized lazily
|
|
26
|
+
_lock_initialized = False
|
|
27
|
+
|
|
28
|
+
def __init__(self):
|
|
29
|
+
# Ensure async locks are initialized properly for the singleton instance
|
|
30
|
+
if not ResourceManager._lock_initialized:
|
|
31
|
+
ResourceManager._global_lock = asyncio.Lock()
|
|
32
|
+
ResourceManager._lock_initialized = True
|
|
33
|
+
|
|
34
|
+
if not self._resources:
|
|
35
|
+
self._load_resources()
|
|
36
|
+
|
|
37
|
+
def _load_resources(self) -> Dict[str, DeployableResource]:
|
|
38
|
+
"""Load persisted resource information using cross-platform file locking."""
|
|
39
|
+
if RESOURCE_STATE_FILE.exists():
|
|
40
|
+
try:
|
|
41
|
+
with open(RESOURCE_STATE_FILE, "rb") as f:
|
|
42
|
+
# Acquire shared lock for reading (cross-platform)
|
|
43
|
+
with file_lock(f, exclusive=False):
|
|
44
|
+
self._resources = cloudpickle.load(f)
|
|
45
|
+
log.debug(f"Loaded saved resources from {RESOURCE_STATE_FILE}")
|
|
46
|
+
except (FileLockError, Exception) as e:
|
|
47
|
+
log.error(f"Failed to load resources from {RESOURCE_STATE_FILE}: {e}")
|
|
48
|
+
return self._resources
|
|
49
|
+
|
|
50
|
+
def _save_resources(self) -> None:
|
|
51
|
+
"""Persist state of resources to disk using cross-platform file locking."""
|
|
52
|
+
try:
|
|
53
|
+
with open(RESOURCE_STATE_FILE, "wb") as f:
|
|
54
|
+
# Acquire exclusive lock for writing (cross-platform)
|
|
55
|
+
with file_lock(f, exclusive=True):
|
|
56
|
+
cloudpickle.dump(self._resources, f)
|
|
57
|
+
f.flush() # Ensure data is written to disk
|
|
58
|
+
log.debug(f"Saved resources in {RESOURCE_STATE_FILE}")
|
|
59
|
+
except (FileLockError, Exception) as e:
|
|
60
|
+
log.error(f"Failed to save resources to {RESOURCE_STATE_FILE}: {e}")
|
|
61
|
+
raise
|
|
62
|
+
|
|
63
|
+
def add_resource(self, uid: str, resource: DeployableResource):
|
|
64
|
+
"""Add a resource to the manager."""
|
|
65
|
+
self._resources[uid] = resource
|
|
66
|
+
self._save_resources()
|
|
67
|
+
|
|
68
|
+
# function to check if resource still exists remotely, else remove it
|
|
69
|
+
def remove_resource(self, uid: str):
|
|
70
|
+
"""Remove a resource from the manager."""
|
|
71
|
+
if uid not in self._resources:
|
|
72
|
+
log.warning(f"Resource {uid} not found for removal")
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
del self._resources[uid]
|
|
76
|
+
log.debug(f"Removed resource {uid}")
|
|
77
|
+
|
|
78
|
+
self._save_resources()
|
|
79
|
+
|
|
80
|
+
async def get_or_deploy_resource(
|
|
81
|
+
self, config: DeployableResource
|
|
82
|
+
) -> DeployableResource:
|
|
83
|
+
"""Get existing or create new resource based on config.
|
|
84
|
+
|
|
85
|
+
Thread-safe implementation that prevents concurrent deployments
|
|
86
|
+
of the same resource configuration.
|
|
87
|
+
"""
|
|
88
|
+
uid = config.resource_id
|
|
89
|
+
|
|
90
|
+
# Ensure global lock is initialized (should be done in __init__)
|
|
91
|
+
assert ResourceManager._global_lock is not None, "Global lock not initialized"
|
|
92
|
+
|
|
93
|
+
# Get or create a per-resource lock
|
|
94
|
+
async with ResourceManager._global_lock:
|
|
95
|
+
if uid not in ResourceManager._deployment_locks:
|
|
96
|
+
ResourceManager._deployment_locks[uid] = asyncio.Lock()
|
|
97
|
+
resource_lock = ResourceManager._deployment_locks[uid]
|
|
98
|
+
|
|
99
|
+
# Acquire per-resource lock for this specific configuration
|
|
100
|
+
async with resource_lock:
|
|
101
|
+
# Double-check pattern: check again inside the lock
|
|
102
|
+
if existing := self._resources.get(uid):
|
|
103
|
+
if not existing.is_deployed():
|
|
104
|
+
log.warning(f"{existing} is no longer valid, redeploying.")
|
|
105
|
+
self.remove_resource(uid)
|
|
106
|
+
# Don't recursive call - deploy directly within the lock
|
|
107
|
+
deployed_resource = await config.deploy()
|
|
108
|
+
log.info(f"URL: {deployed_resource.url}")
|
|
109
|
+
self.add_resource(uid, deployed_resource)
|
|
110
|
+
return deployed_resource
|
|
111
|
+
|
|
112
|
+
log.debug(f"{existing} exists, reusing.")
|
|
113
|
+
log.info(f"URL: {existing.url}")
|
|
114
|
+
return existing
|
|
115
|
+
|
|
116
|
+
# No existing resource, deploy new one
|
|
117
|
+
log.debug(f"Deploying new resource: {uid}")
|
|
118
|
+
deployed_resource = await config.deploy()
|
|
119
|
+
log.info(f"URL: {deployed_resource.url}")
|
|
120
|
+
self.add_resource(uid, deployed_resource)
|
|
121
|
+
return deployed_resource
|