tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tetra_rp/__init__.py +109 -19
- tetra_rp/cli/commands/__init__.py +1 -0
- tetra_rp/cli/commands/apps.py +143 -0
- tetra_rp/cli/commands/build.py +1082 -0
- tetra_rp/cli/commands/build_utils/__init__.py +1 -0
- tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
- tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
- tetra_rp/cli/commands/build_utils/manifest.py +430 -0
- tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
- tetra_rp/cli/commands/build_utils/scanner.py +596 -0
- tetra_rp/cli/commands/deploy.py +580 -0
- tetra_rp/cli/commands/init.py +123 -0
- tetra_rp/cli/commands/resource.py +108 -0
- tetra_rp/cli/commands/run.py +296 -0
- tetra_rp/cli/commands/test_mothership.py +458 -0
- tetra_rp/cli/commands/undeploy.py +533 -0
- tetra_rp/cli/main.py +97 -0
- tetra_rp/cli/utils/__init__.py +1 -0
- tetra_rp/cli/utils/app.py +15 -0
- tetra_rp/cli/utils/conda.py +127 -0
- tetra_rp/cli/utils/deployment.py +530 -0
- tetra_rp/cli/utils/ignore.py +143 -0
- tetra_rp/cli/utils/skeleton.py +184 -0
- tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
- tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
- tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
- tetra_rp/cli/utils/skeleton_template/README.md +263 -0
- tetra_rp/cli/utils/skeleton_template/main.py +44 -0
- tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
- tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
- tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
- tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
- tetra_rp/client.py +136 -33
- tetra_rp/config.py +29 -0
- tetra_rp/core/api/runpod.py +591 -39
- tetra_rp/core/deployment.py +232 -0
- tetra_rp/core/discovery.py +425 -0
- tetra_rp/core/exceptions.py +50 -0
- tetra_rp/core/resources/__init__.py +27 -9
- tetra_rp/core/resources/app.py +738 -0
- tetra_rp/core/resources/base.py +139 -4
- tetra_rp/core/resources/constants.py +21 -0
- tetra_rp/core/resources/cpu.py +115 -13
- tetra_rp/core/resources/gpu.py +182 -16
- tetra_rp/core/resources/live_serverless.py +153 -16
- tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
- tetra_rp/core/resources/network_volume.py +126 -31
- tetra_rp/core/resources/resource_manager.py +436 -35
- tetra_rp/core/resources/serverless.py +537 -120
- tetra_rp/core/resources/serverless_cpu.py +201 -0
- tetra_rp/core/resources/template.py +1 -59
- tetra_rp/core/utils/constants.py +10 -0
- tetra_rp/core/utils/file_lock.py +260 -0
- tetra_rp/core/utils/http.py +67 -0
- tetra_rp/core/utils/lru_cache.py +75 -0
- tetra_rp/core/utils/singleton.py +36 -1
- tetra_rp/core/validation.py +44 -0
- tetra_rp/execute_class.py +301 -0
- tetra_rp/protos/remote_execution.py +98 -9
- tetra_rp/runtime/__init__.py +1 -0
- tetra_rp/runtime/circuit_breaker.py +274 -0
- tetra_rp/runtime/config.py +12 -0
- tetra_rp/runtime/exceptions.py +49 -0
- tetra_rp/runtime/generic_handler.py +206 -0
- tetra_rp/runtime/lb_handler.py +189 -0
- tetra_rp/runtime/load_balancer.py +160 -0
- tetra_rp/runtime/manifest_fetcher.py +192 -0
- tetra_rp/runtime/metrics.py +325 -0
- tetra_rp/runtime/models.py +73 -0
- tetra_rp/runtime/mothership_provisioner.py +512 -0
- tetra_rp/runtime/production_wrapper.py +266 -0
- tetra_rp/runtime/reliability_config.py +149 -0
- tetra_rp/runtime/retry_manager.py +118 -0
- tetra_rp/runtime/serialization.py +124 -0
- tetra_rp/runtime/service_registry.py +346 -0
- tetra_rp/runtime/state_manager_client.py +248 -0
- tetra_rp/stubs/live_serverless.py +35 -17
- tetra_rp/stubs/load_balancer_sls.py +357 -0
- tetra_rp/stubs/registry.py +145 -19
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
- tetra_rp-0.24.0.dist-info/RECORD +99 -0
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
- tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
- tetra_rp/core/pool/cluster_manager.py +0 -177
- tetra_rp/core/pool/dataclass.py +0 -18
- tetra_rp/core/pool/ex.py +0 -38
- tetra_rp/core/pool/job.py +0 -22
- tetra_rp/core/pool/worker.py +0 -19
- tetra_rp/core/resources/utils.py +0 -50
- tetra_rp/core/utils/json.py +0 -33
- tetra_rp-0.6.0.dist-info/RECORD +0 -39
- /tetra_rp/{core/pool → cli}/__init__.py +0 -0
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CPU-specific serverless endpoint classes.
|
|
3
|
+
|
|
4
|
+
This module contains all CPU-related serverless functionality, separate from GPU serverless.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from pydantic import field_serializer, model_validator
|
|
12
|
+
|
|
13
|
+
from .cpu import (
|
|
14
|
+
CpuInstanceType,
|
|
15
|
+
CPU_INSTANCE_DISK_LIMITS,
|
|
16
|
+
get_max_disk_size_for_instances,
|
|
17
|
+
)
|
|
18
|
+
from .serverless import ServerlessEndpoint, get_env_vars
|
|
19
|
+
from .template import KeyValuePair, PodTemplate
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CpuEndpointMixin:
|
|
23
|
+
"""Mixin class that provides CPU-specific functionality for serverless endpoints."""
|
|
24
|
+
|
|
25
|
+
instanceIds: Optional[List[CpuInstanceType]]
|
|
26
|
+
|
|
27
|
+
def _is_cpu_endpoint(self) -> bool:
|
|
28
|
+
"""Check if this is a CPU endpoint (has instanceIds)."""
|
|
29
|
+
return (
|
|
30
|
+
hasattr(self, "instanceIds")
|
|
31
|
+
and self.instanceIds is not None
|
|
32
|
+
and len(self.instanceIds) > 0
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def _get_cpu_container_disk_size(self) -> Optional[int]:
|
|
36
|
+
"""Get the appropriate container disk size for CPU instances."""
|
|
37
|
+
if not self._is_cpu_endpoint():
|
|
38
|
+
return None
|
|
39
|
+
return get_max_disk_size_for_instances(self.instanceIds)
|
|
40
|
+
|
|
41
|
+
def _apply_cpu_disk_sizing(self, template: PodTemplate) -> None:
|
|
42
|
+
"""Apply CPU disk sizing to a template if it's using the default size."""
|
|
43
|
+
if not self._is_cpu_endpoint():
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
# Only auto-size if template is using the default value
|
|
47
|
+
default_disk_size = PodTemplate.model_fields["containerDiskInGb"].default
|
|
48
|
+
if template.containerDiskInGb == default_disk_size:
|
|
49
|
+
cpu_disk_size = self._get_cpu_container_disk_size()
|
|
50
|
+
if cpu_disk_size is not None:
|
|
51
|
+
template.containerDiskInGb = cpu_disk_size
|
|
52
|
+
|
|
53
|
+
def validate_cpu_container_disk_size(self) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Validate that container disk size doesn't exceed limits for CPU instances.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: If container disk size exceeds the limit for any CPU instance
|
|
59
|
+
"""
|
|
60
|
+
if (
|
|
61
|
+
not self._is_cpu_endpoint()
|
|
62
|
+
or not hasattr(self, "template")
|
|
63
|
+
or not self.template
|
|
64
|
+
or not self.template.containerDiskInGb
|
|
65
|
+
):
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
max_allowed_disk_size = self._get_cpu_container_disk_size()
|
|
69
|
+
if max_allowed_disk_size is None:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
if self.template.containerDiskInGb > max_allowed_disk_size:
|
|
73
|
+
instance_limits = []
|
|
74
|
+
for instance_type in self.instanceIds:
|
|
75
|
+
limit = CPU_INSTANCE_DISK_LIMITS[instance_type]
|
|
76
|
+
instance_limits.append(f"{instance_type.value}: max {limit}GB")
|
|
77
|
+
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Container disk size {self.template.containerDiskInGb}GB exceeds the maximum "
|
|
80
|
+
f"allowed for CPU instances. Instance limits: {', '.join(instance_limits)}. "
|
|
81
|
+
f"Maximum allowed: {max_allowed_disk_size}GB"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def _sync_cpu_fields(self):
|
|
85
|
+
"""Sync CPU-specific fields, overriding GPU defaults."""
|
|
86
|
+
# Override GPU-specific fields for CPU
|
|
87
|
+
if hasattr(self, "gpuCount"):
|
|
88
|
+
self.gpuCount = 0
|
|
89
|
+
if hasattr(self, "allowedCudaVersions"):
|
|
90
|
+
self.allowedCudaVersions = ""
|
|
91
|
+
if hasattr(self, "gpuIds"):
|
|
92
|
+
self.gpuIds = ""
|
|
93
|
+
|
|
94
|
+
@field_serializer("instanceIds")
|
|
95
|
+
def serialize_instance_ids(
|
|
96
|
+
self, value: Optional[List[CpuInstanceType]]
|
|
97
|
+
) -> Optional[List[str]]:
|
|
98
|
+
"""Convert CpuInstanceType enums to strings."""
|
|
99
|
+
if value is None:
|
|
100
|
+
return None
|
|
101
|
+
return [item.value if hasattr(item, "value") else str(item) for item in value]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class CpuServerlessEndpoint(CpuEndpointMixin, ServerlessEndpoint):
|
|
105
|
+
"""
|
|
106
|
+
CPU-only serverless endpoint with automatic disk sizing and validation.
|
|
107
|
+
Represents a CPU-only serverless endpoint distinct from a live serverless.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
# CPU endpoints don't use GPU-specific fields, so exclude them from API payload
|
|
111
|
+
# This prevents the RunPod GraphQL API from rejecting CPU endpoints with GPU-specific fields
|
|
112
|
+
# Note: instanceIds is NOT in _input_only, so it will be sent to the API
|
|
113
|
+
_input_only = {
|
|
114
|
+
"id",
|
|
115
|
+
"cudaVersions", # GPU-specific, exclude from API payload
|
|
116
|
+
"datacenter",
|
|
117
|
+
"env",
|
|
118
|
+
"gpus", # Inherited from parent, but always None for CPU endpoints
|
|
119
|
+
"gpuIds", # GPU-specific API field, exclude from payload
|
|
120
|
+
"gpuCount", # GPU-specific API field, exclude from payload
|
|
121
|
+
"allowedCudaVersions", # GPU-specific API field, exclude from payload
|
|
122
|
+
"flashboot",
|
|
123
|
+
"flashEnvironmentId",
|
|
124
|
+
"imageName",
|
|
125
|
+
"networkVolume",
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Override GPU field from parent to None for CPU endpoints
|
|
129
|
+
gpus: Optional[List] = None
|
|
130
|
+
instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def config_hash(self) -> str:
|
|
134
|
+
"""Get hash of current configuration excluding GPU-specific fields.
|
|
135
|
+
|
|
136
|
+
CPU endpoints need GPU fields in _input_only to exclude them from API payload,
|
|
137
|
+
but these fields should not be included in config_hash to avoid false drift
|
|
138
|
+
detection. This override computes the hash using only CPU-relevant fields.
|
|
139
|
+
"""
|
|
140
|
+
# CPU-relevant fields for config hash, excluding 'env' to prevent false drift
|
|
141
|
+
# (env is dynamically computed from .env file at initialization time)
|
|
142
|
+
cpu_fields = {
|
|
143
|
+
"datacenter",
|
|
144
|
+
"flashboot",
|
|
145
|
+
"flashEnvironmentId",
|
|
146
|
+
"imageName",
|
|
147
|
+
"gpus",
|
|
148
|
+
"networkVolume",
|
|
149
|
+
}
|
|
150
|
+
config_dict = self.model_dump(
|
|
151
|
+
exclude_none=True, include=cpu_fields, mode="json"
|
|
152
|
+
)
|
|
153
|
+
config_str = json.dumps(config_dict, sort_keys=True)
|
|
154
|
+
hash_obj = hashlib.md5(f"{self.__class__.__name__}:{config_str}".encode())
|
|
155
|
+
return hash_obj.hexdigest()
|
|
156
|
+
|
|
157
|
+
def _create_new_template(self) -> PodTemplate:
|
|
158
|
+
"""Create a new PodTemplate with CPU-appropriate disk sizing."""
|
|
159
|
+
template = PodTemplate(
|
|
160
|
+
name=self.resource_id,
|
|
161
|
+
imageName=self.imageName,
|
|
162
|
+
env=KeyValuePair.from_dict(self.env or get_env_vars()),
|
|
163
|
+
)
|
|
164
|
+
# Apply CPU-specific disk sizing
|
|
165
|
+
self._apply_cpu_disk_sizing(template)
|
|
166
|
+
return template
|
|
167
|
+
|
|
168
|
+
def _configure_existing_template(self) -> None:
|
|
169
|
+
"""Configure an existing template with necessary overrides and CPU sizing."""
|
|
170
|
+
if self.template is None:
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
self.template.name = f"{self.resource_id}__{self.template.resource_id}"
|
|
174
|
+
|
|
175
|
+
if self.imageName:
|
|
176
|
+
self.template.imageName = self.imageName
|
|
177
|
+
if self.env:
|
|
178
|
+
self.template.env = KeyValuePair.from_dict(self.env)
|
|
179
|
+
|
|
180
|
+
# Apply CPU-specific disk sizing
|
|
181
|
+
self._apply_cpu_disk_sizing(self.template)
|
|
182
|
+
|
|
183
|
+
@model_validator(mode="after")
|
|
184
|
+
def set_serverless_template(self):
|
|
185
|
+
# Sync CPU-specific fields first
|
|
186
|
+
self._sync_cpu_fields()
|
|
187
|
+
|
|
188
|
+
if not any([self.imageName, self.template, self.templateId]):
|
|
189
|
+
raise ValueError(
|
|
190
|
+
"Either imageName, template, or templateId must be provided"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
if not self.templateId and not self.template:
|
|
194
|
+
self.template = self._create_new_template()
|
|
195
|
+
elif self.template:
|
|
196
|
+
self._configure_existing_template()
|
|
197
|
+
|
|
198
|
+
# Validate container disk size for CPU instances
|
|
199
|
+
self.validate_cpu_container_disk_size()
|
|
200
|
+
|
|
201
|
+
return self
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import requests
|
|
2
1
|
from typing import Dict, List, Optional, Any
|
|
3
2
|
from pydantic import BaseModel, model_validator
|
|
4
3
|
from .base import BaseResource
|
|
@@ -22,7 +21,7 @@ class KeyValuePair(BaseModel):
|
|
|
22
21
|
class PodTemplate(BaseResource):
|
|
23
22
|
advancedStart: Optional[bool] = False
|
|
24
23
|
config: Optional[Dict[str, Any]] = {}
|
|
25
|
-
containerDiskInGb: Optional[int] =
|
|
24
|
+
containerDiskInGb: Optional[int] = 64
|
|
26
25
|
containerRegistryAuthId: Optional[str] = ""
|
|
27
26
|
dockerArgs: Optional[str] = ""
|
|
28
27
|
env: Optional[List[KeyValuePair]] = []
|
|
@@ -35,60 +34,3 @@ class PodTemplate(BaseResource):
|
|
|
35
34
|
def sync_input_fields(self):
|
|
36
35
|
self.name = f"{self.name}__{self.resource_id}"
|
|
37
36
|
return self
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def update_system_dependencies(
|
|
41
|
-
template_id, token, system_dependencies, base_entry_cmd=None
|
|
42
|
-
):
|
|
43
|
-
"""
|
|
44
|
-
Updates Runpod template with system dependencies installed via apt-get,
|
|
45
|
-
and appends the app start command.
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
template_id (str): Runpod template ID.
|
|
49
|
-
token (str): Runpod API token.
|
|
50
|
-
system_dependencies (List[str]): List of apt packages to install.
|
|
51
|
-
base_entry_cmd (List[str]): The default command to run the app, e.g. ["uv", "run", "handler.py"]
|
|
52
|
-
Returns:
|
|
53
|
-
dict: API response JSON or error info.
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
# Compose apt-get install command if any packages specified
|
|
57
|
-
apt_cmd = ""
|
|
58
|
-
if system_dependencies:
|
|
59
|
-
joined_pkgs = " ".join(system_dependencies)
|
|
60
|
-
apt_cmd = f"apt-get update && apt-get install -y {joined_pkgs} && "
|
|
61
|
-
|
|
62
|
-
# Default start command if not provided
|
|
63
|
-
app_cmd = base_entry_cmd or ["uv", "run", "handler.py"]
|
|
64
|
-
app_cmd_str = " ".join(app_cmd)
|
|
65
|
-
|
|
66
|
-
# Full command to run in entrypoint shell
|
|
67
|
-
full_cmd = f"{apt_cmd}exec {app_cmd_str}"
|
|
68
|
-
|
|
69
|
-
payload = {
|
|
70
|
-
# other required fields like disk, env, image, etc, should be fetched or passed in real usage
|
|
71
|
-
"dockerEntrypoint": ["/bin/bash", "-c", full_cmd],
|
|
72
|
-
"dockerStartCmd": [],
|
|
73
|
-
# placeholder values, replace as needed or fetch from current template state
|
|
74
|
-
"containerDiskInGb": 50,
|
|
75
|
-
"containerRegistryAuthId": "",
|
|
76
|
-
"env": {},
|
|
77
|
-
"imageName": "your-image-name",
|
|
78
|
-
"isPublic": False,
|
|
79
|
-
"name": "your-template-name",
|
|
80
|
-
"ports": ["8888/http", "22/tcp"],
|
|
81
|
-
"readme": "",
|
|
82
|
-
"volumeInGb": 20,
|
|
83
|
-
"volumeMountPath": "/workspace",
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
|
87
|
-
|
|
88
|
-
url = f"https://rest.runpod.io/v1/templates/{template_id}/update"
|
|
89
|
-
response = requests.post(url, json=payload, headers=headers)
|
|
90
|
-
|
|
91
|
-
try:
|
|
92
|
-
return response.json()
|
|
93
|
-
except Exception:
|
|
94
|
-
return {"error": "Invalid JSON response", "text": response.text}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for utility modules and caching configurations.
|
|
3
|
+
|
|
4
|
+
This module contains configurable constants used across the tetra-rp codebase
|
|
5
|
+
to ensure consistency and easy maintenance.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Cache key generation constants
|
|
9
|
+
HASH_TRUNCATE_LENGTH = 16 # Length to truncate hash values for cache keys
|
|
10
|
+
UUID_FALLBACK_LENGTH = 8 # Length to truncate UUID values for fallback keys
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cross-platform file locking utilities.
|
|
3
|
+
|
|
4
|
+
Provides unified file locking interface that works across Windows, macOS, and Linux.
|
|
5
|
+
Uses platform-appropriate locking mechanisms:
|
|
6
|
+
- Windows: msvcrt.locking()
|
|
7
|
+
- Unix/Linux/macOS: fcntl.flock()
|
|
8
|
+
- Fallback: Basic file existence checking (limited protection)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import contextlib
|
|
12
|
+
import logging
|
|
13
|
+
import platform
|
|
14
|
+
import time
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import BinaryIO, Optional
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Platform detection
|
|
21
|
+
_IS_WINDOWS = platform.system() == "Windows"
|
|
22
|
+
_IS_UNIX = platform.system() in ("Linux", "Darwin")
|
|
23
|
+
|
|
24
|
+
# Initialize availability flags
|
|
25
|
+
_WINDOWS_LOCKING_AVAILABLE = False
|
|
26
|
+
_UNIX_LOCKING_AVAILABLE = False
|
|
27
|
+
|
|
28
|
+
# Import platform-specific modules
|
|
29
|
+
if _IS_WINDOWS:
|
|
30
|
+
try:
|
|
31
|
+
import msvcrt
|
|
32
|
+
|
|
33
|
+
_WINDOWS_LOCKING_AVAILABLE = True
|
|
34
|
+
except ImportError:
|
|
35
|
+
msvcrt = None
|
|
36
|
+
log.warning("msvcrt not available on Windows platform")
|
|
37
|
+
|
|
38
|
+
if _IS_UNIX:
|
|
39
|
+
try:
|
|
40
|
+
import fcntl
|
|
41
|
+
|
|
42
|
+
_UNIX_LOCKING_AVAILABLE = True
|
|
43
|
+
except ImportError:
|
|
44
|
+
fcntl = None
|
|
45
|
+
log.warning("fcntl not available on Unix platform")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FileLockError(Exception):
|
|
49
|
+
"""Exception raised when file locking operations fail."""
|
|
50
|
+
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class FileLockTimeout(FileLockError):
|
|
55
|
+
"""Exception raised when file locking times out."""
|
|
56
|
+
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@contextlib.contextmanager
|
|
61
|
+
def file_lock(
|
|
62
|
+
file_handle: BinaryIO,
|
|
63
|
+
exclusive: bool = True,
|
|
64
|
+
timeout: Optional[float] = 10.0,
|
|
65
|
+
retry_interval: float = 0.1,
|
|
66
|
+
):
|
|
67
|
+
"""
|
|
68
|
+
Cross-platform file locking context manager.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
file_handle: Open file handle to lock
|
|
72
|
+
exclusive: True for exclusive lock, False for shared lock
|
|
73
|
+
timeout: Maximum seconds to wait for lock (None = no timeout)
|
|
74
|
+
retry_interval: Seconds to wait between lock attempts
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
FileLockTimeout: If lock cannot be acquired within timeout
|
|
78
|
+
FileLockError: If locking operation fails
|
|
79
|
+
|
|
80
|
+
Usage:
|
|
81
|
+
with open("file.dat", "rb") as f:
|
|
82
|
+
with file_lock(f, exclusive=False): # Shared read lock
|
|
83
|
+
data = f.read()
|
|
84
|
+
|
|
85
|
+
with open("file.dat", "wb") as f:
|
|
86
|
+
with file_lock(f, exclusive=True): # Exclusive write lock
|
|
87
|
+
f.write(data)
|
|
88
|
+
"""
|
|
89
|
+
lock_acquired = False
|
|
90
|
+
start_time = time.time()
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
# Platform-specific locking
|
|
94
|
+
while not lock_acquired:
|
|
95
|
+
try:
|
|
96
|
+
if _IS_WINDOWS and _WINDOWS_LOCKING_AVAILABLE:
|
|
97
|
+
_acquire_windows_lock(file_handle, exclusive)
|
|
98
|
+
elif _IS_UNIX and _UNIX_LOCKING_AVAILABLE:
|
|
99
|
+
_acquire_unix_lock(file_handle, exclusive)
|
|
100
|
+
else:
|
|
101
|
+
# Fallback - limited protection via file existence
|
|
102
|
+
_acquire_fallback_lock(file_handle, exclusive, timeout)
|
|
103
|
+
|
|
104
|
+
lock_acquired = True
|
|
105
|
+
log.debug(f"File lock acquired (exclusive={exclusive})")
|
|
106
|
+
|
|
107
|
+
except (OSError, IOError, FileLockError) as e:
|
|
108
|
+
# Check timeout
|
|
109
|
+
if timeout is not None and (time.time() - start_time) >= timeout:
|
|
110
|
+
raise FileLockTimeout(
|
|
111
|
+
f"Could not acquire file lock within {timeout} seconds: {e}"
|
|
112
|
+
) from e
|
|
113
|
+
|
|
114
|
+
# Retry after interval
|
|
115
|
+
time.sleep(retry_interval)
|
|
116
|
+
|
|
117
|
+
# Lock acquired successfully
|
|
118
|
+
yield
|
|
119
|
+
|
|
120
|
+
finally:
|
|
121
|
+
# Release lock
|
|
122
|
+
if lock_acquired:
|
|
123
|
+
try:
|
|
124
|
+
if _IS_WINDOWS and _WINDOWS_LOCKING_AVAILABLE:
|
|
125
|
+
_release_windows_lock(file_handle)
|
|
126
|
+
elif _IS_UNIX and _UNIX_LOCKING_AVAILABLE:
|
|
127
|
+
_release_unix_lock(file_handle)
|
|
128
|
+
else:
|
|
129
|
+
_release_fallback_lock(file_handle)
|
|
130
|
+
|
|
131
|
+
log.debug("File lock released")
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
log.error(f"Error releasing file lock: {e}")
|
|
135
|
+
# Don't raise - we're in cleanup
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _acquire_windows_lock(file_handle: BinaryIO, exclusive: bool) -> None:
|
|
139
|
+
"""Acquire Windows file lock using msvcrt.locking()."""
|
|
140
|
+
if not _WINDOWS_LOCKING_AVAILABLE:
|
|
141
|
+
raise FileLockError("Windows file locking not available (msvcrt missing)")
|
|
142
|
+
|
|
143
|
+
# Windows locking modes
|
|
144
|
+
if exclusive:
|
|
145
|
+
lock_mode = msvcrt.LK_NBLCK # Non-blocking exclusive lock
|
|
146
|
+
else:
|
|
147
|
+
# Windows doesn't have shared locks in msvcrt
|
|
148
|
+
# Fall back to exclusive for compatibility
|
|
149
|
+
lock_mode = msvcrt.LK_NBLCK
|
|
150
|
+
log.debug("Windows: Using exclusive lock instead of shared (msvcrt limitation)")
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
# Lock the entire file (position 0, length 1)
|
|
154
|
+
file_handle.seek(0)
|
|
155
|
+
msvcrt.locking(file_handle.fileno(), lock_mode, 1)
|
|
156
|
+
except OSError as e:
|
|
157
|
+
raise FileLockError(f"Failed to acquire Windows file lock: {e}") from e
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _release_windows_lock(file_handle: BinaryIO) -> None:
|
|
161
|
+
"""Release Windows file lock."""
|
|
162
|
+
if not _WINDOWS_LOCKING_AVAILABLE:
|
|
163
|
+
return
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
file_handle.seek(0)
|
|
167
|
+
msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
|
|
168
|
+
except OSError as e:
|
|
169
|
+
raise FileLockError(f"Failed to release Windows file lock: {e}") from e
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _acquire_unix_lock(file_handle: BinaryIO, exclusive: bool) -> None:
|
|
173
|
+
"""Acquire Unix file lock using fcntl.flock()."""
|
|
174
|
+
if not _UNIX_LOCKING_AVAILABLE:
|
|
175
|
+
raise FileLockError("Unix file locking not available (fcntl missing)")
|
|
176
|
+
|
|
177
|
+
# Unix locking modes
|
|
178
|
+
if exclusive:
|
|
179
|
+
lock_mode = fcntl.LOCK_EX | fcntl.LOCK_NB # Non-blocking exclusive
|
|
180
|
+
else:
|
|
181
|
+
lock_mode = fcntl.LOCK_SH | fcntl.LOCK_NB # Non-blocking shared
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
fcntl.flock(file_handle.fileno(), lock_mode)
|
|
185
|
+
except (OSError, IOError) as e:
|
|
186
|
+
raise FileLockError(f"Failed to acquire Unix file lock: {e}") from e
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _release_unix_lock(file_handle: BinaryIO) -> None:
|
|
190
|
+
"""Release Unix file lock."""
|
|
191
|
+
if not _UNIX_LOCKING_AVAILABLE:
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN)
|
|
196
|
+
except (OSError, IOError) as e:
|
|
197
|
+
raise FileLockError(f"Failed to release Unix file lock: {e}") from e
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _acquire_fallback_lock(
|
|
201
|
+
file_handle: BinaryIO, exclusive: bool, timeout: Optional[float]
|
|
202
|
+
) -> None:
|
|
203
|
+
"""
|
|
204
|
+
Fallback locking using lock files.
|
|
205
|
+
|
|
206
|
+
This provides minimal protection but doesn't prevent all race conditions.
|
|
207
|
+
It's better than no locking but not as robust as OS-level file locks.
|
|
208
|
+
"""
|
|
209
|
+
log.warning(
|
|
210
|
+
"Using fallback file locking - limited protection against race conditions"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Create lock file based on the original file
|
|
214
|
+
file_path = (
|
|
215
|
+
Path(file_handle.name) if hasattr(file_handle, "name") else Path("unknown")
|
|
216
|
+
)
|
|
217
|
+
lock_file = file_path.with_suffix(file_path.suffix + ".lock")
|
|
218
|
+
|
|
219
|
+
start_time = time.time()
|
|
220
|
+
|
|
221
|
+
while True:
|
|
222
|
+
try:
|
|
223
|
+
# Try to create lock file atomically
|
|
224
|
+
lock_file.touch(mode=0o600, exist_ok=False)
|
|
225
|
+
log.debug(f"Fallback lock file created: {lock_file}")
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
except FileExistsError:
|
|
229
|
+
# Lock file exists, check timeout
|
|
230
|
+
if timeout is not None and (time.time() - start_time) >= timeout:
|
|
231
|
+
raise FileLockError(f"Fallback lock timeout: {lock_file} exists")
|
|
232
|
+
|
|
233
|
+
# Wait and retry
|
|
234
|
+
time.sleep(0.1)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _release_fallback_lock(file_handle: BinaryIO) -> None:
|
|
238
|
+
"""Release fallback lock by removing lock file."""
|
|
239
|
+
try:
|
|
240
|
+
file_path = (
|
|
241
|
+
Path(file_handle.name) if hasattr(file_handle, "name") else Path("unknown")
|
|
242
|
+
)
|
|
243
|
+
lock_file = file_path.with_suffix(file_path.suffix + ".lock")
|
|
244
|
+
|
|
245
|
+
if lock_file.exists():
|
|
246
|
+
lock_file.unlink()
|
|
247
|
+
log.debug(f"Fallback lock file removed: {lock_file}")
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
log.error(f"Failed to remove fallback lock file: {e}")
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def get_platform_info() -> dict:
|
|
254
|
+
"""Get information about current platform and available locking mechanisms."""
|
|
255
|
+
return {
|
|
256
|
+
"platform": platform.system(),
|
|
257
|
+
"windows_locking": _IS_WINDOWS and _WINDOWS_LOCKING_AVAILABLE,
|
|
258
|
+
"unix_locking": _IS_UNIX and _UNIX_LOCKING_AVAILABLE,
|
|
259
|
+
"fallback_only": not (_WINDOWS_LOCKING_AVAILABLE or _UNIX_LOCKING_AVAILABLE),
|
|
260
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""HTTP utilities for RunPod API communication."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_authenticated_httpx_client(
|
|
11
|
+
timeout: Optional[float] = None,
|
|
12
|
+
) -> httpx.AsyncClient:
|
|
13
|
+
"""Create httpx AsyncClient with RunPod authentication.
|
|
14
|
+
|
|
15
|
+
Automatically includes Authorization header if RUNPOD_API_KEY is set.
|
|
16
|
+
This provides a centralized place to manage authentication headers for
|
|
17
|
+
all RunPod HTTP requests, avoiding repetitive manual header addition.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
timeout: Request timeout in seconds. Defaults to 30.0.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Configured httpx.AsyncClient with Authorization header
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
async with get_authenticated_httpx_client() as client:
|
|
27
|
+
response = await client.post(url, json=data)
|
|
28
|
+
|
|
29
|
+
# With custom timeout
|
|
30
|
+
async with get_authenticated_httpx_client(timeout=60.0) as client:
|
|
31
|
+
response = await client.get(url)
|
|
32
|
+
"""
|
|
33
|
+
headers = {}
|
|
34
|
+
api_key = os.environ.get("RUNPOD_API_KEY")
|
|
35
|
+
if api_key:
|
|
36
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
37
|
+
|
|
38
|
+
timeout_config = timeout if timeout is not None else 30.0
|
|
39
|
+
return httpx.AsyncClient(timeout=timeout_config, headers=headers)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_authenticated_requests_session() -> requests.Session:
|
|
43
|
+
"""Create requests Session with RunPod authentication.
|
|
44
|
+
|
|
45
|
+
Automatically includes Authorization header if RUNPOD_API_KEY is set.
|
|
46
|
+
Provides a centralized place to manage authentication headers for
|
|
47
|
+
synchronous RunPod HTTP requests.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Configured requests.Session with Authorization header
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
session = get_authenticated_requests_session()
|
|
54
|
+
response = session.post(url, json=data, timeout=30.0)
|
|
55
|
+
# Remember to close: session.close()
|
|
56
|
+
|
|
57
|
+
# Or use as context manager
|
|
58
|
+
import contextlib
|
|
59
|
+
with contextlib.closing(get_authenticated_requests_session()) as session:
|
|
60
|
+
response = session.post(url, json=data)
|
|
61
|
+
"""
|
|
62
|
+
session = requests.Session()
|
|
63
|
+
api_key = os.environ.get("RUNPOD_API_KEY")
|
|
64
|
+
if api_key:
|
|
65
|
+
session.headers["Authorization"] = f"Bearer {api_key}"
|
|
66
|
+
|
|
67
|
+
return session
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LRU Cache implementation using OrderedDict for memory-efficient caching with automatic eviction.
|
|
3
|
+
|
|
4
|
+
This module provides a Least Recently Used (LRU) cache implementation that automatically
|
|
5
|
+
manages memory by evicting the least recently used items when the cache exceeds its
|
|
6
|
+
maximum size limit. It maintains O(1) access time and provides a dict-like interface.
|
|
7
|
+
Thread-safe for concurrent access.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import threading
|
|
11
|
+
from collections import OrderedDict
|
|
12
|
+
from typing import Any, Dict, Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LRUCache:
|
|
16
|
+
"""
|
|
17
|
+
A Least Recently Used (LRU) cache implementation using OrderedDict.
|
|
18
|
+
|
|
19
|
+
Automatically evicts the least recently used items when the cache exceeds
|
|
20
|
+
the maximum size limit. Provides dict-like interface with O(1) operations.
|
|
21
|
+
Thread-safe for concurrent access using RLock.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
max_size: Maximum number of items to store in cache (default: 1000)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, max_size: int = 1000):
|
|
28
|
+
self.max_size = max_size
|
|
29
|
+
self.cache = OrderedDict()
|
|
30
|
+
self._lock = threading.RLock()
|
|
31
|
+
|
|
32
|
+
def get(self, key: str) -> Optional[Dict[str, Any]]:
|
|
33
|
+
"""Get item from cache, moving it to end (most recent) if found."""
|
|
34
|
+
with self._lock:
|
|
35
|
+
if key in self.cache:
|
|
36
|
+
self.cache.move_to_end(key)
|
|
37
|
+
return self.cache[key]
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def set(self, key: str, value: Dict[str, Any]) -> None:
|
|
41
|
+
"""Set item in cache, evicting oldest if at capacity."""
|
|
42
|
+
with self._lock:
|
|
43
|
+
if key in self.cache:
|
|
44
|
+
self.cache.move_to_end(key)
|
|
45
|
+
else:
|
|
46
|
+
if len(self.cache) >= self.max_size:
|
|
47
|
+
self.cache.popitem(last=False) # Remove oldest
|
|
48
|
+
self.cache[key] = value
|
|
49
|
+
|
|
50
|
+
def clear(self) -> None:
|
|
51
|
+
"""Clear all items from cache."""
|
|
52
|
+
with self._lock:
|
|
53
|
+
self.cache.clear()
|
|
54
|
+
|
|
55
|
+
def __contains__(self, key: str) -> bool:
|
|
56
|
+
"""Check if key exists in cache."""
|
|
57
|
+
with self._lock:
|
|
58
|
+
return key in self.cache
|
|
59
|
+
|
|
60
|
+
def __len__(self) -> int:
|
|
61
|
+
"""Return number of items in cache."""
|
|
62
|
+
with self._lock:
|
|
63
|
+
return len(self.cache)
|
|
64
|
+
|
|
65
|
+
def __getitem__(self, key: str) -> Dict[str, Any]:
|
|
66
|
+
"""Get item using bracket notation, moving to end if found."""
|
|
67
|
+
with self._lock:
|
|
68
|
+
if key in self.cache:
|
|
69
|
+
self.cache.move_to_end(key)
|
|
70
|
+
return self.cache[key]
|
|
71
|
+
raise KeyError(key)
|
|
72
|
+
|
|
73
|
+
def __setitem__(self, key: str, value: Dict[str, Any]) -> None:
|
|
74
|
+
"""Set item using bracket notation."""
|
|
75
|
+
self.set(key, value)
|