tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. tetra_rp/__init__.py +109 -19
  2. tetra_rp/cli/commands/__init__.py +1 -0
  3. tetra_rp/cli/commands/apps.py +143 -0
  4. tetra_rp/cli/commands/build.py +1082 -0
  5. tetra_rp/cli/commands/build_utils/__init__.py +1 -0
  6. tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
  7. tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
  8. tetra_rp/cli/commands/build_utils/manifest.py +430 -0
  9. tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
  10. tetra_rp/cli/commands/build_utils/scanner.py +596 -0
  11. tetra_rp/cli/commands/deploy.py +580 -0
  12. tetra_rp/cli/commands/init.py +123 -0
  13. tetra_rp/cli/commands/resource.py +108 -0
  14. tetra_rp/cli/commands/run.py +296 -0
  15. tetra_rp/cli/commands/test_mothership.py +458 -0
  16. tetra_rp/cli/commands/undeploy.py +533 -0
  17. tetra_rp/cli/main.py +97 -0
  18. tetra_rp/cli/utils/__init__.py +1 -0
  19. tetra_rp/cli/utils/app.py +15 -0
  20. tetra_rp/cli/utils/conda.py +127 -0
  21. tetra_rp/cli/utils/deployment.py +530 -0
  22. tetra_rp/cli/utils/ignore.py +143 -0
  23. tetra_rp/cli/utils/skeleton.py +184 -0
  24. tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
  25. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  26. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  27. tetra_rp/cli/utils/skeleton_template/README.md +263 -0
  28. tetra_rp/cli/utils/skeleton_template/main.py +44 -0
  29. tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
  30. tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
  31. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  32. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  33. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
  34. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
  35. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
  36. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
  37. tetra_rp/client.py +136 -33
  38. tetra_rp/config.py +29 -0
  39. tetra_rp/core/api/runpod.py +591 -39
  40. tetra_rp/core/deployment.py +232 -0
  41. tetra_rp/core/discovery.py +425 -0
  42. tetra_rp/core/exceptions.py +50 -0
  43. tetra_rp/core/resources/__init__.py +27 -9
  44. tetra_rp/core/resources/app.py +738 -0
  45. tetra_rp/core/resources/base.py +139 -4
  46. tetra_rp/core/resources/constants.py +21 -0
  47. tetra_rp/core/resources/cpu.py +115 -13
  48. tetra_rp/core/resources/gpu.py +182 -16
  49. tetra_rp/core/resources/live_serverless.py +153 -16
  50. tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
  51. tetra_rp/core/resources/network_volume.py +126 -31
  52. tetra_rp/core/resources/resource_manager.py +436 -35
  53. tetra_rp/core/resources/serverless.py +537 -120
  54. tetra_rp/core/resources/serverless_cpu.py +201 -0
  55. tetra_rp/core/resources/template.py +1 -59
  56. tetra_rp/core/utils/constants.py +10 -0
  57. tetra_rp/core/utils/file_lock.py +260 -0
  58. tetra_rp/core/utils/http.py +67 -0
  59. tetra_rp/core/utils/lru_cache.py +75 -0
  60. tetra_rp/core/utils/singleton.py +36 -1
  61. tetra_rp/core/validation.py +44 -0
  62. tetra_rp/execute_class.py +301 -0
  63. tetra_rp/protos/remote_execution.py +98 -9
  64. tetra_rp/runtime/__init__.py +1 -0
  65. tetra_rp/runtime/circuit_breaker.py +274 -0
  66. tetra_rp/runtime/config.py +12 -0
  67. tetra_rp/runtime/exceptions.py +49 -0
  68. tetra_rp/runtime/generic_handler.py +206 -0
  69. tetra_rp/runtime/lb_handler.py +189 -0
  70. tetra_rp/runtime/load_balancer.py +160 -0
  71. tetra_rp/runtime/manifest_fetcher.py +192 -0
  72. tetra_rp/runtime/metrics.py +325 -0
  73. tetra_rp/runtime/models.py +73 -0
  74. tetra_rp/runtime/mothership_provisioner.py +512 -0
  75. tetra_rp/runtime/production_wrapper.py +266 -0
  76. tetra_rp/runtime/reliability_config.py +149 -0
  77. tetra_rp/runtime/retry_manager.py +118 -0
  78. tetra_rp/runtime/serialization.py +124 -0
  79. tetra_rp/runtime/service_registry.py +346 -0
  80. tetra_rp/runtime/state_manager_client.py +248 -0
  81. tetra_rp/stubs/live_serverless.py +35 -17
  82. tetra_rp/stubs/load_balancer_sls.py +357 -0
  83. tetra_rp/stubs/registry.py +145 -19
  84. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
  85. tetra_rp-0.24.0.dist-info/RECORD +99 -0
  86. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
  87. tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
  88. tetra_rp/core/pool/cluster_manager.py +0 -177
  89. tetra_rp/core/pool/dataclass.py +0 -18
  90. tetra_rp/core/pool/ex.py +0 -38
  91. tetra_rp/core/pool/job.py +0 -22
  92. tetra_rp/core/pool/worker.py +0 -19
  93. tetra_rp/core/resources/utils.py +0 -50
  94. tetra_rp/core/utils/json.py +0 -33
  95. tetra_rp-0.6.0.dist-info/RECORD +0 -39
  96. /tetra_rp/{core/pool → cli}/__init__.py +0 -0
  97. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,201 @@
1
+ """
2
+ CPU-specific serverless endpoint classes.
3
+
4
+ This module contains all CPU-related serverless functionality, separate from GPU serverless.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ from typing import List, Optional
10
+
11
+ from pydantic import field_serializer, model_validator
12
+
13
+ from .cpu import (
14
+ CpuInstanceType,
15
+ CPU_INSTANCE_DISK_LIMITS,
16
+ get_max_disk_size_for_instances,
17
+ )
18
+ from .serverless import ServerlessEndpoint, get_env_vars
19
+ from .template import KeyValuePair, PodTemplate
20
+
21
+
22
+ class CpuEndpointMixin:
23
+ """Mixin class that provides CPU-specific functionality for serverless endpoints."""
24
+
25
+ instanceIds: Optional[List[CpuInstanceType]]
26
+
27
+ def _is_cpu_endpoint(self) -> bool:
28
+ """Check if this is a CPU endpoint (has instanceIds)."""
29
+ return (
30
+ hasattr(self, "instanceIds")
31
+ and self.instanceIds is not None
32
+ and len(self.instanceIds) > 0
33
+ )
34
+
35
+ def _get_cpu_container_disk_size(self) -> Optional[int]:
36
+ """Get the appropriate container disk size for CPU instances."""
37
+ if not self._is_cpu_endpoint():
38
+ return None
39
+ return get_max_disk_size_for_instances(self.instanceIds)
40
+
41
+ def _apply_cpu_disk_sizing(self, template: PodTemplate) -> None:
42
+ """Apply CPU disk sizing to a template if it's using the default size."""
43
+ if not self._is_cpu_endpoint():
44
+ return
45
+
46
+ # Only auto-size if template is using the default value
47
+ default_disk_size = PodTemplate.model_fields["containerDiskInGb"].default
48
+ if template.containerDiskInGb == default_disk_size:
49
+ cpu_disk_size = self._get_cpu_container_disk_size()
50
+ if cpu_disk_size is not None:
51
+ template.containerDiskInGb = cpu_disk_size
52
+
53
+ def validate_cpu_container_disk_size(self) -> None:
54
+ """
55
+ Validate that container disk size doesn't exceed limits for CPU instances.
56
+
57
+ Raises:
58
+ ValueError: If container disk size exceeds the limit for any CPU instance
59
+ """
60
+ if (
61
+ not self._is_cpu_endpoint()
62
+ or not hasattr(self, "template")
63
+ or not self.template
64
+ or not self.template.containerDiskInGb
65
+ ):
66
+ return
67
+
68
+ max_allowed_disk_size = self._get_cpu_container_disk_size()
69
+ if max_allowed_disk_size is None:
70
+ return
71
+
72
+ if self.template.containerDiskInGb > max_allowed_disk_size:
73
+ instance_limits = []
74
+ for instance_type in self.instanceIds:
75
+ limit = CPU_INSTANCE_DISK_LIMITS[instance_type]
76
+ instance_limits.append(f"{instance_type.value}: max {limit}GB")
77
+
78
+ raise ValueError(
79
+ f"Container disk size {self.template.containerDiskInGb}GB exceeds the maximum "
80
+ f"allowed for CPU instances. Instance limits: {', '.join(instance_limits)}. "
81
+ f"Maximum allowed: {max_allowed_disk_size}GB"
82
+ )
83
+
84
+ def _sync_cpu_fields(self):
85
+ """Sync CPU-specific fields, overriding GPU defaults."""
86
+ # Override GPU-specific fields for CPU
87
+ if hasattr(self, "gpuCount"):
88
+ self.gpuCount = 0
89
+ if hasattr(self, "allowedCudaVersions"):
90
+ self.allowedCudaVersions = ""
91
+ if hasattr(self, "gpuIds"):
92
+ self.gpuIds = ""
93
+
94
+ @field_serializer("instanceIds")
95
+ def serialize_instance_ids(
96
+ self, value: Optional[List[CpuInstanceType]]
97
+ ) -> Optional[List[str]]:
98
+ """Convert CpuInstanceType enums to strings."""
99
+ if value is None:
100
+ return None
101
+ return [item.value if hasattr(item, "value") else str(item) for item in value]
102
+
103
+
104
+ class CpuServerlessEndpoint(CpuEndpointMixin, ServerlessEndpoint):
105
+ """
106
+ CPU-only serverless endpoint with automatic disk sizing and validation.
107
+ Represents a CPU-only serverless endpoint distinct from a live serverless.
108
+ """
109
+
110
+ # CPU endpoints don't use GPU-specific fields, so exclude them from API payload
111
+ # This prevents the RunPod GraphQL API from rejecting CPU endpoints with GPU-specific fields
112
+ # Note: instanceIds is NOT in _input_only, so it will be sent to the API
113
+ _input_only = {
114
+ "id",
115
+ "cudaVersions", # GPU-specific, exclude from API payload
116
+ "datacenter",
117
+ "env",
118
+ "gpus", # Inherited from parent, but always None for CPU endpoints
119
+ "gpuIds", # GPU-specific API field, exclude from payload
120
+ "gpuCount", # GPU-specific API field, exclude from payload
121
+ "allowedCudaVersions", # GPU-specific API field, exclude from payload
122
+ "flashboot",
123
+ "flashEnvironmentId",
124
+ "imageName",
125
+ "networkVolume",
126
+ }
127
+
128
+ # Override GPU field from parent to None for CPU endpoints
129
+ gpus: Optional[List] = None
130
+ instanceIds: Optional[List[CpuInstanceType]] = [CpuInstanceType.CPU3G_2_8]
131
+
132
+ @property
133
+ def config_hash(self) -> str:
134
+ """Get hash of current configuration excluding GPU-specific fields.
135
+
136
+ CPU endpoints need GPU fields in _input_only to exclude them from API payload,
137
+ but these fields should not be included in config_hash to avoid false drift
138
+ detection. This override computes the hash using only CPU-relevant fields.
139
+ """
140
+ # CPU-relevant fields for config hash, excluding 'env' to prevent false drift
141
+ # (env is dynamically computed from .env file at initialization time)
142
+ cpu_fields = {
143
+ "datacenter",
144
+ "flashboot",
145
+ "flashEnvironmentId",
146
+ "imageName",
147
+ "gpus",
148
+ "networkVolume",
149
+ }
150
+ config_dict = self.model_dump(
151
+ exclude_none=True, include=cpu_fields, mode="json"
152
+ )
153
+ config_str = json.dumps(config_dict, sort_keys=True)
154
+ hash_obj = hashlib.md5(f"{self.__class__.__name__}:{config_str}".encode())
155
+ return hash_obj.hexdigest()
156
+
157
+ def _create_new_template(self) -> PodTemplate:
158
+ """Create a new PodTemplate with CPU-appropriate disk sizing."""
159
+ template = PodTemplate(
160
+ name=self.resource_id,
161
+ imageName=self.imageName,
162
+ env=KeyValuePair.from_dict(self.env or get_env_vars()),
163
+ )
164
+ # Apply CPU-specific disk sizing
165
+ self._apply_cpu_disk_sizing(template)
166
+ return template
167
+
168
+ def _configure_existing_template(self) -> None:
169
+ """Configure an existing template with necessary overrides and CPU sizing."""
170
+ if self.template is None:
171
+ return
172
+
173
+ self.template.name = f"{self.resource_id}__{self.template.resource_id}"
174
+
175
+ if self.imageName:
176
+ self.template.imageName = self.imageName
177
+ if self.env:
178
+ self.template.env = KeyValuePair.from_dict(self.env)
179
+
180
+ # Apply CPU-specific disk sizing
181
+ self._apply_cpu_disk_sizing(self.template)
182
+
183
+ @model_validator(mode="after")
184
+ def set_serverless_template(self):
185
+ # Sync CPU-specific fields first
186
+ self._sync_cpu_fields()
187
+
188
+ if not any([self.imageName, self.template, self.templateId]):
189
+ raise ValueError(
190
+ "Either imageName, template, or templateId must be provided"
191
+ )
192
+
193
+ if not self.templateId and not self.template:
194
+ self.template = self._create_new_template()
195
+ elif self.template:
196
+ self._configure_existing_template()
197
+
198
+ # Validate container disk size for CPU instances
199
+ self.validate_cpu_container_disk_size()
200
+
201
+ return self
@@ -1,4 +1,3 @@
1
- import requests
2
1
  from typing import Dict, List, Optional, Any
3
2
  from pydantic import BaseModel, model_validator
4
3
  from .base import BaseResource
@@ -22,7 +21,7 @@ class KeyValuePair(BaseModel):
22
21
  class PodTemplate(BaseResource):
23
22
  advancedStart: Optional[bool] = False
24
23
  config: Optional[Dict[str, Any]] = {}
25
- containerDiskInGb: Optional[int] = 10
24
+ containerDiskInGb: Optional[int] = 64
26
25
  containerRegistryAuthId: Optional[str] = ""
27
26
  dockerArgs: Optional[str] = ""
28
27
  env: Optional[List[KeyValuePair]] = []
@@ -35,60 +34,3 @@ class PodTemplate(BaseResource):
35
34
  def sync_input_fields(self):
36
35
  self.name = f"{self.name}__{self.resource_id}"
37
36
  return self
38
-
39
-
40
- def update_system_dependencies(
41
- template_id, token, system_dependencies, base_entry_cmd=None
42
- ):
43
- """
44
- Updates Runpod template with system dependencies installed via apt-get,
45
- and appends the app start command.
46
-
47
- Args:
48
- template_id (str): Runpod template ID.
49
- token (str): Runpod API token.
50
- system_dependencies (List[str]): List of apt packages to install.
51
- base_entry_cmd (List[str]): The default command to run the app, e.g. ["uv", "run", "handler.py"]
52
- Returns:
53
- dict: API response JSON or error info.
54
- """
55
-
56
- # Compose apt-get install command if any packages specified
57
- apt_cmd = ""
58
- if system_dependencies:
59
- joined_pkgs = " ".join(system_dependencies)
60
- apt_cmd = f"apt-get update && apt-get install -y {joined_pkgs} && "
61
-
62
- # Default start command if not provided
63
- app_cmd = base_entry_cmd or ["uv", "run", "handler.py"]
64
- app_cmd_str = " ".join(app_cmd)
65
-
66
- # Full command to run in entrypoint shell
67
- full_cmd = f"{apt_cmd}exec {app_cmd_str}"
68
-
69
- payload = {
70
- # other required fields like disk, env, image, etc, should be fetched or passed in real usage
71
- "dockerEntrypoint": ["/bin/bash", "-c", full_cmd],
72
- "dockerStartCmd": [],
73
- # placeholder values, replace as needed or fetch from current template state
74
- "containerDiskInGb": 50,
75
- "containerRegistryAuthId": "",
76
- "env": {},
77
- "imageName": "your-image-name",
78
- "isPublic": False,
79
- "name": "your-template-name",
80
- "ports": ["8888/http", "22/tcp"],
81
- "readme": "",
82
- "volumeInGb": 20,
83
- "volumeMountPath": "/workspace",
84
- }
85
-
86
- headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
87
-
88
- url = f"https://rest.runpod.io/v1/templates/{template_id}/update"
89
- response = requests.post(url, json=payload, headers=headers)
90
-
91
- try:
92
- return response.json()
93
- except Exception:
94
- return {"error": "Invalid JSON response", "text": response.text}
@@ -0,0 +1,10 @@
1
+ """
2
+ Constants for utility modules and caching configurations.
3
+
4
+ This module contains configurable constants used across the tetra-rp codebase
5
+ to ensure consistency and easy maintenance.
6
+ """
7
+
8
+ # Cache key generation constants
9
+ HASH_TRUNCATE_LENGTH = 16 # Length to truncate hash values for cache keys
10
+ UUID_FALLBACK_LENGTH = 8 # Length to truncate UUID values for fallback keys
@@ -0,0 +1,260 @@
1
+ """
2
+ Cross-platform file locking utilities.
3
+
4
+ Provides unified file locking interface that works across Windows, macOS, and Linux.
5
+ Uses platform-appropriate locking mechanisms:
6
+ - Windows: msvcrt.locking()
7
+ - Unix/Linux/macOS: fcntl.flock()
8
+ - Fallback: Basic file existence checking (limited protection)
9
+ """
10
+
11
+ import contextlib
12
+ import logging
13
+ import platform
14
+ import time
15
+ from pathlib import Path
16
+ from typing import BinaryIO, Optional
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+ # Platform detection
21
+ _IS_WINDOWS = platform.system() == "Windows"
22
+ _IS_UNIX = platform.system() in ("Linux", "Darwin")
23
+
24
+ # Initialize availability flags
25
+ _WINDOWS_LOCKING_AVAILABLE = False
26
+ _UNIX_LOCKING_AVAILABLE = False
27
+
28
+ # Import platform-specific modules
29
+ if _IS_WINDOWS:
30
+ try:
31
+ import msvcrt
32
+
33
+ _WINDOWS_LOCKING_AVAILABLE = True
34
+ except ImportError:
35
+ msvcrt = None
36
+ log.warning("msvcrt not available on Windows platform")
37
+
38
+ if _IS_UNIX:
39
+ try:
40
+ import fcntl
41
+
42
+ _UNIX_LOCKING_AVAILABLE = True
43
+ except ImportError:
44
+ fcntl = None
45
+ log.warning("fcntl not available on Unix platform")
46
+
47
+
48
+ class FileLockError(Exception):
49
+ """Exception raised when file locking operations fail."""
50
+
51
+ pass
52
+
53
+
54
+ class FileLockTimeout(FileLockError):
55
+ """Exception raised when file locking times out."""
56
+
57
+ pass
58
+
59
+
60
+ @contextlib.contextmanager
61
+ def file_lock(
62
+ file_handle: BinaryIO,
63
+ exclusive: bool = True,
64
+ timeout: Optional[float] = 10.0,
65
+ retry_interval: float = 0.1,
66
+ ):
67
+ """
68
+ Cross-platform file locking context manager.
69
+
70
+ Args:
71
+ file_handle: Open file handle to lock
72
+ exclusive: True for exclusive lock, False for shared lock
73
+ timeout: Maximum seconds to wait for lock (None = no timeout)
74
+ retry_interval: Seconds to wait between lock attempts
75
+
76
+ Raises:
77
+ FileLockTimeout: If lock cannot be acquired within timeout
78
+ FileLockError: If locking operation fails
79
+
80
+ Usage:
81
+ with open("file.dat", "rb") as f:
82
+ with file_lock(f, exclusive=False): # Shared read lock
83
+ data = f.read()
84
+
85
+ with open("file.dat", "wb") as f:
86
+ with file_lock(f, exclusive=True): # Exclusive write lock
87
+ f.write(data)
88
+ """
89
+ lock_acquired = False
90
+ start_time = time.time()
91
+
92
+ try:
93
+ # Platform-specific locking
94
+ while not lock_acquired:
95
+ try:
96
+ if _IS_WINDOWS and _WINDOWS_LOCKING_AVAILABLE:
97
+ _acquire_windows_lock(file_handle, exclusive)
98
+ elif _IS_UNIX and _UNIX_LOCKING_AVAILABLE:
99
+ _acquire_unix_lock(file_handle, exclusive)
100
+ else:
101
+ # Fallback - limited protection via file existence
102
+ _acquire_fallback_lock(file_handle, exclusive, timeout)
103
+
104
+ lock_acquired = True
105
+ log.debug(f"File lock acquired (exclusive={exclusive})")
106
+
107
+ except (OSError, IOError, FileLockError) as e:
108
+ # Check timeout
109
+ if timeout is not None and (time.time() - start_time) >= timeout:
110
+ raise FileLockTimeout(
111
+ f"Could not acquire file lock within {timeout} seconds: {e}"
112
+ ) from e
113
+
114
+ # Retry after interval
115
+ time.sleep(retry_interval)
116
+
117
+ # Lock acquired successfully
118
+ yield
119
+
120
+ finally:
121
+ # Release lock
122
+ if lock_acquired:
123
+ try:
124
+ if _IS_WINDOWS and _WINDOWS_LOCKING_AVAILABLE:
125
+ _release_windows_lock(file_handle)
126
+ elif _IS_UNIX and _UNIX_LOCKING_AVAILABLE:
127
+ _release_unix_lock(file_handle)
128
+ else:
129
+ _release_fallback_lock(file_handle)
130
+
131
+ log.debug("File lock released")
132
+
133
+ except Exception as e:
134
+ log.error(f"Error releasing file lock: {e}")
135
+ # Don't raise - we're in cleanup
136
+
137
+
138
+ def _acquire_windows_lock(file_handle: BinaryIO, exclusive: bool) -> None:
139
+ """Acquire Windows file lock using msvcrt.locking()."""
140
+ if not _WINDOWS_LOCKING_AVAILABLE:
141
+ raise FileLockError("Windows file locking not available (msvcrt missing)")
142
+
143
+ # Windows locking modes
144
+ if exclusive:
145
+ lock_mode = msvcrt.LK_NBLCK # Non-blocking exclusive lock
146
+ else:
147
+ # Windows doesn't have shared locks in msvcrt
148
+ # Fall back to exclusive for compatibility
149
+ lock_mode = msvcrt.LK_NBLCK
150
+ log.debug("Windows: Using exclusive lock instead of shared (msvcrt limitation)")
151
+
152
+ try:
153
+ # Lock the entire file (position 0, length 1)
154
+ file_handle.seek(0)
155
+ msvcrt.locking(file_handle.fileno(), lock_mode, 1)
156
+ except OSError as e:
157
+ raise FileLockError(f"Failed to acquire Windows file lock: {e}") from e
158
+
159
+
160
+ def _release_windows_lock(file_handle: BinaryIO) -> None:
161
+ """Release Windows file lock."""
162
+ if not _WINDOWS_LOCKING_AVAILABLE:
163
+ return
164
+
165
+ try:
166
+ file_handle.seek(0)
167
+ msvcrt.locking(file_handle.fileno(), msvcrt.LK_UNLCK, 1)
168
+ except OSError as e:
169
+ raise FileLockError(f"Failed to release Windows file lock: {e}") from e
170
+
171
+
172
+ def _acquire_unix_lock(file_handle: BinaryIO, exclusive: bool) -> None:
173
+ """Acquire Unix file lock using fcntl.flock()."""
174
+ if not _UNIX_LOCKING_AVAILABLE:
175
+ raise FileLockError("Unix file locking not available (fcntl missing)")
176
+
177
+ # Unix locking modes
178
+ if exclusive:
179
+ lock_mode = fcntl.LOCK_EX | fcntl.LOCK_NB # Non-blocking exclusive
180
+ else:
181
+ lock_mode = fcntl.LOCK_SH | fcntl.LOCK_NB # Non-blocking shared
182
+
183
+ try:
184
+ fcntl.flock(file_handle.fileno(), lock_mode)
185
+ except (OSError, IOError) as e:
186
+ raise FileLockError(f"Failed to acquire Unix file lock: {e}") from e
187
+
188
+
189
+ def _release_unix_lock(file_handle: BinaryIO) -> None:
190
+ """Release Unix file lock."""
191
+ if not _UNIX_LOCKING_AVAILABLE:
192
+ return
193
+
194
+ try:
195
+ fcntl.flock(file_handle.fileno(), fcntl.LOCK_UN)
196
+ except (OSError, IOError) as e:
197
+ raise FileLockError(f"Failed to release Unix file lock: {e}") from e
198
+
199
+
200
+ def _acquire_fallback_lock(
201
+ file_handle: BinaryIO, exclusive: bool, timeout: Optional[float]
202
+ ) -> None:
203
+ """
204
+ Fallback locking using lock files.
205
+
206
+ This provides minimal protection but doesn't prevent all race conditions.
207
+ It's better than no locking but not as robust as OS-level file locks.
208
+ """
209
+ log.warning(
210
+ "Using fallback file locking - limited protection against race conditions"
211
+ )
212
+
213
+ # Create lock file based on the original file
214
+ file_path = (
215
+ Path(file_handle.name) if hasattr(file_handle, "name") else Path("unknown")
216
+ )
217
+ lock_file = file_path.with_suffix(file_path.suffix + ".lock")
218
+
219
+ start_time = time.time()
220
+
221
+ while True:
222
+ try:
223
+ # Try to create lock file atomically
224
+ lock_file.touch(mode=0o600, exist_ok=False)
225
+ log.debug(f"Fallback lock file created: {lock_file}")
226
+ return
227
+
228
+ except FileExistsError:
229
+ # Lock file exists, check timeout
230
+ if timeout is not None and (time.time() - start_time) >= timeout:
231
+ raise FileLockError(f"Fallback lock timeout: {lock_file} exists")
232
+
233
+ # Wait and retry
234
+ time.sleep(0.1)
235
+
236
+
237
+ def _release_fallback_lock(file_handle: BinaryIO) -> None:
238
+ """Release fallback lock by removing lock file."""
239
+ try:
240
+ file_path = (
241
+ Path(file_handle.name) if hasattr(file_handle, "name") else Path("unknown")
242
+ )
243
+ lock_file = file_path.with_suffix(file_path.suffix + ".lock")
244
+
245
+ if lock_file.exists():
246
+ lock_file.unlink()
247
+ log.debug(f"Fallback lock file removed: {lock_file}")
248
+
249
+ except Exception as e:
250
+ log.error(f"Failed to remove fallback lock file: {e}")
251
+
252
+
253
+ def get_platform_info() -> dict:
254
+ """Get information about current platform and available locking mechanisms."""
255
+ return {
256
+ "platform": platform.system(),
257
+ "windows_locking": _IS_WINDOWS and _WINDOWS_LOCKING_AVAILABLE,
258
+ "unix_locking": _IS_UNIX and _UNIX_LOCKING_AVAILABLE,
259
+ "fallback_only": not (_WINDOWS_LOCKING_AVAILABLE or _UNIX_LOCKING_AVAILABLE),
260
+ }
@@ -0,0 +1,67 @@
1
+ """HTTP utilities for RunPod API communication."""
2
+
3
+ import os
4
+ from typing import Optional
5
+
6
+ import httpx
7
+ import requests
8
+
9
+
10
+ def get_authenticated_httpx_client(
11
+ timeout: Optional[float] = None,
12
+ ) -> httpx.AsyncClient:
13
+ """Create httpx AsyncClient with RunPod authentication.
14
+
15
+ Automatically includes Authorization header if RUNPOD_API_KEY is set.
16
+ This provides a centralized place to manage authentication headers for
17
+ all RunPod HTTP requests, avoiding repetitive manual header addition.
18
+
19
+ Args:
20
+ timeout: Request timeout in seconds. Defaults to 30.0.
21
+
22
+ Returns:
23
+ Configured httpx.AsyncClient with Authorization header
24
+
25
+ Example:
26
+ async with get_authenticated_httpx_client() as client:
27
+ response = await client.post(url, json=data)
28
+
29
+ # With custom timeout
30
+ async with get_authenticated_httpx_client(timeout=60.0) as client:
31
+ response = await client.get(url)
32
+ """
33
+ headers = {}
34
+ api_key = os.environ.get("RUNPOD_API_KEY")
35
+ if api_key:
36
+ headers["Authorization"] = f"Bearer {api_key}"
37
+
38
+ timeout_config = timeout if timeout is not None else 30.0
39
+ return httpx.AsyncClient(timeout=timeout_config, headers=headers)
40
+
41
+
42
+ def get_authenticated_requests_session() -> requests.Session:
43
+ """Create requests Session with RunPod authentication.
44
+
45
+ Automatically includes Authorization header if RUNPOD_API_KEY is set.
46
+ Provides a centralized place to manage authentication headers for
47
+ synchronous RunPod HTTP requests.
48
+
49
+ Returns:
50
+ Configured requests.Session with Authorization header
51
+
52
+ Example:
53
+ session = get_authenticated_requests_session()
54
+ response = session.post(url, json=data, timeout=30.0)
55
+ # Remember to close: session.close()
56
+
57
+ # Or use as context manager
58
+ import contextlib
59
+ with contextlib.closing(get_authenticated_requests_session()) as session:
60
+ response = session.post(url, json=data)
61
+ """
62
+ session = requests.Session()
63
+ api_key = os.environ.get("RUNPOD_API_KEY")
64
+ if api_key:
65
+ session.headers["Authorization"] = f"Bearer {api_key}"
66
+
67
+ return session
@@ -0,0 +1,75 @@
1
+ """
2
+ LRU Cache implementation using OrderedDict for memory-efficient caching with automatic eviction.
3
+
4
+ This module provides a Least Recently Used (LRU) cache implementation that automatically
5
+ manages memory by evicting the least recently used items when the cache exceeds its
6
+ maximum size limit. It maintains O(1) access time and provides a dict-like interface.
7
+ Thread-safe for concurrent access.
8
+ """
9
+
10
+ import threading
11
+ from collections import OrderedDict
12
+ from typing import Any, Dict, Optional
13
+
14
+
15
+ class LRUCache:
16
+ """
17
+ A Least Recently Used (LRU) cache implementation using OrderedDict.
18
+
19
+ Automatically evicts the least recently used items when the cache exceeds
20
+ the maximum size limit. Provides dict-like interface with O(1) operations.
21
+ Thread-safe for concurrent access using RLock.
22
+
23
+ Args:
24
+ max_size: Maximum number of items to store in cache (default: 1000)
25
+ """
26
+
27
+ def __init__(self, max_size: int = 1000):
28
+ self.max_size = max_size
29
+ self.cache = OrderedDict()
30
+ self._lock = threading.RLock()
31
+
32
+ def get(self, key: str) -> Optional[Dict[str, Any]]:
33
+ """Get item from cache, moving it to end (most recent) if found."""
34
+ with self._lock:
35
+ if key in self.cache:
36
+ self.cache.move_to_end(key)
37
+ return self.cache[key]
38
+ return None
39
+
40
+ def set(self, key: str, value: Dict[str, Any]) -> None:
41
+ """Set item in cache, evicting oldest if at capacity."""
42
+ with self._lock:
43
+ if key in self.cache:
44
+ self.cache.move_to_end(key)
45
+ else:
46
+ if len(self.cache) >= self.max_size:
47
+ self.cache.popitem(last=False) # Remove oldest
48
+ self.cache[key] = value
49
+
50
+ def clear(self) -> None:
51
+ """Clear all items from cache."""
52
+ with self._lock:
53
+ self.cache.clear()
54
+
55
+ def __contains__(self, key: str) -> bool:
56
+ """Check if key exists in cache."""
57
+ with self._lock:
58
+ return key in self.cache
59
+
60
+ def __len__(self) -> int:
61
+ """Return number of items in cache."""
62
+ with self._lock:
63
+ return len(self.cache)
64
+
65
+ def __getitem__(self, key: str) -> Dict[str, Any]:
66
+ """Get item using bracket notation, moving to end if found."""
67
+ with self._lock:
68
+ if key in self.cache:
69
+ self.cache.move_to_end(key)
70
+ return self.cache[key]
71
+ raise KeyError(key)
72
+
73
+ def __setitem__(self, key: str, value: Dict[str, Any]) -> None:
74
+ """Set item using bracket notation."""
75
+ self.set(key, value)