tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tetra_rp/__init__.py +109 -19
- tetra_rp/cli/commands/__init__.py +1 -0
- tetra_rp/cli/commands/apps.py +143 -0
- tetra_rp/cli/commands/build.py +1082 -0
- tetra_rp/cli/commands/build_utils/__init__.py +1 -0
- tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
- tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
- tetra_rp/cli/commands/build_utils/manifest.py +430 -0
- tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
- tetra_rp/cli/commands/build_utils/scanner.py +596 -0
- tetra_rp/cli/commands/deploy.py +580 -0
- tetra_rp/cli/commands/init.py +123 -0
- tetra_rp/cli/commands/resource.py +108 -0
- tetra_rp/cli/commands/run.py +296 -0
- tetra_rp/cli/commands/test_mothership.py +458 -0
- tetra_rp/cli/commands/undeploy.py +533 -0
- tetra_rp/cli/main.py +97 -0
- tetra_rp/cli/utils/__init__.py +1 -0
- tetra_rp/cli/utils/app.py +15 -0
- tetra_rp/cli/utils/conda.py +127 -0
- tetra_rp/cli/utils/deployment.py +530 -0
- tetra_rp/cli/utils/ignore.py +143 -0
- tetra_rp/cli/utils/skeleton.py +184 -0
- tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
- tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
- tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
- tetra_rp/cli/utils/skeleton_template/README.md +263 -0
- tetra_rp/cli/utils/skeleton_template/main.py +44 -0
- tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
- tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
- tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
- tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
- tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
- tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
- tetra_rp/client.py +136 -33
- tetra_rp/config.py +29 -0
- tetra_rp/core/api/runpod.py +591 -39
- tetra_rp/core/deployment.py +232 -0
- tetra_rp/core/discovery.py +425 -0
- tetra_rp/core/exceptions.py +50 -0
- tetra_rp/core/resources/__init__.py +27 -9
- tetra_rp/core/resources/app.py +738 -0
- tetra_rp/core/resources/base.py +139 -4
- tetra_rp/core/resources/constants.py +21 -0
- tetra_rp/core/resources/cpu.py +115 -13
- tetra_rp/core/resources/gpu.py +182 -16
- tetra_rp/core/resources/live_serverless.py +153 -16
- tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
- tetra_rp/core/resources/network_volume.py +126 -31
- tetra_rp/core/resources/resource_manager.py +436 -35
- tetra_rp/core/resources/serverless.py +537 -120
- tetra_rp/core/resources/serverless_cpu.py +201 -0
- tetra_rp/core/resources/template.py +1 -59
- tetra_rp/core/utils/constants.py +10 -0
- tetra_rp/core/utils/file_lock.py +260 -0
- tetra_rp/core/utils/http.py +67 -0
- tetra_rp/core/utils/lru_cache.py +75 -0
- tetra_rp/core/utils/singleton.py +36 -1
- tetra_rp/core/validation.py +44 -0
- tetra_rp/execute_class.py +301 -0
- tetra_rp/protos/remote_execution.py +98 -9
- tetra_rp/runtime/__init__.py +1 -0
- tetra_rp/runtime/circuit_breaker.py +274 -0
- tetra_rp/runtime/config.py +12 -0
- tetra_rp/runtime/exceptions.py +49 -0
- tetra_rp/runtime/generic_handler.py +206 -0
- tetra_rp/runtime/lb_handler.py +189 -0
- tetra_rp/runtime/load_balancer.py +160 -0
- tetra_rp/runtime/manifest_fetcher.py +192 -0
- tetra_rp/runtime/metrics.py +325 -0
- tetra_rp/runtime/models.py +73 -0
- tetra_rp/runtime/mothership_provisioner.py +512 -0
- tetra_rp/runtime/production_wrapper.py +266 -0
- tetra_rp/runtime/reliability_config.py +149 -0
- tetra_rp/runtime/retry_manager.py +118 -0
- tetra_rp/runtime/serialization.py +124 -0
- tetra_rp/runtime/service_registry.py +346 -0
- tetra_rp/runtime/state_manager_client.py +248 -0
- tetra_rp/stubs/live_serverless.py +35 -17
- tetra_rp/stubs/load_balancer_sls.py +357 -0
- tetra_rp/stubs/registry.py +145 -19
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
- tetra_rp-0.24.0.dist-info/RECORD +99 -0
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
- tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
- tetra_rp/core/pool/cluster_manager.py +0 -177
- tetra_rp/core/pool/dataclass.py +0 -18
- tetra_rp/core/pool/ex.py +0 -38
- tetra_rp/core/pool/job.py +0 -22
- tetra_rp/core/pool/worker.py +0 -19
- tetra_rp/core/resources/utils.py +0 -50
- tetra_rp/core/utils/json.py +0 -33
- tetra_rp-0.6.0.dist-info/RECORD +0 -39
- /tetra_rp/{core/pool → cli}/__init__.py +0 -0
- {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import requests
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from typing import Dict, Optional, Union, Tuple, TYPE_CHECKING, Any, List
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
from ..api.runpod import RunpodGraphQLClient
|
|
9
|
+
from ..resources.resource_manager import ResourceManager
|
|
10
|
+
from ..resources.serverless import ServerlessEndpoint, NetworkVolume
|
|
11
|
+
from ..resources.constants import (
|
|
12
|
+
TARBALL_CONTENT_TYPE,
|
|
13
|
+
MAX_TARBALL_SIZE_MB,
|
|
14
|
+
VALID_TARBALL_EXTENSIONS,
|
|
15
|
+
GZIP_MAGIC_BYTES,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from . import ServerlessResource
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class FlashAppError(Exception):
|
|
25
|
+
"""Base exception for Flash app operations."""
|
|
26
|
+
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class FlashAppNotFoundError(FlashAppError):
|
|
31
|
+
"""Raised when a Flash app cannot be found."""
|
|
32
|
+
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FlashEnvironmentNotFoundError(FlashAppError):
|
|
37
|
+
"""Raised when a Flash environment cannot be found."""
|
|
38
|
+
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FlashBuildNotFoundError(FlashAppError):
|
|
43
|
+
"""Raised when a Flash build cannot be found."""
|
|
44
|
+
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _validate_exclusive_params(
|
|
49
|
+
param1: Any, param2: Any, name1: str, name2: str
|
|
50
|
+
) -> None:
|
|
51
|
+
"""Validate that exactly one of two parameters is provided (XOR).
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
param1: First parameter value
|
|
55
|
+
param2: Second parameter value
|
|
56
|
+
name1: Name of first parameter (for error message)
|
|
57
|
+
name2: Name of second parameter (for error message)
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
ValueError: If both or neither parameters are provided
|
|
61
|
+
"""
|
|
62
|
+
if (not param1 and not param2) or (param1 and param2):
|
|
63
|
+
raise ValueError(f"Provide exactly one of {name1} or {name2}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _validate_tarball_file(tar_path: Path) -> None:
|
|
67
|
+
"""Validate tarball file before upload.
|
|
68
|
+
|
|
69
|
+
Validates:
|
|
70
|
+
- File exists
|
|
71
|
+
- File extension is valid (.tar.gz or .tgz)
|
|
72
|
+
- File is a gzip file (magic bytes check)
|
|
73
|
+
- File size is within limits
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
tar_path: Path to the tarball file
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
FileNotFoundError: If file does not exist
|
|
80
|
+
ValueError: If file is invalid (extension, magic bytes, or size)
|
|
81
|
+
"""
|
|
82
|
+
# Check file exists
|
|
83
|
+
if not tar_path.exists():
|
|
84
|
+
raise FileNotFoundError(f"Tarball file not found: {tar_path}")
|
|
85
|
+
|
|
86
|
+
# Check if it's a file, not directory
|
|
87
|
+
if not tar_path.is_file():
|
|
88
|
+
raise ValueError(f"Path is not a file: {tar_path}")
|
|
89
|
+
|
|
90
|
+
# Check extension (check filename only, not full path)
|
|
91
|
+
if not any(tar_path.name.endswith(ext) for ext in VALID_TARBALL_EXTENSIONS):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Invalid file extension. Expected one of {VALID_TARBALL_EXTENSIONS}, "
|
|
94
|
+
f"got: {tar_path.suffix}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Check magic bytes (first 2 bytes should be gzip signature)
|
|
98
|
+
with tar_path.open("rb") as f:
|
|
99
|
+
magic = f.read(2)
|
|
100
|
+
if len(magic) < 2 or (magic[0], magic[1]) != GZIP_MAGIC_BYTES:
|
|
101
|
+
raise ValueError(
|
|
102
|
+
f"File is not a valid gzip file. Expected magic bytes "
|
|
103
|
+
f"{GZIP_MAGIC_BYTES}, got: {tuple(magic) if magic else 'empty file'}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Check file size
|
|
107
|
+
size_bytes = tar_path.stat().st_size
|
|
108
|
+
size_mb = size_bytes / (1024 * 1024)
|
|
109
|
+
if size_mb > MAX_TARBALL_SIZE_MB:
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Tarball exceeds maximum size. "
|
|
112
|
+
f"File size: {size_mb:.2f}MB, Max: {MAX_TARBALL_SIZE_MB}MB"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class FlashApp:
|
|
117
|
+
"""Flash app resource for managing applications, environments, and builds.
|
|
118
|
+
|
|
119
|
+
FlashApp provides the interface for Flash application lifecycle management including:
|
|
120
|
+
- Creating and managing flash apps
|
|
121
|
+
- Managing environments within apps
|
|
122
|
+
- Uploading and deploying builds
|
|
123
|
+
- Registering endpoints and network volumes to environments
|
|
124
|
+
|
|
125
|
+
Lifecycle Management:
|
|
126
|
+
- Constructor (__init__): Creates instance without I/O by default
|
|
127
|
+
- Factory methods (from_name, create, get_or_create): Recommended for async contexts
|
|
128
|
+
- Hydration: Lazy-loads app ID from server via _hydrate()
|
|
129
|
+
- All API methods call _hydrate() automatically before execution
|
|
130
|
+
|
|
131
|
+
Thread Safety:
|
|
132
|
+
- Hydration is protected by asyncio.Lock to prevent concurrent API calls
|
|
133
|
+
- Safe to call _hydrate() multiple times from different coroutines
|
|
134
|
+
- All async methods are safe for concurrent use after hydration
|
|
135
|
+
|
|
136
|
+
Usage Patterns:
|
|
137
|
+
# Factory method (recommended in async context)
|
|
138
|
+
app = await FlashApp.from_name("my-app")
|
|
139
|
+
|
|
140
|
+
# Constructor with eager hydration (blocks, creates event loop)
|
|
141
|
+
app = FlashApp("my-app", eager_hydrate=True)
|
|
142
|
+
|
|
143
|
+
# Constructor without hydration (deferred until first API call)
|
|
144
|
+
app = FlashApp("my-app")
|
|
145
|
+
await app._hydrate() # Explicit hydration
|
|
146
|
+
|
|
147
|
+
GraphQL Query Philosophy:
|
|
148
|
+
- List operations fetch only top-level attributes
|
|
149
|
+
- Child resources queried separately by ID or name
|
|
150
|
+
- Direct queries fetch one level deeper (app + envs/builds, not env resources)
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, name: str, id: Optional[str] = "", eager_hydrate: bool = False):
|
|
154
|
+
self.name: str = name
|
|
155
|
+
self.id: Optional[str] = id
|
|
156
|
+
self.resources: Dict[str, "ServerlessResource"] = {}
|
|
157
|
+
self._hydrated = False
|
|
158
|
+
self._hydrate_lock = asyncio.Lock()
|
|
159
|
+
if eager_hydrate:
|
|
160
|
+
asyncio.run(self._hydrate())
|
|
161
|
+
|
|
162
|
+
def remote(self, *args, **kwargs):
|
|
163
|
+
from tetra_rp.client import remote as remote_decorator
|
|
164
|
+
|
|
165
|
+
resource_config = kwargs.get("resource_config")
|
|
166
|
+
|
|
167
|
+
if resource_config is None and args:
|
|
168
|
+
candidate = args[0]
|
|
169
|
+
if hasattr(candidate, "resource_id"):
|
|
170
|
+
self.resources[candidate.resource_id] = candidate
|
|
171
|
+
|
|
172
|
+
return remote_decorator(*args, **kwargs)
|
|
173
|
+
|
|
174
|
+
async def _hydrate(self) -> None:
|
|
175
|
+
"""Ensure app is loaded from the server or created if it doesn't exist.
|
|
176
|
+
|
|
177
|
+
This method handles the lazy-loading logic for FlashApp instances.
|
|
178
|
+
If the app already exists on the server, it retrieves its ID.
|
|
179
|
+
If it doesn't exist, it creates a new app with the given name.
|
|
180
|
+
|
|
181
|
+
Thread-safe: Uses asyncio.Lock to prevent concurrent hydration attempts.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
None (modifies self.id and self._hydrated in-place)
|
|
185
|
+
"""
|
|
186
|
+
async with self._hydrate_lock:
|
|
187
|
+
if self._hydrated:
|
|
188
|
+
log.debug("App is already hydrated while calling hydrate. Returning")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
log.debug("Hydrating app")
|
|
192
|
+
async with RunpodGraphQLClient() as client:
|
|
193
|
+
try:
|
|
194
|
+
result = await client.get_flash_app_by_name(self.name)
|
|
195
|
+
found_id = result["id"]
|
|
196
|
+
|
|
197
|
+
# if an id is attached to instance check if it makes sense
|
|
198
|
+
if self.id:
|
|
199
|
+
if self.id != found_id:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
"provided id for app class does not match existing app resource."
|
|
202
|
+
)
|
|
203
|
+
self._hydrated = True
|
|
204
|
+
return
|
|
205
|
+
self.id = found_id
|
|
206
|
+
self._hydrated = True
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
except Exception as exc:
|
|
210
|
+
if "app not found" not in str(exc).lower():
|
|
211
|
+
raise
|
|
212
|
+
result = await client.create_flash_app({"name": self.name})
|
|
213
|
+
self.id = result["id"]
|
|
214
|
+
|
|
215
|
+
self._hydrated = True
|
|
216
|
+
return
|
|
217
|
+
|
|
218
|
+
async def _get_id_by_name(self) -> str:
|
|
219
|
+
"""Get the app ID from the server by name.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
The app ID string
|
|
223
|
+
|
|
224
|
+
Raises:
|
|
225
|
+
FlashAppNotFoundError: If the app is not found on the server
|
|
226
|
+
"""
|
|
227
|
+
async with RunpodGraphQLClient() as client:
|
|
228
|
+
result = await client.get_flash_app_by_name(self.name)
|
|
229
|
+
if not result.get("id"):
|
|
230
|
+
raise FlashAppNotFoundError(f"Flash app '{self.name}' not found")
|
|
231
|
+
return result["id"]
|
|
232
|
+
|
|
233
|
+
async def create_environment(self, environment_name: str) -> Dict[str, Any]:
|
|
234
|
+
"""Create an environment within an app.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
environment_name: Name for the new environment
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Dictionary containing environment data including id and name
|
|
241
|
+
|
|
242
|
+
Raises:
|
|
243
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
244
|
+
"""
|
|
245
|
+
await self._hydrate()
|
|
246
|
+
async with RunpodGraphQLClient() as client:
|
|
247
|
+
result = await client.create_flash_environment(
|
|
248
|
+
{"flashAppId": self.id, "name": environment_name}
|
|
249
|
+
)
|
|
250
|
+
return result
|
|
251
|
+
|
|
252
|
+
async def _get_tarball_upload_url(self, tarball_size: int) -> Dict[str, str]:
|
|
253
|
+
"""Get a pre-signed URL for uploading a build tarball.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
tarball_size: Size of the tarball in bytes
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Dictionary with 'uploadUrl' and 'objectKey' keys
|
|
260
|
+
|
|
261
|
+
Raises:
|
|
262
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
263
|
+
"""
|
|
264
|
+
await self._hydrate()
|
|
265
|
+
async with RunpodGraphQLClient() as client:
|
|
266
|
+
return await client.prepare_artifact_upload(
|
|
267
|
+
{"flashAppId": self.id, "tarballSize": tarball_size}
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
async def _get_active_artifact(self, environment_id: str) -> Dict[str, Any]:
|
|
271
|
+
"""Get the active artifact for an environment.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
environment_id: ID of the environment
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Dictionary containing artifact information including downloadUrl
|
|
278
|
+
|
|
279
|
+
Raises:
|
|
280
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
281
|
+
ValueError: If environment has no active artifact
|
|
282
|
+
"""
|
|
283
|
+
await self._hydrate()
|
|
284
|
+
async with RunpodGraphQLClient() as client:
|
|
285
|
+
result = await client.get_flash_artifact_url(environment_id)
|
|
286
|
+
if not result.get("activeArtifact"):
|
|
287
|
+
raise ValueError(
|
|
288
|
+
f"No active artifact found for environment ID: {environment_id}"
|
|
289
|
+
)
|
|
290
|
+
return result["activeArtifact"]
|
|
291
|
+
|
|
292
|
+
async def deploy_build_to_environment(
|
|
293
|
+
self,
|
|
294
|
+
build_id: str,
|
|
295
|
+
environment_id: Optional[str] = "",
|
|
296
|
+
environment_name: Optional[str] = "",
|
|
297
|
+
) -> Dict[str, Any]:
|
|
298
|
+
"""Deploy a build to an environment.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
build_id: ID of the build to deploy
|
|
302
|
+
environment_id: ID of the environment (exclusive with environment_name)
|
|
303
|
+
environment_name: Name of the environment (exclusive with environment_id)
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Dictionary containing deployment result
|
|
307
|
+
|
|
308
|
+
Raises:
|
|
309
|
+
ValueError: If both or neither environment_id and environment_name are provided
|
|
310
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
311
|
+
"""
|
|
312
|
+
_validate_exclusive_params(
|
|
313
|
+
environment_id, environment_name, "environment_id", "environment_name"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
await self._hydrate()
|
|
317
|
+
async with RunpodGraphQLClient() as client:
|
|
318
|
+
if not environment_id:
|
|
319
|
+
environment = await client.get_flash_environment_by_name(
|
|
320
|
+
{"flashAppId": self.id, "name": environment_name}
|
|
321
|
+
)
|
|
322
|
+
environment_id = environment["id"]
|
|
323
|
+
result = await client.deploy_build_to_environment(
|
|
324
|
+
{"flashEnvironmentId": environment_id, "flashBuildId": build_id}
|
|
325
|
+
)
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
async def download_tarball(self, environment_id: str, dest_file: str) -> None:
|
|
329
|
+
"""Download the active build tarball from an environment.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
environment_id: ID of the environment to download from
|
|
333
|
+
dest_file: Path where the tarball should be saved
|
|
334
|
+
|
|
335
|
+
Raises:
|
|
336
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
337
|
+
ValueError: If environment has no active artifact
|
|
338
|
+
requests.HTTPError: If download fails
|
|
339
|
+
"""
|
|
340
|
+
await self._hydrate()
|
|
341
|
+
result = await self._get_active_artifact(environment_id)
|
|
342
|
+
url = result["downloadUrl"]
|
|
343
|
+
with open(dest_file, "wb") as stream:
|
|
344
|
+
with requests.get(url, stream=True) as resp:
|
|
345
|
+
resp.raise_for_status()
|
|
346
|
+
for chunk in resp.iter_content():
|
|
347
|
+
if chunk:
|
|
348
|
+
stream.write(chunk)
|
|
349
|
+
|
|
350
|
+
async def _finalize_upload_build(
|
|
351
|
+
self, object_key: str, manifest: Dict[str, Any]
|
|
352
|
+
) -> Dict[str, Any]:
|
|
353
|
+
"""Finalize the upload of a build tarball.
|
|
354
|
+
|
|
355
|
+
After uploading the tarball to the pre-signed URL, this method
|
|
356
|
+
must be called to inform the server that the upload is complete.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
object_key: The object key returned by _get_tarball_upload_url
|
|
360
|
+
manifest: The manifest dictionary (read from .flash/flash_manifest.json)
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Dictionary containing build information including the build ID
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
367
|
+
"""
|
|
368
|
+
await self._hydrate()
|
|
369
|
+
async with RunpodGraphQLClient() as client:
|
|
370
|
+
result = await client.finalize_artifact_upload(
|
|
371
|
+
{"flashAppId": self.id, "objectKey": object_key, "manifest": manifest}
|
|
372
|
+
)
|
|
373
|
+
return result
|
|
374
|
+
|
|
375
|
+
async def _register_endpoint_to_environment(
|
|
376
|
+
self, environment_id: str, endpoint_id: str
|
|
377
|
+
) -> Dict[str, Any]:
|
|
378
|
+
"""Register a serverless endpoint to an environment.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
environment_id: ID of the environment
|
|
382
|
+
endpoint_id: ID of the endpoint to register
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
Dictionary containing registration result
|
|
386
|
+
|
|
387
|
+
Raises:
|
|
388
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
389
|
+
"""
|
|
390
|
+
await self._hydrate()
|
|
391
|
+
async with RunpodGraphQLClient() as client:
|
|
392
|
+
result = await client.register_endpoint_to_environment(
|
|
393
|
+
{"flashEnvironmentId": environment_id, "endpointId": endpoint_id}
|
|
394
|
+
)
|
|
395
|
+
return result
|
|
396
|
+
|
|
397
|
+
async def _register_network_volume_to_environment(
|
|
398
|
+
self, environment_id: str, network_volume_id: str
|
|
399
|
+
) -> Dict[str, Any]:
|
|
400
|
+
"""Register a network volume to an environment.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
environment_id: ID of the environment
|
|
404
|
+
network_volume_id: ID of the network volume to register
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
Dictionary containing registration result
|
|
408
|
+
|
|
409
|
+
Raises:
|
|
410
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
411
|
+
"""
|
|
412
|
+
await self._hydrate()
|
|
413
|
+
async with RunpodGraphQLClient() as client:
|
|
414
|
+
result = await client.register_network_volume_to_environment(
|
|
415
|
+
{
|
|
416
|
+
"flashEnvironmentId": environment_id,
|
|
417
|
+
"networkVolumeId": network_volume_id,
|
|
418
|
+
}
|
|
419
|
+
)
|
|
420
|
+
return result
|
|
421
|
+
|
|
422
|
+
async def upload_build(self, tar_path: Union[str, Path]) -> Dict[str, Any]:
|
|
423
|
+
"""Upload a build tarball to the server.
|
|
424
|
+
|
|
425
|
+
Validates the tarball file before upload (extension, magic bytes, size limits).
|
|
426
|
+
Manifest is read from .flash/flash_manifest.json during deployment, not extracted
|
|
427
|
+
from tarball.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
tar_path: Path to the tarball file (string or Path object)
|
|
431
|
+
Must be .tar.gz or .tgz, under 500MB
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Dictionary containing build information including the build ID
|
|
435
|
+
|
|
436
|
+
Raises:
|
|
437
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
438
|
+
FileNotFoundError: If tar_path does not exist
|
|
439
|
+
ValueError: If file is invalid (extension, magic bytes, or size)
|
|
440
|
+
requests.HTTPError: If upload fails
|
|
441
|
+
|
|
442
|
+
TODO: Add integration tests for tarball upload flow including:
|
|
443
|
+
- Network failures and retry behavior
|
|
444
|
+
- Large file uploads (edge cases near size limit)
|
|
445
|
+
- Corrupted tarball handling
|
|
446
|
+
- Pre-signed URL expiration scenarios
|
|
447
|
+
"""
|
|
448
|
+
# Convert to Path and validate before hydrating
|
|
449
|
+
if isinstance(tar_path, str):
|
|
450
|
+
tar_path = Path(tar_path)
|
|
451
|
+
_validate_tarball_file(tar_path)
|
|
452
|
+
|
|
453
|
+
# Read manifest from .flash/flash_manifest.json
|
|
454
|
+
manifest_path = Path.cwd() / ".flash" / "flash_manifest.json"
|
|
455
|
+
try:
|
|
456
|
+
with open(manifest_path) as f:
|
|
457
|
+
manifest = json.load(f)
|
|
458
|
+
except FileNotFoundError as e:
|
|
459
|
+
raise FileNotFoundError(
|
|
460
|
+
f"Manifest not found at {manifest_path}. Run 'flash build' first."
|
|
461
|
+
) from e
|
|
462
|
+
except json.JSONDecodeError as e:
|
|
463
|
+
raise ValueError(f"Invalid manifest JSON at {manifest_path}: {e}") from e
|
|
464
|
+
|
|
465
|
+
await self._hydrate()
|
|
466
|
+
tarball_size = tar_path.stat().st_size
|
|
467
|
+
|
|
468
|
+
result = await self._get_tarball_upload_url(tarball_size)
|
|
469
|
+
url = result["uploadUrl"]
|
|
470
|
+
object_key = result["objectKey"]
|
|
471
|
+
|
|
472
|
+
headers = {"Content-Type": TARBALL_CONTENT_TYPE}
|
|
473
|
+
|
|
474
|
+
with tar_path.open("rb") as fh:
|
|
475
|
+
resp = requests.put(url, data=fh, headers=headers)
|
|
476
|
+
|
|
477
|
+
resp.raise_for_status()
|
|
478
|
+
resp = await self._finalize_upload_build(object_key, manifest)
|
|
479
|
+
return resp
|
|
480
|
+
|
|
481
|
+
async def _set_environment_state(self, environment_id: str, status: str) -> None:
|
|
482
|
+
"""Set the state of an environment.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
environment_id: ID of the environment
|
|
486
|
+
status: State to set (e.g., "HEALTHY", "DEPLOYING", "PENDING")
|
|
487
|
+
|
|
488
|
+
Raises:
|
|
489
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
490
|
+
"""
|
|
491
|
+
await self._hydrate()
|
|
492
|
+
async with RunpodGraphQLClient() as client:
|
|
493
|
+
await client.set_environment_state(
|
|
494
|
+
{"flashEnvironmentId": environment_id, "status": status}
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
async def _get_environment_by_name(self, environment_name: str) -> Dict[str, Any]:
|
|
498
|
+
"""Get an environment by name.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
environment_name: Name of the environment to retrieve
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
Dictionary containing environment data
|
|
505
|
+
|
|
506
|
+
Raises:
|
|
507
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
508
|
+
ValueError: If environment is not found
|
|
509
|
+
"""
|
|
510
|
+
await self._hydrate()
|
|
511
|
+
async with RunpodGraphQLClient() as client:
|
|
512
|
+
result = await client.get_flash_environment_by_name(
|
|
513
|
+
{"flashAppId": self.id, "name": environment_name}
|
|
514
|
+
)
|
|
515
|
+
return result["flashEnvironmentByName"]
|
|
516
|
+
|
|
517
|
+
async def deploy_resources(self, environment_name: str) -> None:
|
|
518
|
+
"""Deploy all registered resources to an environment.
|
|
519
|
+
|
|
520
|
+
This method iterates through all resources registered with the app
|
|
521
|
+
(via @remote decorator with resource_config) and deploys them,
|
|
522
|
+
then registers them to the specified environment.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
environment_name: Name of the environment to deploy resources to
|
|
526
|
+
|
|
527
|
+
Raises:
|
|
528
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
529
|
+
ValueError: If environment is not found
|
|
530
|
+
"""
|
|
531
|
+
await self._hydrate()
|
|
532
|
+
resource_manager = ResourceManager()
|
|
533
|
+
environment = await self._get_environment_by_name(environment_name)
|
|
534
|
+
|
|
535
|
+
# NOTE(jhcipar) it's pretty fragile to have client managed state like this
|
|
536
|
+
# we should enforce this on the server side eventually and either debounce or not allow subsequent deploys
|
|
537
|
+
await self._set_environment_state(environment["id"], "DEPLOYING")
|
|
538
|
+
|
|
539
|
+
for resource_id, resource in self.resources.items():
|
|
540
|
+
deployed_resource = await resource_manager.get_or_deploy_resource(resource)
|
|
541
|
+
if isinstance(deployed_resource, ServerlessEndpoint):
|
|
542
|
+
if deployed_resource.id:
|
|
543
|
+
await self._register_endpoint_to_environment(
|
|
544
|
+
environment["id"], deployed_resource.id
|
|
545
|
+
)
|
|
546
|
+
if isinstance(deployed_resource, NetworkVolume):
|
|
547
|
+
if deployed_resource.id:
|
|
548
|
+
await self._register_network_volume_to_environment(
|
|
549
|
+
environment["id"], deployed_resource.id
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# NOTE(jhcipar) we should healthcheck endpoints after provisioning them, for right now we just
|
|
553
|
+
# assume this is healthy
|
|
554
|
+
await self._set_environment_state(environment["id"], "HEALTHY")
|
|
555
|
+
|
|
556
|
+
@classmethod
|
|
557
|
+
async def from_name(cls, app_name: str) -> "FlashApp":
|
|
558
|
+
async with RunpodGraphQLClient() as client:
|
|
559
|
+
result = await client.get_flash_app_by_name(app_name)
|
|
560
|
+
return cls(app_name, id=result["id"], eager_hydrate=False)
|
|
561
|
+
|
|
562
|
+
@classmethod
|
|
563
|
+
async def create(cls, app_name: str) -> "FlashApp":
|
|
564
|
+
async with RunpodGraphQLClient() as client:
|
|
565
|
+
result = await client.create_flash_app({"name": app_name})
|
|
566
|
+
return cls(app_name, id=result["id"], eager_hydrate=False)
|
|
567
|
+
|
|
568
|
+
@classmethod
|
|
569
|
+
async def get_or_create(cls, app_name: str) -> "FlashApp":
|
|
570
|
+
async with RunpodGraphQLClient() as client:
|
|
571
|
+
try:
|
|
572
|
+
result = await client.get_flash_app_by_name(app_name)
|
|
573
|
+
return cls(app_name, id=result["id"], eager_hydrate=False)
|
|
574
|
+
except Exception as exc:
|
|
575
|
+
if "app not found" not in str(exc).lower():
|
|
576
|
+
raise
|
|
577
|
+
result = await client.create_flash_app({"name": app_name})
|
|
578
|
+
return cls(app_name, id=result["id"], eager_hydrate=False)
|
|
579
|
+
|
|
580
|
+
@classmethod
|
|
581
|
+
async def create_environment_and_app(
|
|
582
|
+
cls, app_name: str, environment_name: str
|
|
583
|
+
) -> Tuple["FlashApp", Dict]:
|
|
584
|
+
app = await cls.get_or_create(app_name)
|
|
585
|
+
env = await app.create_environment(environment_name)
|
|
586
|
+
return (app, env)
|
|
587
|
+
|
|
588
|
+
@classmethod
|
|
589
|
+
async def list(cls):
|
|
590
|
+
async with RunpodGraphQLClient() as client:
|
|
591
|
+
return await client.list_flash_apps()
|
|
592
|
+
|
|
593
|
+
@classmethod
|
|
594
|
+
async def delete(
|
|
595
|
+
cls, app_name: Optional[str] = None, app_id: Optional[str] = None
|
|
596
|
+
) -> bool:
|
|
597
|
+
_validate_exclusive_params(app_name, app_id, "app_name", "app_id")
|
|
598
|
+
|
|
599
|
+
if not app_id:
|
|
600
|
+
if app_name is None:
|
|
601
|
+
raise ValueError("app_name cannot be None when app_id is not provided")
|
|
602
|
+
app = await cls.from_name(app_name)
|
|
603
|
+
app_id = app.id
|
|
604
|
+
|
|
605
|
+
if app_id is None:
|
|
606
|
+
raise ValueError("Failed to resolve app_id")
|
|
607
|
+
|
|
608
|
+
async with RunpodGraphQLClient() as client:
|
|
609
|
+
result = await client.delete_flash_app(app_id)
|
|
610
|
+
return result.get("success", False)
|
|
611
|
+
|
|
612
|
+
async def delete_environment(self, environment_name: str) -> bool:
|
|
613
|
+
"""Delete an environment from this flash app.
|
|
614
|
+
|
|
615
|
+
Args:
|
|
616
|
+
environment_name: Name of the environment to delete
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
True if deletion was successful, False otherwise
|
|
620
|
+
|
|
621
|
+
Raises:
|
|
622
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
623
|
+
ValueError: If environment is not found
|
|
624
|
+
"""
|
|
625
|
+
await self._hydrate()
|
|
626
|
+
environment = await self.get_environment_by_name(environment_name)
|
|
627
|
+
environment_id = environment["id"]
|
|
628
|
+
|
|
629
|
+
async with RunpodGraphQLClient() as client:
|
|
630
|
+
result = await client.delete_flash_environment(environment_id)
|
|
631
|
+
return result.get("success", False)
|
|
632
|
+
|
|
633
|
+
async def get_build(self, build_id: str) -> Dict[str, Any]:
|
|
634
|
+
"""Get a build by ID.
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
build_id: ID of the build to retrieve
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Dictionary containing build data
|
|
641
|
+
|
|
642
|
+
Raises:
|
|
643
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
644
|
+
"""
|
|
645
|
+
await self._hydrate()
|
|
646
|
+
async with RunpodGraphQLClient() as client:
|
|
647
|
+
return await client.get_flash_build(build_id)
|
|
648
|
+
|
|
649
|
+
async def list_builds(self) -> List[Dict[str, Any]]:
|
|
650
|
+
"""List all builds for this app.
|
|
651
|
+
|
|
652
|
+
Returns:
|
|
653
|
+
List of dictionaries containing build data
|
|
654
|
+
|
|
655
|
+
Raises:
|
|
656
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
657
|
+
"""
|
|
658
|
+
await self._hydrate()
|
|
659
|
+
async with RunpodGraphQLClient() as client:
|
|
660
|
+
return await client.list_flash_builds_by_app_id(self.id)
|
|
661
|
+
|
|
662
|
+
async def get_environment_by_name(self, environment_name: str) -> Dict[str, Any]:
|
|
663
|
+
"""Get an environment by name (public wrapper for _get_environment_by_name).
|
|
664
|
+
|
|
665
|
+
Args:
|
|
666
|
+
environment_name: Name of the environment to retrieve
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
Dictionary containing environment data
|
|
670
|
+
|
|
671
|
+
Raises:
|
|
672
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
673
|
+
FlashEnvironmentNotFoundError: If environment is not found
|
|
674
|
+
"""
|
|
675
|
+
await self._hydrate()
|
|
676
|
+
async with RunpodGraphQLClient() as client:
|
|
677
|
+
try:
|
|
678
|
+
result = await client.get_flash_environment_by_name(
|
|
679
|
+
{"flashAppId": self.id, "name": environment_name}
|
|
680
|
+
)
|
|
681
|
+
if result is None:
|
|
682
|
+
raise FlashEnvironmentNotFoundError(
|
|
683
|
+
f"Environment '{environment_name}' not found in app '{self.name}'"
|
|
684
|
+
)
|
|
685
|
+
return result
|
|
686
|
+
except Exception as exc:
|
|
687
|
+
# Convert generic exceptions that indicate "not found" to specific exception
|
|
688
|
+
if "not found" in str(exc).lower():
|
|
689
|
+
raise FlashEnvironmentNotFoundError(
|
|
690
|
+
f"Environment '{environment_name}' not found in app '{self.name}'"
|
|
691
|
+
) from exc
|
|
692
|
+
raise
|
|
693
|
+
|
|
694
|
+
async def list_environments(self) -> List[Dict[str, Any]]:
|
|
695
|
+
"""List all environments for this app.
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
List of dictionaries containing environment data
|
|
699
|
+
|
|
700
|
+
Raises:
|
|
701
|
+
RuntimeError: If app is not hydrated (no ID available)
|
|
702
|
+
"""
|
|
703
|
+
await self._hydrate()
|
|
704
|
+
async with RunpodGraphQLClient() as client:
|
|
705
|
+
return await client.list_flash_environments_by_app_id(self.id)
|
|
706
|
+
|
|
707
|
+
async def get_build_manifest(self, build_id: str) -> Dict[str, Any]:
|
|
708
|
+
"""Retrieve manifest for a specific build.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
build_id: ID of the build
|
|
712
|
+
|
|
713
|
+
Returns:
|
|
714
|
+
Manifest dictionary (empty dict if manifest is not present)
|
|
715
|
+
|
|
716
|
+
Raises:
|
|
717
|
+
RuntimeError: If app is not hydrated
|
|
718
|
+
"""
|
|
719
|
+
await self._hydrate()
|
|
720
|
+
async with RunpodGraphQLClient() as client:
|
|
721
|
+
build = await client.get_flash_build(build_id)
|
|
722
|
+
return build.get("manifest", {})
|
|
723
|
+
|
|
724
|
+
async def update_build_manifest(
|
|
725
|
+
self, build_id: str, manifest: Dict[str, Any]
|
|
726
|
+
) -> None:
|
|
727
|
+
"""Update manifest for a specific build.
|
|
728
|
+
|
|
729
|
+
Args:
|
|
730
|
+
build_id: ID of the build
|
|
731
|
+
manifest: Complete manifest dictionary
|
|
732
|
+
|
|
733
|
+
Raises:
|
|
734
|
+
RuntimeError: If app is not hydrated
|
|
735
|
+
"""
|
|
736
|
+
await self._hydrate()
|
|
737
|
+
async with RunpodGraphQLClient() as client:
|
|
738
|
+
await client.update_build_manifest(build_id, manifest)
|