tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. tetra_rp/__init__.py +109 -19
  2. tetra_rp/cli/commands/__init__.py +1 -0
  3. tetra_rp/cli/commands/apps.py +143 -0
  4. tetra_rp/cli/commands/build.py +1082 -0
  5. tetra_rp/cli/commands/build_utils/__init__.py +1 -0
  6. tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
  7. tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
  8. tetra_rp/cli/commands/build_utils/manifest.py +430 -0
  9. tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
  10. tetra_rp/cli/commands/build_utils/scanner.py +596 -0
  11. tetra_rp/cli/commands/deploy.py +580 -0
  12. tetra_rp/cli/commands/init.py +123 -0
  13. tetra_rp/cli/commands/resource.py +108 -0
  14. tetra_rp/cli/commands/run.py +296 -0
  15. tetra_rp/cli/commands/test_mothership.py +458 -0
  16. tetra_rp/cli/commands/undeploy.py +533 -0
  17. tetra_rp/cli/main.py +97 -0
  18. tetra_rp/cli/utils/__init__.py +1 -0
  19. tetra_rp/cli/utils/app.py +15 -0
  20. tetra_rp/cli/utils/conda.py +127 -0
  21. tetra_rp/cli/utils/deployment.py +530 -0
  22. tetra_rp/cli/utils/ignore.py +143 -0
  23. tetra_rp/cli/utils/skeleton.py +184 -0
  24. tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
  25. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  26. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  27. tetra_rp/cli/utils/skeleton_template/README.md +263 -0
  28. tetra_rp/cli/utils/skeleton_template/main.py +44 -0
  29. tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
  30. tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
  31. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  32. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  33. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
  34. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
  35. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
  36. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
  37. tetra_rp/client.py +136 -33
  38. tetra_rp/config.py +29 -0
  39. tetra_rp/core/api/runpod.py +591 -39
  40. tetra_rp/core/deployment.py +232 -0
  41. tetra_rp/core/discovery.py +425 -0
  42. tetra_rp/core/exceptions.py +50 -0
  43. tetra_rp/core/resources/__init__.py +27 -9
  44. tetra_rp/core/resources/app.py +738 -0
  45. tetra_rp/core/resources/base.py +139 -4
  46. tetra_rp/core/resources/constants.py +21 -0
  47. tetra_rp/core/resources/cpu.py +115 -13
  48. tetra_rp/core/resources/gpu.py +182 -16
  49. tetra_rp/core/resources/live_serverless.py +153 -16
  50. tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
  51. tetra_rp/core/resources/network_volume.py +126 -31
  52. tetra_rp/core/resources/resource_manager.py +436 -35
  53. tetra_rp/core/resources/serverless.py +537 -120
  54. tetra_rp/core/resources/serverless_cpu.py +201 -0
  55. tetra_rp/core/resources/template.py +1 -59
  56. tetra_rp/core/utils/constants.py +10 -0
  57. tetra_rp/core/utils/file_lock.py +260 -0
  58. tetra_rp/core/utils/http.py +67 -0
  59. tetra_rp/core/utils/lru_cache.py +75 -0
  60. tetra_rp/core/utils/singleton.py +36 -1
  61. tetra_rp/core/validation.py +44 -0
  62. tetra_rp/execute_class.py +301 -0
  63. tetra_rp/protos/remote_execution.py +98 -9
  64. tetra_rp/runtime/__init__.py +1 -0
  65. tetra_rp/runtime/circuit_breaker.py +274 -0
  66. tetra_rp/runtime/config.py +12 -0
  67. tetra_rp/runtime/exceptions.py +49 -0
  68. tetra_rp/runtime/generic_handler.py +206 -0
  69. tetra_rp/runtime/lb_handler.py +189 -0
  70. tetra_rp/runtime/load_balancer.py +160 -0
  71. tetra_rp/runtime/manifest_fetcher.py +192 -0
  72. tetra_rp/runtime/metrics.py +325 -0
  73. tetra_rp/runtime/models.py +73 -0
  74. tetra_rp/runtime/mothership_provisioner.py +512 -0
  75. tetra_rp/runtime/production_wrapper.py +266 -0
  76. tetra_rp/runtime/reliability_config.py +149 -0
  77. tetra_rp/runtime/retry_manager.py +118 -0
  78. tetra_rp/runtime/serialization.py +124 -0
  79. tetra_rp/runtime/service_registry.py +346 -0
  80. tetra_rp/runtime/state_manager_client.py +248 -0
  81. tetra_rp/stubs/live_serverless.py +35 -17
  82. tetra_rp/stubs/load_balancer_sls.py +357 -0
  83. tetra_rp/stubs/registry.py +145 -19
  84. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
  85. tetra_rp-0.24.0.dist-info/RECORD +99 -0
  86. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
  87. tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
  88. tetra_rp/core/pool/cluster_manager.py +0 -177
  89. tetra_rp/core/pool/dataclass.py +0 -18
  90. tetra_rp/core/pool/ex.py +0 -38
  91. tetra_rp/core/pool/job.py +0 -22
  92. tetra_rp/core/pool/worker.py +0 -19
  93. tetra_rp/core/resources/utils.py +0 -50
  94. tetra_rp/core/utils/json.py +0 -33
  95. tetra_rp-0.6.0.dist-info/RECORD +0 -39
  96. /tetra_rp/{core/pool → cli}/__init__.py +0 -0
  97. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,738 @@
1
+ from pathlib import Path
2
+ import requests
3
+ import asyncio
4
+ import json
5
+ from typing import Dict, Optional, Union, Tuple, TYPE_CHECKING, Any, List
6
+ import logging
7
+
8
+ from ..api.runpod import RunpodGraphQLClient
9
+ from ..resources.resource_manager import ResourceManager
10
+ from ..resources.serverless import ServerlessEndpoint, NetworkVolume
11
+ from ..resources.constants import (
12
+ TARBALL_CONTENT_TYPE,
13
+ MAX_TARBALL_SIZE_MB,
14
+ VALID_TARBALL_EXTENSIONS,
15
+ GZIP_MAGIC_BYTES,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from . import ServerlessResource
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+
24
+ class FlashAppError(Exception):
25
+ """Base exception for Flash app operations."""
26
+
27
+ pass
28
+
29
+
30
+ class FlashAppNotFoundError(FlashAppError):
31
+ """Raised when a Flash app cannot be found."""
32
+
33
+ pass
34
+
35
+
36
+ class FlashEnvironmentNotFoundError(FlashAppError):
37
+ """Raised when a Flash environment cannot be found."""
38
+
39
+ pass
40
+
41
+
42
+ class FlashBuildNotFoundError(FlashAppError):
43
+ """Raised when a Flash build cannot be found."""
44
+
45
+ pass
46
+
47
+
48
+ def _validate_exclusive_params(
49
+ param1: Any, param2: Any, name1: str, name2: str
50
+ ) -> None:
51
+ """Validate that exactly one of two parameters is provided (XOR).
52
+
53
+ Args:
54
+ param1: First parameter value
55
+ param2: Second parameter value
56
+ name1: Name of first parameter (for error message)
57
+ name2: Name of second parameter (for error message)
58
+
59
+ Raises:
60
+ ValueError: If both or neither parameters are provided
61
+ """
62
+ if (not param1 and not param2) or (param1 and param2):
63
+ raise ValueError(f"Provide exactly one of {name1} or {name2}")
64
+
65
+
66
+ def _validate_tarball_file(tar_path: Path) -> None:
67
+ """Validate tarball file before upload.
68
+
69
+ Validates:
70
+ - File exists
71
+ - File extension is valid (.tar.gz or .tgz)
72
+ - File is a gzip file (magic bytes check)
73
+ - File size is within limits
74
+
75
+ Args:
76
+ tar_path: Path to the tarball file
77
+
78
+ Raises:
79
+ FileNotFoundError: If file does not exist
80
+ ValueError: If file is invalid (extension, magic bytes, or size)
81
+ """
82
+ # Check file exists
83
+ if not tar_path.exists():
84
+ raise FileNotFoundError(f"Tarball file not found: {tar_path}")
85
+
86
+ # Check if it's a file, not directory
87
+ if not tar_path.is_file():
88
+ raise ValueError(f"Path is not a file: {tar_path}")
89
+
90
+ # Check extension (check filename only, not full path)
91
+ if not any(tar_path.name.endswith(ext) for ext in VALID_TARBALL_EXTENSIONS):
92
+ raise ValueError(
93
+ f"Invalid file extension. Expected one of {VALID_TARBALL_EXTENSIONS}, "
94
+ f"got: {tar_path.suffix}"
95
+ )
96
+
97
+ # Check magic bytes (first 2 bytes should be gzip signature)
98
+ with tar_path.open("rb") as f:
99
+ magic = f.read(2)
100
+ if len(magic) < 2 or (magic[0], magic[1]) != GZIP_MAGIC_BYTES:
101
+ raise ValueError(
102
+ f"File is not a valid gzip file. Expected magic bytes "
103
+ f"{GZIP_MAGIC_BYTES}, got: {tuple(magic) if magic else 'empty file'}"
104
+ )
105
+
106
+ # Check file size
107
+ size_bytes = tar_path.stat().st_size
108
+ size_mb = size_bytes / (1024 * 1024)
109
+ if size_mb > MAX_TARBALL_SIZE_MB:
110
+ raise ValueError(
111
+ f"Tarball exceeds maximum size. "
112
+ f"File size: {size_mb:.2f}MB, Max: {MAX_TARBALL_SIZE_MB}MB"
113
+ )
114
+
115
+
116
+ class FlashApp:
117
+ """Flash app resource for managing applications, environments, and builds.
118
+
119
+ FlashApp provides the interface for Flash application lifecycle management including:
120
+ - Creating and managing flash apps
121
+ - Managing environments within apps
122
+ - Uploading and deploying builds
123
+ - Registering endpoints and network volumes to environments
124
+
125
+ Lifecycle Management:
126
+ - Constructor (__init__): Creates instance without I/O by default
127
+ - Factory methods (from_name, create, get_or_create): Recommended for async contexts
128
+ - Hydration: Lazy-loads app ID from server via _hydrate()
129
+ - All API methods call _hydrate() automatically before execution
130
+
131
+ Thread Safety:
132
+ - Hydration is protected by asyncio.Lock to prevent concurrent API calls
133
+ - Safe to call _hydrate() multiple times from different coroutines
134
+ - All async methods are safe for concurrent use after hydration
135
+
136
+ Usage Patterns:
137
+ # Factory method (recommended in async context)
138
+ app = await FlashApp.from_name("my-app")
139
+
140
+ # Constructor with eager hydration (blocks, creates event loop)
141
+ app = FlashApp("my-app", eager_hydrate=True)
142
+
143
+ # Constructor without hydration (deferred until first API call)
144
+ app = FlashApp("my-app")
145
+ await app._hydrate() # Explicit hydration
146
+
147
+ GraphQL Query Philosophy:
148
+ - List operations fetch only top-level attributes
149
+ - Child resources queried separately by ID or name
150
+ - Direct queries fetch one level deeper (app + envs/builds, not env resources)
151
+ """
152
+
153
+ def __init__(self, name: str, id: Optional[str] = "", eager_hydrate: bool = False):
154
+ self.name: str = name
155
+ self.id: Optional[str] = id
156
+ self.resources: Dict[str, "ServerlessResource"] = {}
157
+ self._hydrated = False
158
+ self._hydrate_lock = asyncio.Lock()
159
+ if eager_hydrate:
160
+ asyncio.run(self._hydrate())
161
+
162
+ def remote(self, *args, **kwargs):
163
+ from tetra_rp.client import remote as remote_decorator
164
+
165
+ resource_config = kwargs.get("resource_config")
166
+
167
+ if resource_config is None and args:
168
+ candidate = args[0]
169
+ if hasattr(candidate, "resource_id"):
170
+ self.resources[candidate.resource_id] = candidate
171
+
172
+ return remote_decorator(*args, **kwargs)
173
+
174
+ async def _hydrate(self) -> None:
175
+ """Ensure app is loaded from the server or created if it doesn't exist.
176
+
177
+ This method handles the lazy-loading logic for FlashApp instances.
178
+ If the app already exists on the server, it retrieves its ID.
179
+ If it doesn't exist, it creates a new app with the given name.
180
+
181
+ Thread-safe: Uses asyncio.Lock to prevent concurrent hydration attempts.
182
+
183
+ Returns:
184
+ None (modifies self.id and self._hydrated in-place)
185
+ """
186
+ async with self._hydrate_lock:
187
+ if self._hydrated:
188
+ log.debug("App is already hydrated while calling hydrate. Returning")
189
+ return
190
+
191
+ log.debug("Hydrating app")
192
+ async with RunpodGraphQLClient() as client:
193
+ try:
194
+ result = await client.get_flash_app_by_name(self.name)
195
+ found_id = result["id"]
196
+
197
+ # if an id is attached to instance check if it makes sense
198
+ if self.id:
199
+ if self.id != found_id:
200
+ raise ValueError(
201
+ "provided id for app class does not match existing app resource."
202
+ )
203
+ self._hydrated = True
204
+ return
205
+ self.id = found_id
206
+ self._hydrated = True
207
+ return
208
+
209
+ except Exception as exc:
210
+ if "app not found" not in str(exc).lower():
211
+ raise
212
+ result = await client.create_flash_app({"name": self.name})
213
+ self.id = result["id"]
214
+
215
+ self._hydrated = True
216
+ return
217
+
218
+ async def _get_id_by_name(self) -> str:
219
+ """Get the app ID from the server by name.
220
+
221
+ Returns:
222
+ The app ID string
223
+
224
+ Raises:
225
+ FlashAppNotFoundError: If the app is not found on the server
226
+ """
227
+ async with RunpodGraphQLClient() as client:
228
+ result = await client.get_flash_app_by_name(self.name)
229
+ if not result.get("id"):
230
+ raise FlashAppNotFoundError(f"Flash app '{self.name}' not found")
231
+ return result["id"]
232
+
233
+ async def create_environment(self, environment_name: str) -> Dict[str, Any]:
234
+ """Create an environment within an app.
235
+
236
+ Args:
237
+ environment_name: Name for the new environment
238
+
239
+ Returns:
240
+ Dictionary containing environment data including id and name
241
+
242
+ Raises:
243
+ RuntimeError: If app is not hydrated (no ID available)
244
+ """
245
+ await self._hydrate()
246
+ async with RunpodGraphQLClient() as client:
247
+ result = await client.create_flash_environment(
248
+ {"flashAppId": self.id, "name": environment_name}
249
+ )
250
+ return result
251
+
252
+ async def _get_tarball_upload_url(self, tarball_size: int) -> Dict[str, str]:
253
+ """Get a pre-signed URL for uploading a build tarball.
254
+
255
+ Args:
256
+ tarball_size: Size of the tarball in bytes
257
+
258
+ Returns:
259
+ Dictionary with 'uploadUrl' and 'objectKey' keys
260
+
261
+ Raises:
262
+ RuntimeError: If app is not hydrated (no ID available)
263
+ """
264
+ await self._hydrate()
265
+ async with RunpodGraphQLClient() as client:
266
+ return await client.prepare_artifact_upload(
267
+ {"flashAppId": self.id, "tarballSize": tarball_size}
268
+ )
269
+
270
+ async def _get_active_artifact(self, environment_id: str) -> Dict[str, Any]:
271
+ """Get the active artifact for an environment.
272
+
273
+ Args:
274
+ environment_id: ID of the environment
275
+
276
+ Returns:
277
+ Dictionary containing artifact information including downloadUrl
278
+
279
+ Raises:
280
+ RuntimeError: If app is not hydrated (no ID available)
281
+ ValueError: If environment has no active artifact
282
+ """
283
+ await self._hydrate()
284
+ async with RunpodGraphQLClient() as client:
285
+ result = await client.get_flash_artifact_url(environment_id)
286
+ if not result.get("activeArtifact"):
287
+ raise ValueError(
288
+ f"No active artifact found for environment ID: {environment_id}"
289
+ )
290
+ return result["activeArtifact"]
291
+
292
+ async def deploy_build_to_environment(
293
+ self,
294
+ build_id: str,
295
+ environment_id: Optional[str] = "",
296
+ environment_name: Optional[str] = "",
297
+ ) -> Dict[str, Any]:
298
+ """Deploy a build to an environment.
299
+
300
+ Args:
301
+ build_id: ID of the build to deploy
302
+ environment_id: ID of the environment (exclusive with environment_name)
303
+ environment_name: Name of the environment (exclusive with environment_id)
304
+
305
+ Returns:
306
+ Dictionary containing deployment result
307
+
308
+ Raises:
309
+ ValueError: If both or neither environment_id and environment_name are provided
310
+ RuntimeError: If app is not hydrated (no ID available)
311
+ """
312
+ _validate_exclusive_params(
313
+ environment_id, environment_name, "environment_id", "environment_name"
314
+ )
315
+
316
+ await self._hydrate()
317
+ async with RunpodGraphQLClient() as client:
318
+ if not environment_id:
319
+ environment = await client.get_flash_environment_by_name(
320
+ {"flashAppId": self.id, "name": environment_name}
321
+ )
322
+ environment_id = environment["id"]
323
+ result = await client.deploy_build_to_environment(
324
+ {"flashEnvironmentId": environment_id, "flashBuildId": build_id}
325
+ )
326
+ return result
327
+
328
+ async def download_tarball(self, environment_id: str, dest_file: str) -> None:
329
+ """Download the active build tarball from an environment.
330
+
331
+ Args:
332
+ environment_id: ID of the environment to download from
333
+ dest_file: Path where the tarball should be saved
334
+
335
+ Raises:
336
+ RuntimeError: If app is not hydrated (no ID available)
337
+ ValueError: If environment has no active artifact
338
+ requests.HTTPError: If download fails
339
+ """
340
+ await self._hydrate()
341
+ result = await self._get_active_artifact(environment_id)
342
+ url = result["downloadUrl"]
343
+ with open(dest_file, "wb") as stream:
344
+ with requests.get(url, stream=True) as resp:
345
+ resp.raise_for_status()
346
+ for chunk in resp.iter_content():
347
+ if chunk:
348
+ stream.write(chunk)
349
+
350
+ async def _finalize_upload_build(
351
+ self, object_key: str, manifest: Dict[str, Any]
352
+ ) -> Dict[str, Any]:
353
+ """Finalize the upload of a build tarball.
354
+
355
+ After uploading the tarball to the pre-signed URL, this method
356
+ must be called to inform the server that the upload is complete.
357
+
358
+ Args:
359
+ object_key: The object key returned by _get_tarball_upload_url
360
+ manifest: The manifest dictionary (read from .flash/flash_manifest.json)
361
+
362
+ Returns:
363
+ Dictionary containing build information including the build ID
364
+
365
+ Raises:
366
+ RuntimeError: If app is not hydrated (no ID available)
367
+ """
368
+ await self._hydrate()
369
+ async with RunpodGraphQLClient() as client:
370
+ result = await client.finalize_artifact_upload(
371
+ {"flashAppId": self.id, "objectKey": object_key, "manifest": manifest}
372
+ )
373
+ return result
374
+
375
+ async def _register_endpoint_to_environment(
376
+ self, environment_id: str, endpoint_id: str
377
+ ) -> Dict[str, Any]:
378
+ """Register a serverless endpoint to an environment.
379
+
380
+ Args:
381
+ environment_id: ID of the environment
382
+ endpoint_id: ID of the endpoint to register
383
+
384
+ Returns:
385
+ Dictionary containing registration result
386
+
387
+ Raises:
388
+ RuntimeError: If app is not hydrated (no ID available)
389
+ """
390
+ await self._hydrate()
391
+ async with RunpodGraphQLClient() as client:
392
+ result = await client.register_endpoint_to_environment(
393
+ {"flashEnvironmentId": environment_id, "endpointId": endpoint_id}
394
+ )
395
+ return result
396
+
397
+ async def _register_network_volume_to_environment(
398
+ self, environment_id: str, network_volume_id: str
399
+ ) -> Dict[str, Any]:
400
+ """Register a network volume to an environment.
401
+
402
+ Args:
403
+ environment_id: ID of the environment
404
+ network_volume_id: ID of the network volume to register
405
+
406
+ Returns:
407
+ Dictionary containing registration result
408
+
409
+ Raises:
410
+ RuntimeError: If app is not hydrated (no ID available)
411
+ """
412
+ await self._hydrate()
413
+ async with RunpodGraphQLClient() as client:
414
+ result = await client.register_network_volume_to_environment(
415
+ {
416
+ "flashEnvironmentId": environment_id,
417
+ "networkVolumeId": network_volume_id,
418
+ }
419
+ )
420
+ return result
421
+
422
+ async def upload_build(self, tar_path: Union[str, Path]) -> Dict[str, Any]:
423
+ """Upload a build tarball to the server.
424
+
425
+ Validates the tarball file before upload (extension, magic bytes, size limits).
426
+ Manifest is read from .flash/flash_manifest.json during deployment, not extracted
427
+ from tarball.
428
+
429
+ Args:
430
+ tar_path: Path to the tarball file (string or Path object)
431
+ Must be .tar.gz or .tgz, under 500MB
432
+
433
+ Returns:
434
+ Dictionary containing build information including the build ID
435
+
436
+ Raises:
437
+ RuntimeError: If app is not hydrated (no ID available)
438
+ FileNotFoundError: If tar_path does not exist
439
+ ValueError: If file is invalid (extension, magic bytes, or size)
440
+ requests.HTTPError: If upload fails
441
+
442
+ TODO: Add integration tests for tarball upload flow including:
443
+ - Network failures and retry behavior
444
+ - Large file uploads (edge cases near size limit)
445
+ - Corrupted tarball handling
446
+ - Pre-signed URL expiration scenarios
447
+ """
448
+ # Convert to Path and validate before hydrating
449
+ if isinstance(tar_path, str):
450
+ tar_path = Path(tar_path)
451
+ _validate_tarball_file(tar_path)
452
+
453
+ # Read manifest from .flash/flash_manifest.json
454
+ manifest_path = Path.cwd() / ".flash" / "flash_manifest.json"
455
+ try:
456
+ with open(manifest_path) as f:
457
+ manifest = json.load(f)
458
+ except FileNotFoundError as e:
459
+ raise FileNotFoundError(
460
+ f"Manifest not found at {manifest_path}. Run 'flash build' first."
461
+ ) from e
462
+ except json.JSONDecodeError as e:
463
+ raise ValueError(f"Invalid manifest JSON at {manifest_path}: {e}") from e
464
+
465
+ await self._hydrate()
466
+ tarball_size = tar_path.stat().st_size
467
+
468
+ result = await self._get_tarball_upload_url(tarball_size)
469
+ url = result["uploadUrl"]
470
+ object_key = result["objectKey"]
471
+
472
+ headers = {"Content-Type": TARBALL_CONTENT_TYPE}
473
+
474
+ with tar_path.open("rb") as fh:
475
+ resp = requests.put(url, data=fh, headers=headers)
476
+
477
+ resp.raise_for_status()
478
+ resp = await self._finalize_upload_build(object_key, manifest)
479
+ return resp
480
+
481
+ async def _set_environment_state(self, environment_id: str, status: str) -> None:
482
+ """Set the state of an environment.
483
+
484
+ Args:
485
+ environment_id: ID of the environment
486
+ status: State to set (e.g., "HEALTHY", "DEPLOYING", "PENDING")
487
+
488
+ Raises:
489
+ RuntimeError: If app is not hydrated (no ID available)
490
+ """
491
+ await self._hydrate()
492
+ async with RunpodGraphQLClient() as client:
493
+ await client.set_environment_state(
494
+ {"flashEnvironmentId": environment_id, "status": status}
495
+ )
496
+
497
+ async def _get_environment_by_name(self, environment_name: str) -> Dict[str, Any]:
498
+ """Get an environment by name.
499
+
500
+ Args:
501
+ environment_name: Name of the environment to retrieve
502
+
503
+ Returns:
504
+ Dictionary containing environment data
505
+
506
+ Raises:
507
+ RuntimeError: If app is not hydrated (no ID available)
508
+ ValueError: If environment is not found
509
+ """
510
+ await self._hydrate()
511
+ async with RunpodGraphQLClient() as client:
512
+ result = await client.get_flash_environment_by_name(
513
+ {"flashAppId": self.id, "name": environment_name}
514
+ )
515
+ return result["flashEnvironmentByName"]
516
+
517
+ async def deploy_resources(self, environment_name: str) -> None:
518
+ """Deploy all registered resources to an environment.
519
+
520
+ This method iterates through all resources registered with the app
521
+ (via @remote decorator with resource_config) and deploys them,
522
+ then registers them to the specified environment.
523
+
524
+ Args:
525
+ environment_name: Name of the environment to deploy resources to
526
+
527
+ Raises:
528
+ RuntimeError: If app is not hydrated (no ID available)
529
+ ValueError: If environment is not found
530
+ """
531
+ await self._hydrate()
532
+ resource_manager = ResourceManager()
533
+ environment = await self._get_environment_by_name(environment_name)
534
+
535
+ # NOTE(jhcipar) it's pretty fragile to have client managed state like this
536
+ # we should enforce this on the server side eventually and either debounce or not allow subsequent deploys
537
+ await self._set_environment_state(environment["id"], "DEPLOYING")
538
+
539
+ for resource_id, resource in self.resources.items():
540
+ deployed_resource = await resource_manager.get_or_deploy_resource(resource)
541
+ if isinstance(deployed_resource, ServerlessEndpoint):
542
+ if deployed_resource.id:
543
+ await self._register_endpoint_to_environment(
544
+ environment["id"], deployed_resource.id
545
+ )
546
+ if isinstance(deployed_resource, NetworkVolume):
547
+ if deployed_resource.id:
548
+ await self._register_network_volume_to_environment(
549
+ environment["id"], deployed_resource.id
550
+ )
551
+
552
+ # NOTE(jhcipar) we should healthcheck endpoints after provisioning them, for right now we just
553
+ # assume this is healthy
554
+ await self._set_environment_state(environment["id"], "HEALTHY")
555
+
556
+ @classmethod
557
+ async def from_name(cls, app_name: str) -> "FlashApp":
558
+ async with RunpodGraphQLClient() as client:
559
+ result = await client.get_flash_app_by_name(app_name)
560
+ return cls(app_name, id=result["id"], eager_hydrate=False)
561
+
562
+ @classmethod
563
+ async def create(cls, app_name: str) -> "FlashApp":
564
+ async with RunpodGraphQLClient() as client:
565
+ result = await client.create_flash_app({"name": app_name})
566
+ return cls(app_name, id=result["id"], eager_hydrate=False)
567
+
568
+ @classmethod
569
+ async def get_or_create(cls, app_name: str) -> "FlashApp":
570
+ async with RunpodGraphQLClient() as client:
571
+ try:
572
+ result = await client.get_flash_app_by_name(app_name)
573
+ return cls(app_name, id=result["id"], eager_hydrate=False)
574
+ except Exception as exc:
575
+ if "app not found" not in str(exc).lower():
576
+ raise
577
+ result = await client.create_flash_app({"name": app_name})
578
+ return cls(app_name, id=result["id"], eager_hydrate=False)
579
+
580
+ @classmethod
581
+ async def create_environment_and_app(
582
+ cls, app_name: str, environment_name: str
583
+ ) -> Tuple["FlashApp", Dict]:
584
+ app = await cls.get_or_create(app_name)
585
+ env = await app.create_environment(environment_name)
586
+ return (app, env)
587
+
588
+ @classmethod
589
+ async def list(cls):
590
+ async with RunpodGraphQLClient() as client:
591
+ return await client.list_flash_apps()
592
+
593
+ @classmethod
594
+ async def delete(
595
+ cls, app_name: Optional[str] = None, app_id: Optional[str] = None
596
+ ) -> bool:
597
+ _validate_exclusive_params(app_name, app_id, "app_name", "app_id")
598
+
599
+ if not app_id:
600
+ if app_name is None:
601
+ raise ValueError("app_name cannot be None when app_id is not provided")
602
+ app = await cls.from_name(app_name)
603
+ app_id = app.id
604
+
605
+ if app_id is None:
606
+ raise ValueError("Failed to resolve app_id")
607
+
608
+ async with RunpodGraphQLClient() as client:
609
+ result = await client.delete_flash_app(app_id)
610
+ return result.get("success", False)
611
+
612
+ async def delete_environment(self, environment_name: str) -> bool:
613
+ """Delete an environment from this flash app.
614
+
615
+ Args:
616
+ environment_name: Name of the environment to delete
617
+
618
+ Returns:
619
+ True if deletion was successful, False otherwise
620
+
621
+ Raises:
622
+ RuntimeError: If app is not hydrated (no ID available)
623
+ ValueError: If environment is not found
624
+ """
625
+ await self._hydrate()
626
+ environment = await self.get_environment_by_name(environment_name)
627
+ environment_id = environment["id"]
628
+
629
+ async with RunpodGraphQLClient() as client:
630
+ result = await client.delete_flash_environment(environment_id)
631
+ return result.get("success", False)
632
+
633
+ async def get_build(self, build_id: str) -> Dict[str, Any]:
634
+ """Get a build by ID.
635
+
636
+ Args:
637
+ build_id: ID of the build to retrieve
638
+
639
+ Returns:
640
+ Dictionary containing build data
641
+
642
+ Raises:
643
+ RuntimeError: If app is not hydrated (no ID available)
644
+ """
645
+ await self._hydrate()
646
+ async with RunpodGraphQLClient() as client:
647
+ return await client.get_flash_build(build_id)
648
+
649
+ async def list_builds(self) -> List[Dict[str, Any]]:
650
+ """List all builds for this app.
651
+
652
+ Returns:
653
+ List of dictionaries containing build data
654
+
655
+ Raises:
656
+ RuntimeError: If app is not hydrated (no ID available)
657
+ """
658
+ await self._hydrate()
659
+ async with RunpodGraphQLClient() as client:
660
+ return await client.list_flash_builds_by_app_id(self.id)
661
+
662
+ async def get_environment_by_name(self, environment_name: str) -> Dict[str, Any]:
663
+ """Get an environment by name (public wrapper for _get_environment_by_name).
664
+
665
+ Args:
666
+ environment_name: Name of the environment to retrieve
667
+
668
+ Returns:
669
+ Dictionary containing environment data
670
+
671
+ Raises:
672
+ RuntimeError: If app is not hydrated (no ID available)
673
+ FlashEnvironmentNotFoundError: If environment is not found
674
+ """
675
+ await self._hydrate()
676
+ async with RunpodGraphQLClient() as client:
677
+ try:
678
+ result = await client.get_flash_environment_by_name(
679
+ {"flashAppId": self.id, "name": environment_name}
680
+ )
681
+ if result is None:
682
+ raise FlashEnvironmentNotFoundError(
683
+ f"Environment '{environment_name}' not found in app '{self.name}'"
684
+ )
685
+ return result
686
+ except Exception as exc:
687
+ # Convert generic exceptions that indicate "not found" to specific exception
688
+ if "not found" in str(exc).lower():
689
+ raise FlashEnvironmentNotFoundError(
690
+ f"Environment '{environment_name}' not found in app '{self.name}'"
691
+ ) from exc
692
+ raise
693
+
694
+ async def list_environments(self) -> List[Dict[str, Any]]:
695
+ """List all environments for this app.
696
+
697
+ Returns:
698
+ List of dictionaries containing environment data
699
+
700
+ Raises:
701
+ RuntimeError: If app is not hydrated (no ID available)
702
+ """
703
+ await self._hydrate()
704
+ async with RunpodGraphQLClient() as client:
705
+ return await client.list_flash_environments_by_app_id(self.id)
706
+
707
+ async def get_build_manifest(self, build_id: str) -> Dict[str, Any]:
708
+ """Retrieve manifest for a specific build.
709
+
710
+ Args:
711
+ build_id: ID of the build
712
+
713
+ Returns:
714
+ Manifest dictionary (empty dict if manifest is not present)
715
+
716
+ Raises:
717
+ RuntimeError: If app is not hydrated
718
+ """
719
+ await self._hydrate()
720
+ async with RunpodGraphQLClient() as client:
721
+ build = await client.get_flash_build(build_id)
722
+ return build.get("manifest", {})
723
+
724
+ async def update_build_manifest(
725
+ self, build_id: str, manifest: Dict[str, Any]
726
+ ) -> None:
727
+ """Update manifest for a specific build.
728
+
729
+ Args:
730
+ build_id: ID of the build
731
+ manifest: Complete manifest dictionary
732
+
733
+ Raises:
734
+ RuntimeError: If app is not hydrated
735
+ """
736
+ await self._hydrate()
737
+ async with RunpodGraphQLClient() as client:
738
+ await client.update_build_manifest(build_id, manifest)