tetra-rp 0.6.0__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. tetra_rp/__init__.py +109 -19
  2. tetra_rp/cli/commands/__init__.py +1 -0
  3. tetra_rp/cli/commands/apps.py +143 -0
  4. tetra_rp/cli/commands/build.py +1082 -0
  5. tetra_rp/cli/commands/build_utils/__init__.py +1 -0
  6. tetra_rp/cli/commands/build_utils/handler_generator.py +176 -0
  7. tetra_rp/cli/commands/build_utils/lb_handler_generator.py +309 -0
  8. tetra_rp/cli/commands/build_utils/manifest.py +430 -0
  9. tetra_rp/cli/commands/build_utils/mothership_handler_generator.py +75 -0
  10. tetra_rp/cli/commands/build_utils/scanner.py +596 -0
  11. tetra_rp/cli/commands/deploy.py +580 -0
  12. tetra_rp/cli/commands/init.py +123 -0
  13. tetra_rp/cli/commands/resource.py +108 -0
  14. tetra_rp/cli/commands/run.py +296 -0
  15. tetra_rp/cli/commands/test_mothership.py +458 -0
  16. tetra_rp/cli/commands/undeploy.py +533 -0
  17. tetra_rp/cli/main.py +97 -0
  18. tetra_rp/cli/utils/__init__.py +1 -0
  19. tetra_rp/cli/utils/app.py +15 -0
  20. tetra_rp/cli/utils/conda.py +127 -0
  21. tetra_rp/cli/utils/deployment.py +530 -0
  22. tetra_rp/cli/utils/ignore.py +143 -0
  23. tetra_rp/cli/utils/skeleton.py +184 -0
  24. tetra_rp/cli/utils/skeleton_template/.env.example +4 -0
  25. tetra_rp/cli/utils/skeleton_template/.flashignore +40 -0
  26. tetra_rp/cli/utils/skeleton_template/.gitignore +44 -0
  27. tetra_rp/cli/utils/skeleton_template/README.md +263 -0
  28. tetra_rp/cli/utils/skeleton_template/main.py +44 -0
  29. tetra_rp/cli/utils/skeleton_template/mothership.py +55 -0
  30. tetra_rp/cli/utils/skeleton_template/pyproject.toml +58 -0
  31. tetra_rp/cli/utils/skeleton_template/requirements.txt +1 -0
  32. tetra_rp/cli/utils/skeleton_template/workers/__init__.py +0 -0
  33. tetra_rp/cli/utils/skeleton_template/workers/cpu/__init__.py +19 -0
  34. tetra_rp/cli/utils/skeleton_template/workers/cpu/endpoint.py +36 -0
  35. tetra_rp/cli/utils/skeleton_template/workers/gpu/__init__.py +19 -0
  36. tetra_rp/cli/utils/skeleton_template/workers/gpu/endpoint.py +61 -0
  37. tetra_rp/client.py +136 -33
  38. tetra_rp/config.py +29 -0
  39. tetra_rp/core/api/runpod.py +591 -39
  40. tetra_rp/core/deployment.py +232 -0
  41. tetra_rp/core/discovery.py +425 -0
  42. tetra_rp/core/exceptions.py +50 -0
  43. tetra_rp/core/resources/__init__.py +27 -9
  44. tetra_rp/core/resources/app.py +738 -0
  45. tetra_rp/core/resources/base.py +139 -4
  46. tetra_rp/core/resources/constants.py +21 -0
  47. tetra_rp/core/resources/cpu.py +115 -13
  48. tetra_rp/core/resources/gpu.py +182 -16
  49. tetra_rp/core/resources/live_serverless.py +153 -16
  50. tetra_rp/core/resources/load_balancer_sls_resource.py +440 -0
  51. tetra_rp/core/resources/network_volume.py +126 -31
  52. tetra_rp/core/resources/resource_manager.py +436 -35
  53. tetra_rp/core/resources/serverless.py +537 -120
  54. tetra_rp/core/resources/serverless_cpu.py +201 -0
  55. tetra_rp/core/resources/template.py +1 -59
  56. tetra_rp/core/utils/constants.py +10 -0
  57. tetra_rp/core/utils/file_lock.py +260 -0
  58. tetra_rp/core/utils/http.py +67 -0
  59. tetra_rp/core/utils/lru_cache.py +75 -0
  60. tetra_rp/core/utils/singleton.py +36 -1
  61. tetra_rp/core/validation.py +44 -0
  62. tetra_rp/execute_class.py +301 -0
  63. tetra_rp/protos/remote_execution.py +98 -9
  64. tetra_rp/runtime/__init__.py +1 -0
  65. tetra_rp/runtime/circuit_breaker.py +274 -0
  66. tetra_rp/runtime/config.py +12 -0
  67. tetra_rp/runtime/exceptions.py +49 -0
  68. tetra_rp/runtime/generic_handler.py +206 -0
  69. tetra_rp/runtime/lb_handler.py +189 -0
  70. tetra_rp/runtime/load_balancer.py +160 -0
  71. tetra_rp/runtime/manifest_fetcher.py +192 -0
  72. tetra_rp/runtime/metrics.py +325 -0
  73. tetra_rp/runtime/models.py +73 -0
  74. tetra_rp/runtime/mothership_provisioner.py +512 -0
  75. tetra_rp/runtime/production_wrapper.py +266 -0
  76. tetra_rp/runtime/reliability_config.py +149 -0
  77. tetra_rp/runtime/retry_manager.py +118 -0
  78. tetra_rp/runtime/serialization.py +124 -0
  79. tetra_rp/runtime/service_registry.py +346 -0
  80. tetra_rp/runtime/state_manager_client.py +248 -0
  81. tetra_rp/stubs/live_serverless.py +35 -17
  82. tetra_rp/stubs/load_balancer_sls.py +357 -0
  83. tetra_rp/stubs/registry.py +145 -19
  84. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/METADATA +398 -60
  85. tetra_rp-0.24.0.dist-info/RECORD +99 -0
  86. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/WHEEL +1 -1
  87. tetra_rp-0.24.0.dist-info/entry_points.txt +2 -0
  88. tetra_rp/core/pool/cluster_manager.py +0 -177
  89. tetra_rp/core/pool/dataclass.py +0 -18
  90. tetra_rp/core/pool/ex.py +0 -38
  91. tetra_rp/core/pool/job.py +0 -22
  92. tetra_rp/core/pool/worker.py +0 -19
  93. tetra_rp/core/resources/utils.py +0 -50
  94. tetra_rp/core/utils/json.py +0 -33
  95. tetra_rp-0.6.0.dist-info/RECORD +0 -39
  96. /tetra_rp/{core/pool → cli}/__init__.py +0 -0
  97. {tetra_rp-0.6.0.dist-info → tetra_rp-0.24.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,512 @@
1
+ """Mothership auto-provisioning logic with manifest reconciliation."""
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import os
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from tetra_rp.core.resources.base import DeployableResource
12
+ from tetra_rp.core.resources.constants import ENDPOINT_DOMAIN
13
+ from tetra_rp.core.resources.resource_manager import ResourceManager
14
+
15
+ from .state_manager_client import StateManagerClient
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class ManifestDiff:
22
+ """Result of manifest reconciliation."""
23
+
24
+ new: List[str] # Resources to deploy
25
+ changed: List[str] # Resources to update
26
+ removed: List[str] # Resources to delete
27
+ unchanged: List[str] # Resources to skip
28
+
29
+
30
+ def get_mothership_url() -> str:
31
+ """Construct mothership URL from RUNPOD_ENDPOINT_ID env var.
32
+
33
+ Returns:
34
+ Mothership URL in format: https://{endpoint_id}.{ENDPOINT_DOMAIN}
35
+
36
+ Raises:
37
+ RuntimeError: If RUNPOD_ENDPOINT_ID not set
38
+ """
39
+ endpoint_id = os.getenv("RUNPOD_ENDPOINT_ID")
40
+ if not endpoint_id:
41
+ raise RuntimeError("RUNPOD_ENDPOINT_ID environment variable not set")
42
+ return f"https://{endpoint_id}.{ENDPOINT_DOMAIN}"
43
+
44
+
45
+ def is_mothership() -> bool:
46
+ """Check if current endpoint is mothership.
47
+
48
+ Returns:
49
+ True if FLASH_IS_MOTHERSHIP env var is 'true'
50
+ """
51
+ return os.getenv("FLASH_IS_MOTHERSHIP", "").lower() == "true"
52
+
53
+
54
+ def load_manifest(manifest_path: Optional[Path] = None) -> Dict[str, Any]:
55
+ """Load flash_manifest.json.
56
+
57
+ Args:
58
+ manifest_path: Explicit path to manifest. Tries env var and
59
+ auto-detection if not provided.
60
+
61
+ Returns:
62
+ Manifest dictionary
63
+
64
+ Raises:
65
+ FileNotFoundError: If manifest not found
66
+ """
67
+ paths_to_try = []
68
+
69
+ # Explicit path
70
+ if manifest_path:
71
+ paths_to_try.append(manifest_path)
72
+
73
+ # Environment variable
74
+ env_path = os.getenv("FLASH_MANIFEST_PATH")
75
+ if env_path:
76
+ paths_to_try.append(Path(env_path))
77
+
78
+ # Auto-detection: same directory as this file, or cwd
79
+ paths_to_try.extend(
80
+ [
81
+ Path(__file__).parent.parent.parent / "flash_manifest.json",
82
+ Path.cwd() / "flash_manifest.json",
83
+ ]
84
+ )
85
+
86
+ # Try each path
87
+ for path in paths_to_try:
88
+ if path and path.exists():
89
+ try:
90
+ with open(path) as f:
91
+ manifest_dict = json.load(f)
92
+ logger.debug(f"Manifest loaded from {path}")
93
+ return manifest_dict
94
+ except Exception as e:
95
+ logger.warning(f"Failed to load manifest from {path}: {e}")
96
+ continue
97
+
98
+ raise FileNotFoundError(
99
+ f"flash_manifest.json not found. Searched paths: {paths_to_try}"
100
+ )
101
+
102
+
103
+ def compute_resource_hash(resource_data: Dict[str, Any]) -> str:
104
+ """Compute hash of resource configuration for drift detection.
105
+
106
+ Args:
107
+ resource_data: Resource configuration from manifest
108
+
109
+ Returns:
110
+ SHA-256 hash of resource config
111
+ """
112
+ # Convert to JSON and hash to detect changes
113
+ config_json = json.dumps(resource_data, sort_keys=True)
114
+ return hashlib.sha256(config_json.encode()).hexdigest()
115
+
116
+
117
+ def filter_resources_by_manifest(
118
+ all_resources: Dict[str, DeployableResource],
119
+ manifest: Dict[str, Any],
120
+ ) -> Dict[str, DeployableResource]:
121
+ """Filter cached resources to only those defined in manifest.
122
+
123
+ Prevents stale cache entries from being deployed by checking:
124
+ 1. Resource name exists in manifest
125
+ 2. Resource type matches manifest entry
126
+
127
+ Stale entries can occur when codebase is refactored but the resource
128
+ cache still contains endpoints from an older version.
129
+
130
+ Args:
131
+ all_resources: All resources from ResourceManager cache
132
+ manifest: Current deployment manifest
133
+
134
+ Returns:
135
+ Filtered dict containing only manifest-matching resources
136
+ """
137
+ manifest_resources = manifest.get("resources", {})
138
+ filtered = {}
139
+ removed_count = 0
140
+
141
+ for key, resource in all_resources.items():
142
+ resource_name = resource.name if hasattr(resource, "name") else None
143
+
144
+ if not resource_name:
145
+ logger.warning(f"Skipping cached resource without name: {key}")
146
+ removed_count += 1
147
+ continue
148
+
149
+ # Check if resource exists in manifest
150
+ if resource_name not in manifest_resources:
151
+ logger.info(
152
+ f"Removing stale cached resource '{resource_name}' "
153
+ f"(not in current manifest)"
154
+ )
155
+ removed_count += 1
156
+ continue
157
+
158
+ # Check if type matches
159
+ manifest_entry = manifest_resources[resource_name]
160
+ expected_type = manifest_entry.get("resource_type")
161
+ actual_type = resource.__class__.__name__
162
+
163
+ if expected_type and expected_type != actual_type:
164
+ logger.warning(
165
+ f"Removing stale cached resource '{resource_name}' "
166
+ f"(type mismatch: cached={actual_type}, manifest={expected_type})"
167
+ )
168
+ removed_count += 1
169
+ continue
170
+
171
+ filtered[key] = resource
172
+
173
+ if removed_count > 0:
174
+ logger.info(
175
+ f"Cache validation: Removed {removed_count} stale "
176
+ f"resource(s) not matching manifest"
177
+ )
178
+
179
+ return filtered
180
+
181
+
182
+ def reconcile_manifests(
183
+ local_manifest: Dict[str, Any],
184
+ persisted_manifest: Optional[Dict[str, Any]],
185
+ ) -> ManifestDiff:
186
+ """Compare local and persisted manifests to detect changes.
187
+
188
+ Args:
189
+ local_manifest: Current manifest from flash_manifest.json
190
+ persisted_manifest: Last known manifest from State Manager (None if first boot)
191
+
192
+ Returns:
193
+ ManifestDiff with categorized resources
194
+ """
195
+ local_resources = local_manifest.get("resources", {})
196
+ persisted_resources = (
197
+ persisted_manifest.get("resources", {}) if persisted_manifest else {}
198
+ )
199
+
200
+ new = []
201
+ changed = []
202
+ unchanged = []
203
+
204
+ for name, local_data in local_resources.items():
205
+ if name not in persisted_resources:
206
+ new.append(name)
207
+ else:
208
+ # Compare config hashes to detect changes
209
+ local_hash = compute_resource_hash(local_data)
210
+ persisted_hash = persisted_resources[name].get("config_hash")
211
+
212
+ if local_hash != persisted_hash:
213
+ changed.append(name)
214
+ else:
215
+ unchanged.append(name)
216
+
217
+ # Detect removed resources (in persisted, not in local)
218
+ removed = [name for name in persisted_resources if name not in local_resources]
219
+
220
+ return ManifestDiff(new=new, changed=changed, removed=removed, unchanged=unchanged)
221
+
222
+
223
+ def create_resource_from_manifest(
224
+ resource_name: str,
225
+ resource_data: Dict[str, Any],
226
+ mothership_url: str = "",
227
+ flash_environment_id: Optional[str] = None,
228
+ ) -> DeployableResource:
229
+ """Create DeployableResource config from manifest entry.
230
+
231
+ Args:
232
+ resource_name: Name of the resource
233
+ resource_data: Resource configuration from manifest
234
+ mothership_url: Optional mothership URL (for future use with child env vars)
235
+ flash_environment_id: Optional flash environment ID to attach
236
+
237
+ Returns:
238
+ Configured DeployableResource ready for deployment
239
+
240
+ Raises:
241
+ ValueError: If resource type not supported
242
+ """
243
+ from tetra_rp.core.resources.live_serverless import (
244
+ CpuLiveLoadBalancer,
245
+ CpuLiveServerless,
246
+ LiveLoadBalancer,
247
+ LiveServerless,
248
+ )
249
+ from tetra_rp.core.resources.load_balancer_sls_resource import (
250
+ LoadBalancerSlsResource,
251
+ )
252
+ from tetra_rp.core.resources.serverless import ServerlessResource
253
+
254
+ resource_type = resource_data.get("resource_type", "ServerlessResource")
255
+
256
+ # Support both Serverless and LoadBalancer resource types
257
+ if resource_type not in [
258
+ "ServerlessResource",
259
+ "LiveServerless",
260
+ "CpuLiveServerless",
261
+ "LoadBalancerSlsResource",
262
+ "LiveLoadBalancer",
263
+ "CpuLiveLoadBalancer",
264
+ ]:
265
+ raise ValueError(
266
+ f"Unsupported resource type for auto-provisioning: {resource_type}"
267
+ )
268
+
269
+ # Create resource with mothership environment variables
270
+ # Manifest now includes deployment config (imageName, templateId, GPU/worker settings)
271
+ # This enables auto-provisioning to create valid resource configurations
272
+
273
+ # Create appropriate resource type based on manifest entry
274
+ import os
275
+
276
+ env = {
277
+ "FLASH_RESOURCE_NAME": resource_name,
278
+ }
279
+
280
+ # Only set FLASH_MOTHERSHIP_ID when running in mothership context
281
+ # (i.e., when RUNPOD_ENDPOINT_ID is available).
282
+ # During CLI provisioning, RUNPOD_ENDPOINT_ID is not set, so we don't
283
+ # include FLASH_MOTHERSHIP_ID. This avoids Pydantic validation errors
284
+ # (missing keys are fine, None values are not).
285
+ mothership_id = os.getenv("RUNPOD_ENDPOINT_ID")
286
+ if mothership_id:
287
+ env["FLASH_MOTHERSHIP_ID"] = mothership_id
288
+
289
+ # Mothership-specific environment variables
290
+ if resource_data.get("is_mothership"):
291
+ env["FLASH_IS_MOTHERSHIP"] = "true"
292
+ if "main_file" in resource_data:
293
+ env["FLASH_MAIN_FILE"] = resource_data["main_file"]
294
+ if "app_variable" in resource_data:
295
+ env["FLASH_APP_VARIABLE"] = resource_data["app_variable"]
296
+
297
+ # Add "tmp-" prefix for test-mothership deployments
298
+ # Check environment variable set by test-mothership command
299
+
300
+ is_test_mothership = os.getenv("FLASH_IS_TEST_MOTHERSHIP", "").lower() == "true"
301
+
302
+ if is_test_mothership and not resource_name.startswith("tmp-"):
303
+ prefixed_name = f"tmp-{resource_name}"
304
+ logger.info(f"Test mode: Using temporary name '{prefixed_name}'")
305
+ else:
306
+ prefixed_name = resource_name
307
+
308
+ # Extract deployment config from manifest
309
+ deployment_kwargs = {"name": prefixed_name, "env": env}
310
+
311
+ if flash_environment_id:
312
+ deployment_kwargs["flashEnvironmentId"] = flash_environment_id
313
+
314
+ # Add imageName or templateId if present (required for validation)
315
+ if "imageName" in resource_data:
316
+ deployment_kwargs["imageName"] = resource_data["imageName"]
317
+ elif "templateId" in resource_data:
318
+ deployment_kwargs["templateId"] = resource_data["templateId"]
319
+
320
+ # Optional: Add GPU/worker config if present
321
+ if "gpuIds" in resource_data:
322
+ deployment_kwargs["gpuIds"] = resource_data["gpuIds"]
323
+ if "workersMin" in resource_data:
324
+ deployment_kwargs["workersMin"] = resource_data["workersMin"]
325
+ if "workersMax" in resource_data:
326
+ deployment_kwargs["workersMax"] = resource_data["workersMax"]
327
+
328
+ # Note: template is extracted but not passed to resource constructor
329
+ # Let resources create their own templates with proper initialization
330
+ # Templates are created by resource's _create_new_template() method
331
+
332
+ # Create resource with full deployment config
333
+ if resource_type == "CpuLiveLoadBalancer":
334
+ resource = CpuLiveLoadBalancer(**deployment_kwargs)
335
+ elif resource_type == "CpuLiveServerless":
336
+ resource = CpuLiveServerless(**deployment_kwargs)
337
+ elif resource_type == "LiveLoadBalancer":
338
+ resource = LiveLoadBalancer(**deployment_kwargs)
339
+ elif resource_type == "LiveServerless":
340
+ resource = LiveServerless(**deployment_kwargs)
341
+ elif resource_type == "LoadBalancerSlsResource":
342
+ resource = LoadBalancerSlsResource(**deployment_kwargs)
343
+ else:
344
+ # ServerlessResource (default)
345
+ resource = ServerlessResource(**deployment_kwargs)
346
+
347
+ return resource
348
+
349
+
350
+ async def reconcile_children(
351
+ manifest_path: Path,
352
+ mothership_url: str,
353
+ state_client: StateManagerClient,
354
+ ) -> None:
355
+ """Reconcile all child resources based on manifest differences.
356
+
357
+ Orchestrates deployment/update/delete of resources based on manifest differences.
358
+
359
+ Args:
360
+ manifest_path: Path to flash_manifest.json
361
+ mothership_url: Mothership endpoint URL to set on children
362
+ state_client: State Manager API client
363
+ """
364
+ try:
365
+ # Load local manifest
366
+ local_manifest = load_manifest(manifest_path)
367
+
368
+ # Get persisted manifest from State Manager
369
+ mothership_id = os.getenv("RUNPOD_ENDPOINT_ID")
370
+ if not mothership_id:
371
+ logger.error("RUNPOD_ENDPOINT_ID not set, cannot load persisted manifest")
372
+ return
373
+
374
+ persisted_manifest = await state_client.get_persisted_manifest(mothership_id)
375
+
376
+ # Reconcile manifests
377
+ logger.info(
378
+ f"Starting reconciliation: {len(local_manifest.get('resources', {}))} manifest resources"
379
+ )
380
+
381
+ diff = reconcile_manifests(local_manifest, persisted_manifest)
382
+
383
+ logger.info(
384
+ f"Reconciliation plan: {len(diff.new)} to deploy, "
385
+ f"{len(diff.changed)} to update, "
386
+ f"{len(diff.removed)} to remove, "
387
+ f"{len(diff.unchanged)} unchanged"
388
+ )
389
+
390
+ manager = ResourceManager()
391
+
392
+ # Filter cached resources to prevent stale entries from being deployed
393
+ # This ensures resources from old codebase versions don't get redeployed
394
+ all_cached = manager.list_all_resources()
395
+ if all_cached:
396
+ valid_cached = filter_resources_by_manifest(all_cached, local_manifest)
397
+ logger.info(
398
+ f"Cache validation: {len(all_cached)} cached, "
399
+ f"{len(valid_cached)} valid, "
400
+ f"{len(local_manifest.get('resources', {}))} in manifest"
401
+ )
402
+
403
+ # Deploy NEW resources
404
+ for resource_name in diff.new:
405
+ try:
406
+ resource_data = local_manifest["resources"][resource_name]
407
+ config = create_resource_from_manifest(
408
+ resource_name, resource_data, mothership_url
409
+ )
410
+ deployed = await manager.get_or_deploy_resource(config)
411
+
412
+ # Update State Manager
413
+ await state_client.update_resource_state(
414
+ mothership_id,
415
+ resource_name,
416
+ {
417
+ "config_hash": compute_resource_hash(resource_data),
418
+ "endpoint_url": deployed.endpoint_url
419
+ if hasattr(deployed, "endpoint_url")
420
+ else deployed.url,
421
+ "status": "deployed",
422
+ },
423
+ )
424
+ logger.info(f"Deployed new resource: {resource_name}")
425
+
426
+ except Exception as e:
427
+ logger.error(f"Failed to deploy {resource_name}: {e}")
428
+ try:
429
+ await state_client.update_resource_state(
430
+ mothership_id,
431
+ resource_name,
432
+ {"status": "failed", "error": str(e)},
433
+ )
434
+ except Exception as sm_error:
435
+ logger.error(
436
+ f"Failed to update State Manager for {resource_name}: {sm_error}"
437
+ )
438
+
439
+ # Update CHANGED resources
440
+ for resource_name in diff.changed:
441
+ try:
442
+ resource_data = local_manifest["resources"][resource_name]
443
+ config = create_resource_from_manifest(
444
+ resource_name, resource_data, mothership_url
445
+ )
446
+ updated = await manager.get_or_deploy_resource(config)
447
+
448
+ await state_client.update_resource_state(
449
+ mothership_id,
450
+ resource_name,
451
+ {
452
+ "config_hash": compute_resource_hash(resource_data),
453
+ "endpoint_url": updated.endpoint_url
454
+ if hasattr(updated, "endpoint_url")
455
+ else updated.url,
456
+ "status": "updated",
457
+ },
458
+ )
459
+ logger.info(f"Updated resource: {resource_name}")
460
+
461
+ except Exception as e:
462
+ logger.error(f"Failed to update {resource_name}: {e}")
463
+ try:
464
+ await state_client.update_resource_state(
465
+ mothership_id,
466
+ resource_name,
467
+ {"status": "failed", "error": str(e)},
468
+ )
469
+ except Exception as sm_error:
470
+ logger.error(
471
+ f"Failed to update State Manager for {resource_name}: {sm_error}"
472
+ )
473
+
474
+ # Delete REMOVED resources
475
+ for resource_name in diff.removed:
476
+ try:
477
+ # Find resource in ResourceManager
478
+ matches = manager.find_resources_by_name(resource_name)
479
+ if matches:
480
+ resource_id, _ = matches[0]
481
+ result = await manager.undeploy_resource(resource_id, resource_name)
482
+
483
+ if result["success"]:
484
+ try:
485
+ await state_client.remove_resource_state(
486
+ mothership_id, resource_name
487
+ )
488
+ except Exception as sm_error:
489
+ logger.error(
490
+ f"Failed to remove {resource_name} from State Manager: {sm_error}"
491
+ )
492
+ logger.info(f"Deleted removed resource: {resource_name}")
493
+ else:
494
+ logger.error(
495
+ f"Failed to delete {resource_name}: {result['message']}"
496
+ )
497
+ else:
498
+ logger.warning(
499
+ f"Removed resource {resource_name} not found in ResourceManager"
500
+ )
501
+
502
+ except Exception as e:
503
+ logger.error(f"Failed to delete {resource_name}: {e}")
504
+
505
+ logger.info("=" * 60)
506
+ logger.info("Provisioning complete - All child endpoints deployed")
507
+ logger.info(f"Total endpoints: {len(local_manifest.get('resources', {}))}")
508
+ logger.info("Test phase: Manifest updated with child endpoint URLs")
509
+ logger.info("=" * 60)
510
+
511
+ except Exception as e:
512
+ logger.error(f"Provisioning failed: {e}", exc_info=True)