supervaizer 0.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervaizer/__init__.py +97 -0
- supervaizer/__version__.py +10 -0
- supervaizer/account.py +308 -0
- supervaizer/account_service.py +93 -0
- supervaizer/admin/routes.py +1293 -0
- supervaizer/admin/static/js/job-start-form.js +373 -0
- supervaizer/admin/templates/agent_detail.html +145 -0
- supervaizer/admin/templates/agents.html +249 -0
- supervaizer/admin/templates/agents_grid.html +82 -0
- supervaizer/admin/templates/base.html +233 -0
- supervaizer/admin/templates/case_detail.html +230 -0
- supervaizer/admin/templates/cases_list.html +182 -0
- supervaizer/admin/templates/cases_table.html +134 -0
- supervaizer/admin/templates/console.html +389 -0
- supervaizer/admin/templates/dashboard.html +153 -0
- supervaizer/admin/templates/job_detail.html +192 -0
- supervaizer/admin/templates/job_start_test.html +109 -0
- supervaizer/admin/templates/jobs_list.html +180 -0
- supervaizer/admin/templates/jobs_table.html +122 -0
- supervaizer/admin/templates/navigation.html +163 -0
- supervaizer/admin/templates/recent_activity.html +81 -0
- supervaizer/admin/templates/server.html +105 -0
- supervaizer/admin/templates/server_status_cards.html +121 -0
- supervaizer/admin/templates/supervaize_instructions.html +212 -0
- supervaizer/agent.py +956 -0
- supervaizer/case.py +432 -0
- supervaizer/cli.py +395 -0
- supervaizer/common.py +324 -0
- supervaizer/deploy/__init__.py +16 -0
- supervaizer/deploy/cli.py +305 -0
- supervaizer/deploy/commands/__init__.py +9 -0
- supervaizer/deploy/commands/clean.py +294 -0
- supervaizer/deploy/commands/down.py +119 -0
- supervaizer/deploy/commands/local.py +460 -0
- supervaizer/deploy/commands/plan.py +167 -0
- supervaizer/deploy/commands/status.py +169 -0
- supervaizer/deploy/commands/up.py +281 -0
- supervaizer/deploy/docker.py +377 -0
- supervaizer/deploy/driver_factory.py +42 -0
- supervaizer/deploy/drivers/__init__.py +39 -0
- supervaizer/deploy/drivers/aws_app_runner.py +607 -0
- supervaizer/deploy/drivers/base.py +196 -0
- supervaizer/deploy/drivers/cloud_run.py +570 -0
- supervaizer/deploy/drivers/do_app_platform.py +504 -0
- supervaizer/deploy/health.py +404 -0
- supervaizer/deploy/state.py +210 -0
- supervaizer/deploy/templates/Dockerfile.template +44 -0
- supervaizer/deploy/templates/debug_env.py +69 -0
- supervaizer/deploy/templates/docker-compose.yml.template +37 -0
- supervaizer/deploy/templates/dockerignore.template +66 -0
- supervaizer/deploy/templates/entrypoint.sh +20 -0
- supervaizer/deploy/utils.py +52 -0
- supervaizer/event.py +181 -0
- supervaizer/examples/controller_template.py +196 -0
- supervaizer/instructions.py +145 -0
- supervaizer/job.py +392 -0
- supervaizer/job_service.py +156 -0
- supervaizer/lifecycle.py +417 -0
- supervaizer/parameter.py +233 -0
- supervaizer/protocol/__init__.py +11 -0
- supervaizer/protocol/a2a/__init__.py +21 -0
- supervaizer/protocol/a2a/model.py +227 -0
- supervaizer/protocol/a2a/routes.py +99 -0
- supervaizer/py.typed +1 -0
- supervaizer/routes.py +917 -0
- supervaizer/server.py +553 -0
- supervaizer/server_utils.py +54 -0
- supervaizer/storage.py +462 -0
- supervaizer/telemetry.py +81 -0
- supervaizer/utils/__init__.py +16 -0
- supervaizer/utils/version_check.py +56 -0
- supervaizer-0.10.5.dist-info/METADATA +317 -0
- supervaizer-0.10.5.dist-info/RECORD +76 -0
- supervaizer-0.10.5.dist-info/WHEEL +4 -0
- supervaizer-0.10.5.dist-info/entry_points.txt +2 -0
- supervaizer-0.10.5.dist-info/licenses/LICENSE.md +346 -0
|
@@ -0,0 +1,607 @@
|
|
|
1
|
+
# Copyright (c) 2024-2025 Alain Prasquier - Supervaize.com. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
|
|
4
|
+
# If a copy of the MPL was not distributed with this file, you can obtain one at
|
|
5
|
+
# https://mozilla.org/MPL/2.0/.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
AWS App Runner Driver
|
|
9
|
+
|
|
10
|
+
This module implements deployment to AWS App Runner.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import subprocess
|
|
14
|
+
import time
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
|
|
19
|
+
from supervaizer.common import log
|
|
20
|
+
from supervaizer.deploy.drivers.base import (
|
|
21
|
+
ActionType,
|
|
22
|
+
BaseDriver,
|
|
23
|
+
DeploymentPlan,
|
|
24
|
+
DeploymentResult,
|
|
25
|
+
ResourceAction,
|
|
26
|
+
ResourceType,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
console = Console()
|
|
30
|
+
|
|
31
|
+
# Conditional imports for AWS libraries
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
import boto3
|
|
34
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
35
|
+
|
|
36
|
+
AWS_AVAILABLE = True
|
|
37
|
+
else:
|
|
38
|
+
try:
|
|
39
|
+
import boto3
|
|
40
|
+
from botocore.exceptions import ClientError, NoCredentialsError
|
|
41
|
+
|
|
42
|
+
AWS_AVAILABLE = True
|
|
43
|
+
except ImportError:
|
|
44
|
+
AWS_AVAILABLE = False
|
|
45
|
+
|
|
46
|
+
# Create dummy classes for type hints when not available
|
|
47
|
+
class ClientError(Exception):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
class NoCredentialsError(Exception):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
class boto3:
|
|
54
|
+
@staticmethod
|
|
55
|
+
def client(*args: Any, **kwargs: Any) -> Any:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AWSAppRunnerDriver(BaseDriver):
|
|
60
|
+
"""Driver for deploying to AWS App Runner."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, region: str, project_id: Optional[str] = None):
|
|
63
|
+
"""Initialize AWS App Runner driver."""
|
|
64
|
+
if not AWS_AVAILABLE:
|
|
65
|
+
raise ImportError(
|
|
66
|
+
"AWS libraries not available. Install with: pip install boto3"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
super().__init__(region, project_id)
|
|
70
|
+
|
|
71
|
+
# Initialize AWS clients
|
|
72
|
+
try:
|
|
73
|
+
self.apprunner_client = boto3.client("apprunner", region_name=region)
|
|
74
|
+
self.ecr_client = boto3.client("ecr", region_name=region)
|
|
75
|
+
self.secrets_client = boto3.client("secretsmanager", region_name=region)
|
|
76
|
+
self.iam_client = boto3.client("iam", region_name=region)
|
|
77
|
+
except NoCredentialsError:
|
|
78
|
+
raise RuntimeError("AWS credentials not found")
|
|
79
|
+
|
|
80
|
+
def plan_deployment(
|
|
81
|
+
self,
|
|
82
|
+
service_name: str,
|
|
83
|
+
environment: str,
|
|
84
|
+
image_tag: str,
|
|
85
|
+
port: int = 8000,
|
|
86
|
+
env_vars: Optional[Dict[str, str]] = None,
|
|
87
|
+
secrets: Optional[Dict[str, str]] = None,
|
|
88
|
+
) -> DeploymentPlan:
|
|
89
|
+
"""Plan deployment changes without applying them."""
|
|
90
|
+
full_service_name = self.get_service_key(service_name, environment)
|
|
91
|
+
|
|
92
|
+
actions = []
|
|
93
|
+
current_image = None
|
|
94
|
+
current_url = None
|
|
95
|
+
current_status = None
|
|
96
|
+
|
|
97
|
+
# Check if service exists
|
|
98
|
+
try:
|
|
99
|
+
response = self.apprunner_client.describe_service(
|
|
100
|
+
ServiceArn=f"arn:aws:apprunner:{self.region}:{self._get_account_id()}:service/{full_service_name}"
|
|
101
|
+
)
|
|
102
|
+
service = response["Service"]
|
|
103
|
+
current_image = service["ServiceUrl"]
|
|
104
|
+
current_url = service["ServiceUrl"]
|
|
105
|
+
current_status = service["Status"]
|
|
106
|
+
|
|
107
|
+
# Check if update is needed
|
|
108
|
+
# Note: App Runner doesn't easily expose the current image, so we'll assume update is needed
|
|
109
|
+
actions.append(
|
|
110
|
+
ResourceAction(
|
|
111
|
+
resource_type=ResourceType.SERVICE,
|
|
112
|
+
action_type=ActionType.UPDATE,
|
|
113
|
+
resource_name=full_service_name,
|
|
114
|
+
description=f"Update App Runner service with image {image_tag}",
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
except ClientError as e:
|
|
118
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
119
|
+
# Service doesn't exist, need to create
|
|
120
|
+
actions.append(
|
|
121
|
+
ResourceAction(
|
|
122
|
+
resource_type=ResourceType.SERVICE,
|
|
123
|
+
action_type=ActionType.CREATE,
|
|
124
|
+
resource_name=full_service_name,
|
|
125
|
+
description=f"Create new App Runner service with image {image_tag}",
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
raise
|
|
130
|
+
|
|
131
|
+
# Check ECR repository
|
|
132
|
+
repo_name = f"{service_name}-{environment}"
|
|
133
|
+
try:
|
|
134
|
+
self.ecr_client.describe_repositories(repositoryNames=[repo_name])
|
|
135
|
+
actions.append(
|
|
136
|
+
ResourceAction(
|
|
137
|
+
resource_type=ResourceType.REGISTRY,
|
|
138
|
+
action_type=ActionType.NOOP,
|
|
139
|
+
resource_name=repo_name,
|
|
140
|
+
description="ECR repository exists",
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
except ClientError as e:
|
|
144
|
+
if e.response["Error"]["Code"] == "RepositoryNotFoundException":
|
|
145
|
+
actions.append(
|
|
146
|
+
ResourceAction(
|
|
147
|
+
resource_type=ResourceType.REGISTRY,
|
|
148
|
+
action_type=ActionType.CREATE,
|
|
149
|
+
resource_name=repo_name,
|
|
150
|
+
description=f"Create ECR repository {repo_name}",
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
raise
|
|
155
|
+
|
|
156
|
+
# Check secrets
|
|
157
|
+
if secrets:
|
|
158
|
+
for secret_name, secret_value in secrets.items():
|
|
159
|
+
try:
|
|
160
|
+
self.secrets_client.describe_secret(SecretId=secret_name)
|
|
161
|
+
actions.append(
|
|
162
|
+
ResourceAction(
|
|
163
|
+
resource_type=ResourceType.SECRET,
|
|
164
|
+
action_type=ActionType.UPDATE,
|
|
165
|
+
resource_name=secret_name,
|
|
166
|
+
description=f"Update secret {secret_name}",
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
except ClientError as e:
|
|
170
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
171
|
+
actions.append(
|
|
172
|
+
ResourceAction(
|
|
173
|
+
resource_type=ResourceType.SECRET,
|
|
174
|
+
action_type=ActionType.CREATE,
|
|
175
|
+
resource_name=secret_name,
|
|
176
|
+
description=f"Create secret {secret_name}",
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
raise
|
|
181
|
+
|
|
182
|
+
return DeploymentPlan(
|
|
183
|
+
platform="aws-app-runner",
|
|
184
|
+
service_name=service_name,
|
|
185
|
+
environment=environment,
|
|
186
|
+
region=self.region,
|
|
187
|
+
project_id=self.project_id,
|
|
188
|
+
actions=actions,
|
|
189
|
+
current_image=current_image,
|
|
190
|
+
current_url=current_url,
|
|
191
|
+
current_status=current_status,
|
|
192
|
+
target_image=image_tag,
|
|
193
|
+
target_port=port,
|
|
194
|
+
target_env_vars=env_vars or {},
|
|
195
|
+
target_secrets=secrets or {},
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def deploy_service(
|
|
199
|
+
self,
|
|
200
|
+
service_name: str,
|
|
201
|
+
environment: str,
|
|
202
|
+
image_tag: str,
|
|
203
|
+
port: int = 8000,
|
|
204
|
+
env_vars: Optional[Dict[str, str]] = None,
|
|
205
|
+
secrets: Optional[Dict[str, str]] = None,
|
|
206
|
+
timeout: int = 300,
|
|
207
|
+
) -> DeploymentResult:
|
|
208
|
+
"""Deploy or update the service."""
|
|
209
|
+
start_time = time.time()
|
|
210
|
+
full_service_name = self.get_service_key(service_name, environment)
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
# Ensure ECR repository exists
|
|
214
|
+
repo_name = f"{service_name}-{environment}"
|
|
215
|
+
self._ensure_ecr_repository(repo_name)
|
|
216
|
+
|
|
217
|
+
# Create/update secrets
|
|
218
|
+
if secrets:
|
|
219
|
+
self._create_or_update_secrets(secrets)
|
|
220
|
+
|
|
221
|
+
# Create/update service
|
|
222
|
+
service_arn = self._create_or_update_service(
|
|
223
|
+
full_service_name,
|
|
224
|
+
repo_name,
|
|
225
|
+
image_tag,
|
|
226
|
+
port,
|
|
227
|
+
env_vars or {},
|
|
228
|
+
secrets or {},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Wait for service to be ready
|
|
232
|
+
service_url = self._wait_for_service_ready(service_arn, timeout)
|
|
233
|
+
|
|
234
|
+
# Set SUPERVAIZER_PUBLIC_URL
|
|
235
|
+
if service_url:
|
|
236
|
+
self._set_public_url(service_arn, service_url)
|
|
237
|
+
|
|
238
|
+
# Verify health
|
|
239
|
+
health_status = (
|
|
240
|
+
"healthy"
|
|
241
|
+
if service_url and self.verify_health(service_url)
|
|
242
|
+
else "unhealthy"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
deployment_time = time.time() - start_time
|
|
246
|
+
|
|
247
|
+
return DeploymentResult(
|
|
248
|
+
success=True,
|
|
249
|
+
service_url=service_url,
|
|
250
|
+
service_id=service_arn,
|
|
251
|
+
status="running",
|
|
252
|
+
health_status=health_status,
|
|
253
|
+
deployment_time=deployment_time,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
except Exception as e:
|
|
257
|
+
log.error(f"Deployment failed: {e}")
|
|
258
|
+
return DeploymentResult(
|
|
259
|
+
success=False,
|
|
260
|
+
error_message=str(e),
|
|
261
|
+
error_details={"exception_type": type(e).__name__},
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def destroy_service(
|
|
265
|
+
self,
|
|
266
|
+
service_name: str,
|
|
267
|
+
environment: str,
|
|
268
|
+
keep_secrets: bool = False,
|
|
269
|
+
) -> DeploymentResult:
|
|
270
|
+
"""Destroy the service and cleanup resources."""
|
|
271
|
+
full_service_name = self.get_service_key(service_name, environment)
|
|
272
|
+
service_arn = f"arn:aws:apprunner:{self.region}:{self._get_account_id()}:service/{full_service_name}"
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
# Delete service
|
|
276
|
+
self.apprunner_client.delete_service(ServiceArn=service_arn)
|
|
277
|
+
log.info(f"Deleted App Runner service: {full_service_name}")
|
|
278
|
+
|
|
279
|
+
# Delete ECR repository
|
|
280
|
+
repo_name = f"{service_name}-{environment}"
|
|
281
|
+
try:
|
|
282
|
+
self.ecr_client.delete_repository(
|
|
283
|
+
repositoryName=repo_name,
|
|
284
|
+
force=True, # Delete even if images exist
|
|
285
|
+
)
|
|
286
|
+
log.info(f"Deleted ECR repository: {repo_name}")
|
|
287
|
+
except ClientError as e:
|
|
288
|
+
if e.response["Error"]["Code"] != "RepositoryNotFoundException":
|
|
289
|
+
log.warning(f"Failed to delete ECR repository: {e}")
|
|
290
|
+
|
|
291
|
+
# Delete secrets if requested
|
|
292
|
+
if not keep_secrets:
|
|
293
|
+
self._delete_secrets(full_service_name)
|
|
294
|
+
|
|
295
|
+
return DeploymentResult(
|
|
296
|
+
success=True,
|
|
297
|
+
status="deleted",
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
except ClientError as e:
|
|
301
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
302
|
+
log.warning(f"Service {full_service_name} not found")
|
|
303
|
+
return DeploymentResult(
|
|
304
|
+
success=True,
|
|
305
|
+
status="not_found",
|
|
306
|
+
)
|
|
307
|
+
else:
|
|
308
|
+
log.error(f"Failed to destroy service: {e}")
|
|
309
|
+
return DeploymentResult(
|
|
310
|
+
success=False,
|
|
311
|
+
error_message=str(e),
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
def get_service_status(
|
|
315
|
+
self,
|
|
316
|
+
service_name: str,
|
|
317
|
+
environment: str,
|
|
318
|
+
) -> DeploymentResult:
|
|
319
|
+
"""Get current service status and health."""
|
|
320
|
+
full_service_name = self.get_service_key(service_name, environment)
|
|
321
|
+
service_arn = f"arn:aws:apprunner:{self.region}:{self._get_account_id()}:service/{full_service_name}"
|
|
322
|
+
|
|
323
|
+
try:
|
|
324
|
+
response = self.apprunner_client.describe_service(ServiceArn=service_arn)
|
|
325
|
+
service = response["Service"]
|
|
326
|
+
|
|
327
|
+
# Check health
|
|
328
|
+
health_status = "unknown"
|
|
329
|
+
if service["ServiceUrl"]:
|
|
330
|
+
health_status = (
|
|
331
|
+
"healthy"
|
|
332
|
+
if self.verify_health(service["ServiceUrl"])
|
|
333
|
+
else "unhealthy"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return DeploymentResult(
|
|
337
|
+
success=True,
|
|
338
|
+
service_url=service["ServiceUrl"],
|
|
339
|
+
service_id=service_arn,
|
|
340
|
+
status=service["Status"],
|
|
341
|
+
health_status=health_status,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
except ClientError as e:
|
|
345
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
346
|
+
return DeploymentResult(
|
|
347
|
+
success=False,
|
|
348
|
+
status="not_found",
|
|
349
|
+
error_message="Service not found",
|
|
350
|
+
)
|
|
351
|
+
else:
|
|
352
|
+
return DeploymentResult(
|
|
353
|
+
success=False,
|
|
354
|
+
error_message=str(e),
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def verify_health(self, service_url: str, timeout: int = 60) -> bool:
|
|
358
|
+
"""Verify service health by checking the health endpoint."""
|
|
359
|
+
return self.verify_health_enhanced(service_url, timeout=timeout)
|
|
360
|
+
|
|
361
|
+
def check_prerequisites(self) -> List[str]:
|
|
362
|
+
"""Check prerequisites and return list of missing requirements."""
|
|
363
|
+
errors = []
|
|
364
|
+
|
|
365
|
+
# Check AWS CLI
|
|
366
|
+
try:
|
|
367
|
+
result = subprocess.run(
|
|
368
|
+
["aws", "--version"], capture_output=True, text=True, check=True
|
|
369
|
+
)
|
|
370
|
+
log.debug(f"AWS CLI version: {result.stdout}")
|
|
371
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
372
|
+
errors.append("AWS CLI not found or not working")
|
|
373
|
+
|
|
374
|
+
# Check AWS credentials
|
|
375
|
+
try:
|
|
376
|
+
sts_client = boto3.client("sts")
|
|
377
|
+
sts_client.get_caller_identity()
|
|
378
|
+
except NoCredentialsError:
|
|
379
|
+
errors.append("AWS credentials not found")
|
|
380
|
+
except Exception as e:
|
|
381
|
+
errors.append(f"AWS credentials check failed: {e}")
|
|
382
|
+
|
|
383
|
+
# This is a simplified check - in practice you'd want to test actual permissions
|
|
384
|
+
try:
|
|
385
|
+
self._get_account_id()
|
|
386
|
+
except Exception as e:
|
|
387
|
+
errors.append(f"Failed to get AWS account ID: {e}")
|
|
388
|
+
|
|
389
|
+
return errors
|
|
390
|
+
|
|
391
|
+
def _get_account_id(self) -> str:
|
|
392
|
+
"""Get AWS account ID."""
|
|
393
|
+
sts_client = boto3.client("sts")
|
|
394
|
+
response = sts_client.get_caller_identity()
|
|
395
|
+
return response["Account"]
|
|
396
|
+
|
|
397
|
+
def _ensure_ecr_repository(self, repo_name: str) -> None:
|
|
398
|
+
"""Ensure ECR repository exists."""
|
|
399
|
+
try:
|
|
400
|
+
self.ecr_client.describe_repositories(repositoryNames=[repo_name])
|
|
401
|
+
log.info(f"ECR repository {repo_name} exists")
|
|
402
|
+
except ClientError as e:
|
|
403
|
+
if e.response["Error"]["Code"] == "RepositoryNotFoundException":
|
|
404
|
+
# Create repository
|
|
405
|
+
self.ecr_client.create_repository(
|
|
406
|
+
repositoryName=repo_name,
|
|
407
|
+
imageTagMutability="MUTABLE",
|
|
408
|
+
imageScanningConfiguration={"scanOnPush": True},
|
|
409
|
+
)
|
|
410
|
+
log.info(f"Created ECR repository: {repo_name}")
|
|
411
|
+
else:
|
|
412
|
+
raise
|
|
413
|
+
|
|
414
|
+
def _create_or_update_secrets(self, secrets: Dict[str, str]) -> None:
|
|
415
|
+
"""Create or update secrets in Secrets Manager."""
|
|
416
|
+
for secret_name, secret_value in secrets.items():
|
|
417
|
+
try:
|
|
418
|
+
# Try to update existing secret
|
|
419
|
+
self.secrets_client.update_secret(
|
|
420
|
+
SecretId=secret_name, SecretString=secret_value
|
|
421
|
+
)
|
|
422
|
+
log.info(f"Updated secret {secret_name}")
|
|
423
|
+
except ClientError as e:
|
|
424
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
425
|
+
# Create new secret
|
|
426
|
+
self.secrets_client.create_secret(
|
|
427
|
+
Name=secret_name,
|
|
428
|
+
SecretString=secret_value,
|
|
429
|
+
Description="Secret for Supervaizer deployment",
|
|
430
|
+
)
|
|
431
|
+
log.info(f"Created secret {secret_name}")
|
|
432
|
+
else:
|
|
433
|
+
raise
|
|
434
|
+
|
|
435
|
+
def _create_or_update_service(
|
|
436
|
+
self,
|
|
437
|
+
service_name: str,
|
|
438
|
+
repo_name: str,
|
|
439
|
+
image_tag: str,
|
|
440
|
+
port: int,
|
|
441
|
+
env_vars: Dict[str, str],
|
|
442
|
+
secrets: Dict[str, str],
|
|
443
|
+
) -> str:
|
|
444
|
+
"""Create or update App Runner service."""
|
|
445
|
+
account_id = self._get_account_id()
|
|
446
|
+
service_arn = (
|
|
447
|
+
f"arn:aws:apprunner:{self.region}:{account_id}:service/{service_name}"
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
# Build environment variables
|
|
451
|
+
env_vars_list = []
|
|
452
|
+
for key, value in env_vars.items():
|
|
453
|
+
env_vars_list.append({"Name": key, "Value": value})
|
|
454
|
+
|
|
455
|
+
# Build secret references
|
|
456
|
+
secret_refs = []
|
|
457
|
+
for secret_name in secrets.keys():
|
|
458
|
+
secret_refs.append(
|
|
459
|
+
{
|
|
460
|
+
"Name": secret_name,
|
|
461
|
+
"ValueFrom": f"arn:aws:secretsmanager:{self.region}:{account_id}:secret:{secret_name}",
|
|
462
|
+
}
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Service configuration
|
|
466
|
+
service_config = {
|
|
467
|
+
"ServiceName": service_name,
|
|
468
|
+
"SourceConfiguration": {
|
|
469
|
+
"ImageRepository": {
|
|
470
|
+
"ImageIdentifier": f"{account_id}.dkr.ecr.{self.region}.amazonaws.com/{repo_name}:{image_tag}",
|
|
471
|
+
"ImageConfiguration": {
|
|
472
|
+
"Port": str(port),
|
|
473
|
+
"RuntimeEnvironmentVariables": env_vars_list + secret_refs,
|
|
474
|
+
},
|
|
475
|
+
"ImageRepositoryType": "ECR",
|
|
476
|
+
},
|
|
477
|
+
"AutoDeploymentsEnabled": False,
|
|
478
|
+
},
|
|
479
|
+
"InstanceConfiguration": {
|
|
480
|
+
"Cpu": "0.25 vCPU",
|
|
481
|
+
"Memory": "0.5 GB",
|
|
482
|
+
},
|
|
483
|
+
"HealthCheckConfiguration": {
|
|
484
|
+
"Protocol": "HTTP",
|
|
485
|
+
"Path": "/.well-known/health",
|
|
486
|
+
"Interval": 10,
|
|
487
|
+
"Timeout": 5,
|
|
488
|
+
"HealthyThreshold": 1,
|
|
489
|
+
"UnhealthyThreshold": 5,
|
|
490
|
+
},
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
try:
|
|
494
|
+
# Try to update existing service
|
|
495
|
+
response = self.apprunner_client.update_service(
|
|
496
|
+
ServiceArn=service_arn,
|
|
497
|
+
SourceConfiguration=service_config["SourceConfiguration"],
|
|
498
|
+
InstanceConfiguration=service_config["InstanceConfiguration"],
|
|
499
|
+
HealthCheckConfiguration=service_config["HealthCheckConfiguration"],
|
|
500
|
+
)
|
|
501
|
+
log.info(f"Updated App Runner service: {service_name}")
|
|
502
|
+
return service_arn
|
|
503
|
+
|
|
504
|
+
except ClientError as e:
|
|
505
|
+
if e.response["Error"]["Code"] == "ResourceNotFoundException":
|
|
506
|
+
# Create new service
|
|
507
|
+
response = self.apprunner_client.create_service(**service_config)
|
|
508
|
+
log.info(f"Created App Runner service: {service_name}")
|
|
509
|
+
return response["Service"]["ServiceArn"]
|
|
510
|
+
else:
|
|
511
|
+
raise
|
|
512
|
+
|
|
513
|
+
def _wait_for_service_ready(self, service_arn: str, timeout: int) -> Optional[str]:
|
|
514
|
+
"""Wait for service to be ready and return URL."""
|
|
515
|
+
start_time = time.time()
|
|
516
|
+
|
|
517
|
+
while time.time() - start_time < timeout:
|
|
518
|
+
try:
|
|
519
|
+
response = self.apprunner_client.describe_service(
|
|
520
|
+
ServiceArn=service_arn
|
|
521
|
+
)
|
|
522
|
+
service = response["Service"]
|
|
523
|
+
|
|
524
|
+
if service["Status"] == "RUNNING" and service["ServiceUrl"]:
|
|
525
|
+
log.info(f"Service ready at: {service['ServiceUrl']}")
|
|
526
|
+
return service["ServiceUrl"]
|
|
527
|
+
elif service["Status"] in [
|
|
528
|
+
"CREATE_FAILED",
|
|
529
|
+
"UPDATE_FAILED",
|
|
530
|
+
"DELETE_FAILED",
|
|
531
|
+
]:
|
|
532
|
+
raise RuntimeError(
|
|
533
|
+
f"Service failed with status: {service['Status']}"
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
except Exception as e:
|
|
537
|
+
log.debug(f"Waiting for service to be ready: {e}")
|
|
538
|
+
|
|
539
|
+
time.sleep(10)
|
|
540
|
+
|
|
541
|
+
raise TimeoutError(f"Service did not become ready within {timeout} seconds")
|
|
542
|
+
|
|
543
|
+
def _set_public_url(self, service_arn: str, public_url: str) -> None:
|
|
544
|
+
"""Set SUPERVAIZER_PUBLIC_URL environment variable."""
|
|
545
|
+
try:
|
|
546
|
+
# Get current service configuration
|
|
547
|
+
response = self.apprunner_client.describe_service(ServiceArn=service_arn)
|
|
548
|
+
service = response["Service"]
|
|
549
|
+
|
|
550
|
+
# Update environment variables
|
|
551
|
+
current_env_vars = service["SourceConfiguration"]["ImageRepository"][
|
|
552
|
+
"ImageConfiguration"
|
|
553
|
+
].get("RuntimeEnvironmentVariables", [])
|
|
554
|
+
|
|
555
|
+
# Remove existing SUPERVAIZER_PUBLIC_URL
|
|
556
|
+
env_vars = [
|
|
557
|
+
env
|
|
558
|
+
for env in current_env_vars
|
|
559
|
+
if env["Name"] != "SUPERVAIZER_PUBLIC_URL"
|
|
560
|
+
]
|
|
561
|
+
|
|
562
|
+
# Add the public URL
|
|
563
|
+
env_vars.append(
|
|
564
|
+
{
|
|
565
|
+
"Name": "SUPERVAIZER_PUBLIC_URL",
|
|
566
|
+
"Value": public_url,
|
|
567
|
+
}
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Update service
|
|
571
|
+
self.apprunner_client.update_service(
|
|
572
|
+
ServiceArn=service_arn,
|
|
573
|
+
SourceConfiguration={
|
|
574
|
+
**service["SourceConfiguration"],
|
|
575
|
+
"ImageRepository": {
|
|
576
|
+
**service["SourceConfiguration"]["ImageRepository"],
|
|
577
|
+
"ImageConfiguration": {
|
|
578
|
+
**service["SourceConfiguration"]["ImageRepository"][
|
|
579
|
+
"ImageConfiguration"
|
|
580
|
+
],
|
|
581
|
+
"RuntimeEnvironmentVariables": env_vars,
|
|
582
|
+
},
|
|
583
|
+
},
|
|
584
|
+
},
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
log.info(f"Set SUPERVAIZER_PUBLIC_URL to {public_url}")
|
|
588
|
+
|
|
589
|
+
except Exception as e:
|
|
590
|
+
log.error(f"Failed to set SUPERVAIZER_PUBLIC_URL: {e}")
|
|
591
|
+
|
|
592
|
+
def _delete_secrets(self, service_name: str) -> None:
|
|
593
|
+
"""Delete secrets associated with the service."""
|
|
594
|
+
common_secrets = [
|
|
595
|
+
f"{service_name}-api-key",
|
|
596
|
+
f"{service_name}-rsa-key",
|
|
597
|
+
]
|
|
598
|
+
|
|
599
|
+
for secret_name in common_secrets:
|
|
600
|
+
try:
|
|
601
|
+
self.secrets_client.delete_secret(
|
|
602
|
+
SecretId=secret_name, ForceDeleteWithoutRecovery=True
|
|
603
|
+
)
|
|
604
|
+
log.info(f"Deleted secret {secret_name}")
|
|
605
|
+
except ClientError as e:
|
|
606
|
+
if e.response["Error"]["Code"] != "ResourceNotFoundException":
|
|
607
|
+
log.warning(f"Failed to delete secret {secret_name}: {e}")
|