supervaizer 0.9.8__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervaizer/__init__.py +11 -2
- supervaizer/__version__.py +1 -1
- supervaizer/account.py +4 -0
- supervaizer/account_service.py +7 -1
- supervaizer/admin/routes.py +24 -8
- supervaizer/admin/templates/agents.html +74 -0
- supervaizer/admin/templates/agents_grid.html +5 -3
- supervaizer/admin/templates/navigation.html +11 -1
- supervaizer/admin/templates/supervaize_instructions.html +212 -0
- supervaizer/agent.py +28 -6
- supervaizer/case.py +46 -14
- supervaizer/cli.py +247 -7
- supervaizer/common.py +45 -4
- supervaizer/deploy/__init__.py +16 -0
- supervaizer/deploy/cli.py +296 -0
- supervaizer/deploy/commands/__init__.py +9 -0
- supervaizer/deploy/commands/clean.py +294 -0
- supervaizer/deploy/commands/down.py +119 -0
- supervaizer/deploy/commands/local.py +460 -0
- supervaizer/deploy/commands/plan.py +167 -0
- supervaizer/deploy/commands/status.py +169 -0
- supervaizer/deploy/commands/up.py +281 -0
- supervaizer/deploy/docker.py +378 -0
- supervaizer/deploy/driver_factory.py +42 -0
- supervaizer/deploy/drivers/__init__.py +39 -0
- supervaizer/deploy/drivers/aws_app_runner.py +607 -0
- supervaizer/deploy/drivers/base.py +196 -0
- supervaizer/deploy/drivers/cloud_run.py +570 -0
- supervaizer/deploy/drivers/do_app_platform.py +504 -0
- supervaizer/deploy/health.py +404 -0
- supervaizer/deploy/state.py +210 -0
- supervaizer/deploy/templates/Dockerfile.template +44 -0
- supervaizer/deploy/templates/debug_env.py +69 -0
- supervaizer/deploy/templates/docker-compose.yml.template +37 -0
- supervaizer/deploy/templates/dockerignore.template +66 -0
- supervaizer/deploy/templates/entrypoint.sh +20 -0
- supervaizer/deploy/utils.py +52 -0
- supervaizer/examples/controller_template.py +1 -1
- supervaizer/job.py +18 -5
- supervaizer/job_service.py +6 -5
- supervaizer/parameter.py +13 -1
- supervaizer/protocol/__init__.py +2 -2
- supervaizer/protocol/a2a/routes.py +1 -1
- supervaizer/routes.py +141 -17
- supervaizer/server.py +5 -11
- supervaizer/utils/__init__.py +16 -0
- supervaizer/utils/version_check.py +56 -0
- {supervaizer-0.9.8.dist-info → supervaizer-0.10.1.dist-info}/METADATA +105 -34
- supervaizer-0.10.1.dist-info/RECORD +76 -0
- {supervaizer-0.9.8.dist-info → supervaizer-0.10.1.dist-info}/WHEEL +1 -1
- supervaizer/protocol/acp/__init__.py +0 -21
- supervaizer/protocol/acp/model.py +0 -198
- supervaizer/protocol/acp/routes.py +0 -74
- supervaizer-0.9.8.dist-info/RECORD +0 -52
- {supervaizer-0.9.8.dist-info → supervaizer-0.10.1.dist-info}/entry_points.txt +0 -0
- {supervaizer-0.9.8.dist-info → supervaizer-0.10.1.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
# Copyright (c) 2024-2025 Alain Prasquier - Supervaize.com. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
|
|
4
|
+
# If a copy of the MPL was not distributed with this file, you can obtain one at
|
|
5
|
+
# https://mozilla.org/MPL/2.0/.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Health Check Utilities
|
|
9
|
+
|
|
10
|
+
This module provides enhanced health verification functionality with retry logic,
|
|
11
|
+
exponential backoff, and detailed health reporting for deployment verification.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import time
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from enum import Enum
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
from rich.console import Console
|
|
22
|
+
|
|
23
|
+
from supervaizer.common import log
|
|
24
|
+
|
|
25
|
+
console = Console()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class HealthStatus(Enum):
|
|
29
|
+
"""Health check status enumeration."""
|
|
30
|
+
|
|
31
|
+
HEALTHY = "healthy"
|
|
32
|
+
UNHEALTHY = "unhealthy"
|
|
33
|
+
TIMEOUT = "timeout"
|
|
34
|
+
ERROR = "error"
|
|
35
|
+
UNKNOWN = "unknown"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class HealthCheckResult:
|
|
40
|
+
"""Result of a health check operation."""
|
|
41
|
+
|
|
42
|
+
status: HealthStatus
|
|
43
|
+
response_time: float
|
|
44
|
+
status_code: Optional[int] = None
|
|
45
|
+
error_message: Optional[str] = None
|
|
46
|
+
endpoint: Optional[str] = None
|
|
47
|
+
timestamp: float = 0.0
|
|
48
|
+
|
|
49
|
+
def __post_init__(self) -> None:
|
|
50
|
+
if self.timestamp == 0.0:
|
|
51
|
+
self.timestamp = time.time()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class HealthCheckConfig:
|
|
56
|
+
"""Configuration for health check operations."""
|
|
57
|
+
|
|
58
|
+
timeout: int = 60
|
|
59
|
+
max_retries: int = 5
|
|
60
|
+
base_delay: float = 1.0
|
|
61
|
+
max_delay: float = 30.0
|
|
62
|
+
backoff_multiplier: float = 2.0
|
|
63
|
+
success_threshold: int = 1 # Number of successful checks required
|
|
64
|
+
endpoints: Optional[List[str]] = None
|
|
65
|
+
|
|
66
|
+
def __post_init__(self) -> None:
|
|
67
|
+
if self.endpoints is None:
|
|
68
|
+
self.endpoints = ["/.well-known/health"]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class HealthVerifier:
|
|
72
|
+
"""Enhanced health verification with retry logic and exponential backoff."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, config: Optional[HealthCheckConfig] = None):
|
|
75
|
+
"""Initialize the health verifier with configuration."""
|
|
76
|
+
self.config = config or HealthCheckConfig()
|
|
77
|
+
|
|
78
|
+
def verify_health(
|
|
79
|
+
self,
|
|
80
|
+
service_url: str,
|
|
81
|
+
api_key: Optional[str] = None,
|
|
82
|
+
config: Optional[HealthCheckConfig] = None,
|
|
83
|
+
) -> HealthCheckResult:
|
|
84
|
+
"""
|
|
85
|
+
Verify service health with retry logic and exponential backoff.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
service_url: Base URL of the service
|
|
89
|
+
api_key: Optional API key for authenticated endpoints
|
|
90
|
+
config: Optional configuration override
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
HealthCheckResult with detailed status information
|
|
94
|
+
"""
|
|
95
|
+
config = config or self.config
|
|
96
|
+
headers = {}
|
|
97
|
+
if api_key:
|
|
98
|
+
headers["X-API-Key"] = api_key
|
|
99
|
+
|
|
100
|
+
last_error = None
|
|
101
|
+
successful_checks = 0
|
|
102
|
+
total_attempts = 0
|
|
103
|
+
|
|
104
|
+
for attempt in range(config.max_retries):
|
|
105
|
+
total_attempts += 1
|
|
106
|
+
start_time = time.time()
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
# Check all configured endpoints
|
|
110
|
+
if not config.endpoints:
|
|
111
|
+
last_error = "No endpoints configured"
|
|
112
|
+
continue
|
|
113
|
+
all_healthy = True
|
|
114
|
+
for endpoint in config.endpoints:
|
|
115
|
+
endpoint_url = f"{service_url.rstrip('/')}{endpoint}"
|
|
116
|
+
|
|
117
|
+
with httpx.Client(timeout=config.timeout) as client:
|
|
118
|
+
response = client.get(endpoint_url, headers=headers)
|
|
119
|
+
|
|
120
|
+
if response.status_code != 200:
|
|
121
|
+
all_healthy = False
|
|
122
|
+
last_error = (
|
|
123
|
+
f"Endpoint {endpoint} returned {response.status_code}"
|
|
124
|
+
)
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
if all_healthy:
|
|
128
|
+
successful_checks += 1
|
|
129
|
+
if successful_checks >= config.success_threshold:
|
|
130
|
+
response_time = time.time() - start_time
|
|
131
|
+
return HealthCheckResult(
|
|
132
|
+
status=HealthStatus.HEALTHY,
|
|
133
|
+
response_time=response_time,
|
|
134
|
+
status_code=200,
|
|
135
|
+
endpoint=config.endpoints[0] if config.endpoints else None,
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
last_error = last_error or "One or more endpoints failed"
|
|
139
|
+
|
|
140
|
+
except httpx.TimeoutException:
|
|
141
|
+
last_error = f"Request timeout after {config.timeout}s"
|
|
142
|
+
except httpx.RequestError as e:
|
|
143
|
+
last_error = f"Request error: {str(e)}"
|
|
144
|
+
except Exception as e:
|
|
145
|
+
last_error = f"Unexpected error: {str(e)}"
|
|
146
|
+
|
|
147
|
+
# Calculate delay for next attempt
|
|
148
|
+
if attempt < config.max_retries - 1:
|
|
149
|
+
delay = min(
|
|
150
|
+
config.base_delay * (config.backoff_multiplier**attempt),
|
|
151
|
+
config.max_delay,
|
|
152
|
+
)
|
|
153
|
+
log.debug(
|
|
154
|
+
f"Health check attempt {attempt + 1} failed, retrying in {delay:.1f}s"
|
|
155
|
+
)
|
|
156
|
+
time.sleep(delay)
|
|
157
|
+
|
|
158
|
+
# All attempts failed
|
|
159
|
+
return HealthCheckResult(
|
|
160
|
+
status=HealthStatus.UNHEALTHY,
|
|
161
|
+
response_time=0.0,
|
|
162
|
+
error_message=last_error,
|
|
163
|
+
endpoint=config.endpoints[0] if config.endpoints else None,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def verify_health_async(
|
|
167
|
+
self,
|
|
168
|
+
service_url: str,
|
|
169
|
+
api_key: Optional[str] = None,
|
|
170
|
+
config: Optional[HealthCheckConfig] = None,
|
|
171
|
+
) -> HealthCheckResult:
|
|
172
|
+
"""
|
|
173
|
+
Async version of health verification.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
service_url: Base URL of the service
|
|
177
|
+
api_key: Optional API key for authenticated endpoints
|
|
178
|
+
config: Optional configuration override
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
HealthCheckResult with detailed status information
|
|
182
|
+
"""
|
|
183
|
+
return asyncio.run(self._verify_health_async(service_url, api_key, config))
|
|
184
|
+
|
|
185
|
+
async def _verify_health_async(
|
|
186
|
+
self,
|
|
187
|
+
service_url: str,
|
|
188
|
+
api_key: Optional[str] = None,
|
|
189
|
+
config: Optional[HealthCheckConfig] = None,
|
|
190
|
+
) -> HealthCheckResult:
|
|
191
|
+
"""Internal async health verification implementation."""
|
|
192
|
+
config = config or self.config
|
|
193
|
+
headers = {}
|
|
194
|
+
if api_key:
|
|
195
|
+
headers["X-API-Key"] = api_key
|
|
196
|
+
|
|
197
|
+
last_error = None
|
|
198
|
+
successful_checks = 0
|
|
199
|
+
|
|
200
|
+
async with httpx.AsyncClient(timeout=config.timeout) as client:
|
|
201
|
+
for attempt in range(config.max_retries):
|
|
202
|
+
start_time = time.time()
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
# Check all configured endpoints
|
|
206
|
+
if not config.endpoints:
|
|
207
|
+
last_error = "No endpoints configured"
|
|
208
|
+
continue
|
|
209
|
+
all_healthy = True
|
|
210
|
+
for endpoint in config.endpoints:
|
|
211
|
+
endpoint_url = f"{service_url.rstrip('/')}{endpoint}"
|
|
212
|
+
|
|
213
|
+
response = await client.get(endpoint_url, headers=headers)
|
|
214
|
+
|
|
215
|
+
if response.status_code != 200:
|
|
216
|
+
all_healthy = False
|
|
217
|
+
last_error = (
|
|
218
|
+
f"Endpoint {endpoint} returned {response.status_code}"
|
|
219
|
+
)
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
if all_healthy:
|
|
223
|
+
successful_checks += 1
|
|
224
|
+
if successful_checks >= config.success_threshold:
|
|
225
|
+
response_time = time.time() - start_time
|
|
226
|
+
return HealthCheckResult(
|
|
227
|
+
status=HealthStatus.HEALTHY,
|
|
228
|
+
response_time=response_time,
|
|
229
|
+
status_code=200,
|
|
230
|
+
endpoint=config.endpoints[0]
|
|
231
|
+
if config.endpoints
|
|
232
|
+
else None,
|
|
233
|
+
)
|
|
234
|
+
else:
|
|
235
|
+
last_error = last_error or "One or more endpoints failed"
|
|
236
|
+
|
|
237
|
+
except httpx.TimeoutException:
|
|
238
|
+
last_error = f"Request timeout after {config.timeout}s"
|
|
239
|
+
except httpx.RequestError as e:
|
|
240
|
+
last_error = f"Request error: {str(e)}"
|
|
241
|
+
except Exception as e:
|
|
242
|
+
last_error = f"Unexpected error: {str(e)}"
|
|
243
|
+
|
|
244
|
+
# Calculate delay for next attempt
|
|
245
|
+
if attempt < config.max_retries - 1:
|
|
246
|
+
delay = min(
|
|
247
|
+
config.base_delay * (config.backoff_multiplier**attempt),
|
|
248
|
+
config.max_delay,
|
|
249
|
+
)
|
|
250
|
+
log.debug(
|
|
251
|
+
f"Health check attempt {attempt + 1} failed, retrying in {delay:.1f}s"
|
|
252
|
+
)
|
|
253
|
+
await asyncio.sleep(delay)
|
|
254
|
+
|
|
255
|
+
# All attempts failed
|
|
256
|
+
return HealthCheckResult(
|
|
257
|
+
status=HealthStatus.UNHEALTHY,
|
|
258
|
+
response_time=0.0,
|
|
259
|
+
error_message=last_error,
|
|
260
|
+
endpoint=config.endpoints[0] if config.endpoints else None,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def verify_multiple_endpoints(
|
|
264
|
+
self,
|
|
265
|
+
service_url: str,
|
|
266
|
+
endpoints: List[str],
|
|
267
|
+
api_key: Optional[str] = None,
|
|
268
|
+
config: Optional[HealthCheckConfig] = None,
|
|
269
|
+
) -> Dict[str, HealthCheckResult]:
|
|
270
|
+
"""
|
|
271
|
+
Verify multiple endpoints and return individual results.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
service_url: Base URL of the service
|
|
275
|
+
endpoints: List of endpoints to check
|
|
276
|
+
api_key: Optional API key for authenticated endpoints
|
|
277
|
+
config: Optional configuration override
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Dictionary mapping endpoints to their health check results
|
|
281
|
+
"""
|
|
282
|
+
config = config or self.config
|
|
283
|
+
config.endpoints = endpoints
|
|
284
|
+
|
|
285
|
+
results = {}
|
|
286
|
+
for endpoint in endpoints:
|
|
287
|
+
single_endpoint_config = HealthCheckConfig(
|
|
288
|
+
timeout=config.timeout,
|
|
289
|
+
max_retries=config.max_retries,
|
|
290
|
+
base_delay=config.base_delay,
|
|
291
|
+
max_delay=config.max_delay,
|
|
292
|
+
backoff_multiplier=config.backoff_multiplier,
|
|
293
|
+
success_threshold=config.success_threshold,
|
|
294
|
+
endpoints=[endpoint],
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
results[endpoint] = self.verify_health(
|
|
298
|
+
service_url, api_key, single_endpoint_config
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
return results
|
|
302
|
+
|
|
303
|
+
def get_health_summary(
|
|
304
|
+
self, results: Dict[str, HealthCheckResult]
|
|
305
|
+
) -> Dict[str, Any]:
|
|
306
|
+
"""
|
|
307
|
+
Generate a summary of health check results.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
results: Dictionary of health check results
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Summary dictionary with overall status and statistics
|
|
314
|
+
"""
|
|
315
|
+
total_checks = len(results)
|
|
316
|
+
healthy_checks = sum(
|
|
317
|
+
1 for r in results.values() if r.status == HealthStatus.HEALTHY
|
|
318
|
+
)
|
|
319
|
+
unhealthy_checks = total_checks - healthy_checks
|
|
320
|
+
|
|
321
|
+
avg_response_time = 0.0
|
|
322
|
+
if healthy_checks > 0:
|
|
323
|
+
response_times = [
|
|
324
|
+
r.response_time
|
|
325
|
+
for r in results.values()
|
|
326
|
+
if r.status == HealthStatus.HEALTHY
|
|
327
|
+
]
|
|
328
|
+
avg_response_time = sum(response_times) / len(response_times)
|
|
329
|
+
|
|
330
|
+
overall_status = (
|
|
331
|
+
HealthStatus.HEALTHY if unhealthy_checks == 0 else HealthStatus.UNHEALTHY
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
"overall_status": overall_status,
|
|
336
|
+
"total_endpoints": total_checks,
|
|
337
|
+
"healthy_endpoints": healthy_checks,
|
|
338
|
+
"unhealthy_endpoints": unhealthy_checks,
|
|
339
|
+
"success_rate": healthy_checks / total_checks if total_checks > 0 else 0.0,
|
|
340
|
+
"average_response_time": avg_response_time,
|
|
341
|
+
"timestamp": time.time(),
|
|
342
|
+
"details": results,
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def verify_service_health(
|
|
347
|
+
service_url: str,
|
|
348
|
+
api_key: Optional[str] = None,
|
|
349
|
+
timeout: int = 60,
|
|
350
|
+
max_retries: int = 5,
|
|
351
|
+
) -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Simple health verification function for backward compatibility.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
service_url: Base URL of the service
|
|
357
|
+
api_key: Optional API key for authenticated endpoints
|
|
358
|
+
timeout: Request timeout in seconds
|
|
359
|
+
max_retries: Maximum number of retry attempts
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
True if service is healthy, False otherwise
|
|
363
|
+
"""
|
|
364
|
+
config = HealthCheckConfig(timeout=timeout, max_retries=max_retries)
|
|
365
|
+
|
|
366
|
+
verifier = HealthVerifier(config)
|
|
367
|
+
result = verifier.verify_health(service_url, api_key)
|
|
368
|
+
|
|
369
|
+
return result.status == HealthStatus.HEALTHY
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def display_health_results(results: Dict[str, HealthCheckResult]) -> None:
|
|
373
|
+
"""
|
|
374
|
+
Display health check results in a formatted table.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
results: Dictionary of health check results
|
|
378
|
+
"""
|
|
379
|
+
from rich.table import Table
|
|
380
|
+
|
|
381
|
+
table = Table(title="Health Check Results")
|
|
382
|
+
table.add_column("Endpoint", style="cyan")
|
|
383
|
+
table.add_column("Status", style="magenta")
|
|
384
|
+
table.add_column("Response Time", style="green")
|
|
385
|
+
table.add_column("Status Code", style="blue")
|
|
386
|
+
table.add_column("Error", style="red")
|
|
387
|
+
|
|
388
|
+
for endpoint, result in results.items():
|
|
389
|
+
status_style = "green" if result.status == HealthStatus.HEALTHY else "red"
|
|
390
|
+
response_time = (
|
|
391
|
+
f"{result.response_time:.3f}s" if result.response_time > 0 else "N/A"
|
|
392
|
+
)
|
|
393
|
+
status_code = str(result.status_code) if result.status_code else "N/A"
|
|
394
|
+
error = result.error_message or "None"
|
|
395
|
+
|
|
396
|
+
table.add_row(
|
|
397
|
+
endpoint,
|
|
398
|
+
f"[{status_style}]{result.status.value}[/]",
|
|
399
|
+
response_time,
|
|
400
|
+
status_code,
|
|
401
|
+
error,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
console.print(table)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# Copyright (c) 2024-2025 Alain Prasquier - Supervaize.com. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
|
|
4
|
+
# If a copy of the MPL was not distributed with this file, you can obtain one at
|
|
5
|
+
# https://mozilla.org/MPL/2.0/.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Deployment State Management
|
|
9
|
+
|
|
10
|
+
This module handles deployment state persistence and management.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, Optional
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
|
+
|
|
20
|
+
from supervaizer.common import log
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DeploymentState(BaseModel):
|
|
24
|
+
"""Deployment state model."""
|
|
25
|
+
|
|
26
|
+
# Versioning
|
|
27
|
+
version: int = Field(2, description="State file format version")
|
|
28
|
+
|
|
29
|
+
# Service identification
|
|
30
|
+
service_name: str = Field(..., description="Name of the deployed service")
|
|
31
|
+
platform: str = Field(
|
|
32
|
+
..., description="Target platform (cloud-run|aws-app-runner|do-app-platform)"
|
|
33
|
+
)
|
|
34
|
+
environment: str = Field(..., description="Environment (dev|staging|prod)")
|
|
35
|
+
region: str = Field(..., description="Provider region")
|
|
36
|
+
project_id: Optional[str] = Field(
|
|
37
|
+
None, description="GCP project / AWS account / DO project"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Deployment details
|
|
41
|
+
image_tag: str = Field(..., description="Docker image tag")
|
|
42
|
+
image_digest: Optional[str] = Field(None, description="Docker image digest")
|
|
43
|
+
service_url: Optional[str] = Field(None, description="Public service URL")
|
|
44
|
+
revision: Optional[str] = Field(None, description="Service revision/version")
|
|
45
|
+
|
|
46
|
+
# Timestamps
|
|
47
|
+
created_at: datetime = Field(
|
|
48
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
49
|
+
description="Deployment creation time",
|
|
50
|
+
)
|
|
51
|
+
updated_at: datetime = Field(
|
|
52
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
53
|
+
description="Last update time",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Status
|
|
57
|
+
status: str = Field("unknown", description="Deployment status")
|
|
58
|
+
health_status: str = Field("unknown", description="Health check status")
|
|
59
|
+
|
|
60
|
+
# Configuration
|
|
61
|
+
port: int = Field(8000, description="Application port")
|
|
62
|
+
api_key_generated: bool = Field(False, description="Whether API key was generated")
|
|
63
|
+
rsa_key_generated: bool = Field(False, description="Whether RSA key was generated")
|
|
64
|
+
|
|
65
|
+
# Provider-specific data
|
|
66
|
+
provider_data: Dict[str, Any] = Field(
|
|
67
|
+
default_factory=dict, description="Platform-specific data"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class StateManager:
|
|
72
|
+
"""Manages deployment state persistence."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, deployment_dir: Path) -> None:
|
|
75
|
+
"""Initialize state manager."""
|
|
76
|
+
self.deployment_dir = deployment_dir
|
|
77
|
+
self.state_file = deployment_dir / "state.json"
|
|
78
|
+
self._ensure_deployment_dir()
|
|
79
|
+
|
|
80
|
+
def _ensure_deployment_dir(self) -> None:
|
|
81
|
+
"""Ensure deployment directory exists."""
|
|
82
|
+
self.deployment_dir.mkdir(exist_ok=True)
|
|
83
|
+
|
|
84
|
+
# Create logs subdirectory
|
|
85
|
+
logs_dir = self.deployment_dir / "logs"
|
|
86
|
+
logs_dir.mkdir(exist_ok=True)
|
|
87
|
+
|
|
88
|
+
log.info(f"Deployment directory: {self.deployment_dir}")
|
|
89
|
+
|
|
90
|
+
def load_state(self) -> Optional[DeploymentState]:
|
|
91
|
+
"""Load deployment state from file."""
|
|
92
|
+
if not self.state_file.exists():
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
with open(self.state_file, "r") as f:
|
|
97
|
+
data = json.load(f)
|
|
98
|
+
|
|
99
|
+
# Handle migration
|
|
100
|
+
data = self.migrate_state(data)
|
|
101
|
+
|
|
102
|
+
# Handle datetime deserialization
|
|
103
|
+
if "created_at" in data:
|
|
104
|
+
data["created_at"] = datetime.fromisoformat(data["created_at"])
|
|
105
|
+
if "updated_at" in data:
|
|
106
|
+
data["updated_at"] = datetime.fromisoformat(data["updated_at"])
|
|
107
|
+
|
|
108
|
+
state = DeploymentState(**data)
|
|
109
|
+
if not self.validate_state(state):
|
|
110
|
+
return None
|
|
111
|
+
return state
|
|
112
|
+
|
|
113
|
+
except ValueError as e:
|
|
114
|
+
if "Unsupported state version" in str(e):
|
|
115
|
+
raise # Re-raise the specific error for unsupported versions
|
|
116
|
+
log.error(f"Failed to load or validate deployment state: {e}")
|
|
117
|
+
return None
|
|
118
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
119
|
+
log.error(f"Failed to load or validate deployment state: {e}")
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
def save_state(self, state: DeploymentState) -> None:
|
|
123
|
+
"""Save deployment state to file."""
|
|
124
|
+
try:
|
|
125
|
+
# Update timestamp
|
|
126
|
+
state.updated_at = datetime.now(timezone.utc)
|
|
127
|
+
|
|
128
|
+
# Convert to dict and handle datetime serialization
|
|
129
|
+
data = state.model_dump()
|
|
130
|
+
data["created_at"] = state.created_at.isoformat()
|
|
131
|
+
data["updated_at"] = state.updated_at.isoformat()
|
|
132
|
+
|
|
133
|
+
with open(self.state_file, "w") as f:
|
|
134
|
+
json.dump(data, f, indent=2)
|
|
135
|
+
|
|
136
|
+
log.info(f"Saved deployment state to {self.state_file}")
|
|
137
|
+
|
|
138
|
+
except (OSError, ValueError) as e:
|
|
139
|
+
log.error(f"Failed to save deployment state: {e}")
|
|
140
|
+
raise RuntimeError(f"Failed to save deployment state: {e}") from e
|
|
141
|
+
|
|
142
|
+
def update_state(self, **kwargs: Any) -> DeploymentState:
|
|
143
|
+
"""Update deployment state with new values."""
|
|
144
|
+
current_state = self.load_state()
|
|
145
|
+
|
|
146
|
+
if current_state is None:
|
|
147
|
+
# Create new state if none exists
|
|
148
|
+
current_state = DeploymentState(**kwargs)
|
|
149
|
+
else:
|
|
150
|
+
# Update existing state
|
|
151
|
+
for key, value in kwargs.items():
|
|
152
|
+
if hasattr(current_state, key):
|
|
153
|
+
setattr(current_state, key, value)
|
|
154
|
+
|
|
155
|
+
self.save_state(current_state)
|
|
156
|
+
return current_state
|
|
157
|
+
|
|
158
|
+
def delete_state(self) -> None:
|
|
159
|
+
"""Delete deployment state file."""
|
|
160
|
+
if self.state_file.exists():
|
|
161
|
+
self.state_file.unlink()
|
|
162
|
+
log.info(f"Deleted deployment state file: {self.state_file}")
|
|
163
|
+
|
|
164
|
+
def get_service_key(self, service_name: str, environment: str) -> str:
|
|
165
|
+
"""Generate a unique key for the service."""
|
|
166
|
+
return f"{service_name}-{environment}"
|
|
167
|
+
|
|
168
|
+
def validate_state(self, state: DeploymentState) -> bool:
|
|
169
|
+
"""Validate deployment state."""
|
|
170
|
+
required_fields = ["service_name", "platform", "environment", "image_tag"]
|
|
171
|
+
|
|
172
|
+
for field in required_fields:
|
|
173
|
+
if not getattr(state, field):
|
|
174
|
+
log.error(f"Missing required field in state: {field}")
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
# Validate platform
|
|
178
|
+
valid_platforms = ["cloud-run", "aws-app-runner", "do-app-platform"]
|
|
179
|
+
if state.platform not in valid_platforms:
|
|
180
|
+
log.error(f"Invalid platform: {state.platform}")
|
|
181
|
+
return False
|
|
182
|
+
|
|
183
|
+
# Validate environment
|
|
184
|
+
valid_environments = ["dev", "staging", "prod"]
|
|
185
|
+
if state.environment not in valid_environments:
|
|
186
|
+
log.error(f"Invalid environment: {state.environment}")
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
def migrate_state(self, state_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
192
|
+
"""Migrate state data from older versions."""
|
|
193
|
+
version = state_data.get("version", 1)
|
|
194
|
+
if version > 2:
|
|
195
|
+
raise ValueError(f"Unsupported state version: {version}")
|
|
196
|
+
|
|
197
|
+
migrated_data = state_data.copy()
|
|
198
|
+
|
|
199
|
+
if version < 2:
|
|
200
|
+
log.info("Migrating state from v1 to v2")
|
|
201
|
+
# Example migration: add new fields with defaults
|
|
202
|
+
if "api_key_generated" not in migrated_data:
|
|
203
|
+
migrated_data["api_key_generated"] = False
|
|
204
|
+
if "rsa_key_generated" not in migrated_data:
|
|
205
|
+
migrated_data["rsa_key_generated"] = False
|
|
206
|
+
if "provider_data" not in migrated_data:
|
|
207
|
+
migrated_data["provider_data"] = {}
|
|
208
|
+
migrated_data["version"] = 2
|
|
209
|
+
|
|
210
|
+
return migrated_data
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Supervaizer Deployment Dockerfile
|
|
2
|
+
FROM ghcr.io/astral-sh/uv:python{{PYTHON_VERSION}}-bookworm AS base
|
|
3
|
+
ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
|
|
4
|
+
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
|
|
6
|
+
|
|
7
|
+
# Set working directory
|
|
8
|
+
WORKDIR /app
|
|
9
|
+
|
|
10
|
+
# Copy only files that affect dependency resolution
|
|
11
|
+
COPY pyproject.toml ./
|
|
12
|
+
# Sync dependencies (uv sync will resolve and install dependencies)
|
|
13
|
+
RUN uv --version &&python --version
|
|
14
|
+
RUN uv sync --no-dev --no-install-project
|
|
15
|
+
# COPY {{CONTROLLER_FILE}} ./
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Copy entrypoint script
|
|
20
|
+
COPY .deployment/entrypoint.sh /entrypoint.sh
|
|
21
|
+
RUN chmod +x /entrypoint.sh
|
|
22
|
+
|
|
23
|
+
# Now bring in the rest of the source (maximizes cache hits for deps)
|
|
24
|
+
COPY . .
|
|
25
|
+
|
|
26
|
+
# COPY .deployment/debug_env.py ./debug_env.py
|
|
27
|
+
# Set environment variables
|
|
28
|
+
{{ENV_VARS}}
|
|
29
|
+
|
|
30
|
+
# Create non-root user
|
|
31
|
+
RUN useradd --create-home --shell /bin/bash supervaizer && \
|
|
32
|
+
chown -R supervaizer:supervaizer /app && \
|
|
33
|
+
chown supervaizer:supervaizer /entrypoint.sh
|
|
34
|
+
USER supervaizer
|
|
35
|
+
|
|
36
|
+
# Expose port
|
|
37
|
+
EXPOSE {{APP_PORT}}
|
|
38
|
+
|
|
39
|
+
# Health check
|
|
40
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
41
|
+
CMD curl -f http://localhost:{{APP_PORT}}/.well-known/health || exit 1
|
|
42
|
+
|
|
43
|
+
# Set entrypoint
|
|
44
|
+
ENTRYPOINT ["/entrypoint.sh"]
|