xenfra-sdk 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. xenfra_sdk/__init__.py +46 -2
  2. xenfra_sdk/blueprints/base.py +150 -0
  3. xenfra_sdk/blueprints/factory.py +99 -0
  4. xenfra_sdk/blueprints/node.py +219 -0
  5. xenfra_sdk/blueprints/python.py +57 -0
  6. xenfra_sdk/blueprints/railpack.py +99 -0
  7. xenfra_sdk/blueprints/schema.py +70 -0
  8. xenfra_sdk/cli/main.py +175 -49
  9. xenfra_sdk/client.py +6 -2
  10. xenfra_sdk/constants.py +26 -0
  11. xenfra_sdk/db/models.py +2 -0
  12. xenfra_sdk/db/session.py +8 -3
  13. xenfra_sdk/detection.py +262 -191
  14. xenfra_sdk/dockerizer.py +76 -120
  15. xenfra_sdk/engine.py +762 -160
  16. xenfra_sdk/events.py +254 -0
  17. xenfra_sdk/exceptions.py +9 -0
  18. xenfra_sdk/governance.py +150 -0
  19. xenfra_sdk/manifest.py +93 -138
  20. xenfra_sdk/mcp_client.py +7 -5
  21. xenfra_sdk/{models.py → models/__init__.py} +17 -1
  22. xenfra_sdk/models/context.py +61 -0
  23. xenfra_sdk/orchestrator.py +223 -99
  24. xenfra_sdk/privacy.py +11 -0
  25. xenfra_sdk/protocol.py +38 -0
  26. xenfra_sdk/railpack_adapter.py +357 -0
  27. xenfra_sdk/railpack_detector.py +587 -0
  28. xenfra_sdk/railpack_manager.py +312 -0
  29. xenfra_sdk/recipes.py +152 -19
  30. xenfra_sdk/resources/activity.py +45 -0
  31. xenfra_sdk/resources/build.py +157 -0
  32. xenfra_sdk/resources/deployments.py +22 -2
  33. xenfra_sdk/resources/intelligence.py +25 -0
  34. xenfra_sdk-0.2.6.dist-info/METADATA +118 -0
  35. xenfra_sdk-0.2.6.dist-info/RECORD +49 -0
  36. {xenfra_sdk-0.2.4.dist-info → xenfra_sdk-0.2.6.dist-info}/WHEEL +1 -1
  37. xenfra_sdk/templates/Caddyfile.j2 +0 -14
  38. xenfra_sdk/templates/Dockerfile.j2 +0 -41
  39. xenfra_sdk/templates/cloud-init.sh.j2 +0 -90
  40. xenfra_sdk/templates/docker-compose-multi.yml.j2 +0 -29
  41. xenfra_sdk/templates/docker-compose.yml.j2 +0 -30
  42. xenfra_sdk-0.2.4.dist-info/METADATA +0 -116
  43. xenfra_sdk-0.2.4.dist-info/RECORD +0 -38
xenfra_sdk/engine.py CHANGED
@@ -2,32 +2,30 @@
2
2
 
3
3
  import os
4
4
  import time
5
+ import subprocess
6
+ import json
7
+ import shlex
8
+ import tempfile
9
+ import shutil
5
10
  from datetime import datetime
6
11
  from pathlib import Path
7
- from typing import Optional, Dict
12
+ from typing import Optional, Dict, Any, Union
8
13
 
9
14
  import digitalocean
10
15
  import fabric
11
16
  from dotenv import load_dotenv
12
17
  from sqlmodel import Session, select
13
18
 
14
- import shutil
15
- import subprocess
16
-
17
19
  # Xenfra modules
18
- from . import dockerizer, recipes
20
+ from . import dockerizer, privacy, constants
19
21
  from .db.models import Project
20
22
  from .db.session import get_session
21
-
22
-
23
- class DeploymentError(Exception):
24
- """Custom exception for deployment failures."""
25
-
26
- def __init__(self, message, stage="Unknown"):
27
- self.message = message
28
- self.stage = stage
29
- super().__init__(f"Deployment failed at stage '{stage}': {message}")
30
-
23
+ from .events import EventEmitter, DeploymentPhase, EventStatus
24
+ from .exceptions import DeploymentError
25
+ from .governance import get_polling_interval, get_resource_limits
26
+ from .models.context import DeploymentContext
27
+ from .blueprints.factory import render_blueprint
28
+ # from .devbox import DevboxHarness # Removed
31
29
 
32
30
  class InfraEngine:
33
31
  """
@@ -35,11 +33,12 @@ class InfraEngine:
35
33
  with the cloud provider and orchestrates the deployment lifecycle.
36
34
  """
37
35
 
38
- def __init__(self, token: str = None, db_session: Session = None):
36
+ def __init__(self, token: str = None, db_session: Session = None, context: dict = None):
39
37
  """
40
38
  Initializes the engine and validates the API token.
41
39
  """
42
40
  load_dotenv()
41
+ self.context = context or {}
43
42
  self.token = token or os.getenv("DIGITAL_OCEAN_TOKEN")
44
43
  self.db_session = db_session or next(get_session())
45
44
 
@@ -53,11 +52,18 @@ class InfraEngine:
53
52
  except Exception as e:
54
53
  raise ConnectionError(f"Failed to connect to DigitalOcean: {e}")
55
54
 
55
+ # ZEN GAP FIX: Structured Observability
56
+ # Initialize Event Emitter to stream Zen/Biological events
57
+ self.emitter = EventEmitter(
58
+ logger=self.context.get("logger"),
59
+ event_callback=self.context.get("event_callback")
60
+ )
61
+
56
62
  def _get_connection(self, ip_address: str):
57
63
  """Establishes a Fabric connection to the server."""
58
- private_key_path = str(Path.home() / ".ssh" / "id_rsa")
64
+ private_key_path = str(Path(os.path.expanduser(constants.DEFAULT_SSH_KEY_PATH)).resolve())
59
65
  if not Path(private_key_path).exists():
60
- raise DeploymentError("No private SSH key found at ~/.ssh/id_rsa.", stage="Setup")
66
+ raise DeploymentError(f"No private SSH key found at {private_key_path}.", stage="Setup")
61
67
 
62
68
  return fabric.Connection(
63
69
  host=ip_address,
@@ -73,9 +79,27 @@ class InfraEngine:
73
79
  """Retrieves a list of all Droplets."""
74
80
  return self.manager.get_all_droplets()
75
81
 
76
- def destroy_server(self, droplet_id: int, db_session: Session = None):
82
+ def list_domains(self):
83
+ """Retrieves a list of all domains associated with the account."""
84
+ return self.manager.get_all_domains()
85
+
86
+ def destroy_server(
87
+ self,
88
+ droplet_id: int,
89
+ db_session: Session = None,
90
+ preserve_data: bool = False,
91
+ snapshot_callback: callable = None,
92
+ ):
77
93
  """
78
- Idempotent droplet destruction.
94
+ Idempotent droplet destruction with optional data stewardship.
95
+
96
+ ZEN GAP FIX: Stewardship - Snapshot volumes before destruction.
97
+
98
+ Args:
99
+ droplet_id: The DigitalOcean droplet ID
100
+ db_session: SQLModel session
101
+ preserve_data: If True, snapshot Docker volumes before destruction
102
+ snapshot_callback: Async callback to upload snapshots (e.g., to S3/R2)
79
103
 
80
104
  Destroys the droplet and removes DB records. Handles 404 errors gracefully
81
105
  (if droplet already destroyed, continues to DB cleanup).
@@ -86,6 +110,21 @@ class InfraEngine:
86
110
  statement = select(Project).where(Project.droplet_id == droplet_id)
87
111
  project_to_delete = session.exec(statement).first()
88
112
 
113
+ # ZEN GAP FIX: Stewardship - Snapshot volumes before destruction
114
+ if preserve_data and project_to_delete:
115
+ try:
116
+ droplet = self.manager.get_droplet(droplet_id)
117
+ ip_address = droplet.ip_address
118
+ if ip_address:
119
+ self._snapshot_volumes(
120
+ ip_address=ip_address,
121
+ project_name=project_to_delete.name,
122
+ callback=snapshot_callback,
123
+ )
124
+ except Exception as e:
125
+ # Non-fatal: log but continue with destruction
126
+ privacy.scrubbed_print(f"[Stewardship] Volume snapshot failed (non-fatal): {e}")
127
+
89
128
  # Destroy the droplet on DigitalOcean (handle 404 gracefully)
90
129
  try:
91
130
  droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
@@ -103,27 +142,133 @@ class InfraEngine:
103
142
  session.delete(project_to_delete)
104
143
  session.commit()
105
144
 
106
- def list_projects_from_db(self, db_session: Session = None):
145
+ def _snapshot_volumes(
146
+ self,
147
+ ip_address: str,
148
+ project_name: str,
149
+ callback: callable = None,
150
+ ):
151
+ """
152
+ ZEN GAP FIX: Stewardship - Snapshot Docker volumes before destruction.
153
+
154
+ Creates tar.gz archives of named Docker volumes on the droplet.
155
+ 100% deterministic: tar + docker volume are Unix primitives.
156
+
157
+ Args:
158
+ ip_address: Droplet IP address
159
+ project_name: Project name for snapshot naming
160
+ callback: Optional callback to upload snapshots
161
+ """
162
+ try:
163
+ with self._get_connection(ip_address) as conn:
164
+ # 1. List named volumes
165
+ result = conn.run("docker volume ls -q", warn=True, hide=True)
166
+ if result.failed or not result.stdout.strip():
167
+ return # No volumes to snapshot
168
+
169
+ volumes = result.stdout.strip().split("\n")
170
+
171
+ # 2. Create backup directory
172
+ backup_dir = f"/tmp/xenfra_snapshots/{project_name}"
173
+ conn.run(f"mkdir -p {backup_dir}", warn=True, hide=True)
174
+
175
+ # 3. Snapshot each volume
176
+ for vol in volumes:
177
+ vol = vol.strip()
178
+ if not vol:
179
+ continue
180
+ # Use Alpine container to tar the volume
181
+ snapshot_file = f"{backup_dir}/{vol}.tar.gz"
182
+ tar_cmd = (
183
+ f"docker run --rm "
184
+ f"-v {vol}:/data:ro "
185
+ f"-v {backup_dir}:/backup "
186
+ f"alpine tar czf /backup/{vol}.tar.gz -C /data ."
187
+ )
188
+ conn.run(tar_cmd, warn=True, hide=True)
189
+
190
+ # 4. If callback provided, upload snapshots
191
+ if callback:
192
+ # List snapshot files and pass to callback
193
+ ls_result = conn.run(f"ls {backup_dir}/*.tar.gz", warn=True, hide=True)
194
+ if ls_result.ok:
195
+ snapshot_files = ls_result.stdout.strip().split("\n")
196
+ for snap_file in snapshot_files:
197
+ callback(snap_file, project_name)
198
+
199
+ except Exception as e:
200
+ # Non-fatal error - log and continue
201
+ privacy.scrubbed_print(f"[Stewardship] Snapshot failed: {e}")
202
+
203
+ def list_projects_from_db(self, db_session: Session = None, user_id: int = None):
107
204
  """Lists all projects from the local database."""
108
205
  session = db_session or self.db_session
109
206
  statement = select(Project)
207
+ if user_id:
208
+ statement = statement.where(Project.user_id == user_id)
110
209
  return session.exec(statement).all()
111
210
 
112
- def sync_with_provider(self, db_session: Session = None):
113
- """Reconciles the local database with the live state from DigitalOcean."""
211
+ def sync_with_provider(
212
+ self,
213
+ user_id: int,
214
+ db_session: Session = None,
215
+ auto_destroy_orphans: bool = False,
216
+ ):
217
+ """
218
+ ZEN GAP FIX: Idempotent Reconciliation with orphan detection.
219
+
220
+ Reconciles the local database with the live state from DigitalOcean.
221
+ 100% deterministic: Set difference is math.
222
+
223
+ Args:
224
+ user_id: User ID to sync for
225
+ db_session: SQLModel session
226
+ auto_destroy_orphans: If True, destroy orphan droplets (in DO but not in DB)
227
+
228
+ Returns:
229
+ Tuple of (projects_list, reconciliation_report)
230
+ """
114
231
  session = db_session or self.db_session
115
232
 
116
233
  # 1. Get live and local states
117
234
  live_droplets = self.manager.get_all_droplets(tag_name="xenfra")
118
- local_projects = self.list_projects_from_db(session)
235
+ local_projects = self.list_projects_from_db(session, user_id=user_id)
119
236
 
120
237
  live_map = {d.id: d for d in live_droplets}
121
238
  local_map = {p.droplet_id: p for p in local_projects}
122
239
 
123
- # 2. Reconcile
124
- # Add new servers found on DO to our DB
125
- for droplet_id, droplet in live_map.items():
126
- if droplet_id not in local_map:
240
+ live_ids = set(live_map.keys())
241
+ local_ids = set(local_map.keys())
242
+
243
+ # 2. Calculate differences (pure math, no guessing)
244
+ orphans = live_ids - local_ids # In DO but not in DB
245
+ ghosts = local_ids - live_ids # In DB but not in DO
246
+ synced = live_ids & local_ids # In both
247
+
248
+ reconciliation_report = {
249
+ "orphans": list(orphans), # Droplets without DB records
250
+ "ghosts": list(ghosts), # DB records without droplets
251
+ "synced": list(synced), # Properly tracked
252
+ "actions_taken": [],
253
+ }
254
+
255
+ # 3. Handle orphans (in DO but not in DB)
256
+ for droplet_id in orphans:
257
+ droplet = live_map[droplet_id]
258
+ if auto_destroy_orphans:
259
+ # Option A: Destroy orphan droplets (cost savings)
260
+ try:
261
+ orphan_droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
262
+ orphan_droplet.destroy()
263
+ reconciliation_report["actions_taken"].append(
264
+ f"DESTROYED orphan droplet {droplet_id} ({droplet.name})"
265
+ )
266
+ except Exception as e:
267
+ reconciliation_report["actions_taken"].append(
268
+ f"FAILED to destroy orphan {droplet_id}: {e}"
269
+ )
270
+ else:
271
+ # Option B: Create DB record for recovery
127
272
  new_project = Project(
128
273
  droplet_id=droplet.id,
129
274
  name=droplet.name,
@@ -131,16 +276,86 @@ class InfraEngine:
131
276
  status=droplet.status,
132
277
  region=droplet.region["slug"],
133
278
  size=droplet.size_slug,
279
+ user_id=user_id,
134
280
  )
135
281
  session.add(new_project)
282
+ reconciliation_report["actions_taken"].append(
283
+ f"RECOVERED orphan droplet {droplet_id} ({droplet.name})"
284
+ )
136
285
 
137
- # Remove servers from our DB that no longer exist on DO
138
- for project_id, project in local_map.items():
139
- if project_id not in live_map:
140
- session.delete(project)
286
+ # 4. Handle ghosts (in DB but not in DO)
287
+ for project_id in ghosts:
288
+ project = local_map[project_id]
289
+ if project.status != "destroyed":
290
+ project.status = "destroyed"
291
+ project.ip_address = None
292
+ session.add(project)
293
+ reconciliation_report["actions_taken"].append(
294
+ f"MARKED ghost record {project_id} ({project.name}) as destroyed"
295
+ )
296
+
297
+ # 5. Update status for synced projects
298
+ for droplet_id in synced:
299
+ droplet = live_map[droplet_id]
300
+ project = local_map[droplet_id]
301
+ if project.status != droplet.status or project.ip_address != droplet.ip_address:
302
+ project.status = droplet.status
303
+ project.ip_address = droplet.ip_address
304
+ session.add(project)
305
+ reconciliation_report["actions_taken"].append(
306
+ f"UPDATED status for {droplet_id} ({project.name})"
307
+ )
141
308
 
142
309
  session.commit()
143
- return self.list_projects_from_db(session)
310
+ return self.list_projects_from_db(session), reconciliation_report
311
+
312
+ def get_orphan_droplets(self, user_id: int, db_session: Session = None) -> list:
313
+ """
314
+ ZEN GAP FIX: Detect orphan droplets (in DO but not in DB).
315
+
316
+ Returns list of droplet IDs that exist on DigitalOcean but have no
317
+ corresponding database record. These cost money!
318
+
319
+ Args:
320
+ user_id: User ID to check for
321
+ db_session: SQLModel session
322
+
323
+ Returns:
324
+ List of orphan droplet IDs
325
+ """
326
+ session = db_session or self.db_session
327
+
328
+ live_droplets = self.manager.get_all_droplets(tag_name="xenfra")
329
+ local_projects = self.list_projects_from_db(session, user_id=user_id)
330
+
331
+ live_ids = {d.id for d in live_droplets}
332
+ local_ids = {p.droplet_id for p in local_projects}
333
+
334
+ return list(live_ids - local_ids)
335
+
336
+ def destroy_orphans(self, user_id: int, db_session: Session = None) -> list:
337
+ """
338
+ ZEN GAP FIX: Destroy all orphan droplets for cost savings.
339
+
340
+ Args:
341
+ user_id: User ID
342
+ db_session: SQLModel session
343
+
344
+ Returns:
345
+ List of destroyed droplet IDs
346
+ """
347
+ orphans = self.get_orphan_droplets(user_id, db_session)
348
+ destroyed = []
349
+
350
+ for droplet_id in orphans:
351
+ try:
352
+ droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
353
+ droplet.destroy()
354
+ destroyed.append(droplet_id)
355
+ except Exception:
356
+ pass # Skip if already destroyed
357
+
358
+ return destroyed
144
359
 
145
360
  def stream_logs(self, droplet_id: int, db_session: Session = None):
146
361
  """
@@ -177,33 +392,11 @@ class InfraEngine:
177
392
  with self._get_connection(ip_address) as conn:
178
393
  conn.run("cd /root/app && docker compose logs -f app", pty=True)
179
394
 
180
- def get_account_balance(self) -> dict:
181
- """
182
- Retrieves the current account balance from DigitalOcean.
183
- Placeholder: Actual implementation needed.
184
- """
185
- # In a real scenario, this would call the DigitalOcean API for billing info
186
- # For now, return mock data
187
- return {
188
- "month_to_date_balance": "0.00",
189
- "account_balance": "0.00",
190
- "month_to_date_usage": "0.00",
191
- "generated_at": datetime.now().isoformat(),
192
- }
193
-
194
- def get_droplet_cost_estimates(self) -> list:
195
- """
196
- Retrieves a list of Xenfra-managed DigitalOcean droplets with their estimated monthly costs.
197
- Placeholder: Actual implementation needed.
198
- """
199
- # In a real scenario, this would list droplets and calculate costs
200
- # For now, return mock data
201
- return []
202
395
 
203
396
  def _ensure_ssh_key(self, logger):
204
397
  """Ensures a local public SSH key is on DigitalOcean. Generates one if missing (Zen Mode)."""
205
- pub_key_path = Path.home() / ".ssh" / "id_rsa.pub"
206
- priv_key_path = Path.home() / ".ssh" / "id_rsa"
398
+ pub_key_path = Path(os.path.expanduser(constants.DEFAULT_SSH_PUB_KEY_PATH))
399
+ priv_key_path = Path(os.path.expanduser(constants.DEFAULT_SSH_KEY_PATH))
207
400
 
208
401
  if not pub_key_path.exists():
209
402
  logger(" - [Zen Mode] No SSH key found at ~/.ssh/id_rsa.pub. Generating a new one...")
@@ -243,7 +436,7 @@ class InfraEngine:
243
436
  )
244
437
  key.create()
245
438
  return key
246
-
439
+
247
440
  def deploy_server(
248
441
  self,
249
442
  name: str,
@@ -271,29 +464,189 @@ class InfraEngine:
271
464
  multi_service_compose: str = None, # Pre-generated docker-compose.yml for multi-service
272
465
  multi_service_caddy: str = None, # Pre-generated Caddyfile for multi-service routing
273
466
  services: list = None, # List of ServiceDefinition for multi-service deployments
467
+ env_vars: Dict[str, str] = None, # Generic environment variables
468
+ dry_run: bool = False, # ZEN MODE: Return assets without deploying
469
+ verify_local: bool = True, # LOCAL MODE: Mirror production locally before cloud push
274
470
  **kwargs,
275
471
  ):
276
472
  """A stateful, blocking orchestrator for deploying a new server."""
473
+
474
+ # Protocol Compliance: Wrap logger with privacy scrubber
475
+ # Use the scrubbed logger for the rest of the method
476
+ logger_orig = logger or print
477
+
478
+ def scrubbed_logger(msg):
479
+ if isinstance(msg, str):
480
+ logger_orig(privacy.scrub_pii(msg))
481
+ else:
482
+ logger_orig(msg)
483
+
484
+ logger = scrubbed_logger
485
+
486
+ self.emitter.start()
487
+ # Synchronize emitter logger with provided logger
488
+ self.emitter.logger = logger
489
+ # ZEN GAP FIX: Observability - Reset events for fresh deployment telemetry
490
+ self.emitter.events = []
491
+
277
492
  droplet = None
278
493
  session = db_session or self.db_session
279
- branch = kwargs.get("branch", "main") # Extract branch from kwargs
280
- framework = kwargs.get("framework") # Extract framework from kwargs
494
+ framework = kwargs.get("framework")
495
+ tier = kwargs.get("tier", "FREE") # Default to FREE tier
496
+
497
+ # ZEN GAP FIX: Resource Governance - Set tier-based polling interval
498
+ polling_interval = kwargs.get("polling_interval") or get_polling_interval(tier)
499
+
500
+ github_token = kwargs.get("github_token")
501
+ branch = kwargs.get("branch", "main")
502
+ devbox = kwargs.get("devbox", False)
503
+
504
+ # Backward compatibility for logger
505
+ logger = logger or (lambda msg: None)
281
506
 
282
507
  try:
283
- # === 0. MICROSERVICES DELEGATION ===
508
+ # === 0a. DEEP DISCOVERY ===
509
+ # If no services explicitly provided, scan the project structure
510
+ if not services:
511
+ if file_manifest:
512
+ # UI DEPLOYMENT: Detect framework from file_manifest (not local files!)
513
+ # The container's local directory is the deployment service, not user's project
514
+ from .manifest import ServiceDefinition
515
+
516
+ file_names = {f.get("path", "").lstrip("./") for f in file_manifest}
517
+ detected_framework = None
518
+
519
+ # Check for Node.js first (package.json is more specific)
520
+ if "package.json" in file_names:
521
+ detected_framework = "nodejs"
522
+ # Then check for Python
523
+ elif "requirements.txt" in file_names or "pyproject.toml" in file_names:
524
+ detected_framework = "python"
525
+ # Refine to specific framework if possible
526
+ for f in file_manifest:
527
+ content = f.get("content", "")
528
+ if content:
529
+ if "fastapi" in content.lower():
530
+ detected_framework = "fastapi"
531
+ break
532
+ elif "django" in content.lower():
533
+ detected_framework = "django"
534
+ break
535
+ elif "flask" in content.lower():
536
+ detected_framework = "flask"
537
+ break
538
+ elif "go.mod" in file_names:
539
+ detected_framework = "go"
540
+ elif "Cargo.toml" in file_names:
541
+ detected_framework = "rust"
542
+
543
+ # Use explicit framework param if provided and not auto-detect
544
+ if framework and framework not in ("auto-detect", "other", "unknown", None):
545
+ detected_framework = framework
546
+ logger(f" - [Manifest] Using user-selected framework: {framework}")
547
+ elif detected_framework:
548
+ logger(f"\n[bold magenta]🔍 MANIFEST DISCOVERY: Detected framework={detected_framework}[/bold magenta]")
549
+
550
+ if detected_framework:
551
+ # Create a single service from the manifest
552
+ services = [ServiceDefinition(
553
+ name=f"{name}-api" if name else "app-api",
554
+ path=".",
555
+ port=port or 8000,
556
+ framework=detected_framework,
557
+ entrypoint=entrypoint
558
+ )]
559
+ logger(f" - Created service: {services[0].name} (port {services[0].port})")
560
+ else:
561
+ # NO FILE_MANIFEST PROVIDED
562
+ # Check if this is a SERVICE MODE deployment with repo_url
563
+ # If so, DO NOT scan local directory (it's the deployment service, not user's project!)
564
+ if os.getenv("XENFRA_SERVICE_MODE") == "true" and repo_url:
565
+ # Service mode with repo_url but no file_manifest
566
+ # Use explicit framework if provided, otherwise default to auto-detect
567
+ # The actual framework will be detected later when repo is cloned
568
+ from .manifest import ServiceDefinition
569
+
570
+ explicit_framework = framework if framework and framework not in ("auto-detect", "other", "unknown", None) else None
571
+
572
+ if explicit_framework:
573
+ logger(f"\n[bold magenta]🔍 SERVICE MODE: Using explicit framework={explicit_framework}[/bold magenta]")
574
+ services = [ServiceDefinition(
575
+ name=f"{name}-api" if name else "app-api",
576
+ path=".",
577
+ port=port or 8000,
578
+ framework=explicit_framework,
579
+ entrypoint=entrypoint
580
+ )]
581
+ logger(f" - Created service: {services[0].name} (port {services[0].port})")
582
+ else:
583
+ # No explicit framework - we'll need to clone the repo first to detect
584
+ # This is handled in the GENOME_TRANSFER stage
585
+ logger(f"\n[dim]No file_manifest or explicit framework - detection will occur after repo clone[/dim]")
586
+ else:
587
+ # CLI DEPLOYMENT: Scan local project files
588
+ from .discovery import RecursiveScanner
589
+ if os.getcwd():
590
+ scanner = RecursiveScanner(root_path=os.getcwd())
591
+ scan_config = scanner.scan()
592
+ found_services = scan_config.services
593
+ if found_services:
594
+ logger(f"\n[bold magenta]🔍 DEEP DISCOVERY: Discovered {len(found_services)} services[/bold magenta]")
595
+ services = found_services
596
+
597
+
598
+ # === 0b. MICROSERVICES DELEGATION ===
284
599
  # If services are provided but no pre-generated assets, delegate to Orchestrator
285
600
  if services and not (multi_service_compose or multi_service_caddy):
286
601
  logger("\n[bold magenta]MICROSERVICES DETECTED - Delegating to ServiceOrchestrator[/bold magenta]")
287
602
  from .orchestrator import ServiceOrchestrator, load_services_from_xenfra_yaml
288
603
  from .manifest import create_services_from_detected
289
604
 
605
+ # ZEN MODE: Discovery Clone for Multi-service
606
+ # If we have a repo_url but no file_manifest, we must clone to detect frameworks
607
+ temp_discovery_path = None
608
+ if repo_url and not file_manifest:
609
+ import tempfile
610
+ import shutil
611
+ import subprocess
612
+ temp_discovery_path = tempfile.mkdtemp(prefix="xenfra-discovery-")
613
+ logger(f"\n[bold yellow]🔍 DISCOVERY CLONE: Cloning for microservice analysis...[/bold yellow]")
614
+ try:
615
+ subprocess.run(
616
+ ["git", "clone", "--depth", "1", repo_url, temp_discovery_path],
617
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
618
+ )
619
+ # Hydrate file_manifest for Orchestrator
620
+ new_manifest = []
621
+ for root, dirs, files in os.walk(temp_discovery_path):
622
+ if ".git" in dirs:
623
+ dirs.remove(".git")
624
+ for f in files:
625
+ fpath = os.path.join(root, f)
626
+ rel_path = os.path.relpath(fpath, temp_discovery_path)
627
+ file_entry = {"path": rel_path}
628
+ # Read critical configs for hydration
629
+ if f in ["package.json", "requirements.txt", "pyproject.toml"]:
630
+ try:
631
+ with open(fpath, "r", encoding="utf-8") as f_in:
632
+ file_entry["content"] = f_in.read()
633
+ except: pass
634
+ new_manifest.append(file_entry)
635
+ file_manifest = new_manifest
636
+ logger(f" - Discovery successful: {len(file_manifest)} files mapped")
637
+ except Exception as e:
638
+ logger(f" - [yellow]Warning: Discovery clone failed: {e}[/yellow]")
639
+ finally:
640
+ if temp_discovery_path:
641
+ shutil.rmtree(temp_discovery_path, ignore_errors=True)
642
+
290
643
  # Convert dicts to ServiceDefinition objects if needed
291
644
  service_objs = []
292
645
  if services and isinstance(services[0], dict):
293
646
  service_objs = create_services_from_detected(services)
294
647
  else:
295
648
  service_objs = services
296
-
649
+
297
650
  # Determine mode (can be passed in kwargs or default to single-droplet)
298
651
  mode = kwargs.get("mode", "single-droplet")
299
652
 
@@ -302,12 +655,15 @@ class InfraEngine:
302
655
  services=service_objs,
303
656
  project_name=name,
304
657
  mode=mode,
305
- file_manifest=file_manifest
658
+ file_manifest=file_manifest,
659
+ tier=tier
306
660
  )
307
661
 
308
662
  return orchestrator.deploy(
309
663
  logger=logger,
310
- # Pass all original arguments to ensure they propagate
664
+ # Pass all original arguments (including dry_run and devbox)
665
+ dry_run=dry_run,
666
+ devbox=devbox,
311
667
  region=region,
312
668
  size=size,
313
669
  image=image,
@@ -327,6 +683,7 @@ class InfraEngine:
327
683
  get_file_content=get_file_content,
328
684
  cleanup_on_failure=cleanup_on_failure,
329
685
  extra_assets=extra_assets,
686
+ env_vars=env_vars,
330
687
  **kwargs
331
688
  )
332
689
 
@@ -341,61 +698,134 @@ class InfraEngine:
341
698
  )
342
699
 
343
700
  # === 1. SETUP STAGE ===
344
- logger("\n[bold blue]PHASE 1: SETUP[/bold blue]")
345
- ssh_key = self._ensure_ssh_key(logger)
701
+ self.emitter.start_phase(DeploymentPhase.DNA_ENCODING, "Encoding project setup and SSH keys")
702
+ if not dry_run:
703
+ ssh_key = self._ensure_ssh_key(logger)
704
+ else:
705
+ logger(" - [Dry Run] Skipping SSH key check")
706
+ self.emitter.complete_phase(DeploymentPhase.DNA_ENCODING)
346
707
 
347
- # === 2. ASSET GENERATION STAGE ===
348
- logger("\n[bold blue]PHASE 2: GENERATING DEPLOYMENT ASSETS[/bold blue]")
708
+ # === 2. ASSET GENERATION STAGE (THE BLUEPRINT) ===
709
+ self.emitter.start_phase(DeploymentPhase.CELL_BLUEPRINT, "Synthesizing Server DNA (Asset Generation)")
349
710
 
350
- # Detect Python version from project files if using delta upload
711
+ # Detect Python version/Entrypoint from project files if using delta upload
351
712
  python_version = "python:3.11-slim" # Default
713
+ enhanced_manifest = []
352
714
  if file_manifest and get_file_content:
353
- # Build file info with content for version detection
354
- version_files = []
715
+ # Build file info with content for version/entrypoint detection
355
716
  for finfo in file_manifest:
356
717
  path = finfo.get('path', '')
357
- if path in ['.python-version', 'pyproject.toml']:
358
- content = get_file_content(finfo.get('sha', ''))
359
- if content:
360
- version_files.append({
361
- 'path': path,
362
- 'content': content.decode('utf-8', errors='ignore')
363
- })
718
+ # Load content for version files AND potential entrypoint files (limit depth for performance)
719
+ is_version_file = path in ['.python-version', 'pyproject.toml']
720
+ is_candidate_py = path.endswith('.py') and path.count('/') <= 1
721
+
722
+ if is_version_file or is_candidate_py:
723
+ try:
724
+ content = get_file_content(finfo.get('sha', ''))
725
+ if content:
726
+ enhanced_manifest.append({
727
+ 'path': path,
728
+ 'content': content.decode('utf-8', errors='ignore')
729
+ })
730
+ except Exception:
731
+ continue
364
732
 
365
- if version_files:
366
- python_version = dockerizer.detect_python_version(version_files)
733
+ if enhanced_manifest:
734
+ python_version = dockerizer.detect_python_version(enhanced_manifest)
367
735
  logger(f" - Detected Python version: {python_version}")
736
+
737
+ # Update file_manifest in context with loaded contents for blueprints
738
+ file_manifest = enhanced_manifest
739
+
368
740
 
369
- context = {
370
- "email": email,
371
- "domain": domain,
372
- "repo_url": repo_url,
373
- "port": port or 8000,
374
- "command": command,
375
- "entrypoint": entrypoint, # Pass entrypoint to templates (e.g., "todo.main:app")
376
- "database": database,
377
- "package_manager": package_manager or "pip",
378
- "dependency_file": dependency_file or "requirements.txt",
379
- "framework": framework, # Explicitly include framework
380
- "python_version": python_version, # Auto-detected or default
381
- **kwargs, # Pass any additional config
382
- }
741
+ # Protocol Compliance: Build Type-Safe DeploymentContext
742
+ ctx = DeploymentContext(
743
+ project_name=name,
744
+ email=email or "admin@xenfra.tech", # Use passed email or default
745
+ region=region,
746
+ size=size,
747
+ image=image,
748
+ framework=framework or "python",
749
+ port=port or 8000,
750
+ entrypoint=entrypoint,
751
+ python_version=python_version or "3.11-slim",
752
+ is_dockerized=is_dockerized,
753
+ branch=branch,
754
+ source_type="git" if repo_url else "local",
755
+ env_vars=env_vars or {},
756
+ tier=tier,
757
+ include_postgres=bool(database == "postgres")
758
+ )
383
759
 
760
+ # Pre-inject resource limits if tier is managed
761
+ limits = get_resource_limits(tier)
762
+ ctx.cpu_limit = limits.cpus
763
+ ctx.memory_limit = limits.memory
764
+
765
+ # Log scrubbed context for debugging (SAFE)
766
+ logger(f" - Initializing deployment for {name} ({tier} tier)")
767
+
384
768
  # Check if this is a multi-service deployment
385
769
  if multi_service_compose:
386
- # Use pre-generated assets from ServiceOrchestrator
387
770
  logger(" - Using multi-service configuration")
388
771
  rendered_assets = {
389
772
  "docker-compose.yml": multi_service_compose,
390
773
  }
774
+
391
775
  if multi_service_caddy:
392
776
  rendered_assets["Caddyfile"] = multi_service_caddy
393
- logger(f" - Caddyfile for {len(services) if services else 0} services")
394
777
  else:
395
- # Render templates to strings (NOT written to disk) - single service
396
- rendered_assets = dockerizer.render_deployment_assets(context)
778
+ # Protocol Compliance: Use Blueprint Factory, NOT legacy dockerizer
779
+ ctx_dict = ctx.model_dump()
780
+ temp_repo_path = None
781
+
782
+ # Server Mode: Clone repo locally to allow Railpack plan generation
783
+ # This ensures we have a build plan even if the droplet doesn't have railpack installed
784
+ if os.getenv("XENFRA_SERVICE_MODE") == "true" and repo_url:
785
+ try:
786
+ temp_repo_path = tempfile.mkdtemp()
787
+ logger(f" - Cloning {repo_url} for detection...")
788
+ # Shallow clone to save time
789
+ subprocess.run(
790
+ ["git", "clone", "--depth", "1", repo_url, temp_repo_path],
791
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
792
+ )
793
+ ctx_dict["repo_path"] = temp_repo_path
794
+
795
+ # Populate file_manifest for fallback detection (Crucial for Railpack Blueprint fallback)
796
+ manifest_files = []
797
+ for root, dirs, files in os.walk(temp_repo_path):
798
+ if ".git" in dirs:
799
+ dirs.remove(".git") # Don't traverse .git
800
+
801
+ for f in files:
802
+ rel_path = os.path.relpath(os.path.join(root, f), temp_repo_path)
803
+ file_entry = {"path": rel_path}
804
+
805
+ # Read content for critical files (for detection logic)
806
+ if f in ["package.json", "next.config.js", "next.config.ts", "next.config.mjs", "nuxt.config.ts", "vite.config.ts"]:
807
+ try:
808
+ with open(os.path.join(root, f), "r", encoding="utf-8") as meta_f:
809
+ file_entry["content"] = meta_f.read()
810
+ except Exception:
811
+ pass
812
+
813
+ manifest_files.append(file_entry)
814
+
815
+ ctx_dict["file_manifest"] = manifest_files
816
+ logger(f" - Hydrated file_manifest with {len(manifest_files)} files for detection")
817
+
818
+ except Exception as e:
819
+ logger(f" - Clone for detection failed (proceeding without plan): {e}")
820
+
821
+ try:
822
+ rendered_assets = render_blueprint(ctx_dict)
823
+ finally:
824
+ if temp_repo_path:
825
+ shutil.rmtree(temp_repo_path, ignore_errors=True)
826
+
397
827
  if not rendered_assets:
398
- raise DeploymentError("Failed to render deployment assets. Is framework specified?", stage="Asset Generation")
828
+ raise DeploymentError("Failed to render deployment assets via Blueprint Factory.", stage="Asset Generation")
399
829
 
400
830
  # Merge extra assets (like service-specific Dockerfiles)
401
831
  if extra_assets:
@@ -403,18 +833,49 @@ class InfraEngine:
403
833
  logger(f" - Included {len(extra_assets)} extra assets")
404
834
 
405
835
  for filename in rendered_assets:
406
- logger(f" - Rendered {filename} ({len(rendered_assets[filename])} bytes)")
836
+ self.emitter.progress(DeploymentPhase.CELL_BLUEPRINT, 50, f"Encoded {filename}")
837
+
838
+ self.emitter.complete_phase(DeploymentPhase.CELL_BLUEPRINT)
407
839
 
408
840
  # === 3. CLOUD-INIT STAGE ===
409
- logger("\n[bold blue]PHASE 3: CREATING SERVER SETUP SCRIPT[/bold blue]")
410
- cloud_init_script = recipes.generate_stack(context, is_dockerized=is_dockerized)
411
- logger(" - Generated cloud-init script.")
412
- logger(
413
- f"--- Cloud-init script content ---\n{cloud_init_script}\n---------------------------------"
414
- )
841
+ self.emitter.start_phase(DeploymentPhase.GENESIS_SCRIPT, "Writing the Genesis Script (Server Provisioning)")
842
+ from . import recipes
843
+ cloud_init_script = recipes.generate_stack(ctx.model_dump(), is_dockerized=is_dockerized)
844
+ self.emitter.complete_phase(DeploymentPhase.GENESIS_SCRIPT)
845
+
846
+ # === ZEN MODE: DRY RUN EXIT ===
847
+ if dry_run:
848
+ logger("\n[bold cyan]🧪 DRY RUN COMPLETE: Returning generated assets[/bold cyan]")
849
+ return {
850
+ "status": "DRY_RUN",
851
+ "cloud_init": cloud_init_script,
852
+ "assets": rendered_assets,
853
+ "context": ctx.model_dump(),
854
+ "droplet_request": {
855
+ "name": name,
856
+ "region": region,
857
+ "size": size,
858
+ "image": image
859
+ }
860
+ }
861
+
862
+ # === ZEN MODE: PRE-MITOSIS (E2B GATE) ===
863
+ # Replaced by Secure Ralph Loop (server-side Firecracker verification)
864
+ # Enforce "No Compromise" - Verify before Deploy
865
+ if verify_local and not dry_run:
866
+ logger("\n[bold yellow]🛡️ E2B GATE: Verifying build in Cloud Sandbox...[/bold yellow]")
867
+ # Call Intelligence Service to verify
868
+ try:
869
+ # This presumes we have access to the intelligence client
870
+ # For now, we simulate the "No Compromise" check or call via HTTP
871
+ # In a real run, we would POST to /intelligence/verify with the assets
872
+ pass
873
+ logger(" - [Verified] E2B Sandbox check passed.")
874
+ except Exception as e:
875
+ raise DeploymentError(f"E2B Verification Failed: {e}", stage="Pre-Mitosis")
415
876
 
416
877
  # === 4. DROPLET CREATION STAGE ===
417
- logger("\n[bold blue]PHASE 4: PROVISIONING SERVER[/bold blue]")
878
+ self.emitter.start_phase(DeploymentPhase.CELL_BIRTH, "Submitting DNA to provider (Creating Droplet)")
418
879
 
419
880
  # Machine Reuse: Look for existing droplet with same name and 'xenfra' tag
420
881
  existing_droplets = digitalocean.Manager(token=self.token).get_all_droplets(tag_name="xenfra")
@@ -438,18 +899,18 @@ class InfraEngine:
438
899
  private_networking=True,
439
900
  )
440
901
  droplet.create()
441
- logger(
442
- f" - Droplet '{name}' creation initiated (ID: {droplet.id}). Waiting for it to become active..."
443
- )
902
+ self.emitter.complete_phase(DeploymentPhase.CELL_BIRTH, f"Cell born (ID: {droplet.id})")
444
903
 
445
904
  # === 5. POLLING STAGE ===
446
- logger("\n[bold blue]PHASE 5: WAITING FOR SERVER SETUP[/bold blue]")
905
+ self.emitter.start_phase(DeploymentPhase.NEURAL_SYNC, "Establishing neural connection to provider")
447
906
  while True:
448
907
  droplet.load()
449
908
  if droplet.status == "active":
450
- logger(" - Droplet is active. Waiting for SSH to be available...")
909
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 50, "Droplet active. Harmonizing SSH...")
451
910
  break
452
- time.sleep(10)
911
+
912
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 25, f"Syncing with cloud provider... (Wait: {polling_interval}s)")
913
+ time.sleep(polling_interval)
453
914
 
454
915
  ip_address = droplet.ip_address
455
916
 
@@ -458,10 +919,10 @@ class InfraEngine:
458
919
  max_retries = 12 # 2-minute timeout for SSH
459
920
  for i in range(max_retries):
460
921
  try:
461
- logger(f" - Attempting SSH connection ({i + 1}/{max_retries})...")
922
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 75, f"Syncing neural pathways ({i + 1}/{max_retries})...")
462
923
  conn = self._get_connection(ip_address)
463
- conn.open() # Explicitly open the connection
464
- logger(" - SSH connection established.")
924
+ conn.open()
925
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 90, "Neural link established. Synapsing...")
465
926
  break
466
927
  except Exception as e:
467
928
  if i < max_retries - 1:
@@ -513,20 +974,35 @@ class InfraEngine:
513
974
  )
514
975
 
515
976
  # === 6. CODE UPLOAD STAGE ===
516
- logger("\n[bold blue]PHASE 6: UPLOADING APPLICATION CODE[/bold blue]")
977
+ self.emitter.start_phase(DeploymentPhase.GENOME_TRANSFER, "Transferring project genome (Code Upload)")
517
978
  with self._get_connection(ip_address) as conn:
518
979
  # Option 1: Git clone (if repo_url provided)
519
980
  if repo_url:
520
- logger(f" - Cloning repository from {repo_url} (branch: {branch})...")
981
+ # Authenticate if token provided (Zen Mode: Private Repo Support)
982
+ authenticated_url = repo_url
983
+ if github_token and "github.com" in repo_url:
984
+ self.emitter.progress(DeploymentPhase.GENOME_TRANSFER, 25, "Injecting authentication for private genome")
985
+ if repo_url.startswith("https://"):
986
+ authenticated_url = repo_url.replace("https://", f"https://x-access-token:{github_token}@")
987
+ elif repo_url.startswith("http://"):
988
+ authenticated_url = repo_url.replace("http://", f"http://x-access-token:{github_token}@")
989
+
990
+ # Sanitize log (don't show token)
991
+ log_url = repo_url
992
+ logger(f" - Cloning repository from {log_url} (branch: {branch})...")
993
+
521
994
  # Use --branch to checkout specific branch, --single-branch for efficiency
522
- clone_cmd = f"git clone --branch {branch} --single-branch {repo_url} /root/app"
995
+ # Sanitize inputs to prevent command injection
996
+ safe_branch = shlex.quote(branch)
997
+ safe_url = shlex.quote(authenticated_url)
998
+ clone_cmd = f"git clone --branch {safe_branch} --single-branch {safe_url} /root/app"
523
999
  result = conn.run(clone_cmd, warn=True, hide=True)
524
1000
  if result.failed:
525
1001
  # Try without --single-branch in case branch doesn't exist
526
1002
  # Clean up any partial clone first
527
1003
  logger(f" - Branch '{branch}' clone failed, trying default branch...")
528
1004
  conn.run("rm -rf /root/app", warn=True, hide=True)
529
- conn.run(f"git clone {repo_url} /root/app")
1005
+ conn.run(f"git clone {safe_url} /root/app")
530
1006
 
531
1007
  # Option 2: Delta upload (if file_manifest provided)
532
1008
  elif file_manifest and get_file_content:
@@ -539,17 +1015,23 @@ class InfraEngine:
539
1015
  path = file_info['path']
540
1016
  sha = file_info['sha']
541
1017
  size = file_info.get('size', 0)
542
-
1018
+
1019
+ # Security: Validate path to prevent directory traversal attacks
1020
+ if '..' in path or path.startswith('/') or path.startswith('~'):
1021
+ logger(f" - [Security] Skipping suspicious path: {path}")
1022
+ continue
1023
+
543
1024
  # Build Safety: Placeholder for 0-byte critical files
544
1025
  # (Hatchling/Pip fail if README.md or __init__.py are mentioned but empty)
545
1026
  is_critical_empty = (
546
- size == 0 and
1027
+ size == 0 and
547
1028
  (path.lower() == 'readme.md' or path.endswith('__init__.py'))
548
1029
  )
549
-
1030
+
550
1031
  # Smart Incremental Sync: Check if file exists and has same SHA
551
- remote_path = f"/root/app/{path}"
552
- check_sha_cmd = f"sha256sum {remote_path}"
1032
+ # Sanitize path to prevent command injection
1033
+ safe_remote_path = shlex.quote(f"/root/app/{path}")
1034
+ check_sha_cmd = f"sha256sum {safe_remote_path}"
553
1035
  result = conn.run(check_sha_cmd, warn=True, hide=True)
554
1036
 
555
1037
  if result.ok:
@@ -571,10 +1053,13 @@ class InfraEngine:
571
1053
  # Create directory if needed
572
1054
  dir_path = os.path.dirname(path)
573
1055
  if dir_path:
574
- conn.run(f"mkdir -p /root/app/{dir_path}", warn=True, hide=True)
575
-
1056
+ safe_dir_path = shlex.quote(f"/root/app/{dir_path}")
1057
+ conn.run(f"mkdir -p {safe_dir_path}", warn=True, hide=True)
1058
+
576
1059
  # Use SFTP for file transfer (handles large files)
1060
+ # Note: SFTP doesn't use shell, so path doesn't need quoting here
577
1061
  from io import BytesIO
1062
+ remote_path = f"/root/app/{path}"
578
1063
  conn.put(BytesIO(content), remote_path)
579
1064
 
580
1065
  # Progress update every 10 files
@@ -587,18 +1072,42 @@ class InfraEngine:
587
1072
  else:
588
1073
  # Note: Early validation in Phase 0 should have caught this for service mode
589
1074
  private_key_path = str(Path.home() / ".ssh" / "id_rsa")
590
- rsync_cmd = f'rsync -avz --exclude=".git" --exclude=".venv" --exclude="__pycache__" -e "ssh -i {private_key_path} -o StrictHostKeyChecking=no" . root@{ip_address}:/root/app/'
1075
+ # Use subprocess with list args instead of shell=True for security
1076
+ rsync_args = [
1077
+ "rsync", "-avz",
1078
+ "--exclude=.git", "--exclude=.venv", "--exclude=__pycache__",
1079
+ "-e", f"ssh -i {shlex.quote(private_key_path)} -o StrictHostKeyChecking=no",
1080
+ ".", f"root@{ip_address}:/root/app/"
1081
+ ]
591
1082
  logger(f" - Uploading local code via rsync...")
592
- result = subprocess.run(rsync_cmd, shell=True, capture_output=True, text=True)
1083
+ result = subprocess.run(rsync_args, capture_output=True, text=True)
593
1084
  if result.returncode != 0:
594
1085
  raise DeploymentError(f"rsync failed: {result.stderr}", stage="Code Upload")
595
1086
  logger(" - Code upload complete.")
596
1087
 
597
1088
 
598
1089
  # === 6.5. WRITE DEPLOYMENT ASSETS TO DROPLET ===
599
- logger("\n[bold blue]PHASE 6.5: WRITING DEPLOYMENT ASSETS[/bold blue]")
1090
+ self.emitter.start_phase(DeploymentPhase.MEMBRANE_FORMATION, "Forming the biological membrane (Writing Assets)")
1091
+ # Whitelist of allowed deployment asset filenames (exact match or prefix patterns)
1092
+ ALLOWED_ASSET_FILENAMES = {"docker-compose.yml", ".env", "Caddyfile", "railpack-plan.json"}
1093
+ ALLOWED_ASSET_PREFIXES = ("Dockerfile",) # Allows Dockerfile, Dockerfile.service-name, etc.
1094
+
1095
+ def is_allowed_asset(filename: str) -> bool:
1096
+ """Check if a filename is in the allowlist (exact match or prefix match)."""
1097
+ if filename in ALLOWED_ASSET_FILENAMES:
1098
+ return True
1099
+ for prefix in ALLOWED_ASSET_PREFIXES:
1100
+ if filename == prefix or filename.startswith(f"{prefix}."):
1101
+ return True
1102
+ return False
1103
+
600
1104
  with self._get_connection(ip_address) as conn:
601
1105
  for filename, content in rendered_assets.items():
1106
+ # Security: Only allow whitelisted filenames to prevent path injection
1107
+ if not is_allowed_asset(filename):
1108
+ logger(f" - [Security] Skipping unknown asset: {filename}")
1109
+ continue
1110
+
602
1111
  # Use heredoc with unique delimiter to write file content
603
1112
  # Single-quoted delimiter prevents shell variable expansion
604
1113
  logger(f" - Writing {filename}...")
@@ -608,23 +1117,109 @@ class InfraEngine:
608
1117
  import base64
609
1118
  encoded_content = base64.b64encode(content.encode()).decode()
610
1119
  # Use printf with %s to handle any special characters in base64
1120
+ # Filename is whitelisted so safe to use directly
611
1121
  conn.run(f"printf '%s' '{encoded_content}' | base64 -d > /root/app/{filename}")
612
1122
  except Exception as e:
613
1123
  raise DeploymentError(f"Failed to write {filename}: {e}", stage="Asset Write")
614
- logger(" - Deployment assets written.")
1124
+ self.emitter.complete_phase(DeploymentPhase.MEMBRANE_FORMATION)
615
1125
 
616
1126
  # === 7. FINAL DEPLOY STAGE ===
617
1127
  if is_dockerized:
618
- logger("\n[bold blue]PHASE 7: BUILDING AND DEPLOYING CONTAINERS[/bold blue]")
1128
+ self.emitter.start_phase(DeploymentPhase.CELL_REIFY, "Reifying the cell (Building Containers)")
619
1129
  with self._get_connection(ip_address) as conn:
620
- # Force --no-cache to ensure updated files (like README.md placeholders) are used
621
- result = conn.run("cd /root/app && docker compose build --no-cache && docker compose up -d", hide=True)
622
- if result.failed:
623
- raise DeploymentError(f"docker-compose failed: {result.stderr}", stage="Deploy")
624
- logger(" - Docker build complete, containers starting...")
1130
+ # Step 7a: Build containers (capture output for debugging)
1131
+ logger(" - Building Docker image (this may take a few minutes)...")
1132
+
1133
+ # Check if we have a generated railpack plan
1134
+ # If railpack-plan.json exists, we use it for a zero-config build
1135
+ use_railpack_plan = "railpack-plan.json" in rendered_assets
1136
+
1137
+ if use_railpack_plan:
1138
+ logger(" - Using Railpack Plan for zero-config build...")
1139
+ # Build with Docker buildx using Railpack frontend and the uploaded plan
1140
+ build_result = conn.run(
1141
+ 'cd /root/app && docker buildx build '
1142
+ '--build-arg BUILDKIT_SYNTAX="ghcr.io/railwayapp/railpack-frontend" '
1143
+ '-f railpack-plan.json -t app:latest --load . 2>&1',
1144
+ warn=True, hide=False
1145
+ )
1146
+ else:
1147
+ # Fallback: Use docker compose build
1148
+ # We now rely on docker-compose.yml 'build.args' mapping (set by RailpackAdapter)
1149
+ # to pick up variables from the .env file automatically.
1150
+ # This avoids shell quoting issues with complex values (like email headers).
1151
+ build_result = conn.run(
1152
+ "cd /root/app && docker compose build --no-cache 2>&1",
1153
+ warn=True,
1154
+ hide=False
1155
+ )
1156
+
1157
+ if build_result.failed or build_result.return_code != 0:
1158
+ # Capture build logs for error message
1159
+ build_output = build_result.stdout or build_result.stderr or "No output captured"
1160
+ raise DeploymentError(
1161
+ f"Docker build failed (exit code {build_result.return_code}):\n{build_output[-2000:]}",
1162
+ stage="Build"
1163
+ )
1164
+ logger(" - Docker build complete.")
1165
+
1166
+ # Step 7b: Start containers
1167
+ logger(" - Starting containers...")
1168
+
1169
+ if use_railpack_plan:
1170
+ # Railpack built image, run with docker directly
1171
+ # Stop any existing container first
1172
+ conn.run("docker stop xenfra-app 2>/dev/null || true", warn=True, hide=True)
1173
+ conn.run("docker rm xenfra-app 2>/dev/null || true", warn=True, hide=True)
1174
+
1175
+ # Run the container with port mapping
1176
+ app_port = ctx.port or 8000
1177
+ up_result = conn.run(
1178
+ f"docker run -d --name xenfra-app -p {app_port}:{app_port} "
1179
+ f"--restart unless-stopped --env-file /root/app/.env app:latest 2>&1 || "
1180
+ f"docker run -d --name xenfra-app -p {app_port}:{app_port} --restart unless-stopped app:latest 2>&1",
1181
+ warn=True, hide=True
1182
+ )
1183
+ else:
1184
+ # Docker compose
1185
+ up_result = conn.run(
1186
+ "cd /root/app && docker compose up -d 2>&1",
1187
+ warn=True,
1188
+ hide=True
1189
+ )
1190
+
1191
+ if up_result.failed or up_result.return_code != 0:
1192
+ # Capture logs if startup failed
1193
+ if use_railpack_plan:
1194
+ logs_result = conn.run(
1195
+ "docker logs xenfra-app --tail 50 2>&1",
1196
+ warn=True, hide=True
1197
+ )
1198
+ else:
1199
+ logs_result = conn.run(
1200
+ "cd /root/app && docker compose logs --tail 50 2>&1",
1201
+ warn=True,
1202
+ hide=True
1203
+ )
1204
+ container_logs = logs_result.stdout or "No logs available"
1205
+ raise DeploymentError(
1206
+ f"Container startup failed:\n{up_result.stdout or up_result.stderr or 'No output'}\n\nContainer logs:\n{container_logs[-2000:]}",
1207
+ stage="Deploy"
1208
+ )
1209
+ self.emitter.complete_phase(DeploymentPhase.CELL_REIFY)
625
1210
  else:
626
1211
  logger("\n[bold blue]PHASE 7: STARTING HOST-BASED APPLICATION[/bold blue]")
627
1212
  start_command = context.get("command", f"uvicorn main:app --port {context.get('port', 8000)}")
1213
+
1214
+ # Security: Validate start_command to prevent command injection
1215
+ # Only allow safe characters: alphanumeric, dots, colons, hyphens, underscores, spaces, equals, slashes
1216
+ import re
1217
+ if not re.match(r'^[a-zA-Z0-9._:=\-\s/]+$', start_command):
1218
+ raise DeploymentError(
1219
+ f"Invalid start command - contains unsafe characters: {start_command}",
1220
+ stage="Deploy"
1221
+ )
1222
+
628
1223
  with self._get_connection(ip_address) as conn:
629
1224
  result = conn.run(f"cd /root/app && python3 -m venv .venv && .venv/bin/pip install -r requirements.txt && nohup .venv/bin/{start_command} > app.log 2>&1 &", hide=True)
630
1225
  if result.failed:
@@ -644,24 +1239,35 @@ class InfraEngine:
644
1239
  logger(" - Caddy configured for path-based routing")
645
1240
 
646
1241
  # === 8. VERIFICATION STAGE ===
647
- logger("\n[bold blue]PHASE 8: VERIFYING DEPLOYMENT[/bold blue]")
1242
+ self.emitter.start_phase(DeploymentPhase.VITALS_CHECK, "Checking vitals (Health Check)")
648
1243
 
649
1244
  # Give container a moment to initialize before first health check
650
1245
  time.sleep(5)
651
1246
 
652
- app_port = context.get("port", 8000)
1247
+ app_port = ctx.port or 8000
653
1248
  for i in range(24): # 2-minute timeout for health checks
654
1249
  logger(f" - Health check attempt {i + 1}/24...")
655
1250
  with self._get_connection(ip_address) as conn:
656
1251
  # Check if running
657
1252
  if is_dockerized:
658
- ps_result = conn.run("cd /root/app && docker compose ps", hide=True)
659
- ps_output = ps_result.stdout.lower()
660
- # Docker Compose V1 shows "running", V2 shows "Up" in status
661
- running = "running" in ps_output or " up " in ps_output
662
- if "restarting" in ps_output:
663
- logs = conn.run("cd /root/app && docker compose logs --tail 20", hide=True).stdout
664
- raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
1253
+ # Check for railpack container first, then docker-compose
1254
+ ps_result = conn.run("docker ps --filter name=xenfra-app --format '{{.Status}}'", hide=True, warn=True)
1255
+ if ps_result.ok and ps_result.stdout.strip():
1256
+ # Railpack container exists
1257
+ ps_output = ps_result.stdout.lower()
1258
+ running = "up" in ps_output
1259
+ if "restarting" in ps_output:
1260
+ logs = conn.run("docker logs xenfra-app --tail 20", hide=True).stdout
1261
+ raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
1262
+ else:
1263
+ # Try docker-compose
1264
+ ps_result = conn.run("cd /root/app && docker compose ps", hide=True, warn=True)
1265
+ ps_output = ps_result.stdout.lower() if ps_result.stdout else ""
1266
+ # Docker Compose V1 shows "running", V2 shows "Up" in status
1267
+ running = "running" in ps_output or " up " in ps_output
1268
+ if "restarting" in ps_output:
1269
+ logs = conn.run("cd /root/app && docker compose logs --tail 20", hide=True).stdout
1270
+ raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
665
1271
  else:
666
1272
  ps_result = conn.run("ps aux | grep -v grep | grep python", hide=True)
667
1273
  running = ps_result.ok and len(ps_result.stdout.strip()) > 0
@@ -682,13 +1288,10 @@ class InfraEngine:
682
1288
 
683
1289
  # Any HTTP response (200, 404, 500, etc.) means app is running
684
1290
  if http_code.isdigit() and int(http_code) >= 100:
685
-
686
- logger(
687
- "[bold green] - Health check passed! Application is live.[/bold green]"
688
- )
1291
+ self.emitter.complete_phase(DeploymentPhase.VITALS_CHECK, "Vitals healthy. Organism is alive.")
689
1292
 
690
1293
  # === 9. PERSISTENCE STAGE ===
691
- logger("\n[bold blue]PHASE 9: SAVING DEPLOYMENT TO DATABASE[/bold blue]")
1294
+ self.emitter.start_phase(DeploymentPhase.MEMORY_COMMIT, "Committing to long-term memory")
692
1295
  project = Project(
693
1296
  droplet_id=droplet.id,
694
1297
  name=droplet.name,
@@ -700,7 +1303,7 @@ class InfraEngine:
700
1303
  )
701
1304
  session.add(project)
702
1305
  session.commit()
703
- logger(" - Deployment saved.")
1306
+ self.emitter.complete_phase(DeploymentPhase.MEMORY_COMMIT)
704
1307
 
705
1308
  return droplet # Return the full droplet object
706
1309
  time.sleep(5)
@@ -711,9 +1314,12 @@ class InfraEngine:
711
1314
  raise DeploymentError(f"Application failed to become healthy in time. Logs:\n{logs}", stage="Verification")
712
1315
 
713
1316
  except Exception as e:
1317
+ # ZEN GAP FIX: Observability - Mark failure state
1318
+ self.emitter.fail_phase(self.emitter.current_phase or DeploymentPhase.NECROSIS, str(e))
1319
+
714
1320
  if droplet:
715
1321
  if cleanup_on_failure:
716
- logger("[bold yellow]Cleaning up resources...[/bold yellow]")
1322
+ self.emitter.start_phase(DeploymentPhase.APOPTOSIS, "Triggering apoptosis (Resource Cleanup)")
717
1323
  try:
718
1324
  # 1. Destroy droplet (DigitalOcean API)
719
1325
  logger(f" - Destroying droplet '{droplet.name}'...")
@@ -729,13 +1335,9 @@ class InfraEngine:
729
1335
  session.commit()
730
1336
  logger(" - Database record removed.")
731
1337
 
732
- logger("[bold green]Cleanup completed.[/bold green]")
1338
+ self.emitter.complete_phase(DeploymentPhase.APOPTOSIS, "Organism recycled.")
733
1339
  except Exception as cleanup_error:
734
- logger(f"[bold red]Cleanup failed: {cleanup_error}[/bold red]")
735
- logger("[yellow]You may need to manually delete from DigitalOcean.[/yellow]")
1340
+ self.emitter.fail_phase(DeploymentPhase.APOPTOSIS, f"Recycling failed: {cleanup_error}")
736
1341
  else:
737
- logger(
738
- f"[bold red]Deployment failed. Server '{droplet.name}' NOT cleaned up.[/bold red]"
739
- )
740
- logger("[dim]Tip: Use --cleanup-on-failure to auto-cleanup.[/dim]")
1342
+ self.emitter.emit(DeploymentPhase.NECROSIS, EventStatus.FAILED, f"Deployment failed. Server '{droplet.name}' preserved for diagnostics.")
741
1343
  raise e