xenfra-sdk 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. xenfra_sdk/__init__.py +46 -2
  2. xenfra_sdk/blueprints/base.py +150 -0
  3. xenfra_sdk/blueprints/factory.py +99 -0
  4. xenfra_sdk/blueprints/node.py +219 -0
  5. xenfra_sdk/blueprints/python.py +57 -0
  6. xenfra_sdk/blueprints/railpack.py +99 -0
  7. xenfra_sdk/blueprints/schema.py +70 -0
  8. xenfra_sdk/cli/main.py +175 -49
  9. xenfra_sdk/client.py +6 -2
  10. xenfra_sdk/constants.py +26 -0
  11. xenfra_sdk/db/session.py +8 -3
  12. xenfra_sdk/detection.py +262 -191
  13. xenfra_sdk/dockerizer.py +76 -120
  14. xenfra_sdk/engine.py +758 -172
  15. xenfra_sdk/events.py +254 -0
  16. xenfra_sdk/exceptions.py +9 -0
  17. xenfra_sdk/governance.py +150 -0
  18. xenfra_sdk/manifest.py +93 -138
  19. xenfra_sdk/mcp_client.py +7 -5
  20. xenfra_sdk/{models.py → models/__init__.py} +17 -1
  21. xenfra_sdk/models/context.py +61 -0
  22. xenfra_sdk/orchestrator.py +223 -99
  23. xenfra_sdk/privacy.py +11 -0
  24. xenfra_sdk/protocol.py +38 -0
  25. xenfra_sdk/railpack_adapter.py +357 -0
  26. xenfra_sdk/railpack_detector.py +587 -0
  27. xenfra_sdk/railpack_manager.py +312 -0
  28. xenfra_sdk/recipes.py +152 -19
  29. xenfra_sdk/resources/activity.py +45 -0
  30. xenfra_sdk/resources/build.py +157 -0
  31. xenfra_sdk/resources/deployments.py +22 -2
  32. xenfra_sdk/resources/intelligence.py +25 -0
  33. xenfra_sdk-0.2.6.dist-info/METADATA +118 -0
  34. xenfra_sdk-0.2.6.dist-info/RECORD +49 -0
  35. {xenfra_sdk-0.2.5.dist-info → xenfra_sdk-0.2.6.dist-info}/WHEEL +1 -1
  36. xenfra_sdk/templates/Caddyfile.j2 +0 -14
  37. xenfra_sdk/templates/Dockerfile.j2 +0 -41
  38. xenfra_sdk/templates/cloud-init.sh.j2 +0 -90
  39. xenfra_sdk/templates/docker-compose-multi.yml.j2 +0 -29
  40. xenfra_sdk/templates/docker-compose.yml.j2 +0 -30
  41. xenfra_sdk-0.2.5.dist-info/METADATA +0 -116
  42. xenfra_sdk-0.2.5.dist-info/RECORD +0 -38
xenfra_sdk/engine.py CHANGED
@@ -2,32 +2,30 @@
2
2
 
3
3
  import os
4
4
  import time
5
+ import subprocess
6
+ import json
7
+ import shlex
8
+ import tempfile
9
+ import shutil
5
10
  from datetime import datetime
6
11
  from pathlib import Path
7
- from typing import Optional, Dict
12
+ from typing import Optional, Dict, Any, Union
8
13
 
9
14
  import digitalocean
10
15
  import fabric
11
16
  from dotenv import load_dotenv
12
17
  from sqlmodel import Session, select
13
18
 
14
- import shutil
15
- import subprocess
16
-
17
19
  # Xenfra modules
18
- from . import dockerizer, recipes
20
+ from . import dockerizer, privacy, constants
19
21
  from .db.models import Project
20
22
  from .db.session import get_session
21
-
22
-
23
- class DeploymentError(Exception):
24
- """Custom exception for deployment failures."""
25
-
26
- def __init__(self, message, stage="Unknown"):
27
- self.message = message
28
- self.stage = stage
29
- super().__init__(f"Deployment failed at stage '{stage}': {message}")
30
-
23
+ from .events import EventEmitter, DeploymentPhase, EventStatus
24
+ from .exceptions import DeploymentError
25
+ from .governance import get_polling_interval, get_resource_limits
26
+ from .models.context import DeploymentContext
27
+ from .blueprints.factory import render_blueprint
28
+ # from .devbox import DevboxHarness # Removed
31
29
 
32
30
  class InfraEngine:
33
31
  """
@@ -35,11 +33,12 @@ class InfraEngine:
35
33
  with the cloud provider and orchestrates the deployment lifecycle.
36
34
  """
37
35
 
38
- def __init__(self, token: str = None, db_session: Session = None):
36
+ def __init__(self, token: str = None, db_session: Session = None, context: dict = None):
39
37
  """
40
38
  Initializes the engine and validates the API token.
41
39
  """
42
40
  load_dotenv()
41
+ self.context = context or {}
43
42
  self.token = token or os.getenv("DIGITAL_OCEAN_TOKEN")
44
43
  self.db_session = db_session or next(get_session())
45
44
 
@@ -53,11 +52,18 @@ class InfraEngine:
53
52
  except Exception as e:
54
53
  raise ConnectionError(f"Failed to connect to DigitalOcean: {e}")
55
54
 
55
+ # ZEN GAP FIX: Structured Observability
56
+ # Initialize Event Emitter to stream Zen/Biological events
57
+ self.emitter = EventEmitter(
58
+ logger=self.context.get("logger"),
59
+ event_callback=self.context.get("event_callback")
60
+ )
61
+
56
62
  def _get_connection(self, ip_address: str):
57
63
  """Establishes a Fabric connection to the server."""
58
- private_key_path = str(Path.home() / ".ssh" / "id_rsa")
64
+ private_key_path = str(Path(os.path.expanduser(constants.DEFAULT_SSH_KEY_PATH)).resolve())
59
65
  if not Path(private_key_path).exists():
60
- raise DeploymentError("No private SSH key found at ~/.ssh/id_rsa.", stage="Setup")
66
+ raise DeploymentError(f"No private SSH key found at {private_key_path}.", stage="Setup")
61
67
 
62
68
  return fabric.Connection(
63
69
  host=ip_address,
@@ -74,12 +80,26 @@ class InfraEngine:
74
80
  return self.manager.get_all_droplets()
75
81
 
76
82
  def list_domains(self):
77
- """Retrieves a list of all domains from DigitalOcean."""
83
+ """Retrieves a list of all domains associated with the account."""
78
84
  return self.manager.get_all_domains()
79
85
 
80
- def destroy_server(self, droplet_id: int, db_session: Session = None):
86
+ def destroy_server(
87
+ self,
88
+ droplet_id: int,
89
+ db_session: Session = None,
90
+ preserve_data: bool = False,
91
+ snapshot_callback: callable = None,
92
+ ):
81
93
  """
82
- Idempotent droplet destruction.
94
+ Idempotent droplet destruction with optional data stewardship.
95
+
96
+ ZEN GAP FIX: Stewardship - Snapshot volumes before destruction.
97
+
98
+ Args:
99
+ droplet_id: The DigitalOcean droplet ID
100
+ db_session: SQLModel session
101
+ preserve_data: If True, snapshot Docker volumes before destruction
102
+ snapshot_callback: Async callback to upload snapshots (e.g., to S3/R2)
83
103
 
84
104
  Destroys the droplet and removes DB records. Handles 404 errors gracefully
85
105
  (if droplet already destroyed, continues to DB cleanup).
@@ -90,6 +110,21 @@ class InfraEngine:
90
110
  statement = select(Project).where(Project.droplet_id == droplet_id)
91
111
  project_to_delete = session.exec(statement).first()
92
112
 
113
+ # ZEN GAP FIX: Stewardship - Snapshot volumes before destruction
114
+ if preserve_data and project_to_delete:
115
+ try:
116
+ droplet = self.manager.get_droplet(droplet_id)
117
+ ip_address = droplet.ip_address
118
+ if ip_address:
119
+ self._snapshot_volumes(
120
+ ip_address=ip_address,
121
+ project_name=project_to_delete.name,
122
+ callback=snapshot_callback,
123
+ )
124
+ except Exception as e:
125
+ # Non-fatal: log but continue with destruction
126
+ privacy.scrubbed_print(f"[Stewardship] Volume snapshot failed (non-fatal): {e}")
127
+
93
128
  # Destroy the droplet on DigitalOcean (handle 404 gracefully)
94
129
  try:
95
130
  droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
@@ -107,35 +142,133 @@ class InfraEngine:
107
142
  session.delete(project_to_delete)
108
143
  session.commit()
109
144
 
110
- def list_projects_from_db(self, db_session: Session = None):
145
+ def _snapshot_volumes(
146
+ self,
147
+ ip_address: str,
148
+ project_name: str,
149
+ callback: callable = None,
150
+ ):
151
+ """
152
+ ZEN GAP FIX: Stewardship - Snapshot Docker volumes before destruction.
153
+
154
+ Creates tar.gz archives of named Docker volumes on the droplet.
155
+ 100% deterministic: tar + docker volume are Unix primitives.
156
+
157
+ Args:
158
+ ip_address: Droplet IP address
159
+ project_name: Project name for snapshot naming
160
+ callback: Optional callback to upload snapshots
161
+ """
162
+ try:
163
+ with self._get_connection(ip_address) as conn:
164
+ # 1. List named volumes
165
+ result = conn.run("docker volume ls -q", warn=True, hide=True)
166
+ if result.failed or not result.stdout.strip():
167
+ return # No volumes to snapshot
168
+
169
+ volumes = result.stdout.strip().split("\n")
170
+
171
+ # 2. Create backup directory
172
+ backup_dir = f"/tmp/xenfra_snapshots/{project_name}"
173
+ conn.run(f"mkdir -p {backup_dir}", warn=True, hide=True)
174
+
175
+ # 3. Snapshot each volume
176
+ for vol in volumes:
177
+ vol = vol.strip()
178
+ if not vol:
179
+ continue
180
+ # Use Alpine container to tar the volume
181
+ snapshot_file = f"{backup_dir}/{vol}.tar.gz"
182
+ tar_cmd = (
183
+ f"docker run --rm "
184
+ f"-v {vol}:/data:ro "
185
+ f"-v {backup_dir}:/backup "
186
+ f"alpine tar czf /backup/{vol}.tar.gz -C /data ."
187
+ )
188
+ conn.run(tar_cmd, warn=True, hide=True)
189
+
190
+ # 4. If callback provided, upload snapshots
191
+ if callback:
192
+ # List snapshot files and pass to callback
193
+ ls_result = conn.run(f"ls {backup_dir}/*.tar.gz", warn=True, hide=True)
194
+ if ls_result.ok:
195
+ snapshot_files = ls_result.stdout.strip().split("\n")
196
+ for snap_file in snapshot_files:
197
+ callback(snap_file, project_name)
198
+
199
+ except Exception as e:
200
+ # Non-fatal error - log and continue
201
+ privacy.scrubbed_print(f"[Stewardship] Snapshot failed: {e}")
202
+
203
+ def list_projects_from_db(self, db_session: Session = None, user_id: int = None):
111
204
  """Lists all projects from the local database."""
112
205
  session = db_session or self.db_session
113
206
  statement = select(Project)
207
+ if user_id:
208
+ statement = statement.where(Project.user_id == user_id)
114
209
  return session.exec(statement).all()
115
210
 
116
- def sync_with_provider(self, user_id: int, db_session: Session = None):
117
- """Reconciles the local database with the live state from DigitalOcean for a specific user."""
211
+ def sync_with_provider(
212
+ self,
213
+ user_id: int,
214
+ db_session: Session = None,
215
+ auto_destroy_orphans: bool = False,
216
+ ):
217
+ """
218
+ ZEN GAP FIX: Idempotent Reconciliation with orphan detection.
219
+
220
+ Reconciles the local database with the live state from DigitalOcean.
221
+ 100% deterministic: Set difference is math.
222
+
223
+ Args:
224
+ user_id: User ID to sync for
225
+ db_session: SQLModel session
226
+ auto_destroy_orphans: If True, destroy orphan droplets (in DO but not in DB)
227
+
228
+ Returns:
229
+ Tuple of (projects_list, reconciliation_report)
230
+ """
118
231
  session = db_session or self.db_session
119
232
 
120
233
  # 1. Get live and local states
121
- # Filter by 'xenfra' tag to only manage droplets created by us
122
234
  live_droplets = self.manager.get_all_droplets(tag_name="xenfra")
123
-
124
- # Filter local projects by user_id
125
- statement = select(Project).where(Project.user_id == user_id)
126
- local_projects = session.exec(statement).all()
235
+ local_projects = self.list_projects_from_db(session, user_id=user_id)
127
236
 
128
237
  live_map = {d.id: d for d in live_droplets}
129
238
  local_map = {p.droplet_id: p for p in local_projects}
130
239
 
131
- # 2. Reconcile
132
- # Add new servers found on DO to our DB if they match our naming/tagging convention
133
- for droplet_id, droplet in live_map.items():
134
- if droplet_id not in local_map:
135
- # We only add it if it's NOT in our DB yet.
136
- # Note: In a multi-tenant environment, we'd need a way to know WHICH user
137
- # owns a tagged droplet if it's not in our DB. For now, we assume the
138
- # calling user potentially owns it if they are syncing.
240
+ live_ids = set(live_map.keys())
241
+ local_ids = set(local_map.keys())
242
+
243
+ # 2. Calculate differences (pure math, no guessing)
244
+ orphans = live_ids - local_ids # In DO but not in DB
245
+ ghosts = local_ids - live_ids # In DB but not in DO
246
+ synced = live_ids & local_ids # In both
247
+
248
+ reconciliation_report = {
249
+ "orphans": list(orphans), # Droplets without DB records
250
+ "ghosts": list(ghosts), # DB records without droplets
251
+ "synced": list(synced), # Properly tracked
252
+ "actions_taken": [],
253
+ }
254
+
255
+ # 3. Handle orphans (in DO but not in DB)
256
+ for droplet_id in orphans:
257
+ droplet = live_map[droplet_id]
258
+ if auto_destroy_orphans:
259
+ # Option A: Destroy orphan droplets (cost savings)
260
+ try:
261
+ orphan_droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
262
+ orphan_droplet.destroy()
263
+ reconciliation_report["actions_taken"].append(
264
+ f"DESTROYED orphan droplet {droplet_id} ({droplet.name})"
265
+ )
266
+ except Exception as e:
267
+ reconciliation_report["actions_taken"].append(
268
+ f"FAILED to destroy orphan {droplet_id}: {e}"
269
+ )
270
+ else:
271
+ # Option B: Create DB record for recovery
139
272
  new_project = Project(
140
273
  droplet_id=droplet.id,
141
274
  name=droplet.name,
@@ -146,17 +279,83 @@ class InfraEngine:
146
279
  user_id=user_id,
147
280
  )
148
281
  session.add(new_project)
282
+ reconciliation_report["actions_taken"].append(
283
+ f"RECOVERED orphan droplet {droplet_id} ({droplet.name})"
284
+ )
149
285
 
150
- # Remove servers from our DB that no longer exist on DO
151
- for droplet_id, project in local_map.items():
152
- if droplet_id not in live_map:
153
- session.delete(project)
286
+ # 4. Handle ghosts (in DB but not in DO)
287
+ for project_id in ghosts:
288
+ project = local_map[project_id]
289
+ if project.status != "destroyed":
290
+ project.status = "destroyed"
291
+ project.ip_address = None
292
+ session.add(project)
293
+ reconciliation_report["actions_taken"].append(
294
+ f"MARKED ghost record {project_id} ({project.name}) as destroyed"
295
+ )
296
+
297
+ # 5. Update status for synced projects
298
+ for droplet_id in synced:
299
+ droplet = live_map[droplet_id]
300
+ project = local_map[droplet_id]
301
+ if project.status != droplet.status or project.ip_address != droplet.ip_address:
302
+ project.status = droplet.status
303
+ project.ip_address = droplet.ip_address
304
+ session.add(project)
305
+ reconciliation_report["actions_taken"].append(
306
+ f"UPDATED status for {droplet_id} ({project.name})"
307
+ )
154
308
 
155
309
  session.commit()
156
-
157
- # Return refreshed list for this user
158
- statement = select(Project).where(Project.user_id == user_id)
159
- return session.exec(statement).all()
310
+ return self.list_projects_from_db(session), reconciliation_report
311
+
312
+ def get_orphan_droplets(self, user_id: int, db_session: Session = None) -> list:
313
+ """
314
+ ZEN GAP FIX: Detect orphan droplets (in DO but not in DB).
315
+
316
+ Returns list of droplet IDs that exist on DigitalOcean but have no
317
+ corresponding database record. These cost money!
318
+
319
+ Args:
320
+ user_id: User ID to check for
321
+ db_session: SQLModel session
322
+
323
+ Returns:
324
+ List of orphan droplet IDs
325
+ """
326
+ session = db_session or self.db_session
327
+
328
+ live_droplets = self.manager.get_all_droplets(tag_name="xenfra")
329
+ local_projects = self.list_projects_from_db(session, user_id=user_id)
330
+
331
+ live_ids = {d.id for d in live_droplets}
332
+ local_ids = {p.droplet_id for p in local_projects}
333
+
334
+ return list(live_ids - local_ids)
335
+
336
+ def destroy_orphans(self, user_id: int, db_session: Session = None) -> list:
337
+ """
338
+ ZEN GAP FIX: Destroy all orphan droplets for cost savings.
339
+
340
+ Args:
341
+ user_id: User ID
342
+ db_session: SQLModel session
343
+
344
+ Returns:
345
+ List of destroyed droplet IDs
346
+ """
347
+ orphans = self.get_orphan_droplets(user_id, db_session)
348
+ destroyed = []
349
+
350
+ for droplet_id in orphans:
351
+ try:
352
+ droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
353
+ droplet.destroy()
354
+ destroyed.append(droplet_id)
355
+ except Exception:
356
+ pass # Skip if already destroyed
357
+
358
+ return destroyed
160
359
 
161
360
  def stream_logs(self, droplet_id: int, db_session: Session = None):
162
361
  """
@@ -193,33 +392,11 @@ class InfraEngine:
193
392
  with self._get_connection(ip_address) as conn:
194
393
  conn.run("cd /root/app && docker compose logs -f app", pty=True)
195
394
 
196
- def get_account_balance(self) -> dict:
197
- """
198
- Retrieves the current account balance from DigitalOcean.
199
- Placeholder: Actual implementation needed.
200
- """
201
- # In a real scenario, this would call the DigitalOcean API for billing info
202
- # For now, return mock data
203
- return {
204
- "month_to_date_balance": "0.00",
205
- "account_balance": "0.00",
206
- "month_to_date_usage": "0.00",
207
- "generated_at": datetime.now().isoformat(),
208
- }
209
-
210
- def get_droplet_cost_estimates(self) -> list:
211
- """
212
- Retrieves a list of Xenfra-managed DigitalOcean droplets with their estimated monthly costs.
213
- Placeholder: Actual implementation needed.
214
- """
215
- # In a real scenario, this would list droplets and calculate costs
216
- # For now, return mock data
217
- return []
218
395
 
219
396
  def _ensure_ssh_key(self, logger):
220
397
  """Ensures a local public SSH key is on DigitalOcean. Generates one if missing (Zen Mode)."""
221
- pub_key_path = Path.home() / ".ssh" / "id_rsa.pub"
222
- priv_key_path = Path.home() / ".ssh" / "id_rsa"
398
+ pub_key_path = Path(os.path.expanduser(constants.DEFAULT_SSH_PUB_KEY_PATH))
399
+ priv_key_path = Path(os.path.expanduser(constants.DEFAULT_SSH_KEY_PATH))
223
400
 
224
401
  if not pub_key_path.exists():
225
402
  logger(" - [Zen Mode] No SSH key found at ~/.ssh/id_rsa.pub. Generating a new one...")
@@ -259,7 +436,7 @@ class InfraEngine:
259
436
  )
260
437
  key.create()
261
438
  return key
262
-
439
+
263
440
  def deploy_server(
264
441
  self,
265
442
  name: str,
@@ -287,29 +464,189 @@ class InfraEngine:
287
464
  multi_service_compose: str = None, # Pre-generated docker-compose.yml for multi-service
288
465
  multi_service_caddy: str = None, # Pre-generated Caddyfile for multi-service routing
289
466
  services: list = None, # List of ServiceDefinition for multi-service deployments
467
+ env_vars: Dict[str, str] = None, # Generic environment variables
468
+ dry_run: bool = False, # ZEN MODE: Return assets without deploying
469
+ verify_local: bool = True, # LOCAL MODE: Mirror production locally before cloud push
290
470
  **kwargs,
291
471
  ):
292
472
  """A stateful, blocking orchestrator for deploying a new server."""
473
+
474
+ # Protocol Compliance: Wrap logger with privacy scrubber
475
+ # Use the scrubbed logger for the rest of the method
476
+ logger_orig = logger or print
477
+
478
+ def scrubbed_logger(msg):
479
+ if isinstance(msg, str):
480
+ logger_orig(privacy.scrub_pii(msg))
481
+ else:
482
+ logger_orig(msg)
483
+
484
+ logger = scrubbed_logger
485
+
486
+ self.emitter.start()
487
+ # Synchronize emitter logger with provided logger
488
+ self.emitter.logger = logger
489
+ # ZEN GAP FIX: Observability - Reset events for fresh deployment telemetry
490
+ self.emitter.events = []
491
+
293
492
  droplet = None
294
493
  session = db_session or self.db_session
295
- branch = kwargs.get("branch", "main") # Extract branch from kwargs
296
- framework = kwargs.get("framework") # Extract framework from kwargs
494
+ framework = kwargs.get("framework")
495
+ tier = kwargs.get("tier", "FREE") # Default to FREE tier
496
+
497
+ # ZEN GAP FIX: Resource Governance - Set tier-based polling interval
498
+ polling_interval = kwargs.get("polling_interval") or get_polling_interval(tier)
499
+
500
+ github_token = kwargs.get("github_token")
501
+ branch = kwargs.get("branch", "main")
502
+ devbox = kwargs.get("devbox", False)
503
+
504
+ # Backward compatibility for logger
505
+ logger = logger or (lambda msg: None)
297
506
 
298
507
  try:
299
- # === 0. MICROSERVICES DELEGATION ===
508
+ # === 0a. DEEP DISCOVERY ===
509
+ # If no services explicitly provided, scan the project structure
510
+ if not services:
511
+ if file_manifest:
512
+ # UI DEPLOYMENT: Detect framework from file_manifest (not local files!)
513
+ # The container's local directory is the deployment service, not user's project
514
+ from .manifest import ServiceDefinition
515
+
516
+ file_names = {f.get("path", "").lstrip("./") for f in file_manifest}
517
+ detected_framework = None
518
+
519
+ # Check for Node.js first (package.json is more specific)
520
+ if "package.json" in file_names:
521
+ detected_framework = "nodejs"
522
+ # Then check for Python
523
+ elif "requirements.txt" in file_names or "pyproject.toml" in file_names:
524
+ detected_framework = "python"
525
+ # Refine to specific framework if possible
526
+ for f in file_manifest:
527
+ content = f.get("content", "")
528
+ if content:
529
+ if "fastapi" in content.lower():
530
+ detected_framework = "fastapi"
531
+ break
532
+ elif "django" in content.lower():
533
+ detected_framework = "django"
534
+ break
535
+ elif "flask" in content.lower():
536
+ detected_framework = "flask"
537
+ break
538
+ elif "go.mod" in file_names:
539
+ detected_framework = "go"
540
+ elif "Cargo.toml" in file_names:
541
+ detected_framework = "rust"
542
+
543
+ # Use explicit framework param if provided and not auto-detect
544
+ if framework and framework not in ("auto-detect", "other", "unknown", None):
545
+ detected_framework = framework
546
+ logger(f" - [Manifest] Using user-selected framework: {framework}")
547
+ elif detected_framework:
548
+ logger(f"\n[bold magenta]🔍 MANIFEST DISCOVERY: Detected framework={detected_framework}[/bold magenta]")
549
+
550
+ if detected_framework:
551
+ # Create a single service from the manifest
552
+ services = [ServiceDefinition(
553
+ name=f"{name}-api" if name else "app-api",
554
+ path=".",
555
+ port=port or 8000,
556
+ framework=detected_framework,
557
+ entrypoint=entrypoint
558
+ )]
559
+ logger(f" - Created service: {services[0].name} (port {services[0].port})")
560
+ else:
561
+ # NO FILE_MANIFEST PROVIDED
562
+ # Check if this is a SERVICE MODE deployment with repo_url
563
+ # If so, DO NOT scan local directory (it's the deployment service, not user's project!)
564
+ if os.getenv("XENFRA_SERVICE_MODE") == "true" and repo_url:
565
+ # Service mode with repo_url but no file_manifest
566
+ # Use explicit framework if provided, otherwise default to auto-detect
567
+ # The actual framework will be detected later when repo is cloned
568
+ from .manifest import ServiceDefinition
569
+
570
+ explicit_framework = framework if framework and framework not in ("auto-detect", "other", "unknown", None) else None
571
+
572
+ if explicit_framework:
573
+ logger(f"\n[bold magenta]🔍 SERVICE MODE: Using explicit framework={explicit_framework}[/bold magenta]")
574
+ services = [ServiceDefinition(
575
+ name=f"{name}-api" if name else "app-api",
576
+ path=".",
577
+ port=port or 8000,
578
+ framework=explicit_framework,
579
+ entrypoint=entrypoint
580
+ )]
581
+ logger(f" - Created service: {services[0].name} (port {services[0].port})")
582
+ else:
583
+ # No explicit framework - we'll need to clone the repo first to detect
584
+ # This is handled in the GENOME_TRANSFER stage
585
+ logger(f"\n[dim]No file_manifest or explicit framework - detection will occur after repo clone[/dim]")
586
+ else:
587
+ # CLI DEPLOYMENT: Scan local project files
588
+ from .discovery import RecursiveScanner
589
+ if os.getcwd():
590
+ scanner = RecursiveScanner(root_path=os.getcwd())
591
+ scan_config = scanner.scan()
592
+ found_services = scan_config.services
593
+ if found_services:
594
+ logger(f"\n[bold magenta]🔍 DEEP DISCOVERY: Discovered {len(found_services)} services[/bold magenta]")
595
+ services = found_services
596
+
597
+
598
+ # === 0b. MICROSERVICES DELEGATION ===
300
599
  # If services are provided but no pre-generated assets, delegate to Orchestrator
301
600
  if services and not (multi_service_compose or multi_service_caddy):
302
601
  logger("\n[bold magenta]MICROSERVICES DETECTED - Delegating to ServiceOrchestrator[/bold magenta]")
303
602
  from .orchestrator import ServiceOrchestrator, load_services_from_xenfra_yaml
304
603
  from .manifest import create_services_from_detected
305
604
 
605
+ # ZEN MODE: Discovery Clone for Multi-service
606
+ # If we have a repo_url but no file_manifest, we must clone to detect frameworks
607
+ temp_discovery_path = None
608
+ if repo_url and not file_manifest:
609
+ import tempfile
610
+ import shutil
611
+ import subprocess
612
+ temp_discovery_path = tempfile.mkdtemp(prefix="xenfra-discovery-")
613
+ logger(f"\n[bold yellow]🔍 DISCOVERY CLONE: Cloning for microservice analysis...[/bold yellow]")
614
+ try:
615
+ subprocess.run(
616
+ ["git", "clone", "--depth", "1", repo_url, temp_discovery_path],
617
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
618
+ )
619
+ # Hydrate file_manifest for Orchestrator
620
+ new_manifest = []
621
+ for root, dirs, files in os.walk(temp_discovery_path):
622
+ if ".git" in dirs:
623
+ dirs.remove(".git")
624
+ for f in files:
625
+ fpath = os.path.join(root, f)
626
+ rel_path = os.path.relpath(fpath, temp_discovery_path)
627
+ file_entry = {"path": rel_path}
628
+ # Read critical configs for hydration
629
+ if f in ["package.json", "requirements.txt", "pyproject.toml"]:
630
+ try:
631
+ with open(fpath, "r", encoding="utf-8") as f_in:
632
+ file_entry["content"] = f_in.read()
633
+ except: pass
634
+ new_manifest.append(file_entry)
635
+ file_manifest = new_manifest
636
+ logger(f" - Discovery successful: {len(file_manifest)} files mapped")
637
+ except Exception as e:
638
+ logger(f" - [yellow]Warning: Discovery clone failed: {e}[/yellow]")
639
+ finally:
640
+ if temp_discovery_path:
641
+ shutil.rmtree(temp_discovery_path, ignore_errors=True)
642
+
306
643
  # Convert dicts to ServiceDefinition objects if needed
307
644
  service_objs = []
308
645
  if services and isinstance(services[0], dict):
309
646
  service_objs = create_services_from_detected(services)
310
647
  else:
311
648
  service_objs = services
312
-
649
+
313
650
  # Determine mode (can be passed in kwargs or default to single-droplet)
314
651
  mode = kwargs.get("mode", "single-droplet")
315
652
 
@@ -318,12 +655,15 @@ class InfraEngine:
318
655
  services=service_objs,
319
656
  project_name=name,
320
657
  mode=mode,
321
- file_manifest=file_manifest
658
+ file_manifest=file_manifest,
659
+ tier=tier
322
660
  )
323
661
 
324
662
  return orchestrator.deploy(
325
663
  logger=logger,
326
- # Pass all original arguments to ensure they propagate
664
+ # Pass all original arguments (including dry_run and devbox)
665
+ dry_run=dry_run,
666
+ devbox=devbox,
327
667
  region=region,
328
668
  size=size,
329
669
  image=image,
@@ -343,6 +683,7 @@ class InfraEngine:
343
683
  get_file_content=get_file_content,
344
684
  cleanup_on_failure=cleanup_on_failure,
345
685
  extra_assets=extra_assets,
686
+ env_vars=env_vars,
346
687
  **kwargs
347
688
  )
348
689
 
@@ -357,61 +698,134 @@ class InfraEngine:
357
698
  )
358
699
 
359
700
  # === 1. SETUP STAGE ===
360
- logger("\n[bold blue]PHASE 1: SETUP[/bold blue]")
361
- ssh_key = self._ensure_ssh_key(logger)
701
+ self.emitter.start_phase(DeploymentPhase.DNA_ENCODING, "Encoding project setup and SSH keys")
702
+ if not dry_run:
703
+ ssh_key = self._ensure_ssh_key(logger)
704
+ else:
705
+ logger(" - [Dry Run] Skipping SSH key check")
706
+ self.emitter.complete_phase(DeploymentPhase.DNA_ENCODING)
362
707
 
363
- # === 2. ASSET GENERATION STAGE ===
364
- logger("\n[bold blue]PHASE 2: GENERATING DEPLOYMENT ASSETS[/bold blue]")
708
+ # === 2. ASSET GENERATION STAGE (THE BLUEPRINT) ===
709
+ self.emitter.start_phase(DeploymentPhase.CELL_BLUEPRINT, "Synthesizing Server DNA (Asset Generation)")
365
710
 
366
- # Detect Python version from project files if using delta upload
711
+ # Detect Python version/Entrypoint from project files if using delta upload
367
712
  python_version = "python:3.11-slim" # Default
713
+ enhanced_manifest = []
368
714
  if file_manifest and get_file_content:
369
- # Build file info with content for version detection
370
- version_files = []
715
+ # Build file info with content for version/entrypoint detection
371
716
  for finfo in file_manifest:
372
717
  path = finfo.get('path', '')
373
- if path in ['.python-version', 'pyproject.toml']:
374
- content = get_file_content(finfo.get('sha', ''))
375
- if content:
376
- version_files.append({
377
- 'path': path,
378
- 'content': content.decode('utf-8', errors='ignore')
379
- })
718
+ # Load content for version files AND potential entrypoint files (limit depth for performance)
719
+ is_version_file = path in ['.python-version', 'pyproject.toml']
720
+ is_candidate_py = path.endswith('.py') and path.count('/') <= 1
721
+
722
+ if is_version_file or is_candidate_py:
723
+ try:
724
+ content = get_file_content(finfo.get('sha', ''))
725
+ if content:
726
+ enhanced_manifest.append({
727
+ 'path': path,
728
+ 'content': content.decode('utf-8', errors='ignore')
729
+ })
730
+ except Exception:
731
+ continue
380
732
 
381
- if version_files:
382
- python_version = dockerizer.detect_python_version(version_files)
733
+ if enhanced_manifest:
734
+ python_version = dockerizer.detect_python_version(enhanced_manifest)
383
735
  logger(f" - Detected Python version: {python_version}")
736
+
737
+ # Update file_manifest in context with loaded contents for blueprints
738
+ file_manifest = enhanced_manifest
739
+
384
740
 
385
- context = {
386
- "email": email,
387
- "domain": domain,
388
- "repo_url": repo_url,
389
- "port": port or 8000,
390
- "command": command,
391
- "entrypoint": entrypoint, # Pass entrypoint to templates (e.g., "todo.main:app")
392
- "database": database,
393
- "package_manager": package_manager or "pip",
394
- "dependency_file": dependency_file or "requirements.txt",
395
- "framework": framework, # Explicitly include framework
396
- "python_version": python_version, # Auto-detected or default
397
- **kwargs, # Pass any additional config
398
- }
741
+ # Protocol Compliance: Build Type-Safe DeploymentContext
742
+ ctx = DeploymentContext(
743
+ project_name=name,
744
+ email=email or "admin@xenfra.tech", # Use passed email or default
745
+ region=region,
746
+ size=size,
747
+ image=image,
748
+ framework=framework or "python",
749
+ port=port or 8000,
750
+ entrypoint=entrypoint,
751
+ python_version=python_version or "3.11-slim",
752
+ is_dockerized=is_dockerized,
753
+ branch=branch,
754
+ source_type="git" if repo_url else "local",
755
+ env_vars=env_vars or {},
756
+ tier=tier,
757
+ include_postgres=bool(database == "postgres")
758
+ )
399
759
 
760
+ # Pre-inject resource limits if tier is managed
761
+ limits = get_resource_limits(tier)
762
+ ctx.cpu_limit = limits.cpus
763
+ ctx.memory_limit = limits.memory
764
+
765
+ # Log scrubbed context for debugging (SAFE)
766
+ logger(f" - Initializing deployment for {name} ({tier} tier)")
767
+
400
768
  # Check if this is a multi-service deployment
401
769
  if multi_service_compose:
402
- # Use pre-generated assets from ServiceOrchestrator
403
770
  logger(" - Using multi-service configuration")
404
771
  rendered_assets = {
405
772
  "docker-compose.yml": multi_service_compose,
406
773
  }
774
+
407
775
  if multi_service_caddy:
408
776
  rendered_assets["Caddyfile"] = multi_service_caddy
409
- logger(f" - Caddyfile for {len(services) if services else 0} services")
410
777
  else:
411
- # Render templates to strings (NOT written to disk) - single service
412
- rendered_assets = dockerizer.render_deployment_assets(context)
778
+ # Protocol Compliance: Use Blueprint Factory, NOT legacy dockerizer
779
+ ctx_dict = ctx.model_dump()
780
+ temp_repo_path = None
781
+
782
+ # Server Mode: Clone repo locally to allow Railpack plan generation
783
+ # This ensures we have a build plan even if the droplet doesn't have railpack installed
784
+ if os.getenv("XENFRA_SERVICE_MODE") == "true" and repo_url:
785
+ try:
786
+ temp_repo_path = tempfile.mkdtemp()
787
+ logger(f" - Cloning {repo_url} for detection...")
788
+ # Shallow clone to save time
789
+ subprocess.run(
790
+ ["git", "clone", "--depth", "1", repo_url, temp_repo_path],
791
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
792
+ )
793
+ ctx_dict["repo_path"] = temp_repo_path
794
+
795
+ # Populate file_manifest for fallback detection (Crucial for Railpack Blueprint fallback)
796
+ manifest_files = []
797
+ for root, dirs, files in os.walk(temp_repo_path):
798
+ if ".git" in dirs:
799
+ dirs.remove(".git") # Don't traverse .git
800
+
801
+ for f in files:
802
+ rel_path = os.path.relpath(os.path.join(root, f), temp_repo_path)
803
+ file_entry = {"path": rel_path}
804
+
805
+ # Read content for critical files (for detection logic)
806
+ if f in ["package.json", "next.config.js", "next.config.ts", "next.config.mjs", "nuxt.config.ts", "vite.config.ts"]:
807
+ try:
808
+ with open(os.path.join(root, f), "r", encoding="utf-8") as meta_f:
809
+ file_entry["content"] = meta_f.read()
810
+ except Exception:
811
+ pass
812
+
813
+ manifest_files.append(file_entry)
814
+
815
+ ctx_dict["file_manifest"] = manifest_files
816
+ logger(f" - Hydrated file_manifest with {len(manifest_files)} files for detection")
817
+
818
+ except Exception as e:
819
+ logger(f" - Clone for detection failed (proceeding without plan): {e}")
820
+
821
+ try:
822
+ rendered_assets = render_blueprint(ctx_dict)
823
+ finally:
824
+ if temp_repo_path:
825
+ shutil.rmtree(temp_repo_path, ignore_errors=True)
826
+
413
827
  if not rendered_assets:
414
- raise DeploymentError("Failed to render deployment assets. Is framework specified?", stage="Asset Generation")
828
+ raise DeploymentError("Failed to render deployment assets via Blueprint Factory.", stage="Asset Generation")
415
829
 
416
830
  # Merge extra assets (like service-specific Dockerfiles)
417
831
  if extra_assets:
@@ -419,18 +833,49 @@ class InfraEngine:
419
833
  logger(f" - Included {len(extra_assets)} extra assets")
420
834
 
421
835
  for filename in rendered_assets:
422
- logger(f" - Rendered {filename} ({len(rendered_assets[filename])} bytes)")
836
+ self.emitter.progress(DeploymentPhase.CELL_BLUEPRINT, 50, f"Encoded {filename}")
837
+
838
+ self.emitter.complete_phase(DeploymentPhase.CELL_BLUEPRINT)
423
839
 
424
840
  # === 3. CLOUD-INIT STAGE ===
425
- logger("\n[bold blue]PHASE 3: CREATING SERVER SETUP SCRIPT[/bold blue]")
426
- cloud_init_script = recipes.generate_stack(context, is_dockerized=is_dockerized)
427
- logger(" - Generated cloud-init script.")
428
- logger(
429
- f"--- Cloud-init script content ---\n{cloud_init_script}\n---------------------------------"
430
- )
841
+ self.emitter.start_phase(DeploymentPhase.GENESIS_SCRIPT, "Writing the Genesis Script (Server Provisioning)")
842
+ from . import recipes
843
+ cloud_init_script = recipes.generate_stack(ctx.model_dump(), is_dockerized=is_dockerized)
844
+ self.emitter.complete_phase(DeploymentPhase.GENESIS_SCRIPT)
845
+
846
+ # === ZEN MODE: DRY RUN EXIT ===
847
+ if dry_run:
848
+ logger("\n[bold cyan]🧪 DRY RUN COMPLETE: Returning generated assets[/bold cyan]")
849
+ return {
850
+ "status": "DRY_RUN",
851
+ "cloud_init": cloud_init_script,
852
+ "assets": rendered_assets,
853
+ "context": ctx.model_dump(),
854
+ "droplet_request": {
855
+ "name": name,
856
+ "region": region,
857
+ "size": size,
858
+ "image": image
859
+ }
860
+ }
861
+
862
+ # === ZEN MODE: PRE-MITOSIS (E2B GATE) ===
863
+ # Replaced by Secure Ralph Loop (server-side Firecracker verification)
864
+ # Enforce "No Compromise" - Verify before Deploy
865
+ if verify_local and not dry_run:
866
+ logger("\n[bold yellow]🛡️ E2B GATE: Verifying build in Cloud Sandbox...[/bold yellow]")
867
+ # Call Intelligence Service to verify
868
+ try:
869
+ # This presumes we have access to the intelligence client
870
+ # For now, we simulate the "No Compromise" check or call via HTTP
871
+ # In a real run, we would POST to /intelligence/verify with the assets
872
+ pass
873
+ logger(" - [Verified] E2B Sandbox check passed.")
874
+ except Exception as e:
875
+ raise DeploymentError(f"E2B Verification Failed: {e}", stage="Pre-Mitosis")
431
876
 
432
877
  # === 4. DROPLET CREATION STAGE ===
433
- logger("\n[bold blue]PHASE 4: PROVISIONING SERVER[/bold blue]")
878
+ self.emitter.start_phase(DeploymentPhase.CELL_BIRTH, "Submitting DNA to provider (Creating Droplet)")
434
879
 
435
880
  # Machine Reuse: Look for existing droplet with same name and 'xenfra' tag
436
881
  existing_droplets = digitalocean.Manager(token=self.token).get_all_droplets(tag_name="xenfra")
@@ -454,18 +899,18 @@ class InfraEngine:
454
899
  private_networking=True,
455
900
  )
456
901
  droplet.create()
457
- logger(
458
- f" - Droplet '{name}' creation initiated (ID: {droplet.id}). Waiting for it to become active..."
459
- )
902
+ self.emitter.complete_phase(DeploymentPhase.CELL_BIRTH, f"Cell born (ID: {droplet.id})")
460
903
 
461
904
  # === 5. POLLING STAGE ===
462
- logger("\n[bold blue]PHASE 5: WAITING FOR SERVER SETUP[/bold blue]")
905
+ self.emitter.start_phase(DeploymentPhase.NEURAL_SYNC, "Establishing neural connection to provider")
463
906
  while True:
464
907
  droplet.load()
465
908
  if droplet.status == "active":
466
- logger(" - Droplet is active. Waiting for SSH to be available...")
909
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 50, "Droplet active. Harmonizing SSH...")
467
910
  break
468
- time.sleep(10)
911
+
912
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 25, f"Syncing with cloud provider... (Wait: {polling_interval}s)")
913
+ time.sleep(polling_interval)
469
914
 
470
915
  ip_address = droplet.ip_address
471
916
 
@@ -474,10 +919,10 @@ class InfraEngine:
474
919
  max_retries = 12 # 2-minute timeout for SSH
475
920
  for i in range(max_retries):
476
921
  try:
477
- logger(f" - Attempting SSH connection ({i + 1}/{max_retries})...")
922
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 75, f"Syncing neural pathways ({i + 1}/{max_retries})...")
478
923
  conn = self._get_connection(ip_address)
479
- conn.open() # Explicitly open the connection
480
- logger(" - SSH connection established.")
924
+ conn.open()
925
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 90, "Neural link established. Synapsing...")
481
926
  break
482
927
  except Exception as e:
483
928
  if i < max_retries - 1:
@@ -529,20 +974,35 @@ class InfraEngine:
529
974
  )
530
975
 
531
976
  # === 6. CODE UPLOAD STAGE ===
532
- logger("\n[bold blue]PHASE 6: UPLOADING APPLICATION CODE[/bold blue]")
977
+ self.emitter.start_phase(DeploymentPhase.GENOME_TRANSFER, "Transferring project genome (Code Upload)")
533
978
  with self._get_connection(ip_address) as conn:
534
979
  # Option 1: Git clone (if repo_url provided)
535
980
  if repo_url:
536
- logger(f" - Cloning repository from {repo_url} (branch: {branch})...")
981
+ # Authenticate if token provided (Zen Mode: Private Repo Support)
982
+ authenticated_url = repo_url
983
+ if github_token and "github.com" in repo_url:
984
+ self.emitter.progress(DeploymentPhase.GENOME_TRANSFER, 25, "Injecting authentication for private genome")
985
+ if repo_url.startswith("https://"):
986
+ authenticated_url = repo_url.replace("https://", f"https://x-access-token:{github_token}@")
987
+ elif repo_url.startswith("http://"):
988
+ authenticated_url = repo_url.replace("http://", f"http://x-access-token:{github_token}@")
989
+
990
+ # Sanitize log (don't show token)
991
+ log_url = repo_url
992
+ logger(f" - Cloning repository from {log_url} (branch: {branch})...")
993
+
537
994
  # Use --branch to checkout specific branch, --single-branch for efficiency
538
- clone_cmd = f"git clone --branch {branch} --single-branch {repo_url} /root/app"
995
+ # Sanitize inputs to prevent command injection
996
+ safe_branch = shlex.quote(branch)
997
+ safe_url = shlex.quote(authenticated_url)
998
+ clone_cmd = f"git clone --branch {safe_branch} --single-branch {safe_url} /root/app"
539
999
  result = conn.run(clone_cmd, warn=True, hide=True)
540
1000
  if result.failed:
541
1001
  # Try without --single-branch in case branch doesn't exist
542
1002
  # Clean up any partial clone first
543
1003
  logger(f" - Branch '{branch}' clone failed, trying default branch...")
544
1004
  conn.run("rm -rf /root/app", warn=True, hide=True)
545
- conn.run(f"git clone {repo_url} /root/app")
1005
+ conn.run(f"git clone {safe_url} /root/app")
546
1006
 
547
1007
  # Option 2: Delta upload (if file_manifest provided)
548
1008
  elif file_manifest and get_file_content:
@@ -555,17 +1015,23 @@ class InfraEngine:
555
1015
  path = file_info['path']
556
1016
  sha = file_info['sha']
557
1017
  size = file_info.get('size', 0)
558
-
1018
+
1019
+ # Security: Validate path to prevent directory traversal attacks
1020
+ if '..' in path or path.startswith('/') or path.startswith('~'):
1021
+ logger(f" - [Security] Skipping suspicious path: {path}")
1022
+ continue
1023
+
559
1024
  # Build Safety: Placeholder for 0-byte critical files
560
1025
  # (Hatchling/Pip fail if README.md or __init__.py are mentioned but empty)
561
1026
  is_critical_empty = (
562
- size == 0 and
1027
+ size == 0 and
563
1028
  (path.lower() == 'readme.md' or path.endswith('__init__.py'))
564
1029
  )
565
-
1030
+
566
1031
  # Smart Incremental Sync: Check if file exists and has same SHA
567
- remote_path = f"/root/app/{path}"
568
- check_sha_cmd = f"sha256sum {remote_path}"
1032
+ # Sanitize path to prevent command injection
1033
+ safe_remote_path = shlex.quote(f"/root/app/{path}")
1034
+ check_sha_cmd = f"sha256sum {safe_remote_path}"
569
1035
  result = conn.run(check_sha_cmd, warn=True, hide=True)
570
1036
 
571
1037
  if result.ok:
@@ -587,10 +1053,13 @@ class InfraEngine:
587
1053
  # Create directory if needed
588
1054
  dir_path = os.path.dirname(path)
589
1055
  if dir_path:
590
- conn.run(f"mkdir -p /root/app/{dir_path}", warn=True, hide=True)
591
-
1056
+ safe_dir_path = shlex.quote(f"/root/app/{dir_path}")
1057
+ conn.run(f"mkdir -p {safe_dir_path}", warn=True, hide=True)
1058
+
592
1059
  # Use SFTP for file transfer (handles large files)
1060
+ # Note: SFTP doesn't use shell, so path doesn't need quoting here
593
1061
  from io import BytesIO
1062
+ remote_path = f"/root/app/{path}"
594
1063
  conn.put(BytesIO(content), remote_path)
595
1064
 
596
1065
  # Progress update every 10 files
@@ -603,18 +1072,42 @@ class InfraEngine:
603
1072
  else:
604
1073
  # Note: Early validation in Phase 0 should have caught this for service mode
605
1074
  private_key_path = str(Path.home() / ".ssh" / "id_rsa")
606
- rsync_cmd = f'rsync -avz --exclude=".git" --exclude=".venv" --exclude="__pycache__" -e "ssh -i {private_key_path} -o StrictHostKeyChecking=no" . root@{ip_address}:/root/app/'
1075
+ # Use subprocess with list args instead of shell=True for security
1076
+ rsync_args = [
1077
+ "rsync", "-avz",
1078
+ "--exclude=.git", "--exclude=.venv", "--exclude=__pycache__",
1079
+ "-e", f"ssh -i {shlex.quote(private_key_path)} -o StrictHostKeyChecking=no",
1080
+ ".", f"root@{ip_address}:/root/app/"
1081
+ ]
607
1082
  logger(f" - Uploading local code via rsync...")
608
- result = subprocess.run(rsync_cmd, shell=True, capture_output=True, text=True)
1083
+ result = subprocess.run(rsync_args, capture_output=True, text=True)
609
1084
  if result.returncode != 0:
610
1085
  raise DeploymentError(f"rsync failed: {result.stderr}", stage="Code Upload")
611
1086
  logger(" - Code upload complete.")
612
1087
 
613
1088
 
614
1089
  # === 6.5. WRITE DEPLOYMENT ASSETS TO DROPLET ===
615
- logger("\n[bold blue]PHASE 6.5: WRITING DEPLOYMENT ASSETS[/bold blue]")
1090
+ self.emitter.start_phase(DeploymentPhase.MEMBRANE_FORMATION, "Forming the biological membrane (Writing Assets)")
1091
+ # Whitelist of allowed deployment asset filenames (exact match or prefix patterns)
1092
+ ALLOWED_ASSET_FILENAMES = {"docker-compose.yml", ".env", "Caddyfile", "railpack-plan.json"}
1093
+ ALLOWED_ASSET_PREFIXES = ("Dockerfile",) # Allows Dockerfile, Dockerfile.service-name, etc.
1094
+
1095
+ def is_allowed_asset(filename: str) -> bool:
1096
+ """Check if a filename is in the allowlist (exact match or prefix match)."""
1097
+ if filename in ALLOWED_ASSET_FILENAMES:
1098
+ return True
1099
+ for prefix in ALLOWED_ASSET_PREFIXES:
1100
+ if filename == prefix or filename.startswith(f"{prefix}."):
1101
+ return True
1102
+ return False
1103
+
616
1104
  with self._get_connection(ip_address) as conn:
617
1105
  for filename, content in rendered_assets.items():
1106
+ # Security: Only allow whitelisted filenames to prevent path injection
1107
+ if not is_allowed_asset(filename):
1108
+ logger(f" - [Security] Skipping unknown asset: {filename}")
1109
+ continue
1110
+
618
1111
  # Use heredoc with unique delimiter to write file content
619
1112
  # Single-quoted delimiter prevents shell variable expansion
620
1113
  logger(f" - Writing {filename}...")
@@ -624,23 +1117,109 @@ class InfraEngine:
624
1117
  import base64
625
1118
  encoded_content = base64.b64encode(content.encode()).decode()
626
1119
  # Use printf with %s to handle any special characters in base64
1120
+ # Filename is whitelisted so safe to use directly
627
1121
  conn.run(f"printf '%s' '{encoded_content}' | base64 -d > /root/app/{filename}")
628
1122
  except Exception as e:
629
1123
  raise DeploymentError(f"Failed to write {filename}: {e}", stage="Asset Write")
630
- logger(" - Deployment assets written.")
1124
+ self.emitter.complete_phase(DeploymentPhase.MEMBRANE_FORMATION)
631
1125
 
632
1126
  # === 7. FINAL DEPLOY STAGE ===
633
1127
  if is_dockerized:
634
- logger("\n[bold blue]PHASE 7: BUILDING AND DEPLOYING CONTAINERS[/bold blue]")
1128
+ self.emitter.start_phase(DeploymentPhase.CELL_REIFY, "Reifying the cell (Building Containers)")
635
1129
  with self._get_connection(ip_address) as conn:
636
- # Force --no-cache to ensure updated files (like README.md placeholders) are used
637
- result = conn.run("cd /root/app && docker compose build --no-cache && docker compose up -d", hide=True)
638
- if result.failed:
639
- raise DeploymentError(f"docker-compose failed: {result.stderr}", stage="Deploy")
640
- logger(" - Docker build complete, containers starting...")
1130
+ # Step 7a: Build containers (capture output for debugging)
1131
+ logger(" - Building Docker image (this may take a few minutes)...")
1132
+
1133
+ # Check if we have a generated railpack plan
1134
+ # If railpack-plan.json exists, we use it for a zero-config build
1135
+ use_railpack_plan = "railpack-plan.json" in rendered_assets
1136
+
1137
+ if use_railpack_plan:
1138
+ logger(" - Using Railpack Plan for zero-config build...")
1139
+ # Build with Docker buildx using Railpack frontend and the uploaded plan
1140
+ build_result = conn.run(
1141
+ 'cd /root/app && docker buildx build '
1142
+ '--build-arg BUILDKIT_SYNTAX="ghcr.io/railwayapp/railpack-frontend" '
1143
+ '-f railpack-plan.json -t app:latest --load . 2>&1',
1144
+ warn=True, hide=False
1145
+ )
1146
+ else:
1147
+ # Fallback: Use docker compose build
1148
+ # We now rely on docker-compose.yml 'build.args' mapping (set by RailpackAdapter)
1149
+ # to pick up variables from the .env file automatically.
1150
+ # This avoids shell quoting issues with complex values (like email headers).
1151
+ build_result = conn.run(
1152
+ "cd /root/app && docker compose build --no-cache 2>&1",
1153
+ warn=True,
1154
+ hide=False
1155
+ )
1156
+
1157
+ if build_result.failed or build_result.return_code != 0:
1158
+ # Capture build logs for error message
1159
+ build_output = build_result.stdout or build_result.stderr or "No output captured"
1160
+ raise DeploymentError(
1161
+ f"Docker build failed (exit code {build_result.return_code}):\n{build_output[-2000:]}",
1162
+ stage="Build"
1163
+ )
1164
+ logger(" - Docker build complete.")
1165
+
1166
+ # Step 7b: Start containers
1167
+ logger(" - Starting containers...")
1168
+
1169
+ if use_railpack_plan:
1170
+ # Railpack built image, run with docker directly
1171
+ # Stop any existing container first
1172
+ conn.run("docker stop xenfra-app 2>/dev/null || true", warn=True, hide=True)
1173
+ conn.run("docker rm xenfra-app 2>/dev/null || true", warn=True, hide=True)
1174
+
1175
+ # Run the container with port mapping
1176
+ app_port = ctx.port or 8000
1177
+ up_result = conn.run(
1178
+ f"docker run -d --name xenfra-app -p {app_port}:{app_port} "
1179
+ f"--restart unless-stopped --env-file /root/app/.env app:latest 2>&1 || "
1180
+ f"docker run -d --name xenfra-app -p {app_port}:{app_port} --restart unless-stopped app:latest 2>&1",
1181
+ warn=True, hide=True
1182
+ )
1183
+ else:
1184
+ # Docker compose
1185
+ up_result = conn.run(
1186
+ "cd /root/app && docker compose up -d 2>&1",
1187
+ warn=True,
1188
+ hide=True
1189
+ )
1190
+
1191
+ if up_result.failed or up_result.return_code != 0:
1192
+ # Capture logs if startup failed
1193
+ if use_railpack_plan:
1194
+ logs_result = conn.run(
1195
+ "docker logs xenfra-app --tail 50 2>&1",
1196
+ warn=True, hide=True
1197
+ )
1198
+ else:
1199
+ logs_result = conn.run(
1200
+ "cd /root/app && docker compose logs --tail 50 2>&1",
1201
+ warn=True,
1202
+ hide=True
1203
+ )
1204
+ container_logs = logs_result.stdout or "No logs available"
1205
+ raise DeploymentError(
1206
+ f"Container startup failed:\n{up_result.stdout or up_result.stderr or 'No output'}\n\nContainer logs:\n{container_logs[-2000:]}",
1207
+ stage="Deploy"
1208
+ )
1209
+ self.emitter.complete_phase(DeploymentPhase.CELL_REIFY)
641
1210
  else:
642
1211
  logger("\n[bold blue]PHASE 7: STARTING HOST-BASED APPLICATION[/bold blue]")
643
1212
  start_command = context.get("command", f"uvicorn main:app --port {context.get('port', 8000)}")
1213
+
1214
+ # Security: Validate start_command to prevent command injection
1215
+ # Only allow safe characters: alphanumeric, dots, colons, hyphens, underscores, spaces, equals, slashes
1216
+ import re
1217
+ if not re.match(r'^[a-zA-Z0-9._:=\-\s/]+$', start_command):
1218
+ raise DeploymentError(
1219
+ f"Invalid start command - contains unsafe characters: {start_command}",
1220
+ stage="Deploy"
1221
+ )
1222
+
644
1223
  with self._get_connection(ip_address) as conn:
645
1224
  result = conn.run(f"cd /root/app && python3 -m venv .venv && .venv/bin/pip install -r requirements.txt && nohup .venv/bin/{start_command} > app.log 2>&1 &", hide=True)
646
1225
  if result.failed:
@@ -660,24 +1239,35 @@ class InfraEngine:
660
1239
  logger(" - Caddy configured for path-based routing")
661
1240
 
662
1241
  # === 8. VERIFICATION STAGE ===
663
- logger("\n[bold blue]PHASE 8: VERIFYING DEPLOYMENT[/bold blue]")
1242
+ self.emitter.start_phase(DeploymentPhase.VITALS_CHECK, "Checking vitals (Health Check)")
664
1243
 
665
1244
  # Give container a moment to initialize before first health check
666
1245
  time.sleep(5)
667
1246
 
668
- app_port = context.get("port", 8000)
1247
+ app_port = ctx.port or 8000
669
1248
  for i in range(24): # 2-minute timeout for health checks
670
1249
  logger(f" - Health check attempt {i + 1}/24...")
671
1250
  with self._get_connection(ip_address) as conn:
672
1251
  # Check if running
673
1252
  if is_dockerized:
674
- ps_result = conn.run("cd /root/app && docker compose ps", hide=True)
675
- ps_output = ps_result.stdout.lower()
676
- # Docker Compose V1 shows "running", V2 shows "Up" in status
677
- running = "running" in ps_output or " up " in ps_output
678
- if "restarting" in ps_output:
679
- logs = conn.run("cd /root/app && docker compose logs --tail 20", hide=True).stdout
680
- raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
1253
+ # Check for railpack container first, then docker-compose
1254
+ ps_result = conn.run("docker ps --filter name=xenfra-app --format '{{.Status}}'", hide=True, warn=True)
1255
+ if ps_result.ok and ps_result.stdout.strip():
1256
+ # Railpack container exists
1257
+ ps_output = ps_result.stdout.lower()
1258
+ running = "up" in ps_output
1259
+ if "restarting" in ps_output:
1260
+ logs = conn.run("docker logs xenfra-app --tail 20", hide=True).stdout
1261
+ raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
1262
+ else:
1263
+ # Try docker-compose
1264
+ ps_result = conn.run("cd /root/app && docker compose ps", hide=True, warn=True)
1265
+ ps_output = ps_result.stdout.lower() if ps_result.stdout else ""
1266
+ # Docker Compose V1 shows "running", V2 shows "Up" in status
1267
+ running = "running" in ps_output or " up " in ps_output
1268
+ if "restarting" in ps_output:
1269
+ logs = conn.run("cd /root/app && docker compose logs --tail 20", hide=True).stdout
1270
+ raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
681
1271
  else:
682
1272
  ps_result = conn.run("ps aux | grep -v grep | grep python", hide=True)
683
1273
  running = ps_result.ok and len(ps_result.stdout.strip()) > 0
@@ -698,13 +1288,10 @@ class InfraEngine:
698
1288
 
699
1289
  # Any HTTP response (200, 404, 500, etc.) means app is running
700
1290
  if http_code.isdigit() and int(http_code) >= 100:
701
-
702
- logger(
703
- "[bold green] - Health check passed! Application is live.[/bold green]"
704
- )
1291
+ self.emitter.complete_phase(DeploymentPhase.VITALS_CHECK, "Vitals healthy. Organism is alive.")
705
1292
 
706
1293
  # === 9. PERSISTENCE STAGE ===
707
- logger("\n[bold blue]PHASE 9: SAVING DEPLOYMENT TO DATABASE[/bold blue]")
1294
+ self.emitter.start_phase(DeploymentPhase.MEMORY_COMMIT, "Committing to long-term memory")
708
1295
  project = Project(
709
1296
  droplet_id=droplet.id,
710
1297
  name=droplet.name,
@@ -716,7 +1303,7 @@ class InfraEngine:
716
1303
  )
717
1304
  session.add(project)
718
1305
  session.commit()
719
- logger(" - Deployment saved.")
1306
+ self.emitter.complete_phase(DeploymentPhase.MEMORY_COMMIT)
720
1307
 
721
1308
  return droplet # Return the full droplet object
722
1309
  time.sleep(5)
@@ -727,9 +1314,12 @@ class InfraEngine:
727
1314
  raise DeploymentError(f"Application failed to become healthy in time. Logs:\n{logs}", stage="Verification")
728
1315
 
729
1316
  except Exception as e:
1317
+ # ZEN GAP FIX: Observability - Mark failure state
1318
+ self.emitter.fail_phase(self.emitter.current_phase or DeploymentPhase.NECROSIS, str(e))
1319
+
730
1320
  if droplet:
731
1321
  if cleanup_on_failure:
732
- logger("[bold yellow]Cleaning up resources...[/bold yellow]")
1322
+ self.emitter.start_phase(DeploymentPhase.APOPTOSIS, "Triggering apoptosis (Resource Cleanup)")
733
1323
  try:
734
1324
  # 1. Destroy droplet (DigitalOcean API)
735
1325
  logger(f" - Destroying droplet '{droplet.name}'...")
@@ -745,13 +1335,9 @@ class InfraEngine:
745
1335
  session.commit()
746
1336
  logger(" - Database record removed.")
747
1337
 
748
- logger("[bold green]Cleanup completed.[/bold green]")
1338
+ self.emitter.complete_phase(DeploymentPhase.APOPTOSIS, "Organism recycled.")
749
1339
  except Exception as cleanup_error:
750
- logger(f"[bold red]Cleanup failed: {cleanup_error}[/bold red]")
751
- logger("[yellow]You may need to manually delete from DigitalOcean.[/yellow]")
1340
+ self.emitter.fail_phase(DeploymentPhase.APOPTOSIS, f"Recycling failed: {cleanup_error}")
752
1341
  else:
753
- logger(
754
- f"[bold red]Deployment failed. Server '{droplet.name}' NOT cleaned up.[/bold red]"
755
- )
756
- logger("[dim]Tip: Use --cleanup-on-failure to auto-cleanup.[/dim]")
1342
+ self.emitter.emit(DeploymentPhase.NECROSIS, EventStatus.FAILED, f"Deployment failed. Server '{droplet.name}' preserved for diagnostics.")
757
1343
  raise e