xenfra-sdk 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. xenfra_sdk/__init__.py +46 -2
  2. xenfra_sdk/blueprints/base.py +150 -0
  3. xenfra_sdk/blueprints/factory.py +99 -0
  4. xenfra_sdk/blueprints/node.py +219 -0
  5. xenfra_sdk/blueprints/python.py +57 -0
  6. xenfra_sdk/blueprints/railpack.py +99 -0
  7. xenfra_sdk/blueprints/schema.py +70 -0
  8. xenfra_sdk/cli/main.py +175 -49
  9. xenfra_sdk/client.py +6 -2
  10. xenfra_sdk/constants.py +26 -0
  11. xenfra_sdk/db/session.py +8 -3
  12. xenfra_sdk/detection.py +262 -191
  13. xenfra_sdk/dockerizer.py +76 -120
  14. xenfra_sdk/engine.py +767 -172
  15. xenfra_sdk/events.py +254 -0
  16. xenfra_sdk/exceptions.py +9 -0
  17. xenfra_sdk/governance.py +150 -0
  18. xenfra_sdk/manifest.py +93 -138
  19. xenfra_sdk/mcp_client.py +7 -5
  20. xenfra_sdk/{models.py → models/__init__.py} +17 -1
  21. xenfra_sdk/models/context.py +61 -0
  22. xenfra_sdk/orchestrator.py +223 -99
  23. xenfra_sdk/privacy.py +11 -0
  24. xenfra_sdk/protocol.py +38 -0
  25. xenfra_sdk/railpack_adapter.py +357 -0
  26. xenfra_sdk/railpack_detector.py +587 -0
  27. xenfra_sdk/railpack_manager.py +312 -0
  28. xenfra_sdk/recipes.py +152 -19
  29. xenfra_sdk/resources/activity.py +45 -0
  30. xenfra_sdk/resources/build.py +157 -0
  31. xenfra_sdk/resources/deployments.py +22 -2
  32. xenfra_sdk/resources/intelligence.py +25 -0
  33. xenfra_sdk-0.2.7.dist-info/METADATA +118 -0
  34. xenfra_sdk-0.2.7.dist-info/RECORD +49 -0
  35. {xenfra_sdk-0.2.5.dist-info → xenfra_sdk-0.2.7.dist-info}/WHEEL +1 -1
  36. xenfra_sdk/templates/Caddyfile.j2 +0 -14
  37. xenfra_sdk/templates/Dockerfile.j2 +0 -41
  38. xenfra_sdk/templates/cloud-init.sh.j2 +0 -90
  39. xenfra_sdk/templates/docker-compose-multi.yml.j2 +0 -29
  40. xenfra_sdk/templates/docker-compose.yml.j2 +0 -30
  41. xenfra_sdk-0.2.5.dist-info/METADATA +0 -116
  42. xenfra_sdk-0.2.5.dist-info/RECORD +0 -38
xenfra_sdk/engine.py CHANGED
@@ -2,32 +2,30 @@
2
2
 
3
3
  import os
4
4
  import time
5
+ import subprocess
6
+ import json
7
+ import shlex
8
+ import tempfile
9
+ import shutil
5
10
  from datetime import datetime
6
11
  from pathlib import Path
7
- from typing import Optional, Dict
12
+ from typing import Optional, Dict, Any, Union
8
13
 
9
14
  import digitalocean
10
15
  import fabric
11
16
  from dotenv import load_dotenv
12
17
  from sqlmodel import Session, select
13
18
 
14
- import shutil
15
- import subprocess
16
-
17
19
  # Xenfra modules
18
- from . import dockerizer, recipes
20
+ from . import dockerizer, privacy, constants
19
21
  from .db.models import Project
20
22
  from .db.session import get_session
21
-
22
-
23
- class DeploymentError(Exception):
24
- """Custom exception for deployment failures."""
25
-
26
- def __init__(self, message, stage="Unknown"):
27
- self.message = message
28
- self.stage = stage
29
- super().__init__(f"Deployment failed at stage '{stage}': {message}")
30
-
23
+ from .events import EventEmitter, DeploymentPhase, EventStatus
24
+ from .exceptions import DeploymentError
25
+ from .governance import get_polling_interval, get_resource_limits
26
+ from .models.context import DeploymentContext
27
+ from .blueprints.factory import render_blueprint
28
+ from .client import XenfraClient
31
29
 
32
30
  class InfraEngine:
33
31
  """
@@ -35,11 +33,12 @@ class InfraEngine:
35
33
  with the cloud provider and orchestrates the deployment lifecycle.
36
34
  """
37
35
 
38
- def __init__(self, token: str = None, db_session: Session = None):
36
+ def __init__(self, token: str = None, db_session: Session = None, context: dict = None):
39
37
  """
40
38
  Initializes the engine and validates the API token.
41
39
  """
42
40
  load_dotenv()
41
+ self.context = context or {}
43
42
  self.token = token or os.getenv("DIGITAL_OCEAN_TOKEN")
44
43
  self.db_session = db_session or next(get_session())
45
44
 
@@ -53,11 +52,20 @@ class InfraEngine:
53
52
  except Exception as e:
54
53
  raise ConnectionError(f"Failed to connect to DigitalOcean: {e}")
55
54
 
55
+ # ZEN GAP FIX: Structured Observability
56
+ # Initialize Event Emitter to stream Zen/Biological events
57
+ self.emitter = EventEmitter(
58
+ logger=self.context.get("logger"),
59
+ event_callback=self.context.get("event_callback")
60
+ )
61
+ # Initialize internal client for API access (Intelligence/Sandbox)
62
+ self.client = XenfraClient(token=self.token)
63
+
56
64
  def _get_connection(self, ip_address: str):
57
65
  """Establishes a Fabric connection to the server."""
58
- private_key_path = str(Path.home() / ".ssh" / "id_rsa")
66
+ private_key_path = str(Path(os.path.expanduser(constants.DEFAULT_SSH_KEY_PATH)).resolve())
59
67
  if not Path(private_key_path).exists():
60
- raise DeploymentError("No private SSH key found at ~/.ssh/id_rsa.", stage="Setup")
68
+ raise DeploymentError(f"No private SSH key found at {private_key_path}.", stage="Setup")
61
69
 
62
70
  return fabric.Connection(
63
71
  host=ip_address,
@@ -74,12 +82,26 @@ class InfraEngine:
74
82
  return self.manager.get_all_droplets()
75
83
 
76
84
  def list_domains(self):
77
- """Retrieves a list of all domains from DigitalOcean."""
85
+ """Retrieves a list of all domains associated with the account."""
78
86
  return self.manager.get_all_domains()
79
87
 
80
- def destroy_server(self, droplet_id: int, db_session: Session = None):
88
+ def destroy_server(
89
+ self,
90
+ droplet_id: int,
91
+ db_session: Session = None,
92
+ preserve_data: bool = False,
93
+ snapshot_callback: callable = None,
94
+ ):
81
95
  """
82
- Idempotent droplet destruction.
96
+ Idempotent droplet destruction with optional data stewardship.
97
+
98
+ ZEN GAP FIX: Stewardship - Snapshot volumes before destruction.
99
+
100
+ Args:
101
+ droplet_id: The DigitalOcean droplet ID
102
+ db_session: SQLModel session
103
+ preserve_data: If True, snapshot Docker volumes before destruction
104
+ snapshot_callback: Async callback to upload snapshots (e.g., to S3/R2)
83
105
 
84
106
  Destroys the droplet and removes DB records. Handles 404 errors gracefully
85
107
  (if droplet already destroyed, continues to DB cleanup).
@@ -90,6 +112,21 @@ class InfraEngine:
90
112
  statement = select(Project).where(Project.droplet_id == droplet_id)
91
113
  project_to_delete = session.exec(statement).first()
92
114
 
115
+ # ZEN GAP FIX: Stewardship - Snapshot volumes before destruction
116
+ if preserve_data and project_to_delete:
117
+ try:
118
+ droplet = self.manager.get_droplet(droplet_id)
119
+ ip_address = droplet.ip_address
120
+ if ip_address:
121
+ self._snapshot_volumes(
122
+ ip_address=ip_address,
123
+ project_name=project_to_delete.name,
124
+ callback=snapshot_callback,
125
+ )
126
+ except Exception as e:
127
+ # Non-fatal: log but continue with destruction
128
+ privacy.scrubbed_print(f"[Stewardship] Volume snapshot failed (non-fatal): {e}")
129
+
93
130
  # Destroy the droplet on DigitalOcean (handle 404 gracefully)
94
131
  try:
95
132
  droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
@@ -107,35 +144,133 @@ class InfraEngine:
107
144
  session.delete(project_to_delete)
108
145
  session.commit()
109
146
 
110
- def list_projects_from_db(self, db_session: Session = None):
147
+ def _snapshot_volumes(
148
+ self,
149
+ ip_address: str,
150
+ project_name: str,
151
+ callback: callable = None,
152
+ ):
153
+ """
154
+ ZEN GAP FIX: Stewardship - Snapshot Docker volumes before destruction.
155
+
156
+ Creates tar.gz archives of named Docker volumes on the droplet.
157
+ 100% deterministic: tar + docker volume are Unix primitives.
158
+
159
+ Args:
160
+ ip_address: Droplet IP address
161
+ project_name: Project name for snapshot naming
162
+ callback: Optional callback to upload snapshots
163
+ """
164
+ try:
165
+ with self._get_connection(ip_address) as conn:
166
+ # 1. List named volumes
167
+ result = conn.run("docker volume ls -q", warn=True, hide=True)
168
+ if result.failed or not result.stdout.strip():
169
+ return # No volumes to snapshot
170
+
171
+ volumes = result.stdout.strip().split("\n")
172
+
173
+ # 2. Create backup directory
174
+ backup_dir = f"/tmp/xenfra_snapshots/{project_name}"
175
+ conn.run(f"mkdir -p {backup_dir}", warn=True, hide=True)
176
+
177
+ # 3. Snapshot each volume
178
+ for vol in volumes:
179
+ vol = vol.strip()
180
+ if not vol:
181
+ continue
182
+ # Use Alpine container to tar the volume
183
+ snapshot_file = f"{backup_dir}/{vol}.tar.gz"
184
+ tar_cmd = (
185
+ f"docker run --rm "
186
+ f"-v {vol}:/data:ro "
187
+ f"-v {backup_dir}:/backup "
188
+ f"alpine tar czf /backup/{vol}.tar.gz -C /data ."
189
+ )
190
+ conn.run(tar_cmd, warn=True, hide=True)
191
+
192
+ # 4. If callback provided, upload snapshots
193
+ if callback:
194
+ # List snapshot files and pass to callback
195
+ ls_result = conn.run(f"ls {backup_dir}/*.tar.gz", warn=True, hide=True)
196
+ if ls_result.ok:
197
+ snapshot_files = ls_result.stdout.strip().split("\n")
198
+ for snap_file in snapshot_files:
199
+ callback(snap_file, project_name)
200
+
201
+ except Exception as e:
202
+ # Non-fatal error - log and continue
203
+ privacy.scrubbed_print(f"[Stewardship] Snapshot failed: {e}")
204
+
205
+ def list_projects_from_db(self, db_session: Session = None, user_id: int = None):
111
206
  """Lists all projects from the local database."""
112
207
  session = db_session or self.db_session
113
208
  statement = select(Project)
209
+ if user_id:
210
+ statement = statement.where(Project.user_id == user_id)
114
211
  return session.exec(statement).all()
115
212
 
116
- def sync_with_provider(self, user_id: int, db_session: Session = None):
117
- """Reconciles the local database with the live state from DigitalOcean for a specific user."""
213
+ def sync_with_provider(
214
+ self,
215
+ user_id: int,
216
+ db_session: Session = None,
217
+ auto_destroy_orphans: bool = False,
218
+ ):
219
+ """
220
+ ZEN GAP FIX: Idempotent Reconciliation with orphan detection.
221
+
222
+ Reconciles the local database with the live state from DigitalOcean.
223
+ 100% deterministic: Set difference is math.
224
+
225
+ Args:
226
+ user_id: User ID to sync for
227
+ db_session: SQLModel session
228
+ auto_destroy_orphans: If True, destroy orphan droplets (in DO but not in DB)
229
+
230
+ Returns:
231
+ Tuple of (projects_list, reconciliation_report)
232
+ """
118
233
  session = db_session or self.db_session
119
234
 
120
235
  # 1. Get live and local states
121
- # Filter by 'xenfra' tag to only manage droplets created by us
122
236
  live_droplets = self.manager.get_all_droplets(tag_name="xenfra")
123
-
124
- # Filter local projects by user_id
125
- statement = select(Project).where(Project.user_id == user_id)
126
- local_projects = session.exec(statement).all()
237
+ local_projects = self.list_projects_from_db(session, user_id=user_id)
127
238
 
128
239
  live_map = {d.id: d for d in live_droplets}
129
240
  local_map = {p.droplet_id: p for p in local_projects}
130
241
 
131
- # 2. Reconcile
132
- # Add new servers found on DO to our DB if they match our naming/tagging convention
133
- for droplet_id, droplet in live_map.items():
134
- if droplet_id not in local_map:
135
- # We only add it if it's NOT in our DB yet.
136
- # Note: In a multi-tenant environment, we'd need a way to know WHICH user
137
- # owns a tagged droplet if it's not in our DB. For now, we assume the
138
- # calling user potentially owns it if they are syncing.
242
+ live_ids = set(live_map.keys())
243
+ local_ids = set(local_map.keys())
244
+
245
+ # 2. Calculate differences (pure math, no guessing)
246
+ orphans = live_ids - local_ids # In DO but not in DB
247
+ ghosts = local_ids - live_ids # In DB but not in DO
248
+ synced = live_ids & local_ids # In both
249
+
250
+ reconciliation_report = {
251
+ "orphans": list(orphans), # Droplets without DB records
252
+ "ghosts": list(ghosts), # DB records without droplets
253
+ "synced": list(synced), # Properly tracked
254
+ "actions_taken": [],
255
+ }
256
+
257
+ # 3. Handle orphans (in DO but not in DB)
258
+ for droplet_id in orphans:
259
+ droplet = live_map[droplet_id]
260
+ if auto_destroy_orphans:
261
+ # Option A: Destroy orphan droplets (cost savings)
262
+ try:
263
+ orphan_droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
264
+ orphan_droplet.destroy()
265
+ reconciliation_report["actions_taken"].append(
266
+ f"DESTROYED orphan droplet {droplet_id} ({droplet.name})"
267
+ )
268
+ except Exception as e:
269
+ reconciliation_report["actions_taken"].append(
270
+ f"FAILED to destroy orphan {droplet_id}: {e}"
271
+ )
272
+ else:
273
+ # Option B: Create DB record for recovery
139
274
  new_project = Project(
140
275
  droplet_id=droplet.id,
141
276
  name=droplet.name,
@@ -146,17 +281,83 @@ class InfraEngine:
146
281
  user_id=user_id,
147
282
  )
148
283
  session.add(new_project)
284
+ reconciliation_report["actions_taken"].append(
285
+ f"RECOVERED orphan droplet {droplet_id} ({droplet.name})"
286
+ )
149
287
 
150
- # Remove servers from our DB that no longer exist on DO
151
- for droplet_id, project in local_map.items():
152
- if droplet_id not in live_map:
153
- session.delete(project)
288
+ # 4. Handle ghosts (in DB but not in DO)
289
+ for project_id in ghosts:
290
+ project = local_map[project_id]
291
+ if project.status != "destroyed":
292
+ project.status = "destroyed"
293
+ project.ip_address = None
294
+ session.add(project)
295
+ reconciliation_report["actions_taken"].append(
296
+ f"MARKED ghost record {project_id} ({project.name}) as destroyed"
297
+ )
298
+
299
+ # 5. Update status for synced projects
300
+ for droplet_id in synced:
301
+ droplet = live_map[droplet_id]
302
+ project = local_map[droplet_id]
303
+ if project.status != droplet.status or project.ip_address != droplet.ip_address:
304
+ project.status = droplet.status
305
+ project.ip_address = droplet.ip_address
306
+ session.add(project)
307
+ reconciliation_report["actions_taken"].append(
308
+ f"UPDATED status for {droplet_id} ({project.name})"
309
+ )
154
310
 
155
311
  session.commit()
156
-
157
- # Return refreshed list for this user
158
- statement = select(Project).where(Project.user_id == user_id)
159
- return session.exec(statement).all()
312
+ return self.list_projects_from_db(session), reconciliation_report
313
+
314
+ def get_orphan_droplets(self, user_id: int, db_session: Session = None) -> list:
315
+ """
316
+ ZEN GAP FIX: Detect orphan droplets (in DO but not in DB).
317
+
318
+ Returns list of droplet IDs that exist on DigitalOcean but have no
319
+ corresponding database record. These cost money!
320
+
321
+ Args:
322
+ user_id: User ID to check for
323
+ db_session: SQLModel session
324
+
325
+ Returns:
326
+ List of orphan droplet IDs
327
+ """
328
+ session = db_session or self.db_session
329
+
330
+ live_droplets = self.manager.get_all_droplets(tag_name="xenfra")
331
+ local_projects = self.list_projects_from_db(session, user_id=user_id)
332
+
333
+ live_ids = {d.id for d in live_droplets}
334
+ local_ids = {p.droplet_id for p in local_projects}
335
+
336
+ return list(live_ids - local_ids)
337
+
338
+ def destroy_orphans(self, user_id: int, db_session: Session = None) -> list:
339
+ """
340
+ ZEN GAP FIX: Destroy all orphan droplets for cost savings.
341
+
342
+ Args:
343
+ user_id: User ID
344
+ db_session: SQLModel session
345
+
346
+ Returns:
347
+ List of destroyed droplet IDs
348
+ """
349
+ orphans = self.get_orphan_droplets(user_id, db_session)
350
+ destroyed = []
351
+
352
+ for droplet_id in orphans:
353
+ try:
354
+ droplet = digitalocean.Droplet(token=self.token, id=droplet_id)
355
+ droplet.destroy()
356
+ destroyed.append(droplet_id)
357
+ except Exception:
358
+ pass # Skip if already destroyed
359
+
360
+ return destroyed
160
361
 
161
362
  def stream_logs(self, droplet_id: int, db_session: Session = None):
162
363
  """
@@ -193,33 +394,11 @@ class InfraEngine:
193
394
  with self._get_connection(ip_address) as conn:
194
395
  conn.run("cd /root/app && docker compose logs -f app", pty=True)
195
396
 
196
- def get_account_balance(self) -> dict:
197
- """
198
- Retrieves the current account balance from DigitalOcean.
199
- Placeholder: Actual implementation needed.
200
- """
201
- # In a real scenario, this would call the DigitalOcean API for billing info
202
- # For now, return mock data
203
- return {
204
- "month_to_date_balance": "0.00",
205
- "account_balance": "0.00",
206
- "month_to_date_usage": "0.00",
207
- "generated_at": datetime.now().isoformat(),
208
- }
209
-
210
- def get_droplet_cost_estimates(self) -> list:
211
- """
212
- Retrieves a list of Xenfra-managed DigitalOcean droplets with their estimated monthly costs.
213
- Placeholder: Actual implementation needed.
214
- """
215
- # In a real scenario, this would list droplets and calculate costs
216
- # For now, return mock data
217
- return []
218
397
 
219
398
  def _ensure_ssh_key(self, logger):
220
399
  """Ensures a local public SSH key is on DigitalOcean. Generates one if missing (Zen Mode)."""
221
- pub_key_path = Path.home() / ".ssh" / "id_rsa.pub"
222
- priv_key_path = Path.home() / ".ssh" / "id_rsa"
400
+ pub_key_path = Path(os.path.expanduser(constants.DEFAULT_SSH_PUB_KEY_PATH))
401
+ priv_key_path = Path(os.path.expanduser(constants.DEFAULT_SSH_KEY_PATH))
223
402
 
224
403
  if not pub_key_path.exists():
225
404
  logger(" - [Zen Mode] No SSH key found at ~/.ssh/id_rsa.pub. Generating a new one...")
@@ -259,7 +438,7 @@ class InfraEngine:
259
438
  )
260
439
  key.create()
261
440
  return key
262
-
441
+
263
442
  def deploy_server(
264
443
  self,
265
444
  name: str,
@@ -287,29 +466,189 @@ class InfraEngine:
287
466
  multi_service_compose: str = None, # Pre-generated docker-compose.yml for multi-service
288
467
  multi_service_caddy: str = None, # Pre-generated Caddyfile for multi-service routing
289
468
  services: list = None, # List of ServiceDefinition for multi-service deployments
469
+ env_vars: Dict[str, str] = None, # Generic environment variables
470
+ dry_run: bool = False, # ZEN MODE: Return assets without deploying
471
+ verify_local: bool = True, # LOCAL MODE: Mirror production locally before cloud push
290
472
  **kwargs,
291
473
  ):
292
474
  """A stateful, blocking orchestrator for deploying a new server."""
475
+
476
+ # Protocol Compliance: Wrap logger with privacy scrubber
477
+ # Use the scrubbed logger for the rest of the method
478
+ logger_orig = logger or print
479
+
480
+ def scrubbed_logger(msg):
481
+ if isinstance(msg, str):
482
+ logger_orig(privacy.scrub_pii(msg))
483
+ else:
484
+ logger_orig(msg)
485
+
486
+ logger = scrubbed_logger
487
+
488
+ self.emitter.start()
489
+ # Synchronize emitter logger with provided logger
490
+ self.emitter.logger = logger
491
+ # ZEN GAP FIX: Observability - Reset events for fresh deployment telemetry
492
+ self.emitter.events = []
493
+
293
494
  droplet = None
294
495
  session = db_session or self.db_session
295
- branch = kwargs.get("branch", "main") # Extract branch from kwargs
296
- framework = kwargs.get("framework") # Extract framework from kwargs
496
+ framework = kwargs.get("framework")
497
+ tier = kwargs.get("tier", "FREE") # Default to FREE tier
498
+
499
+ # ZEN GAP FIX: Resource Governance - Set tier-based polling interval
500
+ polling_interval = kwargs.get("polling_interval") or get_polling_interval(tier)
501
+
502
+ github_token = kwargs.get("github_token")
503
+ branch = kwargs.get("branch", "main")
504
+ devbox = kwargs.get("devbox", False)
505
+
506
+ # Backward compatibility for logger
507
+ logger = logger or (lambda msg: None)
297
508
 
298
509
  try:
299
- # === 0. MICROSERVICES DELEGATION ===
510
+ # === 0a. DEEP DISCOVERY ===
511
+ # If no services explicitly provided, scan the project structure
512
+ if not services:
513
+ if file_manifest:
514
+ # UI DEPLOYMENT: Detect framework from file_manifest (not local files!)
515
+ # The container's local directory is the deployment service, not user's project
516
+ from .manifest import ServiceDefinition
517
+
518
+ file_names = {f.get("path", "").lstrip("./") for f in file_manifest}
519
+ detected_framework = None
520
+
521
+ # Check for Node.js first (package.json is more specific)
522
+ if "package.json" in file_names:
523
+ detected_framework = "nodejs"
524
+ # Then check for Python
525
+ elif "requirements.txt" in file_names or "pyproject.toml" in file_names:
526
+ detected_framework = "python"
527
+ # Refine to specific framework if possible
528
+ for f in file_manifest:
529
+ content = f.get("content", "")
530
+ if content:
531
+ if "fastapi" in content.lower():
532
+ detected_framework = "fastapi"
533
+ break
534
+ elif "django" in content.lower():
535
+ detected_framework = "django"
536
+ break
537
+ elif "flask" in content.lower():
538
+ detected_framework = "flask"
539
+ break
540
+ elif "go.mod" in file_names:
541
+ detected_framework = "go"
542
+ elif "Cargo.toml" in file_names:
543
+ detected_framework = "rust"
544
+
545
+ # Use explicit framework param if provided and not auto-detect
546
+ if framework and framework not in ("auto-detect", "other", "unknown", None):
547
+ detected_framework = framework
548
+ logger(f" - [Manifest] Using user-selected framework: {framework}")
549
+ elif detected_framework:
550
+ logger(f"\n[bold magenta]🔍 MANIFEST DISCOVERY: Detected framework={detected_framework}[/bold magenta]")
551
+
552
+ if detected_framework:
553
+ # Create a single service from the manifest
554
+ services = [ServiceDefinition(
555
+ name=f"{name}-api" if name else "app-api",
556
+ path=".",
557
+ port=port or 8000,
558
+ framework=detected_framework,
559
+ entrypoint=entrypoint
560
+ )]
561
+ logger(f" - Created service: {services[0].name} (port {services[0].port})")
562
+ else:
563
+ # NO FILE_MANIFEST PROVIDED
564
+ # Check if this is a SERVICE MODE deployment with repo_url
565
+ # If so, DO NOT scan local directory (it's the deployment service, not user's project!)
566
+ if os.getenv("XENFRA_SERVICE_MODE") == "true" and repo_url:
567
+ # Service mode with repo_url but no file_manifest
568
+ # Use explicit framework if provided, otherwise default to auto-detect
569
+ # The actual framework will be detected later when repo is cloned
570
+ from .manifest import ServiceDefinition
571
+
572
+ explicit_framework = framework if framework and framework not in ("auto-detect", "other", "unknown", None) else None
573
+
574
+ if explicit_framework:
575
+ logger(f"\n[bold magenta]🔍 SERVICE MODE: Using explicit framework={explicit_framework}[/bold magenta]")
576
+ services = [ServiceDefinition(
577
+ name=f"{name}-api" if name else "app-api",
578
+ path=".",
579
+ port=port or 8000,
580
+ framework=explicit_framework,
581
+ entrypoint=entrypoint
582
+ )]
583
+ logger(f" - Created service: {services[0].name} (port {services[0].port})")
584
+ else:
585
+ # No explicit framework - we'll need to clone the repo first to detect
586
+ # This is handled in the GENOME_TRANSFER stage
587
+ logger(f"\n[dim]No file_manifest or explicit framework - detection will occur after repo clone[/dim]")
588
+ else:
589
+ # CLI DEPLOYMENT: Scan local project files
590
+ from .discovery import RecursiveScanner
591
+ if os.getcwd():
592
+ scanner = RecursiveScanner(root_path=os.getcwd())
593
+ scan_config = scanner.scan()
594
+ found_services = scan_config.services
595
+ if found_services:
596
+ logger(f"\n[bold magenta]🔍 DEEP DISCOVERY: Discovered {len(found_services)} services[/bold magenta]")
597
+ services = found_services
598
+
599
+
600
+ # === 0b. MICROSERVICES DELEGATION ===
300
601
  # If services are provided but no pre-generated assets, delegate to Orchestrator
301
602
  if services and not (multi_service_compose or multi_service_caddy):
302
603
  logger("\n[bold magenta]MICROSERVICES DETECTED - Delegating to ServiceOrchestrator[/bold magenta]")
303
604
  from .orchestrator import ServiceOrchestrator, load_services_from_xenfra_yaml
304
605
  from .manifest import create_services_from_detected
305
606
 
607
+ # ZEN MODE: Discovery Clone for Multi-service
608
+ # If we have a repo_url but no file_manifest, we must clone to detect frameworks
609
+ temp_discovery_path = None
610
+ if repo_url and not file_manifest:
611
+ import tempfile
612
+ import shutil
613
+ import subprocess
614
+ temp_discovery_path = tempfile.mkdtemp(prefix="xenfra-discovery-")
615
+ logger(f"\n[bold yellow]🔍 DISCOVERY CLONE: Cloning for microservice analysis...[/bold yellow]")
616
+ try:
617
+ subprocess.run(
618
+ ["git", "clone", "--depth", "1", repo_url, temp_discovery_path],
619
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
620
+ )
621
+ # Hydrate file_manifest for Orchestrator
622
+ new_manifest = []
623
+ for root, dirs, files in os.walk(temp_discovery_path):
624
+ if ".git" in dirs:
625
+ dirs.remove(".git")
626
+ for f in files:
627
+ fpath = os.path.join(root, f)
628
+ rel_path = os.path.relpath(fpath, temp_discovery_path)
629
+ file_entry = {"path": rel_path}
630
+ # Read critical configs for hydration
631
+ if f in ["package.json", "requirements.txt", "pyproject.toml"]:
632
+ try:
633
+ with open(fpath, "r", encoding="utf-8") as f_in:
634
+ file_entry["content"] = f_in.read()
635
+ except: pass
636
+ new_manifest.append(file_entry)
637
+ file_manifest = new_manifest
638
+ logger(f" - Discovery successful: {len(file_manifest)} files mapped")
639
+ except Exception as e:
640
+ logger(f" - [yellow]Warning: Discovery clone failed: {e}[/yellow]")
641
+ finally:
642
+ if temp_discovery_path:
643
+ shutil.rmtree(temp_discovery_path, ignore_errors=True)
644
+
306
645
  # Convert dicts to ServiceDefinition objects if needed
307
646
  service_objs = []
308
647
  if services and isinstance(services[0], dict):
309
648
  service_objs = create_services_from_detected(services)
310
649
  else:
311
650
  service_objs = services
312
-
651
+
313
652
  # Determine mode (can be passed in kwargs or default to single-droplet)
314
653
  mode = kwargs.get("mode", "single-droplet")
315
654
 
@@ -318,12 +657,15 @@ class InfraEngine:
318
657
  services=service_objs,
319
658
  project_name=name,
320
659
  mode=mode,
321
- file_manifest=file_manifest
660
+ file_manifest=file_manifest,
661
+ tier=tier
322
662
  )
323
663
 
324
664
  return orchestrator.deploy(
325
665
  logger=logger,
326
- # Pass all original arguments to ensure they propagate
666
+ # Pass all original arguments (including dry_run and devbox)
667
+ dry_run=dry_run,
668
+ devbox=devbox,
327
669
  region=region,
328
670
  size=size,
329
671
  image=image,
@@ -343,6 +685,7 @@ class InfraEngine:
343
685
  get_file_content=get_file_content,
344
686
  cleanup_on_failure=cleanup_on_failure,
345
687
  extra_assets=extra_assets,
688
+ env_vars=env_vars,
346
689
  **kwargs
347
690
  )
348
691
 
@@ -357,61 +700,134 @@ class InfraEngine:
357
700
  )
358
701
 
359
702
  # === 1. SETUP STAGE ===
360
- logger("\n[bold blue]PHASE 1: SETUP[/bold blue]")
361
- ssh_key = self._ensure_ssh_key(logger)
703
+ self.emitter.start_phase(DeploymentPhase.DNA_ENCODING, "Encoding project setup and SSH keys")
704
+ if not dry_run:
705
+ ssh_key = self._ensure_ssh_key(logger)
706
+ else:
707
+ logger(" - [Dry Run] Skipping SSH key check")
708
+ self.emitter.complete_phase(DeploymentPhase.DNA_ENCODING)
362
709
 
363
- # === 2. ASSET GENERATION STAGE ===
364
- logger("\n[bold blue]PHASE 2: GENERATING DEPLOYMENT ASSETS[/bold blue]")
710
+ # === 2. ASSET GENERATION STAGE (THE BLUEPRINT) ===
711
+ self.emitter.start_phase(DeploymentPhase.CELL_BLUEPRINT, "Synthesizing Server DNA (Asset Generation)")
365
712
 
366
- # Detect Python version from project files if using delta upload
713
+ # Detect Python version/Entrypoint from project files if using delta upload
367
714
  python_version = "python:3.11-slim" # Default
715
+ enhanced_manifest = []
368
716
  if file_manifest and get_file_content:
369
- # Build file info with content for version detection
370
- version_files = []
717
+ # Build file info with content for version/entrypoint detection
371
718
  for finfo in file_manifest:
372
719
  path = finfo.get('path', '')
373
- if path in ['.python-version', 'pyproject.toml']:
374
- content = get_file_content(finfo.get('sha', ''))
375
- if content:
376
- version_files.append({
377
- 'path': path,
378
- 'content': content.decode('utf-8', errors='ignore')
379
- })
720
+ # Load content for version files AND potential entrypoint files (limit depth for performance)
721
+ is_version_file = path in ['.python-version', 'pyproject.toml']
722
+ is_candidate_py = path.endswith('.py') and path.count('/') <= 1
723
+
724
+ if is_version_file or is_candidate_py:
725
+ try:
726
+ content = get_file_content(finfo.get('sha', ''))
727
+ if content:
728
+ enhanced_manifest.append({
729
+ 'path': path,
730
+ 'content': content.decode('utf-8', errors='ignore')
731
+ })
732
+ except Exception:
733
+ continue
380
734
 
381
- if version_files:
382
- python_version = dockerizer.detect_python_version(version_files)
735
+ if enhanced_manifest:
736
+ python_version = dockerizer.detect_python_version(enhanced_manifest)
383
737
  logger(f" - Detected Python version: {python_version}")
738
+
739
+ # Update file_manifest in context with loaded contents for blueprints
740
+ file_manifest = enhanced_manifest
741
+
384
742
 
385
- context = {
386
- "email": email,
387
- "domain": domain,
388
- "repo_url": repo_url,
389
- "port": port or 8000,
390
- "command": command,
391
- "entrypoint": entrypoint, # Pass entrypoint to templates (e.g., "todo.main:app")
392
- "database": database,
393
- "package_manager": package_manager or "pip",
394
- "dependency_file": dependency_file or "requirements.txt",
395
- "framework": framework, # Explicitly include framework
396
- "python_version": python_version, # Auto-detected or default
397
- **kwargs, # Pass any additional config
398
- }
743
+ # Protocol Compliance: Build Type-Safe DeploymentContext
744
+ ctx = DeploymentContext(
745
+ project_name=name,
746
+ email=email or "admin@xenfra.tech", # Use passed email or default
747
+ region=region,
748
+ size=size,
749
+ image=image,
750
+ framework=framework or "python",
751
+ port=port or 8000,
752
+ entrypoint=entrypoint,
753
+ python_version=python_version or "3.11-slim",
754
+ is_dockerized=is_dockerized,
755
+ branch=branch,
756
+ source_type="git" if repo_url else "local",
757
+ env_vars=env_vars or {},
758
+ tier=tier,
759
+ include_postgres=bool(database == "postgres")
760
+ )
399
761
 
762
+ # Pre-inject resource limits if tier is managed
763
+ limits = get_resource_limits(tier)
764
+ ctx.cpu_limit = limits.cpus
765
+ ctx.memory_limit = limits.memory
766
+
767
+ # Log scrubbed context for debugging (SAFE)
768
+ logger(f" - Initializing deployment for {name} ({tier} tier)")
769
+
400
770
  # Check if this is a multi-service deployment
401
771
  if multi_service_compose:
402
- # Use pre-generated assets from ServiceOrchestrator
403
772
  logger(" - Using multi-service configuration")
404
773
  rendered_assets = {
405
774
  "docker-compose.yml": multi_service_compose,
406
775
  }
776
+
407
777
  if multi_service_caddy:
408
778
  rendered_assets["Caddyfile"] = multi_service_caddy
409
- logger(f" - Caddyfile for {len(services) if services else 0} services")
410
779
  else:
411
- # Render templates to strings (NOT written to disk) - single service
412
- rendered_assets = dockerizer.render_deployment_assets(context)
780
+ # Protocol Compliance: Use Blueprint Factory, NOT legacy dockerizer
781
+ ctx_dict = ctx.model_dump()
782
+ temp_repo_path = None
783
+
784
+ # Server Mode: Clone repo locally to allow Railpack plan generation
785
+ # This ensures we have a build plan even if the droplet doesn't have railpack installed
786
+ if os.getenv("XENFRA_SERVICE_MODE") == "true" and repo_url:
787
+ try:
788
+ temp_repo_path = tempfile.mkdtemp()
789
+ logger(f" - Cloning {repo_url} for detection...")
790
+ # Shallow clone to save time
791
+ subprocess.run(
792
+ ["git", "clone", "--depth", "1", repo_url, temp_repo_path],
793
+ check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
794
+ )
795
+ ctx_dict["repo_path"] = temp_repo_path
796
+
797
+ # Populate file_manifest for fallback detection (Crucial for Railpack Blueprint fallback)
798
+ manifest_files = []
799
+ for root, dirs, files in os.walk(temp_repo_path):
800
+ if ".git" in dirs:
801
+ dirs.remove(".git") # Don't traverse .git
802
+
803
+ for f in files:
804
+ rel_path = os.path.relpath(os.path.join(root, f), temp_repo_path)
805
+ file_entry = {"path": rel_path}
806
+
807
+ # Read content for critical files (for detection logic)
808
+ if f in ["package.json", "next.config.js", "next.config.ts", "next.config.mjs", "nuxt.config.ts", "vite.config.ts"]:
809
+ try:
810
+ with open(os.path.join(root, f), "r", encoding="utf-8") as meta_f:
811
+ file_entry["content"] = meta_f.read()
812
+ except Exception:
813
+ pass
814
+
815
+ manifest_files.append(file_entry)
816
+
817
+ ctx_dict["file_manifest"] = manifest_files
818
+ logger(f" - Hydrated file_manifest with {len(manifest_files)} files for detection")
819
+
820
+ except Exception as e:
821
+ logger(f" - Clone for detection failed (proceeding without plan): {e}")
822
+
823
+ try:
824
+ rendered_assets = render_blueprint(ctx_dict)
825
+ finally:
826
+ if temp_repo_path:
827
+ shutil.rmtree(temp_repo_path, ignore_errors=True)
828
+
413
829
  if not rendered_assets:
414
- raise DeploymentError("Failed to render deployment assets. Is framework specified?", stage="Asset Generation")
830
+ raise DeploymentError("Failed to render deployment assets via Blueprint Factory.", stage="Asset Generation")
415
831
 
416
832
  # Merge extra assets (like service-specific Dockerfiles)
417
833
  if extra_assets:
@@ -419,18 +835,56 @@ class InfraEngine:
419
835
  logger(f" - Included {len(extra_assets)} extra assets")
420
836
 
421
837
  for filename in rendered_assets:
422
- logger(f" - Rendered {filename} ({len(rendered_assets[filename])} bytes)")
838
+ self.emitter.progress(DeploymentPhase.CELL_BLUEPRINT, 50, f"Encoded {filename}")
839
+
840
+ self.emitter.complete_phase(DeploymentPhase.CELL_BLUEPRINT)
423
841
 
424
842
  # === 3. CLOUD-INIT STAGE ===
425
- logger("\n[bold blue]PHASE 3: CREATING SERVER SETUP SCRIPT[/bold blue]")
426
- cloud_init_script = recipes.generate_stack(context, is_dockerized=is_dockerized)
427
- logger(" - Generated cloud-init script.")
428
- logger(
429
- f"--- Cloud-init script content ---\n{cloud_init_script}\n---------------------------------"
430
- )
843
+ self.emitter.start_phase(DeploymentPhase.GENESIS_SCRIPT, "Writing the Genesis Script (Server Provisioning)")
844
+ from . import recipes
845
+ cloud_init_script = recipes.generate_stack(ctx.model_dump(), is_dockerized=is_dockerized)
846
+ self.emitter.complete_phase(DeploymentPhase.GENESIS_SCRIPT)
847
+
848
+ # === ZEN MODE: DRY RUN EXIT ===
849
+ if dry_run:
850
+ logger("\n[bold cyan]🧪 DRY RUN COMPLETE: Returning generated assets[/bold cyan]")
851
+ return {
852
+ "status": "DRY_RUN",
853
+ "cloud_init": cloud_init_script,
854
+ "assets": rendered_assets,
855
+ "context": ctx.model_dump(),
856
+ "droplet_request": {
857
+ "name": name,
858
+ "region": region,
859
+ "size": size,
860
+ "image": image
861
+ }
862
+ }
863
+
864
+ # === ZEN MODE: PRE-MITOSIS (E2B GATE) ===
865
+ # Replaced by Secure Ralph Loop (server-side Firecracker verification)
866
+ # Enforce "No Compromise" - Verify before Deploy
867
+ # Fix: Mandatory enforcement (no verify_local conditional)
868
+ if not dry_run:
869
+ logger("\n[bold yellow]🛡️ E2B GATE: Verifying build in Cloud Sandbox...[/bold yellow]")
870
+ # Call Intelligence Service to verify
871
+ try:
872
+ # Check with the Intelligence service using the client
873
+ logger(" - Submitting audit package to Intelligence Service...")
874
+ result = self.client.intelligence.verify(
875
+ logs="PRE-DEPLOYMENT AUDIT",
876
+ code_snippets=[{"path": k, "content": v} for k, v in ctx_dict.get("file_manifest", [])]
877
+ )
878
+
879
+ if result.get("status") == "failed":
880
+ raise Exception(f"Sandbox verification failed: {result.get('error')}")
881
+
882
+ logger(" - [Verified] E2B Sandbox check passed.")
883
+ except Exception as e:
884
+ raise DeploymentError(f"E2B Verification Failed: {e}", stage="Pre-Mitosis")
431
885
 
432
886
  # === 4. DROPLET CREATION STAGE ===
433
- logger("\n[bold blue]PHASE 4: PROVISIONING SERVER[/bold blue]")
887
+ self.emitter.start_phase(DeploymentPhase.CELL_BIRTH, "Submitting DNA to provider (Creating Droplet)")
434
888
 
435
889
  # Machine Reuse: Look for existing droplet with same name and 'xenfra' tag
436
890
  existing_droplets = digitalocean.Manager(token=self.token).get_all_droplets(tag_name="xenfra")
@@ -454,18 +908,18 @@ class InfraEngine:
454
908
  private_networking=True,
455
909
  )
456
910
  droplet.create()
457
- logger(
458
- f" - Droplet '{name}' creation initiated (ID: {droplet.id}). Waiting for it to become active..."
459
- )
911
+ self.emitter.complete_phase(DeploymentPhase.CELL_BIRTH, f"Cell born (ID: {droplet.id})")
460
912
 
461
913
  # === 5. POLLING STAGE ===
462
- logger("\n[bold blue]PHASE 5: WAITING FOR SERVER SETUP[/bold blue]")
914
+ self.emitter.start_phase(DeploymentPhase.NEURAL_SYNC, "Establishing neural connection to provider")
463
915
  while True:
464
916
  droplet.load()
465
917
  if droplet.status == "active":
466
- logger(" - Droplet is active. Waiting for SSH to be available...")
918
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 50, "Droplet active. Harmonizing SSH...")
467
919
  break
468
- time.sleep(10)
920
+
921
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 25, f"Syncing with cloud provider... (Wait: {polling_interval}s)")
922
+ time.sleep(polling_interval)
469
923
 
470
924
  ip_address = droplet.ip_address
471
925
 
@@ -474,10 +928,10 @@ class InfraEngine:
474
928
  max_retries = 12 # 2-minute timeout for SSH
475
929
  for i in range(max_retries):
476
930
  try:
477
- logger(f" - Attempting SSH connection ({i + 1}/{max_retries})...")
931
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 75, f"Syncing neural pathways ({i + 1}/{max_retries})...")
478
932
  conn = self._get_connection(ip_address)
479
- conn.open() # Explicitly open the connection
480
- logger(" - SSH connection established.")
933
+ conn.open()
934
+ self.emitter.progress(DeploymentPhase.NEURAL_SYNC, 90, "Neural link established. Synapsing...")
481
935
  break
482
936
  except Exception as e:
483
937
  if i < max_retries - 1:
@@ -529,20 +983,35 @@ class InfraEngine:
529
983
  )
530
984
 
531
985
  # === 6. CODE UPLOAD STAGE ===
532
- logger("\n[bold blue]PHASE 6: UPLOADING APPLICATION CODE[/bold blue]")
986
+ self.emitter.start_phase(DeploymentPhase.GENOME_TRANSFER, "Transferring project genome (Code Upload)")
533
987
  with self._get_connection(ip_address) as conn:
534
988
  # Option 1: Git clone (if repo_url provided)
535
989
  if repo_url:
536
- logger(f" - Cloning repository from {repo_url} (branch: {branch})...")
990
+ # Authenticate if token provided (Zen Mode: Private Repo Support)
991
+ authenticated_url = repo_url
992
+ if github_token and "github.com" in repo_url:
993
+ self.emitter.progress(DeploymentPhase.GENOME_TRANSFER, 25, "Injecting authentication for private genome")
994
+ if repo_url.startswith("https://"):
995
+ authenticated_url = repo_url.replace("https://", f"https://x-access-token:{github_token}@")
996
+ elif repo_url.startswith("http://"):
997
+ authenticated_url = repo_url.replace("http://", f"http://x-access-token:{github_token}@")
998
+
999
+ # Sanitize log (don't show token)
1000
+ log_url = repo_url
1001
+ logger(f" - Cloning repository from {log_url} (branch: {branch})...")
1002
+
537
1003
  # Use --branch to checkout specific branch, --single-branch for efficiency
538
- clone_cmd = f"git clone --branch {branch} --single-branch {repo_url} /root/app"
1004
+ # Sanitize inputs to prevent command injection
1005
+ safe_branch = shlex.quote(branch)
1006
+ safe_url = shlex.quote(authenticated_url)
1007
+ clone_cmd = f"git clone --branch {safe_branch} --single-branch {safe_url} /root/app"
539
1008
  result = conn.run(clone_cmd, warn=True, hide=True)
540
1009
  if result.failed:
541
1010
  # Try without --single-branch in case branch doesn't exist
542
1011
  # Clean up any partial clone first
543
1012
  logger(f" - Branch '{branch}' clone failed, trying default branch...")
544
1013
  conn.run("rm -rf /root/app", warn=True, hide=True)
545
- conn.run(f"git clone {repo_url} /root/app")
1014
+ conn.run(f"git clone {safe_url} /root/app")
546
1015
 
547
1016
  # Option 2: Delta upload (if file_manifest provided)
548
1017
  elif file_manifest and get_file_content:
@@ -555,17 +1024,23 @@ class InfraEngine:
555
1024
  path = file_info['path']
556
1025
  sha = file_info['sha']
557
1026
  size = file_info.get('size', 0)
558
-
1027
+
1028
+ # Security: Validate path to prevent directory traversal attacks
1029
+ if '..' in path or path.startswith('/') or path.startswith('~'):
1030
+ logger(f" - [Security] Skipping suspicious path: {path}")
1031
+ continue
1032
+
559
1033
  # Build Safety: Placeholder for 0-byte critical files
560
1034
  # (Hatchling/Pip fail if README.md or __init__.py are mentioned but empty)
561
1035
  is_critical_empty = (
562
- size == 0 and
1036
+ size == 0 and
563
1037
  (path.lower() == 'readme.md' or path.endswith('__init__.py'))
564
1038
  )
565
-
1039
+
566
1040
  # Smart Incremental Sync: Check if file exists and has same SHA
567
- remote_path = f"/root/app/{path}"
568
- check_sha_cmd = f"sha256sum {remote_path}"
1041
+ # Sanitize path to prevent command injection
1042
+ safe_remote_path = shlex.quote(f"/root/app/{path}")
1043
+ check_sha_cmd = f"sha256sum {safe_remote_path}"
569
1044
  result = conn.run(check_sha_cmd, warn=True, hide=True)
570
1045
 
571
1046
  if result.ok:
@@ -587,10 +1062,13 @@ class InfraEngine:
587
1062
  # Create directory if needed
588
1063
  dir_path = os.path.dirname(path)
589
1064
  if dir_path:
590
- conn.run(f"mkdir -p /root/app/{dir_path}", warn=True, hide=True)
591
-
1065
+ safe_dir_path = shlex.quote(f"/root/app/{dir_path}")
1066
+ conn.run(f"mkdir -p {safe_dir_path}", warn=True, hide=True)
1067
+
592
1068
  # Use SFTP for file transfer (handles large files)
1069
+ # Note: SFTP doesn't use shell, so path doesn't need quoting here
593
1070
  from io import BytesIO
1071
+ remote_path = f"/root/app/{path}"
594
1072
  conn.put(BytesIO(content), remote_path)
595
1073
 
596
1074
  # Progress update every 10 files
@@ -603,18 +1081,42 @@ class InfraEngine:
603
1081
  else:
604
1082
  # Note: Early validation in Phase 0 should have caught this for service mode
605
1083
  private_key_path = str(Path.home() / ".ssh" / "id_rsa")
606
- rsync_cmd = f'rsync -avz --exclude=".git" --exclude=".venv" --exclude="__pycache__" -e "ssh -i {private_key_path} -o StrictHostKeyChecking=no" . root@{ip_address}:/root/app/'
1084
+ # Use subprocess with list args instead of shell=True for security
1085
+ rsync_args = [
1086
+ "rsync", "-avz",
1087
+ "--exclude=.git", "--exclude=.venv", "--exclude=__pycache__",
1088
+ "-e", f"ssh -i {shlex.quote(private_key_path)} -o StrictHostKeyChecking=no",
1089
+ ".", f"root@{ip_address}:/root/app/"
1090
+ ]
607
1091
  logger(f" - Uploading local code via rsync...")
608
- result = subprocess.run(rsync_cmd, shell=True, capture_output=True, text=True)
1092
+ result = subprocess.run(rsync_args, capture_output=True, text=True)
609
1093
  if result.returncode != 0:
610
1094
  raise DeploymentError(f"rsync failed: {result.stderr}", stage="Code Upload")
611
1095
  logger(" - Code upload complete.")
612
1096
 
613
1097
 
614
1098
  # === 6.5. WRITE DEPLOYMENT ASSETS TO DROPLET ===
615
- logger("\n[bold blue]PHASE 6.5: WRITING DEPLOYMENT ASSETS[/bold blue]")
1099
+ self.emitter.start_phase(DeploymentPhase.MEMBRANE_FORMATION, "Forming the biological membrane (Writing Assets)")
1100
+ # Whitelist of allowed deployment asset filenames (exact match or prefix patterns)
1101
+ ALLOWED_ASSET_FILENAMES = {"docker-compose.yml", ".env", "Caddyfile", "railpack-plan.json"}
1102
+ ALLOWED_ASSET_PREFIXES = ("Dockerfile",) # Allows Dockerfile, Dockerfile.service-name, etc.
1103
+
1104
+ def is_allowed_asset(filename: str) -> bool:
1105
+ """Check if a filename is in the allowlist (exact match or prefix match)."""
1106
+ if filename in ALLOWED_ASSET_FILENAMES:
1107
+ return True
1108
+ for prefix in ALLOWED_ASSET_PREFIXES:
1109
+ if filename == prefix or filename.startswith(f"{prefix}."):
1110
+ return True
1111
+ return False
1112
+
616
1113
  with self._get_connection(ip_address) as conn:
617
1114
  for filename, content in rendered_assets.items():
1115
+ # Security: Only allow whitelisted filenames to prevent path injection
1116
+ if not is_allowed_asset(filename):
1117
+ logger(f" - [Security] Skipping unknown asset: {filename}")
1118
+ continue
1119
+
618
1120
  # Use heredoc with unique delimiter to write file content
619
1121
  # Single-quoted delimiter prevents shell variable expansion
620
1122
  logger(f" - Writing {filename}...")
@@ -624,23 +1126,109 @@ class InfraEngine:
624
1126
  import base64
625
1127
  encoded_content = base64.b64encode(content.encode()).decode()
626
1128
  # Use printf with %s to handle any special characters in base64
1129
+ # Filename is whitelisted so safe to use directly
627
1130
  conn.run(f"printf '%s' '{encoded_content}' | base64 -d > /root/app/{filename}")
628
1131
  except Exception as e:
629
1132
  raise DeploymentError(f"Failed to write {filename}: {e}", stage="Asset Write")
630
- logger(" - Deployment assets written.")
1133
+ self.emitter.complete_phase(DeploymentPhase.MEMBRANE_FORMATION)
631
1134
 
632
1135
  # === 7. FINAL DEPLOY STAGE ===
633
1136
  if is_dockerized:
634
- logger("\n[bold blue]PHASE 7: BUILDING AND DEPLOYING CONTAINERS[/bold blue]")
1137
+ self.emitter.start_phase(DeploymentPhase.CELL_REIFY, "Reifying the cell (Building Containers)")
635
1138
  with self._get_connection(ip_address) as conn:
636
- # Force --no-cache to ensure updated files (like README.md placeholders) are used
637
- result = conn.run("cd /root/app && docker compose build --no-cache && docker compose up -d", hide=True)
638
- if result.failed:
639
- raise DeploymentError(f"docker-compose failed: {result.stderr}", stage="Deploy")
640
- logger(" - Docker build complete, containers starting...")
1139
+ # Step 7a: Build containers (capture output for debugging)
1140
+ logger(" - Building Docker image (this may take a few minutes)...")
1141
+
1142
+ # Check if we have a generated railpack plan
1143
+ # If railpack-plan.json exists, we use it for a zero-config build
1144
+ use_railpack_plan = "railpack-plan.json" in rendered_assets
1145
+
1146
+ if use_railpack_plan:
1147
+ logger(" - Using Railpack Plan for zero-config build...")
1148
+ # Build with Docker buildx using Railpack frontend and the uploaded plan
1149
+ build_result = conn.run(
1150
+ 'cd /root/app && docker buildx build '
1151
+ '--build-arg BUILDKIT_SYNTAX="ghcr.io/railwayapp/railpack-frontend" '
1152
+ '-f railpack-plan.json -t app:latest --load . 2>&1',
1153
+ warn=True, hide=False
1154
+ )
1155
+ else:
1156
+ # Fallback: Use docker compose build
1157
+ # We now rely on docker-compose.yml 'build.args' mapping (set by RailpackAdapter)
1158
+ # to pick up variables from the .env file automatically.
1159
+ # This avoids shell quoting issues with complex values (like email headers).
1160
+ build_result = conn.run(
1161
+ "cd /root/app && docker compose build --no-cache 2>&1",
1162
+ warn=True,
1163
+ hide=False
1164
+ )
1165
+
1166
+ if build_result.failed or build_result.return_code != 0:
1167
+ # Capture build logs for error message
1168
+ build_output = build_result.stdout or build_result.stderr or "No output captured"
1169
+ raise DeploymentError(
1170
+ f"Docker build failed (exit code {build_result.return_code}):\n{build_output[-2000:]}",
1171
+ stage="Build"
1172
+ )
1173
+ logger(" - Docker build complete.")
1174
+
1175
+ # Step 7b: Start containers
1176
+ logger(" - Starting containers...")
1177
+
1178
+ if use_railpack_plan:
1179
+ # Railpack built image, run with docker directly
1180
+ # Stop any existing container first
1181
+ conn.run("docker stop xenfra-app 2>/dev/null || true", warn=True, hide=True)
1182
+ conn.run("docker rm xenfra-app 2>/dev/null || true", warn=True, hide=True)
1183
+
1184
+ # Run the container with port mapping
1185
+ app_port = ctx.port or 8000
1186
+ up_result = conn.run(
1187
+ f"docker run -d --name xenfra-app -p {app_port}:{app_port} "
1188
+ f"--restart unless-stopped --env-file /root/app/.env app:latest 2>&1 || "
1189
+ f"docker run -d --name xenfra-app -p {app_port}:{app_port} --restart unless-stopped app:latest 2>&1",
1190
+ warn=True, hide=True
1191
+ )
1192
+ else:
1193
+ # Docker compose
1194
+ up_result = conn.run(
1195
+ "cd /root/app && docker compose up -d 2>&1",
1196
+ warn=True,
1197
+ hide=True
1198
+ )
1199
+
1200
+ if up_result.failed or up_result.return_code != 0:
1201
+ # Capture logs if startup failed
1202
+ if use_railpack_plan:
1203
+ logs_result = conn.run(
1204
+ "docker logs xenfra-app --tail 50 2>&1",
1205
+ warn=True, hide=True
1206
+ )
1207
+ else:
1208
+ logs_result = conn.run(
1209
+ "cd /root/app && docker compose logs --tail 50 2>&1",
1210
+ warn=True,
1211
+ hide=True
1212
+ )
1213
+ container_logs = logs_result.stdout or "No logs available"
1214
+ raise DeploymentError(
1215
+ f"Container startup failed:\n{up_result.stdout or up_result.stderr or 'No output'}\n\nContainer logs:\n{container_logs[-2000:]}",
1216
+ stage="Deploy"
1217
+ )
1218
+ self.emitter.complete_phase(DeploymentPhase.CELL_REIFY)
641
1219
  else:
642
1220
  logger("\n[bold blue]PHASE 7: STARTING HOST-BASED APPLICATION[/bold blue]")
643
1221
  start_command = context.get("command", f"uvicorn main:app --port {context.get('port', 8000)}")
1222
+
1223
+ # Security: Validate start_command to prevent command injection
1224
+ # Only allow safe characters: alphanumeric, dots, colons, hyphens, underscores, spaces, equals, slashes
1225
+ import re
1226
+ if not re.match(r'^[a-zA-Z0-9._:=\-\s/]+$', start_command):
1227
+ raise DeploymentError(
1228
+ f"Invalid start command - contains unsafe characters: {start_command}",
1229
+ stage="Deploy"
1230
+ )
1231
+
644
1232
  with self._get_connection(ip_address) as conn:
645
1233
  result = conn.run(f"cd /root/app && python3 -m venv .venv && .venv/bin/pip install -r requirements.txt && nohup .venv/bin/{start_command} > app.log 2>&1 &", hide=True)
646
1234
  if result.failed:
@@ -660,24 +1248,35 @@ class InfraEngine:
660
1248
  logger(" - Caddy configured for path-based routing")
661
1249
 
662
1250
  # === 8. VERIFICATION STAGE ===
663
- logger("\n[bold blue]PHASE 8: VERIFYING DEPLOYMENT[/bold blue]")
1251
+ self.emitter.start_phase(DeploymentPhase.VITALS_CHECK, "Checking vitals (Health Check)")
664
1252
 
665
1253
  # Give container a moment to initialize before first health check
666
1254
  time.sleep(5)
667
1255
 
668
- app_port = context.get("port", 8000)
1256
+ app_port = ctx.port or 8000
669
1257
  for i in range(24): # 2-minute timeout for health checks
670
1258
  logger(f" - Health check attempt {i + 1}/24...")
671
1259
  with self._get_connection(ip_address) as conn:
672
1260
  # Check if running
673
1261
  if is_dockerized:
674
- ps_result = conn.run("cd /root/app && docker compose ps", hide=True)
675
- ps_output = ps_result.stdout.lower()
676
- # Docker Compose V1 shows "running", V2 shows "Up" in status
677
- running = "running" in ps_output or " up " in ps_output
678
- if "restarting" in ps_output:
679
- logs = conn.run("cd /root/app && docker compose logs --tail 20", hide=True).stdout
680
- raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
1262
+ # Check for railpack container first, then docker-compose
1263
+ ps_result = conn.run("docker ps --filter name=xenfra-app --format '{{.Status}}'", hide=True, warn=True)
1264
+ if ps_result.ok and ps_result.stdout.strip():
1265
+ # Railpack container exists
1266
+ ps_output = ps_result.stdout.lower()
1267
+ running = "up" in ps_output
1268
+ if "restarting" in ps_output:
1269
+ logs = conn.run("docker logs xenfra-app --tail 20", hide=True).stdout
1270
+ raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
1271
+ else:
1272
+ # Try docker-compose
1273
+ ps_result = conn.run("cd /root/app && docker compose ps", hide=True, warn=True)
1274
+ ps_output = ps_result.stdout.lower() if ps_result.stdout else ""
1275
+ # Docker Compose V1 shows "running", V2 shows "Up" in status
1276
+ running = "running" in ps_output or " up " in ps_output
1277
+ if "restarting" in ps_output:
1278
+ logs = conn.run("cd /root/app && docker compose logs --tail 20", hide=True).stdout
1279
+ raise DeploymentError(f"Application is crash-looping (restarting). Logs:\n{logs}", stage="Verification")
681
1280
  else:
682
1281
  ps_result = conn.run("ps aux | grep -v grep | grep python", hide=True)
683
1282
  running = ps_result.ok and len(ps_result.stdout.strip()) > 0
@@ -698,13 +1297,10 @@ class InfraEngine:
698
1297
 
699
1298
  # Any HTTP response (200, 404, 500, etc.) means app is running
700
1299
  if http_code.isdigit() and int(http_code) >= 100:
701
-
702
- logger(
703
- "[bold green] - Health check passed! Application is live.[/bold green]"
704
- )
1300
+ self.emitter.complete_phase(DeploymentPhase.VITALS_CHECK, "Vitals healthy. Organism is alive.")
705
1301
 
706
1302
  # === 9. PERSISTENCE STAGE ===
707
- logger("\n[bold blue]PHASE 9: SAVING DEPLOYMENT TO DATABASE[/bold blue]")
1303
+ self.emitter.start_phase(DeploymentPhase.MEMORY_COMMIT, "Committing to long-term memory")
708
1304
  project = Project(
709
1305
  droplet_id=droplet.id,
710
1306
  name=droplet.name,
@@ -716,7 +1312,7 @@ class InfraEngine:
716
1312
  )
717
1313
  session.add(project)
718
1314
  session.commit()
719
- logger(" - Deployment saved.")
1315
+ self.emitter.complete_phase(DeploymentPhase.MEMORY_COMMIT)
720
1316
 
721
1317
  return droplet # Return the full droplet object
722
1318
  time.sleep(5)
@@ -727,9 +1323,12 @@ class InfraEngine:
727
1323
  raise DeploymentError(f"Application failed to become healthy in time. Logs:\n{logs}", stage="Verification")
728
1324
 
729
1325
  except Exception as e:
1326
+ # ZEN GAP FIX: Observability - Mark failure state
1327
+ self.emitter.fail_phase(self.emitter.current_phase or DeploymentPhase.NECROSIS, str(e))
1328
+
730
1329
  if droplet:
731
1330
  if cleanup_on_failure:
732
- logger("[bold yellow]Cleaning up resources...[/bold yellow]")
1331
+ self.emitter.start_phase(DeploymentPhase.APOPTOSIS, "Triggering apoptosis (Resource Cleanup)")
733
1332
  try:
734
1333
  # 1. Destroy droplet (DigitalOcean API)
735
1334
  logger(f" - Destroying droplet '{droplet.name}'...")
@@ -745,13 +1344,9 @@ class InfraEngine:
745
1344
  session.commit()
746
1345
  logger(" - Database record removed.")
747
1346
 
748
- logger("[bold green]Cleanup completed.[/bold green]")
1347
+ self.emitter.complete_phase(DeploymentPhase.APOPTOSIS, "Organism recycled.")
749
1348
  except Exception as cleanup_error:
750
- logger(f"[bold red]Cleanup failed: {cleanup_error}[/bold red]")
751
- logger("[yellow]You may need to manually delete from DigitalOcean.[/yellow]")
1349
+ self.emitter.fail_phase(DeploymentPhase.APOPTOSIS, f"Recycling failed: {cleanup_error}")
752
1350
  else:
753
- logger(
754
- f"[bold red]Deployment failed. Server '{droplet.name}' NOT cleaned up.[/bold red]"
755
- )
756
- logger("[dim]Tip: Use --cleanup-on-failure to auto-cleanup.[/dim]")
1351
+ self.emitter.emit(DeploymentPhase.NECROSIS, EventStatus.FAILED, f"Deployment failed. Server '{droplet.name}' preserved for diagnostics.")
757
1352
  raise e