zu-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_cli/main.py ADDED
@@ -0,0 +1,1126 @@
1
+ """The `zu` command.
2
+
3
+ The thin entry point. ``run`` loads a config and a task, assembles the loop
4
+ from config (the model, the active plugins, the event sink), and executes — a
5
+ run is wired by a file, not by code, so swapping the model is a one-line edit.
6
+ ``run --every`` turns the same one-shot into a scheduled worker; ``serve``
7
+ exposes it over HTTP; ``plugins`` lists everything the registry can discover.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import os
14
+ import time
15
+
16
+ import typer
17
+
18
+ from zu_core.contracts import Result, Status
19
+ from zu_core.loop import run_task
20
+ from zu_core.registry import GROUPS, REGISTRY
21
+
22
+ from .config import ConfigError, assemble, load_agent, load_config
23
+
24
+ app = typer.Typer(help="Zu — Agent Production Runtime", no_args_is_help=True)
25
+
26
+
27
+ def _installed_version(dist: str) -> str | None:
28
+ """The installed version of ``dist`` (e.g. ``zu-runtime``), or None if it
29
+ can't be determined — used to pin a generated deploy image reproducibly."""
30
+ from importlib.metadata import PackageNotFoundError, version
31
+
32
+ try:
33
+ return version(dist)
34
+ except PackageNotFoundError:
35
+ return None
36
+
37
+
38
+ def _parse_duration(text: str) -> float:
39
+ """Parse a human duration ('30s', '5m', '2h', '90') into seconds. A bare
40
+ number is seconds. Used by ``run --every`` for the scheduling interval."""
41
+ text = text.strip().lower()
42
+ units = {"s": 1, "m": 60, "h": 3600, "d": 86400}
43
+ unit = units.get(text[-1:])
44
+ try:
45
+ value = float(text[:-1]) if unit else float(text)
46
+ except ValueError:
47
+ raise ConfigError(f"bad duration {text!r}; use e.g. '30s', '5m', '2h'") from None
48
+ seconds = value * (unit or 1)
49
+ if seconds <= 0:
50
+ raise ConfigError(f"duration must be positive, got {text!r}")
51
+ return seconds
52
+
53
+
54
+ def _append_cost_ledger(path: str, *, agent: str, status: str, replayed: bool, summary) -> None:
55
+ """Append one run's cost telemetry to a durable per-agent JSONL ledger, so spend
56
+ is tracked across runs (and record-vs-replay is comparable). Best-effort: a write
57
+ failure is swallowed — telemetry must never fail the run."""
58
+ import json
59
+ from datetime import UTC, datetime
60
+
61
+ entry = {"at": datetime.now(UTC).isoformat(), "agent": agent, "status": status,
62
+ "replayed": replayed, **summary.to_dict()}
63
+ try:
64
+ with open(path, "a", encoding="utf-8") as fh:
65
+ fh.write(json.dumps(entry) + "\n")
66
+ except OSError:
67
+ pass
68
+
69
+
70
+ def _execute_once(
71
+ agent: str, *, stream: bool = True, use_track: bool = True, offline: bool = False
72
+ ) -> Result:
73
+ """Load a single ``agent.yaml`` (or bundle dir) and drive its task to a Result,
74
+ printing a summary. Shared by the one-shot and scheduled paths. Raises
75
+ ConfigError for a bad agent file; turns a model/infra failure into a printed
76
+ terminal Result.
77
+
78
+ With ``stream`` (the default), a live trace of the run — the model's train of
79
+ thought, every tool call and result, detectors, escalations — prints as it
80
+ happens, so the loop is never a black box.
81
+
82
+ With ``use_track`` (the default), a recorded path next to the agent
83
+ (``track.json``) is REPLAYED deterministically (no model calls) before the model
84
+ takes over at the frontier, and a successful run re-records it — so a task done
85
+ once runs cheaply forever after. ``--no-track`` disables both."""
86
+ from pathlib import Path
87
+
88
+ from zu_core.cost import summarize_cost
89
+ from zu_core.track import Track, record_track
90
+
91
+ spec, cfg = load_agent(agent)
92
+ provider, registry, bus, providers = assemble(cfg)
93
+
94
+ p = Path(agent)
95
+ agent_dir = p if p.is_dir() else p.parent
96
+
97
+ # Offline replay (the construction keystone): swap the live model for the captured
98
+ # ScriptedProvider and rebind the off-box tools to fixture doubles — no model, no
99
+ # network, ~$0. Everything downstream (the loop, track recording, cost telemetry) is
100
+ # unchanged, so an offline run still records track.json and proves ~$0 in cost.jsonl.
101
+ if offline:
102
+ from .offline import Bundle, OfflineError, bundle_path, rebind_offline
103
+
104
+ try:
105
+ bundle = Bundle.load(bundle_path(agent_dir))
106
+ except OfflineError as exc:
107
+ raise ConfigError(str(exc)) from None
108
+ provider = rebind_offline(registry, bundle)
109
+ providers = {} # no per-tier LIVE overrides offline; the script drives every tier
110
+ typer.echo(f"zu run --offline: replaying {len(bundle.moves)} captured moves "
111
+ "against fixtures (no model, no network)")
112
+ track_path = str(agent_dir / "track.json")
113
+ cost_path = str(agent_dir / "cost.jsonl")
114
+ track = Track.load(track_path) if use_track else None
115
+ # Maturity settings (agent.yaml `replay:`): a tight budget when a track replays,
116
+ # and a cheap finisher model (reusing the global provider's endpoint/key, model
117
+ # swapped) for the post-replay frontier. Built once; the loop applies them only
118
+ # when a matching track actually replays.
119
+ from .config import build_provider
120
+
121
+ replay_budget = cfg.replay.budget
122
+ finish_provider = (
123
+ build_provider(cfg.provider.model_copy(update={"model": cfg.replay.finish_model}))
124
+ if cfg.replay.finish_model else None
125
+ )
126
+ if track is not None and track.matches(spec.query):
127
+ extra = ""
128
+ if replay_budget is not None:
129
+ extra += f"; budget≤{replay_budget.max_tokens:,} tok/{replay_budget.max_steps} steps"
130
+ if finish_provider is not None:
131
+ extra += f"; finisher={cfg.replay.finish_model}"
132
+ typer.echo(f"track : replaying {len(track.steps)} recorded steps "
133
+ f"(deterministic; model only at the frontier{extra})")
134
+ else:
135
+ track = None
136
+
137
+ # The uniform observability hook: a live trace (when streaming) AND the defense
138
+ # review queue — so a blocked attempt during `zu run` is queued exactly as it
139
+ # is under `zu serve`. Same hook in every harness.
140
+ from .observe import attach_observability
141
+
142
+ attach_observability(bus, cfg.observability, trace=stream)
143
+
144
+ # Only show a model when the provider actually exposes one — otherwise show
145
+ # just the provider name. The two are not the same thing: a provider like
146
+ # ``scripted`` has no model, and printing ``model=scripted`` conflates them.
147
+ model = getattr(provider, "model", None)
148
+ suffix = f" model={model}" if model else ""
149
+ typer.echo(f"zu run: {agent} · provider={cfg.provider.name}{suffix}")
150
+
151
+ async def _drive() -> tuple[Result, list]:
152
+ # Run, query, and release the bus on a *single* event loop: a second
153
+ # ``asyncio.run`` would count on a different loop than the run used, which
154
+ # breaks sinks holding loop-bound resources. ``aclose`` in the finally
155
+ # releases the sink so the scheduled-worker path (``--every``) doesn't
156
+ # leak one connection per tick.
157
+ try:
158
+ result = await run_task(
159
+ spec, provider, registry, bus,
160
+ providers=providers, containment=cfg.containment,
161
+ max_observation_chars=cfg.max_observation_chars,
162
+ observation_strategy=cfg.observation_strategy,
163
+ max_context_chars=cfg.max_context_chars,
164
+ track=track,
165
+ replay_budget=replay_budget,
166
+ finish_provider=finish_provider,
167
+ )
168
+ return result, await bus.query()
169
+ finally:
170
+ await bus.aclose()
171
+
172
+ try:
173
+ result, events = asyncio.run(_drive())
174
+ except Exception as exc: # noqa: BLE001 - a clean message beats a traceback
175
+ # A model-call failure (unset key, unreachable endpoint) propagates here;
176
+ # report it as a terminal outcome rather than a traceback.
177
+ typer.echo(f"run failed: {type(exc).__name__}: {exc}", err=True)
178
+ return Result(status=Status.TERMINAL, reason=f"{type(exc).__name__}: {exc}")
179
+
180
+ # Record the path on success so the next run replays it (captures any reroute
181
+ # the model just built). Best-effort: a save failure never fails the run.
182
+ if use_track and result.status is Status.SUCCESS:
183
+ try:
184
+ recorded = record_track(events, task=spec.query, model=model)
185
+ recorded.save(track_path)
186
+ climbs = sorted({s.tier for s in recorded.steps})
187
+ tiers = (f"tiers {min(climbs)}→{max(climbs)}" if len(climbs) > 1
188
+ else f"tier {climbs[0]}" if climbs else "no tools")
189
+ by = f", driven by {recorded.model}" if recorded.model else ""
190
+ typer.echo(
191
+ f"track : recorded {len(recorded.steps)} steps ({tiers}{by}) → {track_path}"
192
+ )
193
+ except OSError:
194
+ pass
195
+
196
+ typer.echo(f"status : {result.status.value}")
197
+ if result.value is not None:
198
+ typer.echo(f"value : {result.value}")
199
+ if result.reason is not None:
200
+ typer.echo(f"reason : {result.reason}")
201
+ typer.echo(f"events : {len(events)} recorded")
202
+
203
+ # Real cost telemetry: project tokens/dollars + replay savings from the log,
204
+ # print it, and append it to a durable per-agent ledger so spend is tracked
205
+ # across runs. Best-effort persistence: a write failure never fails the run.
206
+ summary = summarize_cost(events)
207
+ typer.echo(f"cost : {summary.format()}")
208
+ _append_cost_ledger(cost_path, agent=agent, status=result.status.value,
209
+ replayed=track is not None, summary=summary)
210
+ return result
211
+
212
+
213
+ def _egress_allowlist(cfg) -> list[str]:
214
+ """The hosts the proxy permits for a contained run: the union of the configured
215
+ tools' declared egress. ``*`` (open) is surfaced as a warning — a real boundary
216
+ wants an explicit host list, not 'any'."""
217
+ from zu_core.ports import declared_envelope
218
+
219
+ from .config import build_registry
220
+
221
+ reg = build_registry(cfg)
222
+ allow: set[str] = set()
223
+ for name in reg.names("tools"):
224
+ allow.update(declared_envelope(reg.get("tools", name))["egress"])
225
+ if "*" in allow:
226
+ typer.echo(
227
+ "warning: a configured tool declares open egress ('*'); the proxy will "
228
+ "permit any host. Narrow each tool's egress for a real boundary.",
229
+ err=True,
230
+ )
231
+ return sorted(allow)
232
+
233
+
234
+ def _model_egress(cfg) -> list[str]:
235
+ """The host the contained meta-agent's BRAIN (its model) must reach — the only egress a
236
+ construction run needs, since the tools replay offline from the bundle. Derived from the
237
+ provider's ``base_url`` (explicit or via its env var); a scripted/offline brain needs
238
+ none. Empty → the proxy denies all egress (fail-closed); set the provider's base_url to
239
+ permit the model host for a live brain."""
240
+ from urllib.parse import urlsplit
241
+
242
+ p = cfg.provider
243
+ if p.name == "scripted":
244
+ return []
245
+ base = getattr(p, "base_url", None)
246
+ base_env = getattr(p, "base_url_env", None)
247
+ url = base or (os.environ.get(base_env) if base_env else None)
248
+ if url:
249
+ host = urlsplit(url).hostname
250
+ return [host] if host else []
251
+ # No base_url configured: fall back to the known default host for built-in providers.
252
+ return {"anthropic": ["api.anthropic.com"]}.get(p.name, [])
253
+
254
+
255
+ def _execute_sandboxed(agent: str) -> Result:
256
+ """Run the whole agent inside a hardened container behind an egress proxy — the
257
+ real boundary for ``containment='required'``. Needs Docker, the zu image, and
258
+ zu-backends installed; the in-container agent runs as contained, so a
259
+ capability tool the bare-host floor would refuse runs here behind the proxy."""
260
+ from pathlib import Path
261
+
262
+ from .config import AGENT_FILE, _read_doc
263
+
264
+ spec, cfg = load_agent(agent) # validate; read the egress allowlist from it
265
+ # Raw task/config dicts to ship into the container (it assembles inside the box).
266
+ p = Path(agent)
267
+ doc = _read_doc(str(p / AGENT_FILE if p.is_dir() else p))
268
+ task_doc = doc.get("task", {})
269
+ config_doc = {k: v for k, v in doc.items() if k != "task"}
270
+ # The bundle directory (the folder holding agent.yaml) is mounted into the
271
+ # container so its own tools/ resolve inside the box.
272
+ bundle_dir = str(p if p.is_dir() else p.parent)
273
+ try:
274
+ from zu_backends.local_docker import LocalDockerBackend
275
+
276
+ from .sandbox import SandboxLauncher
277
+ except ModuleNotFoundError as exc:
278
+ raise ConfigError(
279
+ "the sandboxed run needs the Docker backend: pip install 'zu-runtime[docker]'"
280
+ ) from exc
281
+
282
+ image = os.environ.get("ZU_SANDBOX_IMAGE", "zu:latest")
283
+ launcher = SandboxLauncher(backend=LocalDockerBackend(), image=image)
284
+ typer.echo(f"zu run --sandboxed: {agent} in {image} (egress via proxy)")
285
+ result, events = asyncio.run(
286
+ launcher.run(task_doc, config_doc, allowlist=_egress_allowlist(cfg), bundle_dir=bundle_dir)
287
+ )
288
+ typer.echo(f"status : {result.status.value}")
289
+ if result.value is not None:
290
+ typer.echo(f"value : {result.value}")
291
+ if result.reason is not None:
292
+ typer.echo(f"reason : {result.reason}")
293
+ typer.echo(f"events : {len(events)} recorded (contained)")
294
+ return result
295
+
296
+
297
+ @app.command()
298
+ def run(
299
+ agent: str = typer.Argument(
300
+ "agent.yaml", help="The agent: an agent.yaml file, or a bundle directory "
301
+ "(agent.yaml + a tools/ package)."
302
+ ),
303
+ every: str = typer.Option(
304
+ None, "--every", help="Re-run on an interval (e.g. '5m', '30s', '1h') — a scheduled worker."
305
+ ),
306
+ max_runs: int = typer.Option(
307
+ 0, "--max-runs", help="With --every, stop after N runs (0 = run forever)."
308
+ ),
309
+ stream: bool = typer.Option(
310
+ True, "--stream/--no-stream",
311
+ help="Print a live trace of the run (train of thought, tools, escalations) as it happens.",
312
+ ),
313
+ sandboxed: bool = typer.Option(
314
+ False, "--sandboxed",
315
+ help="Run the WHOLE agent inside a hardened container behind an egress proxy "
316
+ "(needs Docker + the zu image). The real boundary for containment='required'.",
317
+ ),
318
+ track: bool = typer.Option(
319
+ True, "--track/--no-track",
320
+ help="Replay a recorded path (track.json) deterministically — model only at "
321
+ "the frontier — and re-record it on success. --no-track always uses the model.",
322
+ ),
323
+ offline: bool = typer.Option(
324
+ False, "--offline",
325
+ help="Replay against a captured fixtures/ bundle — no model, no network, ~$0. "
326
+ "Proves the wiring after one `zu capture`; the keystone for cheap construction.",
327
+ ),
328
+ ) -> None:
329
+ """Run a self-contained agent (one ``agent.yaml`` or a bundle dir) — once, or
330
+ on a schedule with --every.
331
+
332
+ A live trace streams to the console as the loop runs (disable with
333
+ --no-stream). The whole agent — task, model(s), the tier ladder of tools — is
334
+ one file; a bundle dir adds its own ``tools/`` so custom tools just resolve.
335
+ """
336
+ if sandboxed and offline:
337
+ typer.echo("config error: --sandboxed and --offline are mutually exclusive "
338
+ "(one is a live contained run, the other replays fixtures).", err=True)
339
+ raise typer.Exit(code=2) from None
340
+ # One-shot: run, exit non-zero on a non-success result so it composes in a
341
+ # shell. Scheduled: loop and keep going regardless of any single outcome.
342
+ if not every:
343
+ try:
344
+ result = (
345
+ _execute_sandboxed(agent) if sandboxed
346
+ else _execute_once(agent, stream=stream, use_track=track, offline=offline)
347
+ )
348
+ except ConfigError as exc:
349
+ typer.echo(f"config error: {exc}", err=True)
350
+ raise typer.Exit(code=2) from None
351
+ if result.status is not Status.SUCCESS:
352
+ raise typer.Exit(code=1) from None
353
+ return
354
+
355
+ try:
356
+ interval = _parse_duration(every)
357
+ except ConfigError as exc:
358
+ typer.echo(f"config error: {exc}", err=True)
359
+ raise typer.Exit(code=2) from None
360
+
361
+ typer.echo(f"scheduling every {every} (max_runs={max_runs or '∞'}) — Ctrl-C to stop")
362
+ n = 0
363
+ while True:
364
+ n += 1
365
+ typer.echo(f"--- run {n} ---")
366
+ try:
367
+ _execute_once(agent, stream=stream, use_track=track, offline=offline)
368
+ except ConfigError as exc:
369
+ # A bad config is fatal even in a loop — it won't fix itself.
370
+ typer.echo(f"config error: {exc}", err=True)
371
+ raise typer.Exit(code=2) from None
372
+ if max_runs and n >= max_runs:
373
+ break
374
+ time.sleep(interval)
375
+
376
+
377
+ @app.command()
378
+ def capture(
379
+ agent: str = typer.Argument(
380
+ "agent.yaml", help="The agent to capture: an agent.yaml file, or a bundle directory."
381
+ ),
382
+ stream: bool = typer.Option(
383
+ True, "--stream/--no-stream", help="Print a live trace as the capture run executes."
384
+ ),
385
+ ) -> None:
386
+ """Drive an agent LIVE once and project the run into a ``fixtures/`` bundle, so it
387
+ can then be BUILT and HARDENED offline with ``zu run --offline`` — at ~$0, no further
388
+ live spend.
389
+
390
+ This is the one live step of the construction sequence: it needs the provider's keys
391
+ and network. It records ``fixtures/capture.json`` (the model's moves + each tool's
392
+ observations) next to the agent — the input the offline keystone replays.
393
+ """
394
+ from pathlib import Path
395
+
396
+ from zu_core.loop import run_task
397
+
398
+ from .observe import attach_observability
399
+ from .offline import bundle_path, project_capture
400
+
401
+ try:
402
+ spec, cfg = load_agent(agent)
403
+ provider, registry, bus, providers = assemble(cfg)
404
+ except ConfigError as exc:
405
+ typer.echo(f"config error: {exc}", err=True)
406
+ raise typer.Exit(code=2) from None
407
+
408
+ attach_observability(bus, cfg.observability, trace=stream)
409
+ model = getattr(provider, "model", None)
410
+ typer.echo(f"zu capture: {agent} · provider={cfg.provider.name}"
411
+ + (f" model={model}" if model else "") + " (LIVE — keys + network)")
412
+
413
+ async def _drive() -> tuple[Result, list]:
414
+ try:
415
+ result = await run_task(
416
+ spec, provider, registry, bus,
417
+ providers=providers, containment=cfg.containment,
418
+ max_observation_chars=cfg.max_observation_chars,
419
+ observation_strategy=cfg.observation_strategy,
420
+ max_context_chars=cfg.max_context_chars,
421
+ )
422
+ return result, await bus.query()
423
+ finally:
424
+ await bus.aclose()
425
+
426
+ try:
427
+ result, events = asyncio.run(_drive())
428
+ except Exception as exc: # noqa: BLE001 - a clean message beats a traceback
429
+ typer.echo(f"capture failed: {type(exc).__name__}: {exc}", err=True)
430
+ raise typer.Exit(code=1) from None
431
+
432
+ typer.echo(f"status : {result.status.value}")
433
+ if result.status is not Status.SUCCESS:
434
+ if result.reason is not None:
435
+ typer.echo(f"reason : {result.reason}")
436
+ typer.echo("capture: not recorded (only a SUCCESS run is a faithful fixture).", err=True)
437
+ raise typer.Exit(code=1) from None
438
+
439
+ bundle = project_capture(events, result, task=spec.query, model=model)
440
+ p = Path(agent)
441
+ out = bundle_path(p if p.is_dir() else p.parent)
442
+ out.parent.mkdir(parents=True, exist_ok=True)
443
+ bundle.save(out)
444
+ obs_n = sum(len(v) for v in bundle.observations.values())
445
+ typer.echo(f"capture: recorded {len(bundle.moves)} moves + {obs_n} tool observations "
446
+ f"→ {out}")
447
+ typer.echo("next : `zu run --offline` replays it at ~$0 (no model, no network).")
448
+
449
+
450
+ @app.command()
451
+ def harden(
452
+ agent: str = typer.Argument(
453
+ "agent.yaml", help="The agent to harden: an agent.yaml file, or a bundle directory."
454
+ ),
455
+ min_score: float = typer.Option(
456
+ 1.0, "--min-score",
457
+ help="Fail (exit 1) if the resilience score is below this (0.0–1.0).",
458
+ ),
459
+ ) -> None:
460
+ """Stage 5 — chaos hardening. Score how brittle a captured path is, offline and free.
461
+
462
+ Audits the captured ``fixtures/capture.json`` for single points of failure
463
+ (single-selector steps, single-occurrence grounded values), then replays perturbed
464
+ variants through the offline keystone: cosmetic page noise it SHOULD absorb (the
465
+ resilience score) and value-deletions it MUST fail (proving grounding gates). Needs
466
+ a captured bundle (run ``zu capture`` once); spends nothing — no model, no network.
467
+ """
468
+ from pathlib import Path
469
+
470
+ from .harden import harden as run_harden
471
+ from .offline import Bundle, OfflineError, bundle_path
472
+
473
+ try:
474
+ spec, cfg = load_agent(agent)
475
+ except ConfigError as exc:
476
+ typer.echo(f"config error: {exc}", err=True)
477
+ raise typer.Exit(code=2) from None
478
+ p = Path(agent)
479
+ try:
480
+ bundle = Bundle.load(bundle_path(p if p.is_dir() else p.parent))
481
+ except OfflineError as exc:
482
+ typer.echo(f"config error: {exc}", err=True)
483
+ raise typer.Exit(code=2) from None
484
+
485
+ typer.echo(f"zu harden: {agent} (offline — no model, no network)")
486
+ report = asyncio.run(run_harden(spec, cfg, bundle))
487
+
488
+ if report.findings:
489
+ typer.echo(f"brittle: {len(report.findings)} single point(s) of failure")
490
+ for f in report.findings:
491
+ typer.echo(f" · [{f.kind}] {f.where}: {f.detail}")
492
+ else:
493
+ typer.echo("brittle: none found (no single-selector or single-occurrence steps)")
494
+
495
+ for v in report.variants:
496
+ mark = "ok " if v.ok else "!! "
497
+ verdict = "passed" if v.passed else "failed"
498
+ typer.echo(f" {mark}{v.name}: {verdict} (expected {'pass' if v.expect_pass else 'fail'})")
499
+
500
+ score = report.resilience
501
+ typer.echo(f"resilience: {score:.0%} of cosmetic perturbations absorbed")
502
+ if not report.grounding_load_bearing:
503
+ typer.echo("warning: a value-deletion variant still passed — grounding is NOT "
504
+ "gating this path; the score is unreliable.", err=True)
505
+ if score < min_score:
506
+ typer.echo(f"harden: resilience {score:.0%} below --min-score {min_score:.0%}", err=True)
507
+ raise typer.Exit(code=1) from None
508
+ typer.echo("harden: resilient enough to promote.")
509
+
510
+
511
+ @app.command()
512
+ def build(
513
+ agent: str = typer.Argument(
514
+ "agent.yaml", help="The agent to build: an agent.yaml file, or a bundle directory."
515
+ ),
516
+ min_score: float = typer.Option(
517
+ 1.0, "--min-score",
518
+ help="Hold promotion (exit 1) if the hardened track's resilience is below this.",
519
+ ),
520
+ with_canary: bool = typer.Option(
521
+ False, "--with-canary",
522
+ help="Also run the live canary (stage 6) — the live lane, not built yet.",
523
+ ),
524
+ ) -> None:
525
+ """Run the OFFLINE construction spine — build → record track → harden — and write a
526
+ hardened ``track.json``, at $0 (no model, no network).
527
+
528
+ Chains the offline stages of the sequence: replay the captured ``fixtures/`` bundle
529
+ (stage 3), project the track from that clean run (stage 4), and score it against
530
+ perturbed fixtures (stage 5), gating the track on resilience. Needs a captured bundle
531
+ (run ``zu capture`` once); the live canary (stage 6) and promotion (stage 7) are
532
+ separate steps.
533
+ """
534
+ from pathlib import Path
535
+
536
+ from .build import _canary, build_offline
537
+ from .offline import Bundle, OfflineError, bundle_path
538
+
539
+ if with_canary:
540
+ # The explicit live-lane seam: fail loudly rather than pretend it ran.
541
+ try:
542
+ _canary(None, None)
543
+ except NotImplementedError as exc:
544
+ typer.echo(f"build: {exc}", err=True)
545
+ raise typer.Exit(code=2) from None
546
+
547
+ try:
548
+ spec, cfg = load_agent(agent)
549
+ except ConfigError as exc:
550
+ typer.echo(f"config error: {exc}", err=True)
551
+ raise typer.Exit(code=2) from None
552
+ p = Path(agent)
553
+ agent_dir = p if p.is_dir() else p.parent
554
+ try:
555
+ bundle = Bundle.load(bundle_path(agent_dir))
556
+ except OfflineError as exc:
557
+ typer.echo(f"config error: {exc}", err=True)
558
+ raise typer.Exit(code=2) from None
559
+
560
+ typer.echo(f"zu build: {agent} (offline spine — no model, no network)")
561
+ report = asyncio.run(build_offline(spec, cfg, agent_dir, bundle, min_score=min_score))
562
+
563
+ for s in report.stages:
564
+ mark = {"ok": "✓", "failed": "✗", "skipped": "·"}.get(s.status, "?")
565
+ typer.echo(f" {mark} {s.name}: {s.detail}")
566
+
567
+ if not report.ok:
568
+ typer.echo("build: held — fix the failed stage above before promoting.", err=True)
569
+ raise typer.Exit(code=1) from None
570
+ typer.echo(f"build: hardened track ready at {report.track_path}")
571
+ typer.echo("next : `zu run <agent>` for a live canary, then `zu pack` / `zu deploy`.")
572
+
573
+
574
+ @app.command()
575
+ def construct(
576
+ agent: str = typer.Argument(
577
+ "agent.yaml", help="The agent to construct: an agent.yaml file, or a bundle directory."
578
+ ),
579
+ check: bool = typer.Option(
580
+ False, "--check",
581
+ help="One round only: report construction-readiness (build + guardrails) and exit. "
582
+ "No model needed — the $0 readiness gate.",
583
+ ),
584
+ max_rounds: int = typer.Option(
585
+ 3, "--max-rounds", help="Autonomous mode: max diagnose→edit→rebuild rounds.",
586
+ ),
587
+ min_resilience: float = typer.Option(
588
+ 1.0, "--min-resilience", help="Required resilience score (0.0–1.0).",
589
+ ),
590
+ sandboxed: bool = typer.Option(
591
+ False, "--sandboxed",
592
+ help="Run the autonomous loop INSIDE a hardened container (needs Docker + the zu "
593
+ "image). Egress is limited to the model endpoint; the tools replay offline.",
594
+ ),
595
+ ) -> None:
596
+ """The meta-agent construction loop: build → enforce the anti-hardcode guardrails →
597
+ (autonomously) diagnose, edit, and rebuild — offline, at $0 with a scripted strategist.
598
+
599
+ ``--check`` runs ONE round and reports readiness (the gate that enforces: alternate
600
+ locators, a resilient track, and no hardcoded answer). The autonomous loop decides edits
601
+ with a live model when its key is set (else it stops at the live-strategist seam).
602
+ ``--sandboxed`` runs that loop contained — the production form of the meta-agent: zu's
603
+ own construct() loop inside the hardened box, egress only to the model. Needs a captured
604
+ bundle (run ``zu capture`` once).
605
+ """
606
+ from pathlib import Path
607
+
608
+ from .build import build_offline
609
+ from .construct import LiveStrategist
610
+ from .construct import construct as run_construct
611
+ from .guardrails import enforce_guardrails
612
+ from .offline import Bundle, OfflineError, bundle_path
613
+
614
+ try:
615
+ spec, cfg = load_agent(agent)
616
+ except ConfigError as exc:
617
+ typer.echo(f"config error: {exc}", err=True)
618
+ raise typer.Exit(code=2) from None
619
+ p = Path(agent)
620
+ agent_dir = p if p.is_dir() else p.parent
621
+ try:
622
+ bundle = Bundle.load(bundle_path(agent_dir))
623
+ except OfflineError as exc:
624
+ typer.echo(f"config error: {exc}", err=True)
625
+ raise typer.Exit(code=2) from None
626
+
627
+ if check:
628
+ typer.echo(f"zu construct --check: {agent} (offline readiness gate — no model)")
629
+ build = asyncio.run(build_offline(spec, cfg, agent_dir, bundle, min_score=min_resilience))
630
+ guards = asyncio.run(
631
+ enforce_guardrails(spec, cfg, bundle, agent_dir, min_resilience=min_resilience))
632
+ for s in build.stages:
633
+ mark = {"ok": "✓", "failed": "✗", "skipped": "·"}.get(s.status, "?")
634
+ typer.echo(f" {mark} {s.name}: {s.detail}")
635
+ if guards.passed:
636
+ typer.echo(f" ✓ guardrails: passed (resilience {guards.resilience:.0%})")
637
+ else:
638
+ typer.echo(f" ✗ guardrails: {len(guards.violations)} violation(s)")
639
+ for v in guards.violations:
640
+ typer.echo(f" · [{v.rule}] {v.detail}")
641
+ if build.ok and guards.passed:
642
+ typer.echo("construct: ready for review (build clean + guardrails passed).")
643
+ return
644
+ typer.echo("construct: not ready — fix the items above (a strategist would iterate "
645
+ "on these).", err=True)
646
+ raise typer.Exit(code=1) from None
647
+
648
+ if sandboxed:
649
+ # The production form: run the autonomous loop INSIDE the hardened box. The tools
650
+ # replay offline (the bundle is mounted), so the only egress is the model endpoint.
651
+ from .construct_sandbox import launch_contained_construction
652
+
653
+ try:
654
+ from zu_backends.local_docker import LocalDockerBackend
655
+
656
+ from .sandbox import SandboxLauncher
657
+ except ModuleNotFoundError:
658
+ typer.echo("config error: sandboxed construction needs the Docker backend: "
659
+ "pip install 'zu-runtime[docker]'", err=True)
660
+ raise typer.Exit(code=2) from None
661
+ image = os.environ.get("ZU_SANDBOX_IMAGE", "zu:latest")
662
+ allowlist = _model_egress(cfg)
663
+ launcher = SandboxLauncher(backend=LocalDockerBackend(), image=image)
664
+ typer.echo(f"zu construct --sandboxed: {agent} in {image} "
665
+ f"(contained; egress→{', '.join(allowlist) or 'none'}; up to {max_rounds} rounds)")
666
+ try:
667
+ payload = asyncio.run(launch_contained_construction(
668
+ launcher, str(agent_dir), allowlist=allowlist,
669
+ max_rounds=max_rounds, min_resilience=min_resilience))
670
+ except Exception as exc: # noqa: BLE001 - container/model failure: report, don't traceback
671
+ typer.echo(f"construct: contained run failed: {type(exc).__name__}: {exc}", err=True)
672
+ raise typer.Exit(code=1) from None
673
+ if not payload.get("ok"):
674
+ typer.echo(f"construct: {payload.get('error', 'contained construction failed')}", err=True)
675
+ raise typer.Exit(code=1) from None
676
+ for rr in payload.get("rounds", []):
677
+ typer.echo(f" round {rr['round']}: {rr['note']}")
678
+ if payload.get("converged") and payload.get("track"):
679
+ track_path = agent_dir / "track.json"
680
+ track_path.write_text(payload["track"], encoding="utf-8")
681
+ typer.echo(f"construct: converged — hardened track written → {track_path} "
682
+ "(review before promoting; nothing auto-promoted).")
683
+ return
684
+ for v in payload.get("violations", []):
685
+ typer.echo(f" · [{v['rule']}] {v['detail']}")
686
+ typer.echo("construct: did not converge — handed back for review.", err=True)
687
+ raise typer.Exit(code=1) from None
688
+
689
+ # Autonomous mode: the live strategist (a model) decides edits. Build the agent's
690
+ # configured provider only when its API key is actually set; with no key there is no
691
+ # live model, so LiveStrategist stays a seam and the run stops cleanly at the live lane.
692
+ from .config import build_provider
693
+
694
+ key_env = getattr(cfg.provider, "api_key_env", None)
695
+ provider = build_provider(cfg.provider) if (key_env and os.environ.get(key_env)) else None
696
+ mode = f"live model {cfg.provider.model}" if provider is not None else "no live model"
697
+ typer.echo(f"zu construct: {agent} (autonomous — up to {max_rounds} rounds; {mode})")
698
+ try:
699
+ report = asyncio.run(run_construct(
700
+ spec, cfg, agent_dir, bundle, LiveStrategist(provider),
701
+ max_rounds=max_rounds, min_resilience=min_resilience))
702
+ except NotImplementedError as exc:
703
+ typer.echo(f"construct: {exc}", err=True)
704
+ raise typer.Exit(code=2) from None
705
+ except Exception as exc: # noqa: BLE001 - a live model/network failure: report, don't traceback
706
+ typer.echo(f"construct: live model failed: {type(exc).__name__}: {exc}", err=True)
707
+ raise typer.Exit(code=1) from None
708
+ if report.converged:
709
+ typer.echo("construct: converged — ready for review (build clean + guardrails passed).")
710
+ return
711
+ # Did not converge — report each round's outcome and hand back for review (never G4-promoted).
712
+ for rr in report.rounds:
713
+ typer.echo(f" round {rr.round}: {rr.note}")
714
+ typer.echo("construct: did not converge — handed back for review (nothing auto-promoted).",
715
+ err=True)
716
+ raise typer.Exit(code=1) from None
717
+
718
+
719
+ @app.command()
720
+ def serve(
721
+ config: str = typer.Option(
722
+ "agent.yaml", "--config", "-c", help="Agent/config file for the service (task block ignored; tasks arrive per request)."
723
+ ),
724
+ host: str = typer.Option("127.0.0.1", help="Bind host."),
725
+ port: int = typer.Option(8000, help="Bind port."),
726
+ ) -> None:
727
+ """Serve the runtime over HTTP (POST /run). Needs the 'serve' extra:
728
+ pip install 'zu-runtime[serve]'.
729
+
730
+ Binding to a non-localhost host (e.g. 0.0.0.0, as a container does) exposes
731
+ arbitrary, budget-spending agent runs, so it requires an auth token: set
732
+ ZU_SERVE_TOKEN and clients must send `Authorization: Bearer <token>`."""
733
+ import os
734
+
735
+ try:
736
+ load_config(config) # fail fast on a bad config before binding a port
737
+ except ConfigError as exc:
738
+ typer.echo(f"config error: {exc}", err=True)
739
+ raise typer.Exit(code=2) from None
740
+
741
+ # An exposed bind with no token would let anyone who can reach the port run
742
+ # the agent (spending your model budget) and read the cross-run event feed.
743
+ # Refuse rather than start an unauthenticated public service.
744
+ local_hosts = {"127.0.0.1", "localhost", "::1", "::ffff:127.0.0.1"}
745
+ if host not in local_hosts and not os.environ.get("ZU_SERVE_TOKEN"):
746
+ typer.echo(
747
+ f"refusing to bind {host!r} without authentication: set ZU_SERVE_TOKEN "
748
+ "(clients then send 'Authorization: Bearer <token>'), or bind 127.0.0.1 "
749
+ "for local-only access.",
750
+ err=True,
751
+ )
752
+ raise typer.Exit(code=2) from None
753
+ try:
754
+ import uvicorn
755
+
756
+ from .server import create_app
757
+ except ModuleNotFoundError:
758
+ typer.echo(
759
+ "the HTTP server needs FastAPI + uvicorn; install with: pip install 'zu-runtime[serve]'",
760
+ err=True,
761
+ )
762
+ raise typer.Exit(code=2) from None
763
+
764
+ typer.echo(
765
+ f"zu serve: http://{host}:{port} (dashboard at / · POST /run · "
766
+ f"live feed /events · review queue /review · config={config})"
767
+ )
768
+ uvicorn.run(create_app(config), host=host, port=port)
769
+
770
+
771
+ @app.command()
772
+ def demo(
773
+ type: str = typer.Option(
774
+ "web", "--type", "-t",
775
+ help="Which demo: web (default, tier-1 real fetch) | minimal (no tools) | escalation (tier-2).",
776
+ ),
777
+ model: str = typer.Option(
778
+ None, "--model", help="Model id for the real run (required unless --offline)."
779
+ ),
780
+ provider: str = typer.Option(
781
+ None, "--provider", help="Provider name (required for a real run; no default)."
782
+ ),
783
+ api_key: str = typer.Option(
784
+ None, "--api-key", help="API key for the real run (or set the provider's env var)."
785
+ ),
786
+ api_key_env: str = typer.Option(None, "--api-key-env", help="Env var holding the API key."),
787
+ base_url_env: str = typer.Option(
788
+ None, "--base-url-env", help="Env var holding the base URL (openai-compatible)."
789
+ ),
790
+ offline: bool = typer.Option(
791
+ False, "--offline", help="Self-test with a scripted model + fixtures (proves wiring, not a real run)."
792
+ ),
793
+ ) -> None:
794
+ """Run a demo against a real model — proving Zu actually runs, not just that
795
+ the logic is wired. Requires --model (and a key) by default.
796
+
797
+ --type web (default) does a real tier-1 fetch + extract (API key + network,
798
+ no Docker); minimal is a no-tools model call (API key only); escalation is
799
+ the tier-2 arc (needs Docker — real path not yet available; use --offline).
800
+
801
+ --offline replays a scripted, fixtured run for CI / a wiring self-test.
802
+ """
803
+ import asyncio as _asyncio
804
+
805
+ from . import demo as _demo
806
+
807
+ if type not in _demo.DEMOS:
808
+ typer.echo(
809
+ f"unknown demo type {type!r}; choose one of: {', '.join(_demo.DEMO_TYPES)}", err=True
810
+ )
811
+ raise typer.Exit(code=2) from None
812
+
813
+ # A real run is the point: require a provider AND a model unless self-testing
814
+ # the wiring. There is no default provider — an agent must say what it runs on.
815
+ if not offline and (not model or not provider):
816
+ typer.echo(
817
+ "zu demo runs against a real model to prove it works. Name the provider "
818
+ "and model (no default provider), and set its API key — e.g.:\n"
819
+ " export ANTHROPIC_API_KEY=...\n"
820
+ " zu demo --provider anthropic --model claude-opus-4-8\n"
821
+ "or, for an OpenAI-compatible endpoint (e.g. OpenRouter):\n"
822
+ " export OPENAI_API_KEY=... # and OPENAI_BASE_URL if not api.openai.com\n"
823
+ " zu demo --provider openai-compatible --model openai/gpt-4o-mini "
824
+ "--api-key-env OPENAI_API_KEY --base-url-env OPENAI_BASE_URL\n"
825
+ "Or self-test the wiring offline (no key): zu demo --offline",
826
+ err=True,
827
+ )
828
+ raise typer.Exit(code=2) from None
829
+
830
+ # Fail fast with the install hint if this demo needs the web tools.
831
+ if _demo.DEMOS[type]["needs_web"]:
832
+ try:
833
+ _demo.ensure_web_tools()
834
+ except RuntimeError as exc:
835
+ typer.echo(str(exc), err=True)
836
+ raise typer.Exit(code=2) from None
837
+
838
+ try:
839
+ prov, label = _demo.build_provider(
840
+ provider, model, api_key, api_key_env, base_url_env, kind=type, offline=offline
841
+ )
842
+ except ConfigError as exc:
843
+ typer.echo(f"config error: {exc}", err=True)
844
+ raise typer.Exit(code=2) from None
845
+ raise typer.Exit(code=_asyncio.run(_demo.run_demo(prov, label, kind=type, offline=offline)))
846
+
847
+
848
+ @app.command()
849
+ def init(
850
+ directory: str = typer.Argument(".", help="Where to write the starter files."),
851
+ template: str = typer.Option(
852
+ "web", "--template", "-t", help="Agent shape: web | minimal | research."
853
+ ),
854
+ force: bool = typer.Option(False, "--force", help="Overwrite existing files."),
855
+ ) -> None:
856
+ """Scaffold a new Zu agent — a single starter ``agent.yaml`` you can run at once.
857
+
858
+ Edit the provider block to choose your model, then `zu run`. Drop your own
859
+ tools in a ``tools/`` dir beside it and list them in ``tiers``.
860
+ """
861
+ from .scaffold import TEMPLATE_NAMES, write_template
862
+
863
+ if template not in TEMPLATE_NAMES:
864
+ typer.echo(f"unknown template {template!r}; choose: {', '.join(TEMPLATE_NAMES)}", err=True)
865
+ raise typer.Exit(code=2) from None
866
+ try:
867
+ paths = write_template(directory, template, force=force)
868
+ except FileExistsError as exc:
869
+ typer.echo(f"refusing to overwrite: {exc} (use --force)", err=True)
870
+ raise typer.Exit(code=1) from None
871
+
872
+ for p in paths:
873
+ typer.echo(f"created {p}")
874
+ typer.echo(
875
+ "\nnext:\n"
876
+ " 1. edit agent.yaml — set the provider/model and export its API key\n"
877
+ " 2. zu run # runs agent.yaml with a live trace\n"
878
+ " (add your own tools: drop a tools/ package beside it, list them in tiers)"
879
+ )
880
+
881
+
882
+ @app.command()
883
+ def deploy(
884
+ target: str = typer.Argument("local", help="local | dockerfile | compose | fly | render"),
885
+ config: str = typer.Option("agent.yaml", "--config", "-c", help="The agent/config file to deploy."),
886
+ name: str = typer.Option("zu-agent", "--name", help="Image / app / container name."),
887
+ port: int = typer.Option(8000, "--port", help="Service port."),
888
+ extras: str = typer.Option("all", "--extras", help="zu-runtime extras to install in the image."),
889
+ force: bool = typer.Option(False, "--force", help="Overwrite an existing Dockerfile."),
890
+ dry_run: bool = typer.Option(False, "--dry-run", help="With target=local, print the docker commands instead of running them."),
891
+ ) -> None:
892
+ """Deploy the agent as an HTTP service. `local` builds + runs a container;
893
+ `dockerfile`/`compose`/`fly`/`render` emit a manifest you apply yourself.
894
+
895
+ Secrets are never baked in — the provider's key env is passed through at run
896
+ time (local) or referenced in the manifest (cloud).
897
+ """
898
+ from . import deploy as _deploy
899
+
900
+ if target not in _deploy.TARGETS:
901
+ typer.echo(f"unknown target {target!r}; choose: {', '.join(_deploy.TARGETS)}", err=True)
902
+ raise typer.Exit(code=2) from None
903
+ try:
904
+ cfg = load_config(config) # fail fast on a bad/missing config before building
905
+ except ConfigError as exc:
906
+ typer.echo(f"config error: {exc}", err=True)
907
+ raise typer.Exit(code=2) from None
908
+
909
+ # Pin the image to the installed zu-runtime so a rebuild is reproducible, and
910
+ # pass through exactly the env vars THIS config's provider(s) name (plus the
911
+ # defaults), so a custom provider's key isn't silently dropped.
912
+ version = _installed_version("zu-runtime")
913
+ envs = _deploy.key_envs_for_config(cfg)
914
+
915
+ if target != "local":
916
+ paths = _deploy.generate(
917
+ target, ".", name=name, config=config, extras=extras, port=port, force=force,
918
+ version=version, envs=envs,
919
+ )
920
+ for p in paths:
921
+ typer.echo(f"wrote {p}")
922
+ typer.echo(f"\nnext: apply the {target} manifest with your platform's tooling "
923
+ "(set the provider's API key as a secret there).")
924
+ return
925
+
926
+ # target == local: generate a Dockerfile (if absent), build, run.
927
+ import shutil
928
+ import subprocess
929
+
930
+ df = _deploy.write_dockerfile(".", config, extras=extras, port=port, force=force, version=version)
931
+ typer.echo(f"Dockerfile: {df}")
932
+ build, run = _deploy.local_commands(name, config, port=port, envs=envs)
933
+ if dry_run:
934
+ typer.echo("$ " + " ".join(build))
935
+ typer.echo("$ " + " ".join(run))
936
+ return
937
+ if shutil.which("docker") is None:
938
+ typer.echo("docker not found — install Docker, or use a manifest target (compose/fly/render).", err=True)
939
+ raise typer.Exit(code=2) from None
940
+ typer.echo("building image…")
941
+ if subprocess.run(build).returncode != 0:
942
+ typer.echo("docker build failed", err=True)
943
+ raise typer.Exit(code=1) from None
944
+ subprocess.run(["docker", "rm", "-f", name], capture_output=True) # replace any prior
945
+ if subprocess.run(run).returncode != 0:
946
+ typer.echo("docker run failed", err=True)
947
+ raise typer.Exit(code=1) from None
948
+ typer.echo(
949
+ f"\n✅ {name} running → http://localhost:{port}\n"
950
+ f" POST /run · POST /run/stream (live)\n"
951
+ f" logs: docker logs -f {name}\n"
952
+ f" stop: docker rm -f {name}"
953
+ )
954
+
955
+
956
+ @app.command()
957
+ def pack(
958
+ bundle: str = typer.Argument(".", help="The bundle directory (agent.yaml + tools/)."),
959
+ tag: str = typer.Option(..., "--tag", "-t", help="Image tag to build, e.g. my-agent:1."),
960
+ base: str = typer.Option(
961
+ "zu:latest", "--base", help="Base image with the Zu runtime to build FROM."
962
+ ),
963
+ dry_run: bool = typer.Option(
964
+ False, "--dry-run", help="Print the Dockerfile + build command instead of building."
965
+ ),
966
+ ) -> None:
967
+ """Bake a bundle into a standalone image — agent.yaml + tools/ + its
968
+ requirements.txt, installed at build time.
969
+
970
+ Use this when a bundle's tools have extra pip dependencies (the `--sandboxed`
971
+ mount only sees the base image's packages). The packed image runs the agent on
972
+ `docker run`; point `--sandboxed` at it (ZU_SANDBOX_IMAGE) to run it contained.
973
+ """
974
+ from . import deploy as _deploy
975
+
976
+ try:
977
+ load_agent(bundle) # validate the bundle (agent.yaml present + resolves)
978
+ except ConfigError as exc:
979
+ typer.echo(f"config error: {exc}", err=True)
980
+ raise typer.Exit(code=2) from None
981
+
982
+ df = _deploy.pack_dockerfile_text(base)
983
+ build = _deploy.pack_build_command(tag, bundle)
984
+ if dry_run:
985
+ typer.echo(df)
986
+ typer.echo("$ " + " ".join(build))
987
+ return
988
+
989
+ import shutil
990
+ import subprocess
991
+
992
+ if shutil.which("docker") is None:
993
+ typer.echo("docker not found — install Docker to build the image.", err=True)
994
+ raise typer.Exit(code=2) from None
995
+ typer.echo(f"packing {bundle} → {tag} (base {base})…")
996
+ if subprocess.run(build, input=df.encode()).returncode != 0:
997
+ typer.echo("docker build failed", err=True)
998
+ raise typer.Exit(code=1) from None
999
+ typer.echo(
1000
+ f"\n✅ built {tag}\n"
1001
+ f" run: docker run --rm -e ANTHROPIC_API_KEY {tag}\n"
1002
+ f" contained: ZU_SANDBOX_IMAGE={tag} zu run --sandboxed {bundle}"
1003
+ )
1004
+
1005
+
1006
+ @app.command()
1007
+ def mcp() -> None:
1008
+ """Run the MCP server (stdio) so a coding agent — Claude Code, Cursor, … —
1009
+ can design, validate, run, and inspect Zu agents for you in natural language.
1010
+
1011
+ You don't run this by hand: register it once (see the docs) and your harness
1012
+ launches `zu mcp` as a child process per session. Needs the 'mcp' extra:
1013
+ pip install 'zu-runtime[mcp]'.
1014
+ """
1015
+ try:
1016
+ from .mcp_server import build_server
1017
+ except ModuleNotFoundError:
1018
+ typer.echo(
1019
+ "zu mcp needs the MCP SDK; install it with: pip install 'zu-runtime[mcp]'", err=True
1020
+ )
1021
+ raise typer.Exit(code=2) from None
1022
+ build_server().run(transport="stdio")
1023
+
1024
+
1025
+ def _resolve_package_plugins(package: str) -> tuple[list[tuple[str, str, object]], list[str]]:
1026
+ """The (kind, name, instance) Zu plugins a distribution declares via entry
1027
+ points. A plugin that needs constructor args (e.g. a sink wanting a path) is
1028
+ skipped with a note — the gate stands up what it can instantiate no-arg."""
1029
+ from importlib.metadata import PackageNotFoundError, distribution
1030
+
1031
+ groups = {
1032
+ "zu.providers": "providers", "zu.tools": "tools", "zu.detectors": "detectors",
1033
+ "zu.validators": "validators", "zu.backends": "backends", "zu.sinks": "sinks",
1034
+ }
1035
+ try:
1036
+ dist = distribution(package)
1037
+ except PackageNotFoundError:
1038
+ return [], [f"package {package!r} is not installed"]
1039
+ out: list[tuple[str, str, object]] = []
1040
+ notes: list[str] = []
1041
+ for ep in dist.entry_points:
1042
+ kind = groups.get(ep.group)
1043
+ if kind is None:
1044
+ continue
1045
+ try:
1046
+ obj = ep.load()
1047
+ inst = obj() if isinstance(obj, type) else obj
1048
+ except Exception as exc: # noqa: BLE001 - report, don't crash the gate
1049
+ notes.append(f"skipped {ep.group}:{ep.name} (needs config to instantiate: {exc})")
1050
+ continue
1051
+ out.append((kind, ep.name, inst))
1052
+ return out, notes
1053
+
1054
+
1055
+ def _find_package_dir(package: str) -> str | None:
1056
+ from pathlib import Path
1057
+
1058
+ p = Path("packages") / package
1059
+ return str(p) if (p / "tests").is_dir() else None
1060
+
1061
+
1062
+ @app.command(name="test-plugin")
1063
+ def test_plugin(
1064
+ package: str = typer.Argument(..., help="Distribution name to gate, e.g. zu-tools."),
1065
+ no_unit: bool = typer.Option(False, "--no-unit", help="Skip the plugin's own pytest gate."),
1066
+ json_out: bool = typer.Option(False, "--json", help="Emit the full report (gates + findings) as JSON."),
1067
+ watch: bool = typer.Option(False, "--watch", help="Stream each attack live as it runs (see it happening)."),
1068
+ ) -> None:
1069
+ """Run a plugin package through the test gate: unit · contract · interop ·
1070
+ adversarial — the frozen red-team corpus + directed probes, judged by
1071
+ out-of-band verdict observers (the attacker never certifies). The container
1072
+ gate is the production form, reported when Docker is present. See
1073
+ the red-team docs. Exits non-zero if the envelope did not hold.
1074
+ """
1075
+ try:
1076
+ from zu_redteam import run_gate
1077
+ except ModuleNotFoundError:
1078
+ typer.echo("zu test-plugin needs the gate: pip install zu-redteam", err=True)
1079
+ raise typer.Exit(code=2) from None
1080
+
1081
+ plugins_, notes = _resolve_package_plugins(package)
1082
+ for n in notes:
1083
+ typer.echo(f" note: {n}", err=True)
1084
+ if not plugins_:
1085
+ typer.echo(
1086
+ f"no Zu plugins found for {package!r} — is it installed and does it declare "
1087
+ "zu.* entry points?",
1088
+ err=True,
1089
+ )
1090
+ raise typer.Exit(code=2) from None
1091
+
1092
+ on_event = None
1093
+ if watch:
1094
+ from .trace import live_printer # full scope: local, your own terminal
1095
+
1096
+ on_event = live_printer()
1097
+ report = asyncio.run(
1098
+ run_gate(package, plugins=plugins_, pkg_dir=_find_package_dir(package),
1099
+ run_unit=not no_unit, on_event=on_event)
1100
+ )
1101
+ if json_out:
1102
+ import json
1103
+
1104
+ typer.echo(json.dumps(report.as_dict(), indent=2))
1105
+ else:
1106
+ typer.echo(report.render())
1107
+ raise typer.Exit(code=0 if report.passed else 1)
1108
+
1109
+
1110
+ @app.command()
1111
+ def plugins() -> None:
1112
+ """List every plugin Zu can discover (providers, tools, detectors, ...)."""
1113
+ # The shared process registry, so this lists the same plugins the loop sees
1114
+ # (entry points plus any decorator-registered in-process).
1115
+ reg = REGISTRY
1116
+ failures = reg.discover()
1117
+ for kind in GROUPS:
1118
+ names = reg.names(kind)
1119
+ listed = ", ".join(names) if names else "—"
1120
+ typer.echo(f"{kind:11} {listed}")
1121
+ for f in failures:
1122
+ typer.echo(f" ! failed to load {f.kind}:{f.name} — {f.error}", err=True)
1123
+
1124
+
1125
+ if __name__ == "__main__": # pragma: no cover
1126
+ app()