zu-cli 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. zu_cli-0.2.0/.gitignore +66 -0
  2. zu_cli-0.2.0/PKG-INFO +73 -0
  3. zu_cli-0.2.0/README.md +45 -0
  4. zu_cli-0.2.0/pyproject.toml +56 -0
  5. zu_cli-0.2.0/src/zu_cli/__init__.py +0 -0
  6. zu_cli-0.2.0/src/zu_cli/build.py +111 -0
  7. zu_cli-0.2.0/src/zu_cli/config.py +798 -0
  8. zu_cli-0.2.0/src/zu_cli/construct.py +318 -0
  9. zu_cli-0.2.0/src/zu_cli/construct_sandbox.py +139 -0
  10. zu_cli-0.2.0/src/zu_cli/contribute.py +104 -0
  11. zu_cli-0.2.0/src/zu_cli/demo.py +373 -0
  12. zu_cli-0.2.0/src/zu_cli/deploy.py +207 -0
  13. zu_cli-0.2.0/src/zu_cli/explore.py +93 -0
  14. zu_cli-0.2.0/src/zu_cli/gap_triage.py +130 -0
  15. zu_cli-0.2.0/src/zu_cli/guardrails.py +102 -0
  16. zu_cli-0.2.0/src/zu_cli/harden.py +221 -0
  17. zu_cli-0.2.0/src/zu_cli/main.py +1243 -0
  18. zu_cli-0.2.0/src/zu_cli/mcp_server.py +444 -0
  19. zu_cli-0.2.0/src/zu_cli/observe.py +69 -0
  20. zu_cli-0.2.0/src/zu_cli/offline.py +335 -0
  21. zu_cli-0.2.0/src/zu_cli/sandbox.py +276 -0
  22. zu_cli-0.2.0/src/zu_cli/scaffold.py +116 -0
  23. zu_cli-0.2.0/src/zu_cli/server.py +363 -0
  24. zu_cli-0.2.0/src/zu_cli/trace.py +111 -0
  25. zu_cli-0.2.0/tests/agents/browser-widget/README.md +17 -0
  26. zu_cli-0.2.0/tests/agents/browser-widget/agent.yaml +43 -0
  27. zu_cli-0.2.0/tests/agents/browser-widget/fixtures/capture.json +21 -0
  28. zu_cli-0.2.0/tests/agents/custom-tool/README.md +15 -0
  29. zu_cli-0.2.0/tests/agents/custom-tool/agent.yaml +26 -0
  30. zu_cli-0.2.0/tests/agents/custom-tool/tools/__init__.py +0 -0
  31. zu_cli-0.2.0/tests/agents/custom-tool/tools/greet.py +24 -0
  32. zu_cli-0.2.0/tests/agents/research-pipeline/README.md +25 -0
  33. zu_cli-0.2.0/tests/agents/research-pipeline/pipeline.py +93 -0
  34. zu_cli-0.2.0/tests/fixtures/product.html +59 -0
  35. zu_cli-0.2.0/tests/test_build.py +112 -0
  36. zu_cli-0.2.0/tests/test_config.py +695 -0
  37. zu_cli-0.2.0/tests/test_construct.py +179 -0
  38. zu_cli-0.2.0/tests/test_construct_sandbox.py +156 -0
  39. zu_cli-0.2.0/tests/test_contribute.py +79 -0
  40. zu_cli-0.2.0/tests/test_deploy.py +120 -0
  41. zu_cli-0.2.0/tests/test_e2e_agent.py +107 -0
  42. zu_cli-0.2.0/tests/test_example_agents.py +141 -0
  43. zu_cli-0.2.0/tests/test_explore.py +168 -0
  44. zu_cli-0.2.0/tests/test_gap_offer.py +70 -0
  45. zu_cli-0.2.0/tests/test_gap_triage.py +112 -0
  46. zu_cli-0.2.0/tests/test_guardrails.py +79 -0
  47. zu_cli-0.2.0/tests/test_harden.py +99 -0
  48. zu_cli-0.2.0/tests/test_killer_demo.py +109 -0
  49. zu_cli-0.2.0/tests/test_mcp_server.py +157 -0
  50. zu_cli-0.2.0/tests/test_observe.py +45 -0
  51. zu_cli-0.2.0/tests/test_offline.py +139 -0
  52. zu_cli-0.2.0/tests/test_sandbox.py +159 -0
  53. zu_cli-0.2.0/tests/test_scaffold.py +49 -0
  54. zu_cli-0.2.0/tests/test_schedule.py +56 -0
  55. zu_cli-0.2.0/tests/test_server.py +159 -0
  56. zu_cli-0.2.0/tests/test_test_plugin.py +44 -0
  57. zu_cli-0.2.0/tests/test_trace.py +46 -0
  58. zu_cli-0.2.0/tests/test_track_cli.py +132 -0
@@ -0,0 +1,66 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+
9
+ # uv / venv
10
+ .venv/
11
+ uv.lock.bak
12
+
13
+ # Test / type caches
14
+ .pytest_cache/
15
+ .mypy_cache/
16
+ .ruff_cache/
17
+ .coverage
18
+ htmlcov/
19
+
20
+ # Zu runtime artifacts
21
+ *.db
22
+ zu.db
23
+ zu.yaml.local
24
+ zu_review.jsonl
25
+ *.review.jsonl
26
+ # Per-agent cost telemetry ledger — machine-local run history, not source.
27
+ cost.jsonl
28
+ # A recorded replay path is learned per-run and machine-local — regenerated on
29
+ # every successful run, not source. The agent ships; its track does not.
30
+ track.json
31
+ # …except the flagship example ships its track on purpose, as a demo of the
32
+ # record/replay convergence (committed; re-runs show as ordinary modifications).
33
+ !examples/agents/vet-appointment/track.json
34
+
35
+ # Editor / OS
36
+ .idea/
37
+ .vscode/
38
+ .DS_Store
39
+
40
+ # Claude Code local session state
41
+ .claude/
42
+
43
+ # Secrets
44
+ .env
45
+ .env.*
46
+ !.env.example
47
+
48
+ # Microsoft Office temp/lock files
49
+ ~$*
50
+
51
+ # Internal design / strategy docs — kept local, never in the public repo
52
+ *.docx
53
+ *.pdf
54
+ # BUILD.md is the internal build-sequence / deferred-gaps ledger — kept local.
55
+ # (ARCHITECTURE.md is public: an onboarding agent needs the structural map.)
56
+ docs/BUILD.md
57
+
58
+ # Local secret — API key for live validation, never commit
59
+ zu_demo_key.md
60
+ *_key.md
61
+
62
+ # Local PyPI publish token — never commit
63
+ /pypi
64
+
65
+ # Local Discord credentials (bot token / app secrets) — never commit
66
+ /discord
zu_cli-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: zu-cli
3
+ Version: 0.2.0
4
+ Summary: The `zu` command — Agent Production Runtime CLI
5
+ Project-URL: Homepage, https://github.com/k3-mt/zu
6
+ Project-URL: Repository, https://github.com/k3-mt/zu
7
+ License-Expression: Apache-2.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: pyyaml
18
+ Requires-Dist: typer
19
+ Requires-Dist: zu-core==0.2.0
20
+ Provides-Extra: mcp
21
+ Requires-Dist: mcp>=1.2; extra == 'mcp'
22
+ Provides-Extra: serve
23
+ Requires-Dist: fastapi>=0.110; extra == 'serve'
24
+ Requires-Dist: uvicorn>=0.27; extra == 'serve'
25
+ Provides-Extra: test
26
+ Requires-Dist: zu-redteam==0.2.0; extra == 'test'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # zu-cli
30
+
31
+ The `zu` command, the HTTP server, and the MCP server — the surfaces you *drive*
32
+ Zu through. This package wires the same runtime path the `import zu` facade uses
33
+ (config in → typed `Result` out), so the CLI, the server, and embedding are one
34
+ runtime, not three.
35
+
36
+ This package registers **no plugins**; it consumes them.
37
+
38
+ ## Commands
39
+
40
+ | Command | What it does |
41
+ |---------|--------------|
42
+ | `zu run agent.yaml` | Run one task; streams a live trace. `--every 5m` for a scheduled worker, `--no-stream` for CI, `--sandboxed` to run contained. |
43
+ | `zu init --template web` | Scaffold a starter `agent.yaml` (`minimal` / `web` / `research`). |
44
+ | `zu demo` | Prove a real run end to end (`--offline` for a scripted self-test). |
45
+ | `zu serve -c agent.yaml` | HTTP service: `POST /run`, `POST /run/stream` (SSE), `GET /healthz`. Needs `[serve]`. |
46
+ | `zu deploy local\|compose\|fly\|render\|dockerfile` · `zu pack` | Turn a config/bundle into a running/deployable service or image. |
47
+ | `zu mcp` | An MCP stdio server so coding agents (Claude Code, Cursor, Codex) drive Zu — design/run, **construct**, explore, and report capability gaps. Needs `[mcp]`. |
48
+ | `zu plugins` · `zu test-plugin <pkg>` | List discovered plugins · run a plugin through the test gate (see `zu-redteam`). |
49
+
50
+ **The construction sequence** — `task + site → production agent`, frontier spend bounded to one live capture (see [`docs/agent-construction-sequence.md`](../../docs/agent-construction-sequence.md)):
51
+
52
+ | Command | What it does |
53
+ |---------|--------------|
54
+ | `zu capture <agent>` | Drive the target **once** (live) → `fixtures/capture.json`. The one live spend. |
55
+ | `zu run <agent> --offline` | Replay the captured bundle at **~$0** (no model/network) — the free construction inner loop. |
56
+ | `zu build <agent>` | The offline spine: build → record track → harden, gated on resilience. |
57
+ | `zu harden <agent>` | Score a captured path against perturbed fixtures (offline brittleness audit + resilience). |
58
+ | `zu construct <agent> [--check\|--sandboxed]` | The anti-hardcode readiness gate (G1–G3) / the autonomous, contained construction loop. |
59
+
60
+ ## Modules
61
+
62
+ `main.py` (the Typer app), `config.py` (config/task loading + assembly + shared coercion
63
+ helpers), `server.py` (FastAPI), `mcp_server.py`, `demo.py`, `deploy.py`, `scaffold.py`,
64
+ `trace.py` (the live train-of-thought formatter). The construction surface:
65
+ `offline.py` (replay + `FixtureSessionBackend`), `build.py`, `harden.py`, `guardrails.py`,
66
+ `construct.py` (the meta-agent driver + `LiveStrategist`), `construct_sandbox.py` (contained
67
+ construction), `explore.py` (harness-driven pathfinding), `contribute.py` (capability-gap
68
+ issues).
69
+
70
+ ## Tests
71
+
72
+ `uv run pytest packages/zu-cli` — offline. Fixture agents the suite drives live in
73
+ [`tests/agents/`](tests/agents/) (the sole shipped example is `examples/agents/vet-appointment/`).
zu_cli-0.2.0/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # zu-cli
2
+
3
+ The `zu` command, the HTTP server, and the MCP server — the surfaces you *drive*
4
+ Zu through. This package wires the same runtime path the `import zu` facade uses
5
+ (config in → typed `Result` out), so the CLI, the server, and embedding are one
6
+ runtime, not three.
7
+
8
+ This package registers **no plugins**; it consumes them.
9
+
10
+ ## Commands
11
+
12
+ | Command | What it does |
13
+ |---------|--------------|
14
+ | `zu run agent.yaml` | Run one task; streams a live trace. `--every 5m` for a scheduled worker, `--no-stream` for CI, `--sandboxed` to run contained. |
15
+ | `zu init --template web` | Scaffold a starter `agent.yaml` (`minimal` / `web` / `research`). |
16
+ | `zu demo` | Prove a real run end to end (`--offline` for a scripted self-test). |
17
+ | `zu serve -c agent.yaml` | HTTP service: `POST /run`, `POST /run/stream` (SSE), `GET /healthz`. Needs `[serve]`. |
18
+ | `zu deploy local\|compose\|fly\|render\|dockerfile` · `zu pack` | Turn a config/bundle into a running/deployable service or image. |
19
+ | `zu mcp` | An MCP stdio server so coding agents (Claude Code, Cursor, Codex) drive Zu — design/run, **construct**, explore, and report capability gaps. Needs `[mcp]`. |
20
+ | `zu plugins` · `zu test-plugin <pkg>` | List discovered plugins · run a plugin through the test gate (see `zu-redteam`). |
21
+
22
+ **The construction sequence** — `task + site → production agent`, frontier spend bounded to one live capture (see [`docs/agent-construction-sequence.md`](../../docs/agent-construction-sequence.md)):
23
+
24
+ | Command | What it does |
25
+ |---------|--------------|
26
+ | `zu capture <agent>` | Drive the target **once** (live) → `fixtures/capture.json`. The one live spend. |
27
+ | `zu run <agent> --offline` | Replay the captured bundle at **~$0** (no model/network) — the free construction inner loop. |
28
+ | `zu build <agent>` | The offline spine: build → record track → harden, gated on resilience. |
29
+ | `zu harden <agent>` | Score a captured path against perturbed fixtures (offline brittleness audit + resilience). |
30
+ | `zu construct <agent> [--check\|--sandboxed]` | The anti-hardcode readiness gate (G1–G3) / the autonomous, contained construction loop. |
31
+
32
+ ## Modules
33
+
34
+ `main.py` (the Typer app), `config.py` (config/task loading + assembly + shared coercion
35
+ helpers), `server.py` (FastAPI), `mcp_server.py`, `demo.py`, `deploy.py`, `scaffold.py`,
36
+ `trace.py` (the live train-of-thought formatter). The construction surface:
37
+ `offline.py` (replay + `FixtureSessionBackend`), `build.py`, `harden.py`, `guardrails.py`,
38
+ `construct.py` (the meta-agent driver + `LiveStrategist`), `construct_sandbox.py` (contained
39
+ construction), `explore.py` (harness-driven pathfinding), `contribute.py` (capability-gap
40
+ issues).
41
+
42
+ ## Tests
43
+
44
+ `uv run pytest packages/zu-cli` — offline. Fixture agents the suite drives live in
45
+ [`tests/agents/`](tests/agents/) (the sole shipped example is `examples/agents/vet-appointment/`).
@@ -0,0 +1,56 @@
1
+ [project]
2
+ name = "zu-cli"
3
+ version = "0.2.0"
4
+ description = "The `zu` command — Agent Production Runtime CLI"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "Apache-2.0"
8
+ classifiers = [
9
+ "Development Status :: 4 - Beta",
10
+ "Intended Audience :: Developers",
11
+ "License :: OSI Approved :: Apache Software License",
12
+ "Programming Language :: Python :: 3",
13
+ "Programming Language :: Python :: 3.11",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Topic :: Software Development :: Libraries :: Application Frameworks",
16
+ "Typing :: Typed",
17
+ ]
18
+ # The lean engine: the `zu` command + config, and nothing but the core. Plugins
19
+ # are opt-in — installed as separate packages (zu-tools, zu-providers, …) or
20
+ # pulled by the `zu-runtime` bundle and its extras. The CLI discovers whatever
21
+ # is installed at runtime, so it never forces a plugin (or its deps) on a user.
22
+ dependencies = [
23
+ "zu-core==0.2.0",
24
+ "typer",
25
+ "pyyaml",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ # `zu serve` — the HTTP wrapper. Kept optional so the CLI/library never force a
30
+ # web framework on a user who only embeds or runs one-shot tasks.
31
+ serve = ["fastapi>=0.110", "uvicorn>=0.27"]
32
+ # `zu mcp` — the MCP server, so coding agents (Claude Code, Cursor, …) drive Zu.
33
+ mcp = ["mcp>=1.2"]
34
+ # `zu test-plugin` — the plugin-test gate + adversarial red team. Optional: a
35
+ # contributor/CI tool, not needed to run an agent.
36
+ test = ["zu-redteam==0.2.0"]
37
+
38
+ [project.urls]
39
+ Homepage = "https://github.com/k3-mt/zu"
40
+ Repository = "https://github.com/k3-mt/zu"
41
+
42
+ [project.scripts]
43
+ zu = "zu_cli.main:app"
44
+ # In-container entrypoint for the whole-agent-in-container form: the sandbox
45
+ # launcher execs this inside the hardened container (see zu_cli.sandbox).
46
+ zu-run-contained = "zu_cli.sandbox:run_contained_from_env"
47
+ # In-container entrypoint for autonomous construction: the launcher execs this in
48
+ # the same hardened box to run the construct() loop contained (see construct_sandbox).
49
+ zu-construct-contained = "zu_cli.construct_sandbox:construct_contained_from_env"
50
+
51
+ [build-system]
52
+ requires = ["hatchling"]
53
+ build-backend = "hatchling.build"
54
+
55
+ [tool.hatch.build.targets.wheel]
56
+ packages = ["src/zu_cli"]
File without changes
@@ -0,0 +1,111 @@
1
+ """The construction spine — chain the OFFLINE stages of the sequence into one run.
2
+
3
+ ``zu build`` composes what the earlier increments shipped: replay the captured bundle
4
+ offline (stage 3), project the resilient track from that clean run (stage 4), and score
5
+ it against perturbed fixtures (stage 5) — gating the track on the resilience score. The
6
+ output is a production-ready, hardened ``track.json`` next to the agent, produced at $0:
7
+ no model, no network.
8
+
9
+ The two LIVE stages and promotion are deliberately NOT in this spine — they need keys,
10
+ network, or a registry push, and are left behind explicit seams so the cheap, testable
11
+ core stands on its own:
12
+
13
+ * **Stage 2 (capture)** is the one live step; ``zu build`` requires its output
14
+ (``fixtures/capture.json``) and points at ``zu capture`` when it is missing.
15
+ * **Stage 6 (canary)** — one live validation run before promotion — is the live lane,
16
+ guarded by ``_canary`` raising ``NotImplementedError`` so ``--with-canary`` fails
17
+ loudly rather than pretending. It is the next increment.
18
+ * **Stage 7 (promote)** — ``zu pack`` / ``zu deploy`` — is left to its existing commands;
19
+ ``zu build`` prints them as the next step.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from dataclasses import dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any
27
+
28
+ from .harden import HardenReport, harden
29
+ from .offline import Bundle, replay_offline
30
+
31
+
32
+ @dataclass
33
+ class StageResult:
34
+ """One stage of the spine: its outcome and a one-line detail for the summary."""
35
+
36
+ name: str
37
+ status: str # "ok" | "failed" | "skipped"
38
+ detail: str
39
+
40
+
41
+ @dataclass
42
+ class BuildReport:
43
+ stages: list[StageResult] = field(default_factory=list)
44
+ track_path: str | None = None
45
+ harden: HardenReport | None = None
46
+
47
+ @property
48
+ def ok(self) -> bool:
49
+ return all(s.status != "failed" for s in self.stages)
50
+
51
+ def _add(self, name: str, status: str, detail: str) -> StageResult:
52
+ s = StageResult(name=name, status=status, detail=detail)
53
+ self.stages.append(s)
54
+ return s
55
+
56
+
57
+ def _canary(spec: Any, cfg: Any) -> None:
58
+ """Stage 6 — the live canary. The seam for the live lane: one real run guarding
59
+ fixture drift before promotion. Not built here (needs keys + network)."""
60
+ raise NotImplementedError(
61
+ "the live canary (stage 6) is the live lane — it needs keys + network and is the "
62
+ "next increment. Validate manually for now with `zu run <agent>` (live), then "
63
+ "promote with `zu pack` / `zu deploy`."
64
+ )
65
+
66
+
67
+ async def build_offline(
68
+ spec: Any, cfg: Any, agent_dir: str | Path, bundle: Bundle, *, min_score: float = 1.0,
69
+ ) -> BuildReport:
70
+ """Run the offline spine — build → record track → harden — and write the hardened
71
+ track. Each stage gates the next: a failed offline build is not tracked, and a track
72
+ that fails the resilience gate is recorded but flagged failed so promotion is held."""
73
+ from zu_core.contracts import Status
74
+ from zu_core.track import record_track
75
+
76
+ report = BuildReport()
77
+
78
+ # Stage 3 — build offline (the keystone). A clean replay is the precondition.
79
+ result, events = await replay_offline(spec, cfg, bundle)
80
+ if result.status is not Status.SUCCESS:
81
+ report._add("build", "failed",
82
+ f"offline run did not succeed ({result.status.value}: {result.reason})")
83
+ return report
84
+ report._add("build", "ok", f"offline run succeeded → {result.value}")
85
+
86
+ # Stage 4 — record the track from the clean offline run.
87
+ track = record_track(events, task=spec.query, model=bundle.model)
88
+ track_path = str(Path(agent_dir) / "track.json")
89
+ track.save(track_path)
90
+ report.track_path = track_path
91
+ climbs = sorted({s.tier for s in track.steps})
92
+ tiers = (f"tiers {min(climbs)}→{max(climbs)}" if len(climbs) > 1
93
+ else f"tier {climbs[0]}" if climbs else "no tools")
94
+ report._add("track", "ok", f"recorded {len(track.steps)} steps ({tiers}) → {track_path}")
95
+
96
+ # Stage 5 — harden: score the track against perturbed fixtures and gate on it.
97
+ hr = await harden(spec, cfg, bundle)
98
+ report.harden = hr
99
+ score = hr.resilience
100
+ if not hr.grounding_load_bearing:
101
+ report._add("harden", "failed",
102
+ "a value-deletion control passed — grounding is not gating; the "
103
+ "resilience score is unreliable")
104
+ elif score < min_score:
105
+ report._add("harden", "failed",
106
+ f"resilience {score:.0%} below --min-score {min_score:.0%} "
107
+ f"({len(hr.findings)} brittle step(s) to fix)")
108
+ else:
109
+ report._add("harden", "ok",
110
+ f"resilience {score:.0%}; {len(hr.findings)} brittle step(s) noted")
111
+ return report