zu-cli 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_cli-0.2.0/.gitignore +66 -0
- zu_cli-0.2.0/PKG-INFO +73 -0
- zu_cli-0.2.0/README.md +45 -0
- zu_cli-0.2.0/pyproject.toml +56 -0
- zu_cli-0.2.0/src/zu_cli/__init__.py +0 -0
- zu_cli-0.2.0/src/zu_cli/build.py +111 -0
- zu_cli-0.2.0/src/zu_cli/config.py +798 -0
- zu_cli-0.2.0/src/zu_cli/construct.py +318 -0
- zu_cli-0.2.0/src/zu_cli/construct_sandbox.py +139 -0
- zu_cli-0.2.0/src/zu_cli/contribute.py +104 -0
- zu_cli-0.2.0/src/zu_cli/demo.py +373 -0
- zu_cli-0.2.0/src/zu_cli/deploy.py +207 -0
- zu_cli-0.2.0/src/zu_cli/explore.py +93 -0
- zu_cli-0.2.0/src/zu_cli/gap_triage.py +130 -0
- zu_cli-0.2.0/src/zu_cli/guardrails.py +102 -0
- zu_cli-0.2.0/src/zu_cli/harden.py +221 -0
- zu_cli-0.2.0/src/zu_cli/main.py +1243 -0
- zu_cli-0.2.0/src/zu_cli/mcp_server.py +444 -0
- zu_cli-0.2.0/src/zu_cli/observe.py +69 -0
- zu_cli-0.2.0/src/zu_cli/offline.py +335 -0
- zu_cli-0.2.0/src/zu_cli/sandbox.py +276 -0
- zu_cli-0.2.0/src/zu_cli/scaffold.py +116 -0
- zu_cli-0.2.0/src/zu_cli/server.py +363 -0
- zu_cli-0.2.0/src/zu_cli/trace.py +111 -0
- zu_cli-0.2.0/tests/agents/browser-widget/README.md +17 -0
- zu_cli-0.2.0/tests/agents/browser-widget/agent.yaml +43 -0
- zu_cli-0.2.0/tests/agents/browser-widget/fixtures/capture.json +21 -0
- zu_cli-0.2.0/tests/agents/custom-tool/README.md +15 -0
- zu_cli-0.2.0/tests/agents/custom-tool/agent.yaml +26 -0
- zu_cli-0.2.0/tests/agents/custom-tool/tools/__init__.py +0 -0
- zu_cli-0.2.0/tests/agents/custom-tool/tools/greet.py +24 -0
- zu_cli-0.2.0/tests/agents/research-pipeline/README.md +25 -0
- zu_cli-0.2.0/tests/agents/research-pipeline/pipeline.py +93 -0
- zu_cli-0.2.0/tests/fixtures/product.html +59 -0
- zu_cli-0.2.0/tests/test_build.py +112 -0
- zu_cli-0.2.0/tests/test_config.py +695 -0
- zu_cli-0.2.0/tests/test_construct.py +179 -0
- zu_cli-0.2.0/tests/test_construct_sandbox.py +156 -0
- zu_cli-0.2.0/tests/test_contribute.py +79 -0
- zu_cli-0.2.0/tests/test_deploy.py +120 -0
- zu_cli-0.2.0/tests/test_e2e_agent.py +107 -0
- zu_cli-0.2.0/tests/test_example_agents.py +141 -0
- zu_cli-0.2.0/tests/test_explore.py +168 -0
- zu_cli-0.2.0/tests/test_gap_offer.py +70 -0
- zu_cli-0.2.0/tests/test_gap_triage.py +112 -0
- zu_cli-0.2.0/tests/test_guardrails.py +79 -0
- zu_cli-0.2.0/tests/test_harden.py +99 -0
- zu_cli-0.2.0/tests/test_killer_demo.py +109 -0
- zu_cli-0.2.0/tests/test_mcp_server.py +157 -0
- zu_cli-0.2.0/tests/test_observe.py +45 -0
- zu_cli-0.2.0/tests/test_offline.py +139 -0
- zu_cli-0.2.0/tests/test_sandbox.py +159 -0
- zu_cli-0.2.0/tests/test_scaffold.py +49 -0
- zu_cli-0.2.0/tests/test_schedule.py +56 -0
- zu_cli-0.2.0/tests/test_server.py +159 -0
- zu_cli-0.2.0/tests/test_test_plugin.py +44 -0
- zu_cli-0.2.0/tests/test_trace.py +46 -0
- zu_cli-0.2.0/tests/test_track_cli.py +132 -0
zu_cli-0.2.0/.gitignore
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
|
|
9
|
+
# uv / venv
|
|
10
|
+
.venv/
|
|
11
|
+
uv.lock.bak
|
|
12
|
+
|
|
13
|
+
# Test / type caches
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.coverage
|
|
18
|
+
htmlcov/
|
|
19
|
+
|
|
20
|
+
# Zu runtime artifacts
|
|
21
|
+
*.db
|
|
22
|
+
zu.db
|
|
23
|
+
zu.yaml.local
|
|
24
|
+
zu_review.jsonl
|
|
25
|
+
*.review.jsonl
|
|
26
|
+
# Per-agent cost telemetry ledger — machine-local run history, not source.
|
|
27
|
+
cost.jsonl
|
|
28
|
+
# A recorded replay path is learned per-run and machine-local — regenerated on
|
|
29
|
+
# every successful run, not source. The agent ships; its track does not.
|
|
30
|
+
track.json
|
|
31
|
+
# …except the flagship example ships its track on purpose, as a demo of the
|
|
32
|
+
# record/replay convergence (committed; re-runs show as ordinary modifications).
|
|
33
|
+
!examples/agents/vet-appointment/track.json
|
|
34
|
+
|
|
35
|
+
# Editor / OS
|
|
36
|
+
.idea/
|
|
37
|
+
.vscode/
|
|
38
|
+
.DS_Store
|
|
39
|
+
|
|
40
|
+
# Claude Code local session state
|
|
41
|
+
.claude/
|
|
42
|
+
|
|
43
|
+
# Secrets
|
|
44
|
+
.env
|
|
45
|
+
.env.*
|
|
46
|
+
!.env.example
|
|
47
|
+
|
|
48
|
+
# Microsoft Office temp/lock files
|
|
49
|
+
~$*
|
|
50
|
+
|
|
51
|
+
# Internal design / strategy docs — kept local, never in the public repo
|
|
52
|
+
*.docx
|
|
53
|
+
*.pdf
|
|
54
|
+
# BUILD.md is the internal build-sequence / deferred-gaps ledger — kept local.
|
|
55
|
+
# (ARCHITECTURE.md is public: an onboarding agent needs the structural map.)
|
|
56
|
+
docs/BUILD.md
|
|
57
|
+
|
|
58
|
+
# Local secret — API key for live validation, never commit
|
|
59
|
+
zu_demo_key.md
|
|
60
|
+
*_key.md
|
|
61
|
+
|
|
62
|
+
# Local PyPI publish token — never commit
|
|
63
|
+
/pypi
|
|
64
|
+
|
|
65
|
+
# Local Discord credentials (bot token / app secrets) — never commit
|
|
66
|
+
/discord
|
zu_cli-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zu-cli
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: The `zu` command — Agent Production Runtime CLI
|
|
5
|
+
Project-URL: Homepage, https://github.com/k3-mt/zu
|
|
6
|
+
Project-URL: Repository, https://github.com/k3-mt/zu
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: typer
|
|
19
|
+
Requires-Dist: zu-core==0.2.0
|
|
20
|
+
Provides-Extra: mcp
|
|
21
|
+
Requires-Dist: mcp>=1.2; extra == 'mcp'
|
|
22
|
+
Provides-Extra: serve
|
|
23
|
+
Requires-Dist: fastapi>=0.110; extra == 'serve'
|
|
24
|
+
Requires-Dist: uvicorn>=0.27; extra == 'serve'
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: zu-redteam==0.2.0; extra == 'test'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# zu-cli
|
|
30
|
+
|
|
31
|
+
The `zu` command, the HTTP server, and the MCP server — the surfaces you *drive*
|
|
32
|
+
Zu through. This package wires the same runtime path the `import zu` facade uses
|
|
33
|
+
(config in → typed `Result` out), so the CLI, the server, and embedding are one
|
|
34
|
+
runtime, not three.
|
|
35
|
+
|
|
36
|
+
This package registers **no plugins**; it consumes them.
|
|
37
|
+
|
|
38
|
+
## Commands
|
|
39
|
+
|
|
40
|
+
| Command | What it does |
|
|
41
|
+
|---------|--------------|
|
|
42
|
+
| `zu run agent.yaml` | Run one task; streams a live trace. `--every 5m` for a scheduled worker, `--no-stream` for CI, `--sandboxed` to run contained. |
|
|
43
|
+
| `zu init --template web` | Scaffold a starter `agent.yaml` (`minimal` / `web` / `research`). |
|
|
44
|
+
| `zu demo` | Prove a real run end to end (`--offline` for a scripted self-test). |
|
|
45
|
+
| `zu serve -c agent.yaml` | HTTP service: `POST /run`, `POST /run/stream` (SSE), `GET /healthz`. Needs `[serve]`. |
|
|
46
|
+
| `zu deploy local\|compose\|fly\|render\|dockerfile` · `zu pack` | Turn a config/bundle into a running/deployable service or image. |
|
|
47
|
+
| `zu mcp` | An MCP stdio server so coding agents (Claude Code, Cursor, Codex) drive Zu — design/run, **construct**, explore, and report capability gaps. Needs `[mcp]`. |
|
|
48
|
+
| `zu plugins` · `zu test-plugin <pkg>` | List discovered plugins · run a plugin through the test gate (see `zu-redteam`). |
|
|
49
|
+
|
|
50
|
+
**The construction sequence** — `task + site → production agent`, frontier spend bounded to one live capture (see [`docs/agent-construction-sequence.md`](../../docs/agent-construction-sequence.md)):
|
|
51
|
+
|
|
52
|
+
| Command | What it does |
|
|
53
|
+
|---------|--------------|
|
|
54
|
+
| `zu capture <agent>` | Drive the target **once** (live) → `fixtures/capture.json`. The one live spend. |
|
|
55
|
+
| `zu run <agent> --offline` | Replay the captured bundle at **~$0** (no model/network) — the free construction inner loop. |
|
|
56
|
+
| `zu build <agent>` | The offline spine: build → record track → harden, gated on resilience. |
|
|
57
|
+
| `zu harden <agent>` | Score a captured path against perturbed fixtures (offline brittleness audit + resilience). |
|
|
58
|
+
| `zu construct <agent> [--check\|--sandboxed]` | The anti-hardcode readiness gate (G1–G3) / the autonomous, contained construction loop. |
|
|
59
|
+
|
|
60
|
+
## Modules
|
|
61
|
+
|
|
62
|
+
`main.py` (the Typer app), `config.py` (config/task loading + assembly + shared coercion
|
|
63
|
+
helpers), `server.py` (FastAPI), `mcp_server.py`, `demo.py`, `deploy.py`, `scaffold.py`,
|
|
64
|
+
`trace.py` (the live train-of-thought formatter). The construction surface:
|
|
65
|
+
`offline.py` (replay + `FixtureSessionBackend`), `build.py`, `harden.py`, `guardrails.py`,
|
|
66
|
+
`construct.py` (the meta-agent driver + `LiveStrategist`), `construct_sandbox.py` (contained
|
|
67
|
+
construction), `explore.py` (harness-driven pathfinding), `contribute.py` (capability-gap
|
|
68
|
+
issues).
|
|
69
|
+
|
|
70
|
+
## Tests
|
|
71
|
+
|
|
72
|
+
`uv run pytest packages/zu-cli` — offline. Fixture agents the suite drives live in
|
|
73
|
+
[`tests/agents/`](tests/agents/) (the sole shipped example is `examples/agents/vet-appointment/`).
|
zu_cli-0.2.0/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# zu-cli
|
|
2
|
+
|
|
3
|
+
The `zu` command, the HTTP server, and the MCP server — the surfaces you *drive*
|
|
4
|
+
Zu through. This package wires the same runtime path the `import zu` facade uses
|
|
5
|
+
(config in → typed `Result` out), so the CLI, the server, and embedding are one
|
|
6
|
+
runtime, not three.
|
|
7
|
+
|
|
8
|
+
This package registers **no plugins**; it consumes them.
|
|
9
|
+
|
|
10
|
+
## Commands
|
|
11
|
+
|
|
12
|
+
| Command | What it does |
|
|
13
|
+
|---------|--------------|
|
|
14
|
+
| `zu run agent.yaml` | Run one task; streams a live trace. `--every 5m` for a scheduled worker, `--no-stream` for CI, `--sandboxed` to run contained. |
|
|
15
|
+
| `zu init --template web` | Scaffold a starter `agent.yaml` (`minimal` / `web` / `research`). |
|
|
16
|
+
| `zu demo` | Prove a real run end to end (`--offline` for a scripted self-test). |
|
|
17
|
+
| `zu serve -c agent.yaml` | HTTP service: `POST /run`, `POST /run/stream` (SSE), `GET /healthz`. Needs `[serve]`. |
|
|
18
|
+
| `zu deploy local\|compose\|fly\|render\|dockerfile` · `zu pack` | Turn a config/bundle into a running/deployable service or image. |
|
|
19
|
+
| `zu mcp` | An MCP stdio server so coding agents (Claude Code, Cursor, Codex) drive Zu — design/run, **construct**, explore, and report capability gaps. Needs `[mcp]`. |
|
|
20
|
+
| `zu plugins` · `zu test-plugin <pkg>` | List discovered plugins · run a plugin through the test gate (see `zu-redteam`). |
|
|
21
|
+
|
|
22
|
+
**The construction sequence** — `task + site → production agent`, frontier spend bounded to one live capture (see [`docs/agent-construction-sequence.md`](../../docs/agent-construction-sequence.md)):
|
|
23
|
+
|
|
24
|
+
| Command | What it does |
|
|
25
|
+
|---------|--------------|
|
|
26
|
+
| `zu capture <agent>` | Drive the target **once** (live) → `fixtures/capture.json`. The one live spend. |
|
|
27
|
+
| `zu run <agent> --offline` | Replay the captured bundle at **~$0** (no model/network) — the free construction inner loop. |
|
|
28
|
+
| `zu build <agent>` | The offline spine: build → record track → harden, gated on resilience. |
|
|
29
|
+
| `zu harden <agent>` | Score a captured path against perturbed fixtures (offline brittleness audit + resilience). |
|
|
30
|
+
| `zu construct <agent> [--check\|--sandboxed]` | The anti-hardcode readiness gate (G1–G3) / the autonomous, contained construction loop. |
|
|
31
|
+
|
|
32
|
+
## Modules
|
|
33
|
+
|
|
34
|
+
`main.py` (the Typer app), `config.py` (config/task loading + assembly + shared coercion
|
|
35
|
+
helpers), `server.py` (FastAPI), `mcp_server.py`, `demo.py`, `deploy.py`, `scaffold.py`,
|
|
36
|
+
`trace.py` (the live train-of-thought formatter). The construction surface:
|
|
37
|
+
`offline.py` (replay + `FixtureSessionBackend`), `build.py`, `harden.py`, `guardrails.py`,
|
|
38
|
+
`construct.py` (the meta-agent driver + `LiveStrategist`), `construct_sandbox.py` (contained
|
|
39
|
+
construction), `explore.py` (harness-driven pathfinding), `contribute.py` (capability-gap
|
|
40
|
+
issues).
|
|
41
|
+
|
|
42
|
+
## Tests
|
|
43
|
+
|
|
44
|
+
`uv run pytest packages/zu-cli` — offline. Fixture agents the suite drives live in
|
|
45
|
+
[`tests/agents/`](tests/agents/) (the sole shipped example is `examples/agents/vet-appointment/`).
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "zu-cli"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "The `zu` command — Agent Production Runtime CLI"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "Apache-2.0"
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Development Status :: 4 - Beta",
|
|
10
|
+
"Intended Audience :: Developers",
|
|
11
|
+
"License :: OSI Approved :: Apache Software License",
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: Python :: 3.11",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
16
|
+
"Typing :: Typed",
|
|
17
|
+
]
|
|
18
|
+
# The lean engine: the `zu` command + config, and nothing but the core. Plugins
|
|
19
|
+
# are opt-in — installed as separate packages (zu-tools, zu-providers, …) or
|
|
20
|
+
# pulled by the `zu-runtime` bundle and its extras. The CLI discovers whatever
|
|
21
|
+
# is installed at runtime, so it never forces a plugin (or its deps) on a user.
|
|
22
|
+
dependencies = [
|
|
23
|
+
"zu-core==0.2.0",
|
|
24
|
+
"typer",
|
|
25
|
+
"pyyaml",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
# `zu serve` — the HTTP wrapper. Kept optional so the CLI/library never force a
|
|
30
|
+
# web framework on a user who only embeds or runs one-shot tasks.
|
|
31
|
+
serve = ["fastapi>=0.110", "uvicorn>=0.27"]
|
|
32
|
+
# `zu mcp` — the MCP server, so coding agents (Claude Code, Cursor, …) drive Zu.
|
|
33
|
+
mcp = ["mcp>=1.2"]
|
|
34
|
+
# `zu test-plugin` — the plugin-test gate + adversarial red team. Optional: a
|
|
35
|
+
# contributor/CI tool, not needed to run an agent.
|
|
36
|
+
test = ["zu-redteam==0.2.0"]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Homepage = "https://github.com/k3-mt/zu"
|
|
40
|
+
Repository = "https://github.com/k3-mt/zu"
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
zu = "zu_cli.main:app"
|
|
44
|
+
# In-container entrypoint for the whole-agent-in-container form: the sandbox
|
|
45
|
+
# launcher execs this inside the hardened container (see zu_cli.sandbox).
|
|
46
|
+
zu-run-contained = "zu_cli.sandbox:run_contained_from_env"
|
|
47
|
+
# In-container entrypoint for autonomous construction: the launcher execs this in
|
|
48
|
+
# the same hardened box to run the construct() loop contained (see construct_sandbox).
|
|
49
|
+
zu-construct-contained = "zu_cli.construct_sandbox:construct_contained_from_env"
|
|
50
|
+
|
|
51
|
+
[build-system]
|
|
52
|
+
requires = ["hatchling"]
|
|
53
|
+
build-backend = "hatchling.build"
|
|
54
|
+
|
|
55
|
+
[tool.hatch.build.targets.wheel]
|
|
56
|
+
packages = ["src/zu_cli"]
|
|
File without changes
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""The construction spine — chain the OFFLINE stages of the sequence into one run.
|
|
2
|
+
|
|
3
|
+
``zu build`` composes what the earlier increments shipped: replay the captured bundle
|
|
4
|
+
offline (stage 3), project the resilient track from that clean run (stage 4), and score
|
|
5
|
+
it against perturbed fixtures (stage 5) — gating the track on the resilience score. The
|
|
6
|
+
output is a production-ready, hardened ``track.json`` next to the agent, produced at $0:
|
|
7
|
+
no model, no network.
|
|
8
|
+
|
|
9
|
+
The two LIVE stages and promotion are deliberately NOT in this spine — they need keys,
|
|
10
|
+
network, or a registry push, and are left behind explicit seams so the cheap, testable
|
|
11
|
+
core stands on its own:
|
|
12
|
+
|
|
13
|
+
* **Stage 2 (capture)** is the one live step; ``zu build`` requires its output
|
|
14
|
+
(``fixtures/capture.json``) and points at ``zu capture`` when it is missing.
|
|
15
|
+
* **Stage 6 (canary)** — one live validation run before promotion — is the live lane,
|
|
16
|
+
guarded by ``_canary`` raising ``NotImplementedError`` so ``--with-canary`` fails
|
|
17
|
+
loudly rather than pretending. It is the next increment.
|
|
18
|
+
* **Stage 7 (promote)** — ``zu pack`` / ``zu deploy`` — is left to its existing commands;
|
|
19
|
+
``zu build`` prints them as the next step.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from .harden import HardenReport, harden
|
|
29
|
+
from .offline import Bundle, replay_offline
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class StageResult:
|
|
34
|
+
"""One stage of the spine: its outcome and a one-line detail for the summary."""
|
|
35
|
+
|
|
36
|
+
name: str
|
|
37
|
+
status: str # "ok" | "failed" | "skipped"
|
|
38
|
+
detail: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class BuildReport:
|
|
43
|
+
stages: list[StageResult] = field(default_factory=list)
|
|
44
|
+
track_path: str | None = None
|
|
45
|
+
harden: HardenReport | None = None
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def ok(self) -> bool:
|
|
49
|
+
return all(s.status != "failed" for s in self.stages)
|
|
50
|
+
|
|
51
|
+
def _add(self, name: str, status: str, detail: str) -> StageResult:
|
|
52
|
+
s = StageResult(name=name, status=status, detail=detail)
|
|
53
|
+
self.stages.append(s)
|
|
54
|
+
return s
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _canary(spec: Any, cfg: Any) -> None:
|
|
58
|
+
"""Stage 6 — the live canary. The seam for the live lane: one real run guarding
|
|
59
|
+
fixture drift before promotion. Not built here (needs keys + network)."""
|
|
60
|
+
raise NotImplementedError(
|
|
61
|
+
"the live canary (stage 6) is the live lane — it needs keys + network and is the "
|
|
62
|
+
"next increment. Validate manually for now with `zu run <agent>` (live), then "
|
|
63
|
+
"promote with `zu pack` / `zu deploy`."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def build_offline(
|
|
68
|
+
spec: Any, cfg: Any, agent_dir: str | Path, bundle: Bundle, *, min_score: float = 1.0,
|
|
69
|
+
) -> BuildReport:
|
|
70
|
+
"""Run the offline spine — build → record track → harden — and write the hardened
|
|
71
|
+
track. Each stage gates the next: a failed offline build is not tracked, and a track
|
|
72
|
+
that fails the resilience gate is recorded but flagged failed so promotion is held."""
|
|
73
|
+
from zu_core.contracts import Status
|
|
74
|
+
from zu_core.track import record_track
|
|
75
|
+
|
|
76
|
+
report = BuildReport()
|
|
77
|
+
|
|
78
|
+
# Stage 3 — build offline (the keystone). A clean replay is the precondition.
|
|
79
|
+
result, events = await replay_offline(spec, cfg, bundle)
|
|
80
|
+
if result.status is not Status.SUCCESS:
|
|
81
|
+
report._add("build", "failed",
|
|
82
|
+
f"offline run did not succeed ({result.status.value}: {result.reason})")
|
|
83
|
+
return report
|
|
84
|
+
report._add("build", "ok", f"offline run succeeded → {result.value}")
|
|
85
|
+
|
|
86
|
+
# Stage 4 — record the track from the clean offline run.
|
|
87
|
+
track = record_track(events, task=spec.query, model=bundle.model)
|
|
88
|
+
track_path = str(Path(agent_dir) / "track.json")
|
|
89
|
+
track.save(track_path)
|
|
90
|
+
report.track_path = track_path
|
|
91
|
+
climbs = sorted({s.tier for s in track.steps})
|
|
92
|
+
tiers = (f"tiers {min(climbs)}→{max(climbs)}" if len(climbs) > 1
|
|
93
|
+
else f"tier {climbs[0]}" if climbs else "no tools")
|
|
94
|
+
report._add("track", "ok", f"recorded {len(track.steps)} steps ({tiers}) → {track_path}")
|
|
95
|
+
|
|
96
|
+
# Stage 5 — harden: score the track against perturbed fixtures and gate on it.
|
|
97
|
+
hr = await harden(spec, cfg, bundle)
|
|
98
|
+
report.harden = hr
|
|
99
|
+
score = hr.resilience
|
|
100
|
+
if not hr.grounding_load_bearing:
|
|
101
|
+
report._add("harden", "failed",
|
|
102
|
+
"a value-deletion control passed — grounding is not gating; the "
|
|
103
|
+
"resilience score is unreliable")
|
|
104
|
+
elif score < min_score:
|
|
105
|
+
report._add("harden", "failed",
|
|
106
|
+
f"resilience {score:.0%} below --min-score {min_score:.0%} "
|
|
107
|
+
f"({len(hr.findings)} brittle step(s) to fix)")
|
|
108
|
+
else:
|
|
109
|
+
report._add("harden", "ok",
|
|
110
|
+
f"resilience {score:.0%}; {len(hr.findings)} brittle step(s) noted")
|
|
111
|
+
return report
|