zu-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_cli/__init__.py +0 -0
- zu_cli/build.py +111 -0
- zu_cli/config.py +738 -0
- zu_cli/construct.py +318 -0
- zu_cli/construct_sandbox.py +139 -0
- zu_cli/contribute.py +104 -0
- zu_cli/demo.py +373 -0
- zu_cli/deploy.py +207 -0
- zu_cli/explore.py +93 -0
- zu_cli/guardrails.py +102 -0
- zu_cli/harden.py +221 -0
- zu_cli/main.py +1126 -0
- zu_cli/mcp_server.py +444 -0
- zu_cli/observe.py +69 -0
- zu_cli/offline.py +335 -0
- zu_cli/sandbox.py +276 -0
- zu_cli/scaffold.py +116 -0
- zu_cli/server.py +363 -0
- zu_cli/trace.py +111 -0
- zu_cli-0.1.0.dist-info/METADATA +26 -0
- zu_cli-0.1.0.dist-info/RECORD +23 -0
- zu_cli-0.1.0.dist-info/WHEEL +4 -0
- zu_cli-0.1.0.dist-info/entry_points.txt +4 -0
zu_cli/guardrails.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Anti-hardcode guardrails — the executable gate on autonomous construction output.
|
|
2
|
+
|
|
3
|
+
The design's meta-agent is only safe if its output is held to concrete, load-bearing
|
|
4
|
+
rules: a generic, resilient agent, never one that memorised the answer. This module makes
|
|
5
|
+
those rules executable, reusing the stage-5 machinery (``harden.audit_brittleness`` and
|
|
6
|
+
``harden.harden``):
|
|
7
|
+
|
|
8
|
+
* **G1 — every targeting step has an alternate locator.** A click/fill/select with no
|
|
9
|
+
``near`` fallback is a single point of failure (one renamed selector breaks it).
|
|
10
|
+
* **G2 — the track is resilient.** It clears a resilience threshold AND grounding is
|
|
11
|
+
load-bearing (value-deletion controls fail), so the score is real.
|
|
12
|
+
* **G3 — no literal site-answer constant baked in.** None of the captured answer's
|
|
13
|
+
grounded values may appear verbatim in ``agent.yaml`` or a bundle tool's source — the
|
|
14
|
+
"never `click Chislehurst`, never emit the answer as a constant" rule. A generic agent
|
|
15
|
+
DERIVES those values; it must not ship them.
|
|
16
|
+
* **G4 — review gate** is structural, enforced by the driver (``construct``): the output
|
|
17
|
+
is a bundle + report handed back for sign-off, never auto-promoted.
|
|
18
|
+
|
|
19
|
+
This gate is intentionally STRICTER than ``zu build``: ``zu build`` *notes* single-selector
|
|
20
|
+
brittleness (a hand-authored minimal example legitimately has one); the guardrails *fail*
|
|
21
|
+
on it, because they gate autonomous output bound for production.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
from .harden import audit_brittleness, grounded_values, harden
|
|
31
|
+
from .offline import Bundle
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class GuardrailViolation:
|
|
36
|
+
"""One failed guardrail — the gate's reason to hold the output for rework."""
|
|
37
|
+
|
|
38
|
+
rule: str # "single-selector" | "resilience" | "hardcoded-answer"
|
|
39
|
+
detail: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class GuardrailReport:
|
|
44
|
+
violations: list[GuardrailViolation] = field(default_factory=list)
|
|
45
|
+
resilience: float = 1.0
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def passed(self) -> bool:
|
|
49
|
+
return not self.violations
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _config_text(agent_dir: str | Path) -> str:
|
|
53
|
+
"""The agent's authored surface a hardcoded answer could hide in: agent.yaml plus any
|
|
54
|
+
bundle tool source. Read best-effort — a missing tools/ dir is fine."""
|
|
55
|
+
base = Path(agent_dir)
|
|
56
|
+
parts: list[str] = []
|
|
57
|
+
for name in ("agent.yaml", "agent.yml"):
|
|
58
|
+
p = base / name
|
|
59
|
+
if p.is_file():
|
|
60
|
+
parts.append(p.read_text(encoding="utf-8"))
|
|
61
|
+
tools = base / "tools"
|
|
62
|
+
if tools.is_dir():
|
|
63
|
+
for py in sorted(tools.rglob("*.py")):
|
|
64
|
+
try:
|
|
65
|
+
parts.append(py.read_text(encoding="utf-8"))
|
|
66
|
+
except OSError:
|
|
67
|
+
continue
|
|
68
|
+
return "\n".join(parts)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def enforce_guardrails(
|
|
72
|
+
spec: Any, cfg: Any, bundle: Bundle, agent_dir: str | Path, *, min_resilience: float = 1.0,
|
|
73
|
+
) -> GuardrailReport:
|
|
74
|
+
"""Apply G1–G3 to a captured bundle and return the violations (empty == pass). Pure
|
|
75
|
+
$0: the resilience check replays perturbations offline; no model, no network."""
|
|
76
|
+
violations: list[GuardrailViolation] = []
|
|
77
|
+
|
|
78
|
+
# G1 — alternate locators: every single-selector finding is a violation.
|
|
79
|
+
for f in audit_brittleness(bundle):
|
|
80
|
+
if f.kind == "single-selector":
|
|
81
|
+
violations.append(GuardrailViolation("single-selector", f"{f.where}: {f.detail}"))
|
|
82
|
+
|
|
83
|
+
# G2 — resilience: clears the threshold AND grounding actually gates.
|
|
84
|
+
hr = await harden(spec, cfg, bundle)
|
|
85
|
+
if not hr.grounding_load_bearing:
|
|
86
|
+
violations.append(GuardrailViolation(
|
|
87
|
+
"resilience", "a value-deletion control passed — grounding is not gating, so "
|
|
88
|
+
"the resilience score is unreliable"))
|
|
89
|
+
elif hr.resilience < min_resilience:
|
|
90
|
+
violations.append(GuardrailViolation(
|
|
91
|
+
"resilience", f"resilience {hr.resilience:.0%} below required {min_resilience:.0%}"))
|
|
92
|
+
|
|
93
|
+
# G3 — no hardcoded answer: a grounded value verbatim in config/tool source.
|
|
94
|
+
text = _config_text(agent_dir)
|
|
95
|
+
for value in grounded_values(bundle):
|
|
96
|
+
if value in text:
|
|
97
|
+
violations.append(GuardrailViolation(
|
|
98
|
+
"hardcoded-answer", f"the grounded value {value!r} appears verbatim in the "
|
|
99
|
+
"agent config or a tool's source — a generic agent must derive it, not "
|
|
100
|
+
"hardcode it"))
|
|
101
|
+
|
|
102
|
+
return GuardrailReport(violations=violations, resilience=hr.resilience)
|
zu_cli/harden.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Stage 5 — chaos hardening. Score how brittle a captured path is, offline and free.
|
|
2
|
+
|
|
3
|
+
Two honest, $0 signals over a ``fixtures/capture.json`` bundle (the keystone's output):
|
|
4
|
+
|
|
5
|
+
* **Static brittleness audit** — read the captured moves and flag single points of
|
|
6
|
+
failure WITHOUT a run: a click/fill/select that targets one selector with no ``near``
|
|
7
|
+
fallback, and a grounded value that appears exactly once across the fixtures (no
|
|
8
|
+
redundancy, so one wording change loses it). This is the doc's "surface brittle
|
|
9
|
+
single-selector steps", computed structurally.
|
|
10
|
+
|
|
11
|
+
* **Perturbation replay** — generate variant bundles and re-run them through the offline
|
|
12
|
+
keystone (``offline.replay_offline``), modelled on ``zu-redteam``'s out-of-band
|
|
13
|
+
verdict pattern (inspect a finished run from outside its trust boundary). The
|
|
14
|
+
**resilience score** is the fraction of *value-preserving* perturbations the path
|
|
15
|
+
still succeeds on (cosmetic page noise it should absorb). *Value-corrupting*
|
|
16
|
+
perturbations are run too, as a control: they MUST fail, proving grounding is actually
|
|
17
|
+
load-bearing and the score is not a rubber stamp.
|
|
18
|
+
|
|
19
|
+
What this does NOT measure: adaptive recovery (re-pathfinding around a renamed selector
|
|
20
|
+
or an injected interstitial). The replay drives the *captured* moves — a frozen model —
|
|
21
|
+
so a perturbation that needs a NEW decision can't be absorbed offline. Measuring that
|
|
22
|
+
needs a live model and is the next increment (a live hardening lane); it is deliberately
|
|
23
|
+
out of this $0 scope, not silently conflated with what is measured here.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import copy
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
from .offline import Bundle, replay_offline
|
|
33
|
+
|
|
34
|
+
# The fixture observation fields that carry page content (what grounding reads).
|
|
35
|
+
_TEXT_FIELDS = ("text", "html", "content")
|
|
36
|
+
# The action verbs that target a single element — brittle without a `near` fallback.
|
|
37
|
+
_TARGETING = ("click", "fill", "select")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# --- findings + report -------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True)
|
|
44
|
+
class Finding:
|
|
45
|
+
"""One static brittleness finding — a single point of failure in the captured path."""
|
|
46
|
+
|
|
47
|
+
kind: str # "single-selector" | "single-occurrence"
|
|
48
|
+
where: str
|
|
49
|
+
detail: str
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class VariantResult:
|
|
54
|
+
"""One perturbation replay: did the path still succeed, and was that the expectation?"""
|
|
55
|
+
|
|
56
|
+
name: str
|
|
57
|
+
expect_pass: bool
|
|
58
|
+
passed: bool
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def ok(self) -> bool:
|
|
62
|
+
# A value-preserving variant should pass; a value-corrupting one should fail.
|
|
63
|
+
return self.passed == self.expect_pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class HardenReport:
|
|
68
|
+
findings: list[Finding] = field(default_factory=list)
|
|
69
|
+
variants: list[VariantResult] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def resilience(self) -> float:
|
|
73
|
+
"""Fraction of value-preserving perturbations the path still succeeds on
|
|
74
|
+
(1.0 = absorbed every cosmetic change; <1.0 = brittle to page noise)."""
|
|
75
|
+
benign = [v for v in self.variants if v.expect_pass]
|
|
76
|
+
return sum(v.passed for v in benign) / len(benign) if benign else 1.0
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def grounding_load_bearing(self) -> bool:
|
|
80
|
+
"""Every value-corrupting variant failed — so the score reflects real grounding,
|
|
81
|
+
not a path that succeeds regardless of what the page says."""
|
|
82
|
+
return all(not v.passed for v in self.variants if not v.expect_pass)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# --- static audit ------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _value_strings(value: Any) -> list[str]:
|
|
89
|
+
"""Flatten a result value into the scalar strings grounding must find on the page."""
|
|
90
|
+
out: list[str] = []
|
|
91
|
+
if isinstance(value, dict):
|
|
92
|
+
for v in value.values():
|
|
93
|
+
out.extend(_value_strings(v))
|
|
94
|
+
elif isinstance(value, list):
|
|
95
|
+
for v in value:
|
|
96
|
+
out.extend(_value_strings(v))
|
|
97
|
+
elif isinstance(value, (str, int, float)) and not isinstance(value, bool):
|
|
98
|
+
s = str(value).strip()
|
|
99
|
+
if s:
|
|
100
|
+
out.append(s)
|
|
101
|
+
return out
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _final_value(bundle: Bundle) -> Any:
|
|
105
|
+
"""The captured answer — the last text move, parsed as JSON when it is JSON."""
|
|
106
|
+
import json
|
|
107
|
+
|
|
108
|
+
for move in reversed(bundle.moves):
|
|
109
|
+
if "text" in move and move.get("text"):
|
|
110
|
+
try:
|
|
111
|
+
return json.loads(move["text"])
|
|
112
|
+
except (ValueError, TypeError):
|
|
113
|
+
return move["text"]
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def grounded_values(bundle: Bundle) -> list[str]:
|
|
118
|
+
"""The distinct scalar strings the captured answer commits to — the values a generic
|
|
119
|
+
agent must DERIVE from the page (and so must never hardcode). Order-preserving and
|
|
120
|
+
de-duplicated. Reused by the brittleness audit and the anti-hardcode guardrail."""
|
|
121
|
+
return list(dict.fromkeys(_value_strings(_final_value(bundle))))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def audit_brittleness(bundle: Bundle) -> list[Finding]:
|
|
125
|
+
"""Flag single points of failure in the captured path, no run required."""
|
|
126
|
+
findings: list[Finding] = []
|
|
127
|
+
|
|
128
|
+
# 1. Single-selector steps: a targeting action with no `near` fallback.
|
|
129
|
+
step = 0
|
|
130
|
+
for move in bundle.moves:
|
|
131
|
+
if move.get("tool") not in ("browser", "render_dom"):
|
|
132
|
+
continue
|
|
133
|
+
for action in move.get("args", {}).get("actions", []) or []:
|
|
134
|
+
if not isinstance(action, dict):
|
|
135
|
+
continue
|
|
136
|
+
verb = next((v for v in _TARGETING if v in action), None)
|
|
137
|
+
if verb and "near" not in action:
|
|
138
|
+
findings.append(Finding(
|
|
139
|
+
kind="single-selector",
|
|
140
|
+
where=f"{move['tool']} move #{step + 1}",
|
|
141
|
+
detail=f"{verb} {action[verb]!r} has no `near` fallback — one renamed "
|
|
142
|
+
"selector breaks this step; add an alternate locator.",
|
|
143
|
+
))
|
|
144
|
+
step += 1
|
|
145
|
+
|
|
146
|
+
# 2. Single-occurrence grounded values: present in exactly one fixture observation.
|
|
147
|
+
haystacks = [str(obs.get(f, "")) for obss in bundle.observations.values()
|
|
148
|
+
for obs in obss for f in _TEXT_FIELDS]
|
|
149
|
+
for s in grounded_values(bundle):
|
|
150
|
+
hits = sum(s in h for h in haystacks)
|
|
151
|
+
if hits == 1:
|
|
152
|
+
findings.append(Finding(
|
|
153
|
+
kind="single-occurrence",
|
|
154
|
+
where=f"value {s!r}",
|
|
155
|
+
detail="appears in only one fixture observation — a single wording change "
|
|
156
|
+
"there loses the grounding for this value.",
|
|
157
|
+
))
|
|
158
|
+
return findings
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# --- perturbation variants ---------------------------------------------------
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _map_observations(bundle: Bundle, fn: Any) -> Bundle:
|
|
165
|
+
"""A deep copy of ``bundle`` with ``fn`` applied to every content string field of
|
|
166
|
+
every observation. Moves are untouched — only the 'page' changes."""
|
|
167
|
+
variant = copy.deepcopy(bundle)
|
|
168
|
+
for obss in variant.observations.values():
|
|
169
|
+
for obs in obss:
|
|
170
|
+
for f in _TEXT_FIELDS:
|
|
171
|
+
if isinstance(obs.get(f), str):
|
|
172
|
+
obs[f] = fn(obs[f])
|
|
173
|
+
return variant
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def perturb_variants(bundle: Bundle) -> list[tuple[str, Bundle, bool]]:
|
|
177
|
+
"""Generate ``(name, variant, expect_pass)`` perturbations that keep the observation
|
|
178
|
+
SEQUENCE aligned (so the frozen captured moves still apply) and vary only the page
|
|
179
|
+
text — the honest offline subset. Value-preserving variants wrap the page in benign
|
|
180
|
+
noise (a resilient path absorbs them); value-corrupting variants delete a grounded
|
|
181
|
+
value (the path MUST then fail)."""
|
|
182
|
+
variants: list[tuple[str, Bundle, bool]] = []
|
|
183
|
+
|
|
184
|
+
# Value-preserving: cosmetic noise around the existing content (values intact).
|
|
185
|
+
variants.append((
|
|
186
|
+
"banner-prefix",
|
|
187
|
+
_map_observations(bundle, lambda t: "[Cookie notice — we value your privacy] " + t),
|
|
188
|
+
True,
|
|
189
|
+
))
|
|
190
|
+
variants.append((
|
|
191
|
+
"promo-suffix",
|
|
192
|
+
_map_observations(bundle, lambda t: t + " — Limited-time offer, shop now!"),
|
|
193
|
+
True,
|
|
194
|
+
))
|
|
195
|
+
|
|
196
|
+
# Value-corrupting: remove each grounded value from every observation (a control —
|
|
197
|
+
# these must fail, or grounding is not gating).
|
|
198
|
+
for s in dict.fromkeys(_value_strings(_final_value(bundle))):
|
|
199
|
+
variants.append((
|
|
200
|
+
f"drop-value:{s}",
|
|
201
|
+
_map_observations(bundle, lambda t, s=s: t.replace(s, "[removed]")),
|
|
202
|
+
False,
|
|
203
|
+
))
|
|
204
|
+
return variants
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# --- the harden run ----------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
async def harden(spec: Any, cfg: Any, bundle: Bundle) -> HardenReport:
|
|
211
|
+
"""Audit the captured path statically, then replay every perturbation offline and
|
|
212
|
+
score resilience. Pure $0: no model, no network."""
|
|
213
|
+
from zu_core.contracts import Status
|
|
214
|
+
|
|
215
|
+
findings = audit_brittleness(bundle)
|
|
216
|
+
results: list[VariantResult] = []
|
|
217
|
+
for name, variant, expect_pass in perturb_variants(bundle):
|
|
218
|
+
result, _events = await replay_offline(spec, cfg, variant)
|
|
219
|
+
results.append(VariantResult(
|
|
220
|
+
name=name, expect_pass=expect_pass, passed=result.status is Status.SUCCESS))
|
|
221
|
+
return HardenReport(findings=findings, variants=results)
|