zu-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_cli/__init__.py +0 -0
- zu_cli/build.py +111 -0
- zu_cli/config.py +738 -0
- zu_cli/construct.py +318 -0
- zu_cli/construct_sandbox.py +139 -0
- zu_cli/contribute.py +104 -0
- zu_cli/demo.py +373 -0
- zu_cli/deploy.py +207 -0
- zu_cli/explore.py +93 -0
- zu_cli/guardrails.py +102 -0
- zu_cli/harden.py +221 -0
- zu_cli/main.py +1126 -0
- zu_cli/mcp_server.py +444 -0
- zu_cli/observe.py +69 -0
- zu_cli/offline.py +335 -0
- zu_cli/sandbox.py +276 -0
- zu_cli/scaffold.py +116 -0
- zu_cli/server.py +363 -0
- zu_cli/trace.py +111 -0
- zu_cli-0.1.0.dist-info/METADATA +26 -0
- zu_cli-0.1.0.dist-info/RECORD +23 -0
- zu_cli-0.1.0.dist-info/WHEEL +4 -0
- zu_cli-0.1.0.dist-info/entry_points.txt +4 -0
zu_cli/construct.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""The meta-agent construction driver — the diagnose → edit → rebuild loop.
|
|
2
|
+
|
|
3
|
+
The headline of the construction sequence: capture a site once, then iterate the agent
|
|
4
|
+
OFFLINE and free until it builds clean AND clears the anti-hardcode guardrails — reading
|
|
5
|
+
each round's diagnosis to decide the next edit. The orchestration is real and fully
|
|
6
|
+
exercised offline; the one inherently-live part (capturing a site) stays a seam.
|
|
7
|
+
|
|
8
|
+
* The **strategist** decides the next edit from a diagnosis. ``ScriptedStrategist`` replays
|
|
9
|
+
a fixed list (tests, and a deterministic offline demo); ``LiveStrategist`` asks a model —
|
|
10
|
+
given a provider it hardens the single-selector steps (adds a ``near`` alternate locator
|
|
11
|
+
drawn from the captured page text); given none it stays a seam, so ``zu construct``
|
|
12
|
+
without a live model still stops cleanly.
|
|
13
|
+
* **Live capture** (stage 2) is the seam ``live_capture``; ``construct`` takes an already
|
|
14
|
+
captured bundle, exactly as ``zu capture`` produces.
|
|
15
|
+
|
|
16
|
+
The driver NEVER promotes (guardrail G4): it returns a bundle + report for review. Reuses
|
|
17
|
+
``build.build_offline`` (the offline spine) and ``guardrails.enforce_guardrails`` (the
|
|
18
|
+
gate) — no new offline machinery.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import copy
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Protocol, runtime_checkable
|
|
27
|
+
|
|
28
|
+
from zu_core.ports import ModelProvider, ModelRequest
|
|
29
|
+
|
|
30
|
+
from .build import BuildReport, build_offline
|
|
31
|
+
from .guardrails import GuardrailReport, enforce_guardrails
|
|
32
|
+
from .offline import Bundle
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class Edit:
|
|
37
|
+
"""A strategist's proposed change: the mutated bundle to try next, and why."""
|
|
38
|
+
|
|
39
|
+
bundle: Bundle
|
|
40
|
+
note: str
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class Diagnosis:
|
|
45
|
+
"""What a strategist sees at a failing round — enough to decide the next edit."""
|
|
46
|
+
|
|
47
|
+
round: int
|
|
48
|
+
build: BuildReport
|
|
49
|
+
guardrails: GuardrailReport
|
|
50
|
+
bundle: Bundle
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@runtime_checkable
|
|
54
|
+
class Strategist(Protocol):
|
|
55
|
+
"""Decides the next edit from a diagnosis, or ``None`` to give up."""
|
|
56
|
+
|
|
57
|
+
async def propose(self, diagnosis: Diagnosis) -> Edit | None: ...
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class ScriptedStrategist:
|
|
62
|
+
"""Replays a fixed list of edits, one per failing round — the deterministic driver for
|
|
63
|
+
tests and an offline demo. Returns ``None`` once the script is exhausted."""
|
|
64
|
+
|
|
65
|
+
edits: list[Edit]
|
|
66
|
+
_i: int = 0
|
|
67
|
+
|
|
68
|
+
async def propose(self, diagnosis: Diagnosis) -> Edit | None:
|
|
69
|
+
if self._i >= len(self.edits):
|
|
70
|
+
return None
|
|
71
|
+
edit = self.edits[self._i]
|
|
72
|
+
self._i += 1
|
|
73
|
+
return edit
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# --- the live strategist: a model proposes the next edit ---------------------
|
|
77
|
+
|
|
78
|
+
_TARGETING = ("click", "fill", "select")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _brittle_steps(bundle: Bundle) -> list[tuple[int, int, str, Any]]:
|
|
82
|
+
"""The targeting actions in the bundle's moves that lack a ``near`` fallback — the
|
|
83
|
+
single-selector steps an alternate locator would harden. Returns each as
|
|
84
|
+
``(move_index, action_index, verb, selector)`` so an edit can patch it precisely (the
|
|
85
|
+
structural counterpart to ``harden.audit_brittleness``, which only reports)."""
|
|
86
|
+
steps: list[tuple[int, int, str, Any]] = []
|
|
87
|
+
for mi, move in enumerate(bundle.moves):
|
|
88
|
+
if move.get("tool") not in ("browser", "render_dom"):
|
|
89
|
+
continue
|
|
90
|
+
for ai, action in enumerate(move.get("args", {}).get("actions") or []):
|
|
91
|
+
if not isinstance(action, dict):
|
|
92
|
+
continue
|
|
93
|
+
verb = next((v for v in _TARGETING if v in action), None)
|
|
94
|
+
if verb and "near" not in action:
|
|
95
|
+
steps.append((mi, ai, verb, action[verb]))
|
|
96
|
+
return steps
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _page_text(bundle: Bundle, *, limit: int = 2000) -> str:
|
|
100
|
+
"""The visible text the captured browser/render observations showed — the context the
|
|
101
|
+
model draws a real on-page label from when choosing a ``near`` anchor."""
|
|
102
|
+
parts: list[str] = []
|
|
103
|
+
for tool in ("browser", "render_dom"):
|
|
104
|
+
for obs in bundle.observations.get(tool, []):
|
|
105
|
+
t = obs.get("text") or obs.get("html") or ""
|
|
106
|
+
if isinstance(t, str) and t.strip():
|
|
107
|
+
parts.append(t.strip())
|
|
108
|
+
return "\n".join(parts)[:limit]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _balanced_spans(text: str) -> list[str]:
|
|
112
|
+
"""Balanced ``{...}`` / ``[...]`` runs in ``text`` — to recover JSON a model wrapped in
|
|
113
|
+
prose. String/escape-aware, so a brace inside a quoted value doesn't fool the scan."""
|
|
114
|
+
spans: list[str] = []
|
|
115
|
+
for open_ch, close_ch in (("{", "}"), ("[", "]")):
|
|
116
|
+
depth = 0
|
|
117
|
+
start = -1
|
|
118
|
+
in_str = False
|
|
119
|
+
esc = False
|
|
120
|
+
for i, ch in enumerate(text):
|
|
121
|
+
if in_str:
|
|
122
|
+
if esc:
|
|
123
|
+
esc = False
|
|
124
|
+
elif ch == "\\":
|
|
125
|
+
esc = True
|
|
126
|
+
elif ch == '"':
|
|
127
|
+
in_str = False
|
|
128
|
+
continue
|
|
129
|
+
if ch == '"':
|
|
130
|
+
in_str = True
|
|
131
|
+
elif ch == open_ch:
|
|
132
|
+
if depth == 0:
|
|
133
|
+
start = i
|
|
134
|
+
depth += 1
|
|
135
|
+
elif ch == close_ch and depth:
|
|
136
|
+
depth -= 1
|
|
137
|
+
if depth == 0 and start >= 0:
|
|
138
|
+
spans.append(text[start : i + 1])
|
|
139
|
+
return spans
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _extract_json(text: str | None) -> Any:
|
|
143
|
+
"""Best-effort parse of a model reply into JSON: the whole text, a fenced ```json
|
|
144
|
+
block, or the first balanced array/object embedded in prose (models prepend a
|
|
145
|
+
sentence). Returns ``None`` if nothing parses — the caller then gives up cleanly."""
|
|
146
|
+
if not text:
|
|
147
|
+
return None
|
|
148
|
+
import json
|
|
149
|
+
import re
|
|
150
|
+
|
|
151
|
+
candidates = [text]
|
|
152
|
+
fence = re.search(r"```(?:json)?\s*(.*?)```", text, re.DOTALL)
|
|
153
|
+
if fence:
|
|
154
|
+
candidates.append(fence.group(1))
|
|
155
|
+
candidates.extend(_balanced_spans(text))
|
|
156
|
+
for c in candidates:
|
|
157
|
+
try:
|
|
158
|
+
return json.loads(c)
|
|
159
|
+
except (ValueError, TypeError):
|
|
160
|
+
continue
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _parse_fixes(data: Any, n_steps: int) -> dict[int, str]:
|
|
165
|
+
"""Normalise the model's reply into ``{step_index: near_label}`` — accepting a bare list
|
|
166
|
+
or a ``{"fixes": [...]}`` wrapper, and dropping anything out of range or malformed (so a
|
|
167
|
+
sloppy reply yields fewer fixes, never a crash)."""
|
|
168
|
+
items = data.get("fixes") if isinstance(data, dict) else data
|
|
169
|
+
out: dict[int, str] = {}
|
|
170
|
+
if not isinstance(items, list):
|
|
171
|
+
return out
|
|
172
|
+
for item in items:
|
|
173
|
+
if not isinstance(item, dict):
|
|
174
|
+
continue
|
|
175
|
+
step, near = item.get("step"), item.get("near")
|
|
176
|
+
if (isinstance(step, int) and 0 <= step < n_steps
|
|
177
|
+
and isinstance(near, str) and near.strip()):
|
|
178
|
+
out[step] = near.strip()
|
|
179
|
+
return out
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _edit_messages(
|
|
183
|
+
diagnosis: Diagnosis, steps: list[tuple[int, int, str, Any]], page_text: str
|
|
184
|
+
) -> list[dict]:
|
|
185
|
+
"""The prompt: the task, why the round was held, the numbered brittle steps, and the
|
|
186
|
+
page text to anchor against — asking for STRICT JSON mapping each step to a ``near``
|
|
187
|
+
label. Deliberately generic: it asks for a nearby VISIBLE label, never a site answer."""
|
|
188
|
+
violations = "\n".join(
|
|
189
|
+
f"- [{v.rule}] {v.detail}" for v in diagnosis.guardrails.violations) or "- (none)"
|
|
190
|
+
listed = "\n".join(
|
|
191
|
+
f" step {i}: a `{verb}` targeting {selector!r} with no `near` fallback"
|
|
192
|
+
for i, (_mi, _ai, verb, selector) in enumerate(steps))
|
|
193
|
+
system = (
|
|
194
|
+
"You harden a browser-automation path. A targeting step that relies on a single "
|
|
195
|
+
"selector breaks when the site renames it; adding a `near` anchor (a short, stable "
|
|
196
|
+
"VISIBLE label beside the control) lets the runtime resolve the control by "
|
|
197
|
+
"proximity as a fallback. Choose anchors from the page text only — never invent a "
|
|
198
|
+
"value, and never encode the task's answer. Reply with STRICT JSON and nothing else."
|
|
199
|
+
)
|
|
200
|
+
user = (
|
|
201
|
+
f"Task: {diagnosis.bundle.task}\n\n"
|
|
202
|
+
f"This construction round was held:\n{violations}\n\n"
|
|
203
|
+
f"Single-selector steps to harden:\n{listed}\n\n"
|
|
204
|
+
f"Visible page text (choose `near` anchors from here):\n{page_text}\n\n"
|
|
205
|
+
'Reply with JSON: {"fixes": [{"step": <int>, "near": "<short visible label>"}]}. '
|
|
206
|
+
"Include only the steps you can anchor; omit any you cannot."
|
|
207
|
+
)
|
|
208
|
+
return [{"role": "system", "content": system}, {"role": "user", "content": user}]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class LiveStrategist:
|
|
212
|
+
"""A model reads the diagnosis and proposes the next edit — the live lane of the loop.
|
|
213
|
+
Given a ``provider`` it asks the model to harden the single-selector steps (adding a
|
|
214
|
+
``near`` alternate locator drawn from the captured page text) and applies the reply to a
|
|
215
|
+
fresh bundle. Constructed WITHOUT a provider it stays a seam (``NotImplementedError``),
|
|
216
|
+
so ``zu construct`` without a live model still stops cleanly.
|
|
217
|
+
|
|
218
|
+
Scope of this increment: it fixes G1 (single-selector) brittleness — what a *bundle*
|
|
219
|
+
edit can address. A G3 hardcoded answer lives in the agent config, not the bundle, so
|
|
220
|
+
this strategist cannot patch it via an Edit; it returns ``None`` (gives up) and leaves
|
|
221
|
+
that for review (G4). The headline form — a Claude CLI driving the ``zu mcp`` tools in
|
|
222
|
+
``zu run --sandboxed``, free to edit agent.yaml too — is the next step out from here."""
|
|
223
|
+
|
|
224
|
+
def __init__(self, provider: ModelProvider | None = None) -> None:
|
|
225
|
+
self._provider = provider
|
|
226
|
+
|
|
227
|
+
async def propose(self, diagnosis: Diagnosis) -> Edit | None:
|
|
228
|
+
if self._provider is None:
|
|
229
|
+
raise NotImplementedError(
|
|
230
|
+
"the live strategist is the live lane — it needs a model to decide the next "
|
|
231
|
+
"edit (the headline meta-agent: a Claude CLI driving the zu mcp tools in a "
|
|
232
|
+
"sandbox). Pass a provider, inject a ScriptedStrategist for offline runs, or "
|
|
233
|
+
"use `zu construct --check` for a one-round readiness report."
|
|
234
|
+
)
|
|
235
|
+
steps = _brittle_steps(diagnosis.bundle)
|
|
236
|
+
if not steps:
|
|
237
|
+
# The only holds are things a bundle edit can't fix — a G3 hardcoded answer in
|
|
238
|
+
# the config, or a build failure — so give up and leave them for review (G4).
|
|
239
|
+
return None
|
|
240
|
+
req = ModelRequest(
|
|
241
|
+
messages=_edit_messages(diagnosis, steps, _page_text(diagnosis.bundle)))
|
|
242
|
+
resp = await self._provider.complete(req)
|
|
243
|
+
fixes = _parse_fixes(_extract_json(resp.text), len(steps))
|
|
244
|
+
if not fixes:
|
|
245
|
+
return None
|
|
246
|
+
patched = copy.deepcopy(diagnosis.bundle)
|
|
247
|
+
applied: list[str] = []
|
|
248
|
+
for idx, near in fixes.items():
|
|
249
|
+
mi, ai, verb, selector = steps[idx]
|
|
250
|
+
patched.moves[mi]["args"]["actions"][ai]["near"] = near
|
|
251
|
+
applied.append(f"{verb} {selector!r} +near={near!r}")
|
|
252
|
+
return Edit(bundle=patched, note="add `near` fallback(s): " + "; ".join(applied))
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def live_capture(spec: Any, cfg: Any, agent_dir: str | Path) -> Bundle:
|
|
256
|
+
"""The seam: stage-2 live capture (drive the site once, project a bundle). Not built
|
|
257
|
+
here — it needs keys + network. Use ``zu capture`` to produce ``fixtures/capture.json``
|
|
258
|
+
first; ``construct`` then iterates it offline."""
|
|
259
|
+
raise NotImplementedError(
|
|
260
|
+
"live capture needs keys + network — run `zu capture <agent>` once to record "
|
|
261
|
+
"fixtures/capture.json, then construct iterates it offline."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
@dataclass
|
|
266
|
+
class RoundResult:
|
|
267
|
+
round: int
|
|
268
|
+
build_ok: bool
|
|
269
|
+
guardrails_passed: bool
|
|
270
|
+
note: str
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
@dataclass
|
|
274
|
+
class ConstructionReport:
|
|
275
|
+
rounds: list[RoundResult] = field(default_factory=list)
|
|
276
|
+
final_build: BuildReport | None = None
|
|
277
|
+
final_guardrails: GuardrailReport | None = None
|
|
278
|
+
bundle: Bundle | None = None # the working bundle as last tried — handed back for review
|
|
279
|
+
|
|
280
|
+
@property
|
|
281
|
+
def converged(self) -> bool:
|
|
282
|
+
return bool(self.final_build and self.final_build.ok
|
|
283
|
+
and self.final_guardrails and self.final_guardrails.passed)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
async def construct(
|
|
287
|
+
spec: Any, cfg: Any, agent_dir: str | Path, bundle: Bundle, strategist: Strategist,
|
|
288
|
+
*, max_rounds: int = 3, min_resilience: float = 1.0,
|
|
289
|
+
) -> ConstructionReport:
|
|
290
|
+
"""Iterate the agent offline until it builds clean and clears the guardrails, or the
|
|
291
|
+
strategist gives up / ``max_rounds`` is hit. Each round: build the offline spine, then
|
|
292
|
+
enforce the anti-hardcode gate; on a hold, ask the strategist for an edit and retry
|
|
293
|
+
with the mutated bundle. Never promotes (G4) — returns the bundle + report for review."""
|
|
294
|
+
report = ConstructionReport(bundle=bundle)
|
|
295
|
+
for r in range(1, max_rounds + 1):
|
|
296
|
+
build = await build_offline(spec, cfg, agent_dir, bundle, min_score=min_resilience)
|
|
297
|
+
guards = await enforce_guardrails(
|
|
298
|
+
spec, cfg, bundle, agent_dir, min_resilience=min_resilience)
|
|
299
|
+
report.final_build = build
|
|
300
|
+
report.final_guardrails = guards
|
|
301
|
+
report.bundle = bundle
|
|
302
|
+
|
|
303
|
+
if build.ok and guards.passed:
|
|
304
|
+
report.rounds.append(RoundResult(r, True, True, "converged"))
|
|
305
|
+
return report
|
|
306
|
+
|
|
307
|
+
held = ("build held" if not build.ok else "") + (
|
|
308
|
+
("; " if not build.ok and not guards.passed else "")
|
|
309
|
+
+ (f"{len(guards.violations)} guardrail violation(s)" if not guards.passed else ""))
|
|
310
|
+
edit = await strategist.propose(Diagnosis(r, build, guards, bundle))
|
|
311
|
+
if edit is None:
|
|
312
|
+
report.rounds.append(RoundResult(r, build.ok, guards.passed, f"{held}; gave up"))
|
|
313
|
+
return report
|
|
314
|
+
report.rounds.append(RoundResult(r, build.ok, guards.passed, f"{held}; edit: {edit.note}"))
|
|
315
|
+
bundle = edit.bundle
|
|
316
|
+
|
|
317
|
+
# Ran out of rounds — record where the last attempt stood (already on the report).
|
|
318
|
+
return report
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""In-container construction entrypoint — the autonomous brain, contained.
|
|
2
|
+
|
|
3
|
+
The production form of the meta-agent (the headline) is *zu's own ``construct()`` loop run
|
|
4
|
+
INSIDE the hardened container* ``SandboxLauncher`` builds — not an external CLI binary. The
|
|
5
|
+
meta-agent is just another contained zu run: caps dropped, blocking seccomp, and its only
|
|
6
|
+
egress the model endpoint (construction is offline except the strategist's model calls).
|
|
7
|
+
That reuses everything — the offline spine (build → record track → harden), the
|
|
8
|
+
``LiveStrategist`` brain, the anti-hardcode guardrails, cost telemetry, and the event log
|
|
9
|
+
(so the meta-agent's every step is observable) — instead of bolting on a binary that drives
|
|
10
|
+
zu over stdio and reasons outside the log.
|
|
11
|
+
|
|
12
|
+
Two halves, like ``zu_cli.sandbox``:
|
|
13
|
+
|
|
14
|
+
* :func:`construct_contained_from_env` — the in-container entrypoint (console script
|
|
15
|
+
``zu-construct-contained``). Reads the mounted agent, runs construction, and writes one
|
|
16
|
+
JSON object (the report + the hardened track it produced) on stdout.
|
|
17
|
+
* the host-side launcher (the next increment) execs this inside the same hardened container,
|
|
18
|
+
with the model endpoint on the egress allowlist, and parses the report back.
|
|
19
|
+
|
|
20
|
+
:func:`run_contained_construction` is the testable core — it runs the loop on a *writable
|
|
21
|
+
copy* of the agent (the bundle is mounted read-only, but the offline spine writes
|
|
22
|
+
``track.json``) with no Docker and no env, so the orchestration is verified the way the rest
|
|
23
|
+
of zu is: fakes/scripted providers, offline, ~$0.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
import json
|
|
30
|
+
import os
|
|
31
|
+
import shutil
|
|
32
|
+
import sys
|
|
33
|
+
import tempfile
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Any
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _report_to_dict(report: Any, track_text: str | None) -> dict:
|
|
39
|
+
"""Project a ConstructionReport into a JSON-able payload — convergence, each round's
|
|
40
|
+
outcome, the violations still standing, and (only if it converged) the hardened track
|
|
41
|
+
contents to write back. The stdout contract the host-side launcher parses."""
|
|
42
|
+
guards = report.final_guardrails
|
|
43
|
+
return {
|
|
44
|
+
"ok": True,
|
|
45
|
+
"converged": report.converged,
|
|
46
|
+
"ready": report.converged, # converged == build clean AND guardrails passed (G1–G3)
|
|
47
|
+
"rounds": [
|
|
48
|
+
{"round": r.round, "build_ok": r.build_ok,
|
|
49
|
+
"guardrails_passed": r.guardrails_passed, "note": r.note}
|
|
50
|
+
for r in report.rounds
|
|
51
|
+
],
|
|
52
|
+
"violations": [
|
|
53
|
+
{"rule": v.rule, "detail": v.detail} for v in (guards.violations if guards else [])
|
|
54
|
+
],
|
|
55
|
+
"resilience": guards.resilience if guards else None,
|
|
56
|
+
# The deliverable, handed back for review (G4 — never auto-promoted): the hardened
|
|
57
|
+
# track.json, present only when construction converged.
|
|
58
|
+
"track": track_text,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def _run(agent_dir: str | Path, *, max_rounds: int, min_resilience: float) -> dict:
|
|
63
|
+
from .config import build_provider, load_agent
|
|
64
|
+
from .construct import LiveStrategist, construct
|
|
65
|
+
from .offline import Bundle, bundle_path
|
|
66
|
+
|
|
67
|
+
src = Path(agent_dir)
|
|
68
|
+
# Work on a WRITABLE copy: the bundle is mounted read-only in the container, but the
|
|
69
|
+
# offline spine writes track.json. Skip prior runtime artifacts so the copy is clean.
|
|
70
|
+
with tempfile.TemporaryDirectory(prefix="zu-construct-") as tmp:
|
|
71
|
+
work = Path(tmp) / "agent"
|
|
72
|
+
shutil.copytree(src, work, ignore=shutil.ignore_patterns("track.json", "cost.jsonl"))
|
|
73
|
+
spec, cfg = load_agent(str(work))
|
|
74
|
+
bundle = Bundle.load(bundle_path(work))
|
|
75
|
+
# The brain is the agent's configured model (the frontier model in production); the
|
|
76
|
+
# offline replay ignores it and replays the bundle, so the model is spent only on
|
|
77
|
+
# the strategist's edits — the one thing that needs egress.
|
|
78
|
+
provider = build_provider(cfg.provider)
|
|
79
|
+
report = await construct(
|
|
80
|
+
spec, cfg, str(work), bundle, LiveStrategist(provider),
|
|
81
|
+
max_rounds=max_rounds, min_resilience=min_resilience,
|
|
82
|
+
)
|
|
83
|
+
track = work / "track.json"
|
|
84
|
+
track_text = (track.read_text(encoding="utf-8")
|
|
85
|
+
if report.converged and track.is_file() else None)
|
|
86
|
+
return _report_to_dict(report, track_text)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def run_contained_construction(
|
|
90
|
+
agent_dir: str | Path, *, max_rounds: int = 3, min_resilience: float = 1.0
|
|
91
|
+
) -> dict:
|
|
92
|
+
"""Run the ``construct()`` loop on a writable copy of ``agent_dir`` and return a JSON-able
|
|
93
|
+
report (convergence, rounds, remaining violations, resilience, and the hardened track if
|
|
94
|
+
it converged). The testable core of the contained entrypoint — no Docker, no env."""
|
|
95
|
+
return asyncio.run(_run(agent_dir, max_rounds=max_rounds, min_resilience=min_resilience))
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def construct_contained_from_env(argv: list[str] | None = None) -> int:
|
|
99
|
+
"""Console-script entrypoint (``zu-construct-contained``) executed INSIDE the container.
|
|
100
|
+
Reads the mounted agent at ``ZU_BUNDLE`` (and optional ``ZU_CONSTRUCT_MAX_ROUNDS`` /
|
|
101
|
+
``ZU_CONSTRUCT_MIN_RESILIENCE``), runs construction, and emits the report JSON on stdout
|
|
102
|
+
— the same stdout-projection contract as ``run_contained_from_env``."""
|
|
103
|
+
bundle = os.environ.get("ZU_BUNDLE")
|
|
104
|
+
if not bundle:
|
|
105
|
+
json.dump({"ok": False, "error": "ZU_BUNDLE (the mounted agent dir) is not set"},
|
|
106
|
+
sys.stdout)
|
|
107
|
+
sys.stdout.write("\n")
|
|
108
|
+
return 1
|
|
109
|
+
# The mounted bundle carries its own gitignored .env (the brain's model key); load it so
|
|
110
|
+
# the strategist's model is reachable inside the box.
|
|
111
|
+
from .config import load_dotenv
|
|
112
|
+
|
|
113
|
+
load_dotenv(Path(bundle) / ".env")
|
|
114
|
+
max_rounds = int(os.environ.get("ZU_CONSTRUCT_MAX_ROUNDS", "3"))
|
|
115
|
+
min_resilience = float(os.environ.get("ZU_CONSTRUCT_MIN_RESILIENCE", "1.0"))
|
|
116
|
+
payload = run_contained_construction(
|
|
117
|
+
bundle, max_rounds=max_rounds, min_resilience=min_resilience)
|
|
118
|
+
json.dump(payload, sys.stdout, default=str)
|
|
119
|
+
sys.stdout.write("\n")
|
|
120
|
+
return 0
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
async def launch_contained_construction(
|
|
124
|
+
launcher: Any, agent_dir: str | Path, *, allowlist: list[str],
|
|
125
|
+
max_rounds: int = 3, min_resilience: float = 1.0,
|
|
126
|
+
) -> dict:
|
|
127
|
+
"""Run autonomous construction INSIDE the hardened box — the host-side half. Execs the
|
|
128
|
+
``zu-construct-contained`` entrypoint via ``launcher.run_entrypoint`` (a
|
|
129
|
+
:class:`~zu_cli.sandbox.SandboxLauncher`), with the agent mounted read-only at
|
|
130
|
+
``/bundle`` and egress limited to ``allowlist`` (the model endpoint — construction is
|
|
131
|
+
otherwise offline). Returns the construction report the entrypoint emitted: convergence,
|
|
132
|
+
each round, the standing violations, and the hardened ``track.json`` contents to write
|
|
133
|
+
back for review. Never auto-promotes (G4)."""
|
|
134
|
+
return await launcher.run_entrypoint(
|
|
135
|
+
["zu-construct-contained"],
|
|
136
|
+
{"ZU_CONSTRUCT_MAX_ROUNDS": str(max_rounds),
|
|
137
|
+
"ZU_CONSTRUCT_MIN_RESILIENCE": str(min_resilience)},
|
|
138
|
+
allowlist=allowlist, bundle_dir=str(agent_dir),
|
|
139
|
+
)
|
zu_cli/contribute.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Capability gaps → strong, reproducible issues.
|
|
2
|
+
|
|
3
|
+
zu's discipline is: when you hit a wall you don't hardcode around it — you build a GENERIC
|
|
4
|
+
capability (the model reasons, the tool exposes a primitive). This extends that to everyone
|
|
5
|
+
using zu. When a harness hits something zu genuinely can't do — a missing primitive, a
|
|
6
|
+
detector that won't fire, a selector zu can't resolve, a soft miss it mishandles — that's a
|
|
7
|
+
**capability gap in zu, not a bug in the user's agent**, and the fix belongs upstream.
|
|
8
|
+
|
|
9
|
+
The hard part of a good bug report is a reliable repro. Here it is **free**: a captured
|
|
10
|
+
``fixtures/`` bundle reproduces the run deterministically at $0, so the maintainers' agent can
|
|
11
|
+
``zu run --offline`` the attached bundle, reproduce the gap exactly, and build the generic
|
|
12
|
+
capability that closes it. This module turns a gap into that issue — agent config + the
|
|
13
|
+
repeatable example + expected/observed + a proposed generic capability — ready to file.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import shlex
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
# The repo a capability gap is contributed to. Overridable so a fork/mirror can retarget it.
|
|
24
|
+
ZU_REPO = os.environ.get("ZU_CONTRIBUTE_REPO", "k3-mt/zu")
|
|
25
|
+
GAP_LABEL = "capability-gap"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _zu_version() -> str:
|
|
29
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
30
|
+
|
|
31
|
+
for dist in ("zu-runtime", "zu-cli", "zu-core"):
|
|
32
|
+
try:
|
|
33
|
+
return version(dist)
|
|
34
|
+
except PackageNotFoundError:
|
|
35
|
+
continue
|
|
36
|
+
return "unknown"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class GapReport:
|
|
41
|
+
"""A ready-to-file capability-gap issue: the ``title``, the markdown ``body``, and whether
|
|
42
|
+
a deterministic ``fixtures/`` repro is attached (``has_repro``)."""
|
|
43
|
+
|
|
44
|
+
title: str
|
|
45
|
+
body: str
|
|
46
|
+
has_repro: bool
|
|
47
|
+
repro_path: str | None
|
|
48
|
+
|
|
49
|
+
def gh_command(self, body_file: str, *, repo: str = ZU_REPO) -> str:
|
|
50
|
+
"""A ready ``gh issue create`` invocation (body passed by file, since it's multi-line
|
|
51
|
+
and embeds YAML). The caller writes the body to ``body_file`` first."""
|
|
52
|
+
return (f"gh issue create --repo {repo} --label {GAP_LABEL} "
|
|
53
|
+
f"--title {shlex.quote(self.title)} --body-file {shlex.quote(body_file)}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def build_gap_report(
|
|
57
|
+
agent_dir: str | Path, *, summary: str, expected: str, observed: str,
|
|
58
|
+
proposed: str | None = None, zu_version: str | None = None,
|
|
59
|
+
) -> GapReport:
|
|
60
|
+
"""Build a capability-gap issue for the agent at ``agent_dir``. Embeds the agent's
|
|
61
|
+
``agent.yaml`` and, if present, points at its ``fixtures/`` bundle as the **repeatable
|
|
62
|
+
example** (reproduced with ``zu run --offline``). With no bundle the report still builds
|
|
63
|
+
but flags that a repro must be captured first — a gap without a repro is hard to pick up."""
|
|
64
|
+
from .offline import FIXTURES_DIR, bundle_path
|
|
65
|
+
|
|
66
|
+
base = Path(agent_dir)
|
|
67
|
+
cfg_text = ""
|
|
68
|
+
for name in ("agent.yaml", "agent.yml"):
|
|
69
|
+
p = base / name
|
|
70
|
+
if p.is_file():
|
|
71
|
+
cfg_text = p.read_text(encoding="utf-8")
|
|
72
|
+
break
|
|
73
|
+
repro = bundle_path(base)
|
|
74
|
+
has_repro = repro.is_file()
|
|
75
|
+
version = zu_version or _zu_version()
|
|
76
|
+
title = f"Capability gap: {summary}"
|
|
77
|
+
|
|
78
|
+
repro_section = (
|
|
79
|
+
f"This agent ships `{FIXTURES_DIR}/capture.json` — a deterministic, $0 reproduction.\n"
|
|
80
|
+
f"Reproduce the gap with **no model and no network**:\n\n"
|
|
81
|
+
f"```\nzu run <agent> --offline\n```\n"
|
|
82
|
+
if has_repro else
|
|
83
|
+
"⚠️ **No fixtures bundle attached.** A capability gap needs a repeatable example so it "
|
|
84
|
+
"can be picked up. Capture one first — drive the path with `zu_explore` (your harness) "
|
|
85
|
+
"or `zu capture` (once, live) to record `fixtures/capture.json`, then re-run this.\n"
|
|
86
|
+
)
|
|
87
|
+
proposed_section = (
|
|
88
|
+
f"## Proposed generic capability\n{proposed}\n\n" if proposed else
|
|
89
|
+
"## Proposed generic capability\n_(none suggested — describe the smallest GENERIC "
|
|
90
|
+
"primitive that would close this, in zu's no-hardcoding spirit.)_\n\n"
|
|
91
|
+
)
|
|
92
|
+
body = (
|
|
93
|
+
f"## What I was building\n{summary}\n\n"
|
|
94
|
+
f"## What I expected\n{expected}\n\n"
|
|
95
|
+
f"## What zu did (the gap)\n{observed}\n\n"
|
|
96
|
+
f"## Repeatable example\n{repro_section}\n"
|
|
97
|
+
f"<details><summary>agent.yaml</summary>\n\n```yaml\n{cfg_text.rstrip()}\n```\n</details>\n\n"
|
|
98
|
+
f"{proposed_section}"
|
|
99
|
+
f"## Environment\n- zu {version}\n\n"
|
|
100
|
+
f"---\n_Filed via `zu_report_gap`. The fix should be a generic capability (no "
|
|
101
|
+
f"site-specific hardcoding); the attached bundle replays the gap deterministically._\n"
|
|
102
|
+
)
|
|
103
|
+
return GapReport(title=title, body=body, has_repro=has_repro,
|
|
104
|
+
repro_path=str(repro) if has_repro else None)
|