zu-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_cli/construct.py ADDED
@@ -0,0 +1,318 @@
1
+ """The meta-agent construction driver — the diagnose → edit → rebuild loop.
2
+
3
+ The headline of the construction sequence: capture a site once, then iterate the agent
4
+ OFFLINE and free until it builds clean AND clears the anti-hardcode guardrails — reading
5
+ each round's diagnosis to decide the next edit. The orchestration is real and fully
6
+ exercised offline; the one inherently-live part (capturing a site) stays a seam.
7
+
8
+ * The **strategist** decides the next edit from a diagnosis. ``ScriptedStrategist`` replays
9
+ a fixed list (tests, and a deterministic offline demo); ``LiveStrategist`` asks a model —
10
+ given a provider it hardens the single-selector steps (adds a ``near`` alternate locator
11
+ drawn from the captured page text); given none it stays a seam, so ``zu construct``
12
+ without a live model still stops cleanly.
13
+ * **Live capture** (stage 2) is the seam ``live_capture``; ``construct`` takes an already
14
+ captured bundle, exactly as ``zu capture`` produces.
15
+
16
+ The driver NEVER promotes (guardrail G4): it returns a bundle + report for review. Reuses
17
+ ``build.build_offline`` (the offline spine) and ``guardrails.enforce_guardrails`` (the
18
+ gate) — no new offline machinery.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import copy
24
+ from dataclasses import dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any, Protocol, runtime_checkable
27
+
28
+ from zu_core.ports import ModelProvider, ModelRequest
29
+
30
+ from .build import BuildReport, build_offline
31
+ from .guardrails import GuardrailReport, enforce_guardrails
32
+ from .offline import Bundle
33
+
34
+
35
+ @dataclass
36
+ class Edit:
37
+ """A strategist's proposed change: the mutated bundle to try next, and why."""
38
+
39
+ bundle: Bundle
40
+ note: str
41
+
42
+
43
+ @dataclass
44
+ class Diagnosis:
45
+ """What a strategist sees at a failing round — enough to decide the next edit."""
46
+
47
+ round: int
48
+ build: BuildReport
49
+ guardrails: GuardrailReport
50
+ bundle: Bundle
51
+
52
+
53
+ @runtime_checkable
54
+ class Strategist(Protocol):
55
+ """Decides the next edit from a diagnosis, or ``None`` to give up."""
56
+
57
+ async def propose(self, diagnosis: Diagnosis) -> Edit | None: ...
58
+
59
+
60
+ @dataclass
61
+ class ScriptedStrategist:
62
+ """Replays a fixed list of edits, one per failing round — the deterministic driver for
63
+ tests and an offline demo. Returns ``None`` once the script is exhausted."""
64
+
65
+ edits: list[Edit]
66
+ _i: int = 0
67
+
68
+ async def propose(self, diagnosis: Diagnosis) -> Edit | None:
69
+ if self._i >= len(self.edits):
70
+ return None
71
+ edit = self.edits[self._i]
72
+ self._i += 1
73
+ return edit
74
+
75
+
76
+ # --- the live strategist: a model proposes the next edit ---------------------
77
+
78
+ _TARGETING = ("click", "fill", "select")
79
+
80
+
81
+ def _brittle_steps(bundle: Bundle) -> list[tuple[int, int, str, Any]]:
82
+ """The targeting actions in the bundle's moves that lack a ``near`` fallback — the
83
+ single-selector steps an alternate locator would harden. Returns each as
84
+ ``(move_index, action_index, verb, selector)`` so an edit can patch it precisely (the
85
+ structural counterpart to ``harden.audit_brittleness``, which only reports)."""
86
+ steps: list[tuple[int, int, str, Any]] = []
87
+ for mi, move in enumerate(bundle.moves):
88
+ if move.get("tool") not in ("browser", "render_dom"):
89
+ continue
90
+ for ai, action in enumerate(move.get("args", {}).get("actions") or []):
91
+ if not isinstance(action, dict):
92
+ continue
93
+ verb = next((v for v in _TARGETING if v in action), None)
94
+ if verb and "near" not in action:
95
+ steps.append((mi, ai, verb, action[verb]))
96
+ return steps
97
+
98
+
99
+ def _page_text(bundle: Bundle, *, limit: int = 2000) -> str:
100
+ """The visible text the captured browser/render observations showed — the context the
101
+ model draws a real on-page label from when choosing a ``near`` anchor."""
102
+ parts: list[str] = []
103
+ for tool in ("browser", "render_dom"):
104
+ for obs in bundle.observations.get(tool, []):
105
+ t = obs.get("text") or obs.get("html") or ""
106
+ if isinstance(t, str) and t.strip():
107
+ parts.append(t.strip())
108
+ return "\n".join(parts)[:limit]
109
+
110
+
111
+ def _balanced_spans(text: str) -> list[str]:
112
+ """Balanced ``{...}`` / ``[...]`` runs in ``text`` — to recover JSON a model wrapped in
113
+ prose. String/escape-aware, so a brace inside a quoted value doesn't fool the scan."""
114
+ spans: list[str] = []
115
+ for open_ch, close_ch in (("{", "}"), ("[", "]")):
116
+ depth = 0
117
+ start = -1
118
+ in_str = False
119
+ esc = False
120
+ for i, ch in enumerate(text):
121
+ if in_str:
122
+ if esc:
123
+ esc = False
124
+ elif ch == "\\":
125
+ esc = True
126
+ elif ch == '"':
127
+ in_str = False
128
+ continue
129
+ if ch == '"':
130
+ in_str = True
131
+ elif ch == open_ch:
132
+ if depth == 0:
133
+ start = i
134
+ depth += 1
135
+ elif ch == close_ch and depth:
136
+ depth -= 1
137
+ if depth == 0 and start >= 0:
138
+ spans.append(text[start : i + 1])
139
+ return spans
140
+
141
+
142
+ def _extract_json(text: str | None) -> Any:
143
+ """Best-effort parse of a model reply into JSON: the whole text, a fenced ```json
144
+ block, or the first balanced array/object embedded in prose (models prepend a
145
+ sentence). Returns ``None`` if nothing parses — the caller then gives up cleanly."""
146
+ if not text:
147
+ return None
148
+ import json
149
+ import re
150
+
151
+ candidates = [text]
152
+ fence = re.search(r"```(?:json)?\s*(.*?)```", text, re.DOTALL)
153
+ if fence:
154
+ candidates.append(fence.group(1))
155
+ candidates.extend(_balanced_spans(text))
156
+ for c in candidates:
157
+ try:
158
+ return json.loads(c)
159
+ except (ValueError, TypeError):
160
+ continue
161
+ return None
162
+
163
+
164
+ def _parse_fixes(data: Any, n_steps: int) -> dict[int, str]:
165
+ """Normalise the model's reply into ``{step_index: near_label}`` — accepting a bare list
166
+ or a ``{"fixes": [...]}`` wrapper, and dropping anything out of range or malformed (so a
167
+ sloppy reply yields fewer fixes, never a crash)."""
168
+ items = data.get("fixes") if isinstance(data, dict) else data
169
+ out: dict[int, str] = {}
170
+ if not isinstance(items, list):
171
+ return out
172
+ for item in items:
173
+ if not isinstance(item, dict):
174
+ continue
175
+ step, near = item.get("step"), item.get("near")
176
+ if (isinstance(step, int) and 0 <= step < n_steps
177
+ and isinstance(near, str) and near.strip()):
178
+ out[step] = near.strip()
179
+ return out
180
+
181
+
182
+ def _edit_messages(
183
+ diagnosis: Diagnosis, steps: list[tuple[int, int, str, Any]], page_text: str
184
+ ) -> list[dict]:
185
+ """The prompt: the task, why the round was held, the numbered brittle steps, and the
186
+ page text to anchor against — asking for STRICT JSON mapping each step to a ``near``
187
+ label. Deliberately generic: it asks for a nearby VISIBLE label, never a site answer."""
188
+ violations = "\n".join(
189
+ f"- [{v.rule}] {v.detail}" for v in diagnosis.guardrails.violations) or "- (none)"
190
+ listed = "\n".join(
191
+ f" step {i}: a `{verb}` targeting {selector!r} with no `near` fallback"
192
+ for i, (_mi, _ai, verb, selector) in enumerate(steps))
193
+ system = (
194
+ "You harden a browser-automation path. A targeting step that relies on a single "
195
+ "selector breaks when the site renames it; adding a `near` anchor (a short, stable "
196
+ "VISIBLE label beside the control) lets the runtime resolve the control by "
197
+ "proximity as a fallback. Choose anchors from the page text only — never invent a "
198
+ "value, and never encode the task's answer. Reply with STRICT JSON and nothing else."
199
+ )
200
+ user = (
201
+ f"Task: {diagnosis.bundle.task}\n\n"
202
+ f"This construction round was held:\n{violations}\n\n"
203
+ f"Single-selector steps to harden:\n{listed}\n\n"
204
+ f"Visible page text (choose `near` anchors from here):\n{page_text}\n\n"
205
+ 'Reply with JSON: {"fixes": [{"step": <int>, "near": "<short visible label>"}]}. '
206
+ "Include only the steps you can anchor; omit any you cannot."
207
+ )
208
+ return [{"role": "system", "content": system}, {"role": "user", "content": user}]
209
+
210
+
211
+ class LiveStrategist:
212
+ """A model reads the diagnosis and proposes the next edit — the live lane of the loop.
213
+ Given a ``provider`` it asks the model to harden the single-selector steps (adding a
214
+ ``near`` alternate locator drawn from the captured page text) and applies the reply to a
215
+ fresh bundle. Constructed WITHOUT a provider it stays a seam (``NotImplementedError``),
216
+ so ``zu construct`` without a live model still stops cleanly.
217
+
218
+ Scope of this increment: it fixes G1 (single-selector) brittleness — what a *bundle*
219
+ edit can address. A G3 hardcoded answer lives in the agent config, not the bundle, so
220
+ this strategist cannot patch it via an Edit; it returns ``None`` (gives up) and leaves
221
+ that for review (G4). The headline form — a Claude CLI driving the ``zu mcp`` tools in
222
+ ``zu run --sandboxed``, free to edit agent.yaml too — is the next step out from here."""
223
+
224
+ def __init__(self, provider: ModelProvider | None = None) -> None:
225
+ self._provider = provider
226
+
227
+ async def propose(self, diagnosis: Diagnosis) -> Edit | None:
228
+ if self._provider is None:
229
+ raise NotImplementedError(
230
+ "the live strategist is the live lane — it needs a model to decide the next "
231
+ "edit (the headline meta-agent: a Claude CLI driving the zu mcp tools in a "
232
+ "sandbox). Pass a provider, inject a ScriptedStrategist for offline runs, or "
233
+ "use `zu construct --check` for a one-round readiness report."
234
+ )
235
+ steps = _brittle_steps(diagnosis.bundle)
236
+ if not steps:
237
+ # The only holds are things a bundle edit can't fix — a G3 hardcoded answer in
238
+ # the config, or a build failure — so give up and leave them for review (G4).
239
+ return None
240
+ req = ModelRequest(
241
+ messages=_edit_messages(diagnosis, steps, _page_text(diagnosis.bundle)))
242
+ resp = await self._provider.complete(req)
243
+ fixes = _parse_fixes(_extract_json(resp.text), len(steps))
244
+ if not fixes:
245
+ return None
246
+ patched = copy.deepcopy(diagnosis.bundle)
247
+ applied: list[str] = []
248
+ for idx, near in fixes.items():
249
+ mi, ai, verb, selector = steps[idx]
250
+ patched.moves[mi]["args"]["actions"][ai]["near"] = near
251
+ applied.append(f"{verb} {selector!r} +near={near!r}")
252
+ return Edit(bundle=patched, note="add `near` fallback(s): " + "; ".join(applied))
253
+
254
+
255
+ def live_capture(spec: Any, cfg: Any, agent_dir: str | Path) -> Bundle:
256
+ """The seam: stage-2 live capture (drive the site once, project a bundle). Not built
257
+ here — it needs keys + network. Use ``zu capture`` to produce ``fixtures/capture.json``
258
+ first; ``construct`` then iterates it offline."""
259
+ raise NotImplementedError(
260
+ "live capture needs keys + network — run `zu capture <agent>` once to record "
261
+ "fixtures/capture.json, then construct iterates it offline."
262
+ )
263
+
264
+
265
+ @dataclass
266
+ class RoundResult:
267
+ round: int
268
+ build_ok: bool
269
+ guardrails_passed: bool
270
+ note: str
271
+
272
+
273
+ @dataclass
274
+ class ConstructionReport:
275
+ rounds: list[RoundResult] = field(default_factory=list)
276
+ final_build: BuildReport | None = None
277
+ final_guardrails: GuardrailReport | None = None
278
+ bundle: Bundle | None = None # the working bundle as last tried — handed back for review
279
+
280
+ @property
281
+ def converged(self) -> bool:
282
+ return bool(self.final_build and self.final_build.ok
283
+ and self.final_guardrails and self.final_guardrails.passed)
284
+
285
+
286
+ async def construct(
287
+ spec: Any, cfg: Any, agent_dir: str | Path, bundle: Bundle, strategist: Strategist,
288
+ *, max_rounds: int = 3, min_resilience: float = 1.0,
289
+ ) -> ConstructionReport:
290
+ """Iterate the agent offline until it builds clean and clears the guardrails, or the
291
+ strategist gives up / ``max_rounds`` is hit. Each round: build the offline spine, then
292
+ enforce the anti-hardcode gate; on a hold, ask the strategist for an edit and retry
293
+ with the mutated bundle. Never promotes (G4) — returns the bundle + report for review."""
294
+ report = ConstructionReport(bundle=bundle)
295
+ for r in range(1, max_rounds + 1):
296
+ build = await build_offline(spec, cfg, agent_dir, bundle, min_score=min_resilience)
297
+ guards = await enforce_guardrails(
298
+ spec, cfg, bundle, agent_dir, min_resilience=min_resilience)
299
+ report.final_build = build
300
+ report.final_guardrails = guards
301
+ report.bundle = bundle
302
+
303
+ if build.ok and guards.passed:
304
+ report.rounds.append(RoundResult(r, True, True, "converged"))
305
+ return report
306
+
307
+ held = ("build held" if not build.ok else "") + (
308
+ ("; " if not build.ok and not guards.passed else "")
309
+ + (f"{len(guards.violations)} guardrail violation(s)" if not guards.passed else ""))
310
+ edit = await strategist.propose(Diagnosis(r, build, guards, bundle))
311
+ if edit is None:
312
+ report.rounds.append(RoundResult(r, build.ok, guards.passed, f"{held}; gave up"))
313
+ return report
314
+ report.rounds.append(RoundResult(r, build.ok, guards.passed, f"{held}; edit: {edit.note}"))
315
+ bundle = edit.bundle
316
+
317
+ # Ran out of rounds — record where the last attempt stood (already on the report).
318
+ return report
@@ -0,0 +1,139 @@
1
+ """In-container construction entrypoint — the autonomous brain, contained.
2
+
3
+ The production form of the meta-agent (the headline) is *zu's own ``construct()`` loop run
4
+ INSIDE the hardened container* ``SandboxLauncher`` builds — not an external CLI binary. The
5
+ meta-agent is just another contained zu run: caps dropped, blocking seccomp, and its only
6
+ egress the model endpoint (construction is offline except the strategist's model calls).
7
+ That reuses everything — the offline spine (build → record track → harden), the
8
+ ``LiveStrategist`` brain, the anti-hardcode guardrails, cost telemetry, and the event log
9
+ (so the meta-agent's every step is observable) — instead of bolting on a binary that drives
10
+ zu over stdio and reasons outside the log.
11
+
12
+ Two halves, like ``zu_cli.sandbox``:
13
+
14
+ * :func:`construct_contained_from_env` — the in-container entrypoint (console script
15
+ ``zu-construct-contained``). Reads the mounted agent, runs construction, and writes one
16
+ JSON object (the report + the hardened track it produced) on stdout.
17
+ * the host-side launcher (the next increment) execs this inside the same hardened container,
18
+ with the model endpoint on the egress allowlist, and parses the report back.
19
+
20
+ :func:`run_contained_construction` is the testable core — it runs the loop on a *writable
21
+ copy* of the agent (the bundle is mounted read-only, but the offline spine writes
22
+ ``track.json``) with no Docker and no env, so the orchestration is verified the way the rest
23
+ of zu is: fakes/scripted providers, offline, ~$0.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import asyncio
29
+ import json
30
+ import os
31
+ import shutil
32
+ import sys
33
+ import tempfile
34
+ from pathlib import Path
35
+ from typing import Any
36
+
37
+
38
+ def _report_to_dict(report: Any, track_text: str | None) -> dict:
39
+ """Project a ConstructionReport into a JSON-able payload — convergence, each round's
40
+ outcome, the violations still standing, and (only if it converged) the hardened track
41
+ contents to write back. The stdout contract the host-side launcher parses."""
42
+ guards = report.final_guardrails
43
+ return {
44
+ "ok": True,
45
+ "converged": report.converged,
46
+ "ready": report.converged, # converged == build clean AND guardrails passed (G1–G3)
47
+ "rounds": [
48
+ {"round": r.round, "build_ok": r.build_ok,
49
+ "guardrails_passed": r.guardrails_passed, "note": r.note}
50
+ for r in report.rounds
51
+ ],
52
+ "violations": [
53
+ {"rule": v.rule, "detail": v.detail} for v in (guards.violations if guards else [])
54
+ ],
55
+ "resilience": guards.resilience if guards else None,
56
+ # The deliverable, handed back for review (G4 — never auto-promoted): the hardened
57
+ # track.json, present only when construction converged.
58
+ "track": track_text,
59
+ }
60
+
61
+
62
+ async def _run(agent_dir: str | Path, *, max_rounds: int, min_resilience: float) -> dict:
63
+ from .config import build_provider, load_agent
64
+ from .construct import LiveStrategist, construct
65
+ from .offline import Bundle, bundle_path
66
+
67
+ src = Path(agent_dir)
68
+ # Work on a WRITABLE copy: the bundle is mounted read-only in the container, but the
69
+ # offline spine writes track.json. Skip prior runtime artifacts so the copy is clean.
70
+ with tempfile.TemporaryDirectory(prefix="zu-construct-") as tmp:
71
+ work = Path(tmp) / "agent"
72
+ shutil.copytree(src, work, ignore=shutil.ignore_patterns("track.json", "cost.jsonl"))
73
+ spec, cfg = load_agent(str(work))
74
+ bundle = Bundle.load(bundle_path(work))
75
+ # The brain is the agent's configured model (the frontier model in production); the
76
+ # offline replay ignores it and replays the bundle, so the model is spent only on
77
+ # the strategist's edits — the one thing that needs egress.
78
+ provider = build_provider(cfg.provider)
79
+ report = await construct(
80
+ spec, cfg, str(work), bundle, LiveStrategist(provider),
81
+ max_rounds=max_rounds, min_resilience=min_resilience,
82
+ )
83
+ track = work / "track.json"
84
+ track_text = (track.read_text(encoding="utf-8")
85
+ if report.converged and track.is_file() else None)
86
+ return _report_to_dict(report, track_text)
87
+
88
+
89
+ def run_contained_construction(
90
+ agent_dir: str | Path, *, max_rounds: int = 3, min_resilience: float = 1.0
91
+ ) -> dict:
92
+ """Run the ``construct()`` loop on a writable copy of ``agent_dir`` and return a JSON-able
93
+ report (convergence, rounds, remaining violations, resilience, and the hardened track if
94
+ it converged). The testable core of the contained entrypoint — no Docker, no env."""
95
+ return asyncio.run(_run(agent_dir, max_rounds=max_rounds, min_resilience=min_resilience))
96
+
97
+
98
+ def construct_contained_from_env(argv: list[str] | None = None) -> int:
99
+ """Console-script entrypoint (``zu-construct-contained``) executed INSIDE the container.
100
+ Reads the mounted agent at ``ZU_BUNDLE`` (and optional ``ZU_CONSTRUCT_MAX_ROUNDS`` /
101
+ ``ZU_CONSTRUCT_MIN_RESILIENCE``), runs construction, and emits the report JSON on stdout
102
+ — the same stdout-projection contract as ``run_contained_from_env``."""
103
+ bundle = os.environ.get("ZU_BUNDLE")
104
+ if not bundle:
105
+ json.dump({"ok": False, "error": "ZU_BUNDLE (the mounted agent dir) is not set"},
106
+ sys.stdout)
107
+ sys.stdout.write("\n")
108
+ return 1
109
+ # The mounted bundle carries its own gitignored .env (the brain's model key); load it so
110
+ # the strategist's model is reachable inside the box.
111
+ from .config import load_dotenv
112
+
113
+ load_dotenv(Path(bundle) / ".env")
114
+ max_rounds = int(os.environ.get("ZU_CONSTRUCT_MAX_ROUNDS", "3"))
115
+ min_resilience = float(os.environ.get("ZU_CONSTRUCT_MIN_RESILIENCE", "1.0"))
116
+ payload = run_contained_construction(
117
+ bundle, max_rounds=max_rounds, min_resilience=min_resilience)
118
+ json.dump(payload, sys.stdout, default=str)
119
+ sys.stdout.write("\n")
120
+ return 0
121
+
122
+
123
+ async def launch_contained_construction(
124
+ launcher: Any, agent_dir: str | Path, *, allowlist: list[str],
125
+ max_rounds: int = 3, min_resilience: float = 1.0,
126
+ ) -> dict:
127
+ """Run autonomous construction INSIDE the hardened box — the host-side half. Execs the
128
+ ``zu-construct-contained`` entrypoint via ``launcher.run_entrypoint`` (a
129
+ :class:`~zu_cli.sandbox.SandboxLauncher`), with the agent mounted read-only at
130
+ ``/bundle`` and egress limited to ``allowlist`` (the model endpoint — construction is
131
+ otherwise offline). Returns the construction report the entrypoint emitted: convergence,
132
+ each round, the standing violations, and the hardened ``track.json`` contents to write
133
+ back for review. Never auto-promotes (G4)."""
134
+ return await launcher.run_entrypoint(
135
+ ["zu-construct-contained"],
136
+ {"ZU_CONSTRUCT_MAX_ROUNDS": str(max_rounds),
137
+ "ZU_CONSTRUCT_MIN_RESILIENCE": str(min_resilience)},
138
+ allowlist=allowlist, bundle_dir=str(agent_dir),
139
+ )
zu_cli/contribute.py ADDED
@@ -0,0 +1,104 @@
1
+ """Capability gaps → strong, reproducible issues.
2
+
3
+ zu's discipline is: when you hit a wall you don't hardcode around it — you build a GENERIC
4
+ capability (the model reasons, the tool exposes a primitive). This extends that to everyone
5
+ using zu. When a harness hits something zu genuinely can't do — a missing primitive, a
6
+ detector that won't fire, a selector zu can't resolve, a soft miss it mishandles — that's a
7
+ **capability gap in zu, not a bug in the user's agent**, and the fix belongs upstream.
8
+
9
+ The hard part of a good bug report is a reliable repro. Here it is **free**: a captured
10
+ ``fixtures/`` bundle reproduces the run deterministically at $0, so the maintainers' agent can
11
+ ``zu run --offline`` the attached bundle, reproduce the gap exactly, and build the generic
12
+ capability that closes it. This module turns a gap into that issue — agent config + the
13
+ repeatable example + expected/observed + a proposed generic capability — ready to file.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import shlex
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+
23
+ # The repo a capability gap is contributed to. Overridable so a fork/mirror can retarget it.
24
+ ZU_REPO = os.environ.get("ZU_CONTRIBUTE_REPO", "k3-mt/zu")
25
+ GAP_LABEL = "capability-gap"
26
+
27
+
28
+ def _zu_version() -> str:
29
+ from importlib.metadata import PackageNotFoundError, version
30
+
31
+ for dist in ("zu-runtime", "zu-cli", "zu-core"):
32
+ try:
33
+ return version(dist)
34
+ except PackageNotFoundError:
35
+ continue
36
+ return "unknown"
37
+
38
+
39
+ @dataclass
40
+ class GapReport:
41
+ """A ready-to-file capability-gap issue: the ``title``, the markdown ``body``, and whether
42
+ a deterministic ``fixtures/`` repro is attached (``has_repro``)."""
43
+
44
+ title: str
45
+ body: str
46
+ has_repro: bool
47
+ repro_path: str | None
48
+
49
+ def gh_command(self, body_file: str, *, repo: str = ZU_REPO) -> str:
50
+ """A ready ``gh issue create`` invocation (body passed by file, since it's multi-line
51
+ and embeds YAML). The caller writes the body to ``body_file`` first."""
52
+ return (f"gh issue create --repo {repo} --label {GAP_LABEL} "
53
+ f"--title {shlex.quote(self.title)} --body-file {shlex.quote(body_file)}")
54
+
55
+
56
+ def build_gap_report(
57
+ agent_dir: str | Path, *, summary: str, expected: str, observed: str,
58
+ proposed: str | None = None, zu_version: str | None = None,
59
+ ) -> GapReport:
60
+ """Build a capability-gap issue for the agent at ``agent_dir``. Embeds the agent's
61
+ ``agent.yaml`` and, if present, points at its ``fixtures/`` bundle as the **repeatable
62
+ example** (reproduced with ``zu run --offline``). With no bundle the report still builds
63
+ but flags that a repro must be captured first — a gap without a repro is hard to pick up."""
64
+ from .offline import FIXTURES_DIR, bundle_path
65
+
66
+ base = Path(agent_dir)
67
+ cfg_text = ""
68
+ for name in ("agent.yaml", "agent.yml"):
69
+ p = base / name
70
+ if p.is_file():
71
+ cfg_text = p.read_text(encoding="utf-8")
72
+ break
73
+ repro = bundle_path(base)
74
+ has_repro = repro.is_file()
75
+ version = zu_version or _zu_version()
76
+ title = f"Capability gap: {summary}"
77
+
78
+ repro_section = (
79
+ f"This agent ships `{FIXTURES_DIR}/capture.json` — a deterministic, $0 reproduction.\n"
80
+ f"Reproduce the gap with **no model and no network**:\n\n"
81
+ f"```\nzu run <agent> --offline\n```\n"
82
+ if has_repro else
83
+ "⚠️ **No fixtures bundle attached.** A capability gap needs a repeatable example so it "
84
+ "can be picked up. Capture one first — drive the path with `zu_explore` (your harness) "
85
+ "or `zu capture` (once, live) to record `fixtures/capture.json`, then re-run this.\n"
86
+ )
87
+ proposed_section = (
88
+ f"## Proposed generic capability\n{proposed}\n\n" if proposed else
89
+ "## Proposed generic capability\n_(none suggested — describe the smallest GENERIC "
90
+ "primitive that would close this, in zu's no-hardcoding spirit.)_\n\n"
91
+ )
92
+ body = (
93
+ f"## What I was building\n{summary}\n\n"
94
+ f"## What I expected\n{expected}\n\n"
95
+ f"## What zu did (the gap)\n{observed}\n\n"
96
+ f"## Repeatable example\n{repro_section}\n"
97
+ f"<details><summary>agent.yaml</summary>\n\n```yaml\n{cfg_text.rstrip()}\n```\n</details>\n\n"
98
+ f"{proposed_section}"
99
+ f"## Environment\n- zu {version}\n\n"
100
+ f"---\n_Filed via `zu_report_gap`. The fix should be a generic capability (no "
101
+ f"site-specific hardcoding); the attached bundle replays the gap deterministically._\n"
102
+ )
103
+ return GapReport(title=title, body=body, has_repro=has_repro,
104
+ repro_path=str(repro) if has_repro else None)