zu-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_cli/__init__.py +0 -0
- zu_cli/build.py +111 -0
- zu_cli/config.py +738 -0
- zu_cli/construct.py +318 -0
- zu_cli/construct_sandbox.py +139 -0
- zu_cli/contribute.py +104 -0
- zu_cli/demo.py +373 -0
- zu_cli/deploy.py +207 -0
- zu_cli/explore.py +93 -0
- zu_cli/guardrails.py +102 -0
- zu_cli/harden.py +221 -0
- zu_cli/main.py +1126 -0
- zu_cli/mcp_server.py +444 -0
- zu_cli/observe.py +69 -0
- zu_cli/offline.py +335 -0
- zu_cli/sandbox.py +276 -0
- zu_cli/scaffold.py +116 -0
- zu_cli/server.py +363 -0
- zu_cli/trace.py +111 -0
- zu_cli-0.1.0.dist-info/METADATA +26 -0
- zu_cli-0.1.0.dist-info/RECORD +23 -0
- zu_cli-0.1.0.dist-info/WHEEL +4 -0
- zu_cli-0.1.0.dist-info/entry_points.txt +4 -0
zu_cli/mcp_server.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""The `zu mcp` server — drive Zu from any MCP coding agent.
|
|
2
|
+
|
|
3
|
+
A developer lives in their harness of choice (Claude Code, Cursor, …), types in
|
|
4
|
+
natural language, and the agent uses these tools to *design, validate, run,
|
|
5
|
+
inspect* — and *construct* — a Zu agent on their behalf, then streams the run back
|
|
6
|
+
so they can watch it work. It is a thin wrapper over the same engine the CLI uses
|
|
7
|
+
(config, the loop, the event bus), exposed over MCP's stdio transport.
|
|
8
|
+
|
|
9
|
+
The construction tools (``zu_offline_run`` / ``zu_build`` / ``zu_harden`` /
|
|
10
|
+
``zu_construct``) expose the offline construction sequence — replay a captured
|
|
11
|
+
``fixtures/`` bundle, build a hardened track, score resilience, and run the
|
|
12
|
+
anti-hardcode readiness gate — all at ~$0 (no model, no network). They are the
|
|
13
|
+
surface the autonomous meta-agent drives: an external agent reads the readiness
|
|
14
|
+
violations, edits the agent, and re-checks until it clears the gate.
|
|
15
|
+
|
|
16
|
+
The exploration tools (``zu_explore`` / ``zu_explore_save``) let the DEVELOPER's own
|
|
17
|
+
harness model pathfind a live site step by step and capture that discovery as the
|
|
18
|
+
agent's ``fixtures/`` bundle — so the frontier reasoning is spent once, in the harness
|
|
19
|
+
they already use, and the path replays free thereafter.
|
|
20
|
+
|
|
21
|
+
The live stream-back is the point: ``zu_run`` subscribes to the event bus and
|
|
22
|
+
pushes every step — the model's train of thought, each tool call and result,
|
|
23
|
+
detector verdicts, escalations — to the client as an MCP log message *as it
|
|
24
|
+
happens*, using the same formatter as the CLI and the SSE stream. One formatter,
|
|
25
|
+
three surfaces.
|
|
26
|
+
|
|
27
|
+
Optional dependency (the ``mcp`` extra): ``pip install 'zu-runtime[mcp]'``.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import json
|
|
33
|
+
import logging
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
from mcp.server.fastmcp import Context, FastMCP
|
|
37
|
+
|
|
38
|
+
from zu_core.loop import run_task
|
|
39
|
+
from zu_core.registry import GROUPS, Registry
|
|
40
|
+
|
|
41
|
+
from .config import ConfigError, RunConfig, assemble, coerce_config, coerce_task
|
|
42
|
+
from .trace import format_event
|
|
43
|
+
|
|
44
|
+
log = logging.getLogger("zu.mcp")
|
|
45
|
+
|
|
46
|
+
# Config/task coercion (a tool arg may be a dict or a file path) is shared with
|
|
47
|
+
# `zu serve` and the embed facade — see zu_cli.config. The MCP tools accept a str
|
|
48
|
+
# task as a *path* (``allow_paths=True``): the agent driving these tools runs on
|
|
49
|
+
# the same host, so reading a task file it points at is intended.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _discovered() -> dict[str, list[str]]:
|
|
53
|
+
reg = Registry()
|
|
54
|
+
reg.discover()
|
|
55
|
+
return {kind: reg.names(kind) for kind in GROUPS}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _load_for_construction(agent: str) -> tuple[Any, Any, Any, Any]:
|
|
59
|
+
"""Load ``(spec, cfg, agent_dir, bundle)`` for a construction tool. Raises
|
|
60
|
+
``ConfigError`` (bad agent) or ``OfflineError`` (no ``fixtures/`` bundle yet) — the
|
|
61
|
+
caller turns either into a clean ``{"ok": False, "error": ...}``."""
|
|
62
|
+
from pathlib import Path
|
|
63
|
+
|
|
64
|
+
from .config import load_agent
|
|
65
|
+
from .offline import Bundle, bundle_path
|
|
66
|
+
|
|
67
|
+
spec, cfg = load_agent(agent)
|
|
68
|
+
p = Path(agent)
|
|
69
|
+
agent_dir = p if p.is_dir() else p.parent
|
|
70
|
+
return spec, cfg, agent_dir, Bundle.load(bundle_path(agent_dir))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def build_server() -> FastMCP:
|
|
74
|
+
"""Build the FastMCP server. Factored out so tests can drive the tools
|
|
75
|
+
in-process via ``server.call_tool(...)``."""
|
|
76
|
+
mcp = FastMCP("zu-runtime")
|
|
77
|
+
|
|
78
|
+
# One live pathfinding session per server process (a developer explores one site at a
|
|
79
|
+
# time in their harness). Held in a mutable cell the explore tools share.
|
|
80
|
+
_explore: dict[str, Any] = {"session": None}
|
|
81
|
+
|
|
82
|
+
@mcp.tool()
|
|
83
|
+
async def zu_plugins() -> dict:
|
|
84
|
+
"""List every plugin Zu can discover here (providers, tools, detectors,
|
|
85
|
+
validators, sinks, backends), so the agent knows what it can wire."""
|
|
86
|
+
return _discovered()
|
|
87
|
+
|
|
88
|
+
@mcp.tool()
|
|
89
|
+
async def zu_scaffold(directory: str = ".", template: str = "web", force: bool = False) -> dict:
|
|
90
|
+
"""Create a starter agent.yaml in ``directory``. Templates:
|
|
91
|
+
'web' (tier-1/2 web extraction), 'minimal' (no tools), 'research'
|
|
92
|
+
(multi-field article extraction)."""
|
|
93
|
+
from .scaffold import TEMPLATE_NAMES, write_template
|
|
94
|
+
|
|
95
|
+
if template not in TEMPLATE_NAMES:
|
|
96
|
+
return {"ok": False, "error": f"unknown template {template!r}; choose: {list(TEMPLATE_NAMES)}"}
|
|
97
|
+
try:
|
|
98
|
+
written = write_template(directory, template, force=force)
|
|
99
|
+
except FileExistsError as exc:
|
|
100
|
+
return {"ok": False, "error": f"files exist: {exc} (pass force=true to overwrite)"}
|
|
101
|
+
return {
|
|
102
|
+
"ok": True,
|
|
103
|
+
"template": template,
|
|
104
|
+
"files": written,
|
|
105
|
+
"next": "Set the provider's API key, then call zu_validate, then zu_run.",
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
@mcp.tool()
|
|
109
|
+
async def zu_validate(config: Any = None, task: Any = None) -> dict:
|
|
110
|
+
"""Validate a run config (and optionally a task) without executing: load
|
|
111
|
+
it, discover and select plugins, and build the provider — surfacing any
|
|
112
|
+
error with a clear message. ``config``/``task`` may be a path or a dict."""
|
|
113
|
+
try:
|
|
114
|
+
cfg = coerce_config(config)
|
|
115
|
+
provider, registry, _bus, _providers = assemble(cfg)
|
|
116
|
+
active = {kind: registry.names(kind) for kind in ("tools", "detectors", "validators")}
|
|
117
|
+
checked_task = None
|
|
118
|
+
if task is not None:
|
|
119
|
+
spec = coerce_task(task, cfg.budget, allow_paths=True)
|
|
120
|
+
checked_task = {"query": spec.query, "target": spec.target, "max_tier": spec.max_tier}
|
|
121
|
+
except ConfigError as exc:
|
|
122
|
+
return {"ok": False, "error": str(exc)}
|
|
123
|
+
return {
|
|
124
|
+
"ok": True,
|
|
125
|
+
"provider": cfg.provider.name,
|
|
126
|
+
"model": getattr(provider, "model", None),
|
|
127
|
+
"active_plugins": active,
|
|
128
|
+
"task": checked_task,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
@mcp.tool()
|
|
132
|
+
async def zu_run(task: Any, config: Any = None, ctx: Context | None = None) -> dict:
|
|
133
|
+
"""Run a task and STREAM the run back live — every step (train of thought,
|
|
134
|
+
tool calls, detectors, escalations) is sent to you as it happens — then
|
|
135
|
+
return a concise result + run_id. ``task``/``config`` may be a path or a
|
|
136
|
+
dict. Use zu_traces with the run_id to read the full event log."""
|
|
137
|
+
try:
|
|
138
|
+
cfg = coerce_config(config)
|
|
139
|
+
spec = coerce_task(task, cfg.budget, allow_paths=True)
|
|
140
|
+
provider, registry, bus, providers = assemble(cfg)
|
|
141
|
+
except ConfigError as exc:
|
|
142
|
+
return {"ok": False, "error": str(exc)}
|
|
143
|
+
|
|
144
|
+
async def _on_event(event: Any) -> None:
|
|
145
|
+
line = format_event(event)
|
|
146
|
+
if line and ctx is not None:
|
|
147
|
+
try:
|
|
148
|
+
await ctx.info(line) # live to the client; never break the run
|
|
149
|
+
except Exception as exc: # noqa: BLE001 - a transport hiccup must not break the run
|
|
150
|
+
log.debug("ctx.info failed (dropping a live trace line): %s", exc)
|
|
151
|
+
|
|
152
|
+
bus.subscribe(_on_event)
|
|
153
|
+
# The same observability hook: queue any blocked attempt for review.
|
|
154
|
+
from .observe import attach_observability
|
|
155
|
+
|
|
156
|
+
attach_observability(bus, cfg.observability)
|
|
157
|
+
try:
|
|
158
|
+
result = await run_task(spec, provider, registry, bus, providers=providers)
|
|
159
|
+
except Exception as exc: # noqa: BLE001 - a model/infra failure is data, not a crash
|
|
160
|
+
return {"ok": False, "run_id": str(spec.task_id), "error": f"{type(exc).__name__}: {exc}"}
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"ok": result.status.value == "success",
|
|
164
|
+
"run_id": str(spec.task_id),
|
|
165
|
+
"status": result.status.value,
|
|
166
|
+
"value": result.value,
|
|
167
|
+
"reason": result.reason,
|
|
168
|
+
"events": await bus.count(),
|
|
169
|
+
"hint": "call zu_traces with this run_id to read the full event log",
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
@mcp.tool()
|
|
173
|
+
async def zu_traces(
|
|
174
|
+
db_path: str = "zu.db", run_id: str | None = None, limit: int = 100, after_seq: int = 0
|
|
175
|
+
) -> dict:
|
|
176
|
+
"""Read the event log (the always-on store) for a run — what the agent
|
|
177
|
+
actually did. Filter by run_id; page forward with after_seq. Reads a
|
|
178
|
+
sqlite trace sink (the default event_sink in the scaffolded config)."""
|
|
179
|
+
try:
|
|
180
|
+
from zu_backends.sqlite_sink import SqliteSink
|
|
181
|
+
except ModuleNotFoundError:
|
|
182
|
+
return {"ok": False, "error": "reading sqlite traces needs zu-backends (in zu-runtime base)"}
|
|
183
|
+
flt = {"trace_id": run_id} if run_id else None
|
|
184
|
+
sink = SqliteSink(db_path)
|
|
185
|
+
events = await sink.query(flt, limit=limit, after_seq=after_seq)
|
|
186
|
+
total = await sink.count(flt)
|
|
187
|
+
return {
|
|
188
|
+
"ok": True,
|
|
189
|
+
"total": total,
|
|
190
|
+
"returned": len(events),
|
|
191
|
+
"events": [
|
|
192
|
+
{"type": e.type, "source": e.source, "ts": e.ts.isoformat(), "payload": e.payload}
|
|
193
|
+
for e in events
|
|
194
|
+
],
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
@mcp.tool()
|
|
198
|
+
async def zu_offline_run(agent: str) -> dict:
|
|
199
|
+
"""Replay an agent against its captured ``fixtures/`` bundle — no model, no network,
|
|
200
|
+
~$0. The agent must have a ``fixtures/capture.json`` (from ``zu capture``). Returns
|
|
201
|
+
the result and whether it succeeded — the cheap inner loop of construction."""
|
|
202
|
+
from .offline import OfflineError, replay_offline
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
spec, cfg, _dir, bundle = _load_for_construction(agent)
|
|
206
|
+
except (ConfigError, OfflineError) as exc:
|
|
207
|
+
return {"ok": False, "error": str(exc)}
|
|
208
|
+
result, events = await replay_offline(spec, cfg, bundle)
|
|
209
|
+
return {
|
|
210
|
+
"ok": result.status.value == "success",
|
|
211
|
+
"status": result.status.value,
|
|
212
|
+
"value": result.value,
|
|
213
|
+
"reason": result.reason,
|
|
214
|
+
"events": len(events),
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
@mcp.tool()
|
|
218
|
+
async def zu_build(agent: str, min_resilience: float = 1.0) -> dict:
|
|
219
|
+
"""Run the offline construction spine — build → record track → harden — at ~$0, and
|
|
220
|
+
write a hardened ``track.json`` next to the agent. Returns each stage's outcome, the
|
|
221
|
+
track path, and the resilience score. Needs a captured bundle."""
|
|
222
|
+
from .build import build_offline
|
|
223
|
+
from .offline import OfflineError
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
spec, cfg, agent_dir, bundle = _load_for_construction(agent)
|
|
227
|
+
except (ConfigError, OfflineError) as exc:
|
|
228
|
+
return {"ok": False, "error": str(exc)}
|
|
229
|
+
report = await build_offline(spec, cfg, agent_dir, bundle, min_score=min_resilience)
|
|
230
|
+
return {
|
|
231
|
+
"ok": report.ok,
|
|
232
|
+
"stages": [{"name": s.name, "status": s.status, "detail": s.detail}
|
|
233
|
+
for s in report.stages],
|
|
234
|
+
"track_path": report.track_path,
|
|
235
|
+
"resilience": report.harden.resilience if report.harden else None,
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
@mcp.tool()
|
|
239
|
+
async def zu_harden(agent: str) -> dict:
|
|
240
|
+
"""Score how brittle a captured path is — replay perturbed fixtures offline (~$0).
|
|
241
|
+
Returns the resilience score (fraction of cosmetic page changes the path absorbs),
|
|
242
|
+
whether grounding is load-bearing (the score is only meaningful if value-deletion
|
|
243
|
+
controls fail), and the static brittleness findings to fix."""
|
|
244
|
+
from .harden import harden
|
|
245
|
+
from .offline import OfflineError
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
spec, cfg, _dir, bundle = _load_for_construction(agent)
|
|
249
|
+
except (ConfigError, OfflineError) as exc:
|
|
250
|
+
return {"ok": False, "error": str(exc)}
|
|
251
|
+
hr = await harden(spec, cfg, bundle)
|
|
252
|
+
return {
|
|
253
|
+
"ok": True,
|
|
254
|
+
"resilience": hr.resilience,
|
|
255
|
+
"grounding_load_bearing": hr.grounding_load_bearing,
|
|
256
|
+
"findings": [{"kind": f.kind, "where": f.where, "detail": f.detail}
|
|
257
|
+
for f in hr.findings],
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
@mcp.tool()
|
|
261
|
+
async def zu_construct(agent: str, min_resilience: float = 1.0) -> dict:
|
|
262
|
+
"""The construction-readiness gate (one round, no model, ~$0): the offline build
|
|
263
|
+
plus the anti-hardcode guardrails (G1 alternate locators, G2 resilience, G3 no
|
|
264
|
+
hardcoded answer). Returns whether the agent is ready for promotion and, if not, the
|
|
265
|
+
violations to fix — the loop an autonomous agent drives: read the violations, edit
|
|
266
|
+
the agent, call again until ``ready`` is true. Never promotes (review gate G4)."""
|
|
267
|
+
from .build import build_offline
|
|
268
|
+
from .guardrails import enforce_guardrails
|
|
269
|
+
from .offline import OfflineError
|
|
270
|
+
|
|
271
|
+
try:
|
|
272
|
+
spec, cfg, agent_dir, bundle = _load_for_construction(agent)
|
|
273
|
+
except (ConfigError, OfflineError) as exc:
|
|
274
|
+
return {"ok": False, "error": str(exc)}
|
|
275
|
+
build = await build_offline(spec, cfg, agent_dir, bundle, min_score=min_resilience)
|
|
276
|
+
guards = await enforce_guardrails(
|
|
277
|
+
spec, cfg, bundle, agent_dir, min_resilience=min_resilience)
|
|
278
|
+
return {
|
|
279
|
+
"ok": True,
|
|
280
|
+
"ready": build.ok and guards.passed,
|
|
281
|
+
"build_ok": build.ok,
|
|
282
|
+
"guardrails_passed": guards.passed,
|
|
283
|
+
"resilience": guards.resilience,
|
|
284
|
+
"violations": [{"rule": v.rule, "detail": v.detail} for v in guards.violations],
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
@mcp.tool()
|
|
288
|
+
async def zu_explore(
|
|
289
|
+
tool: str, op: str | None = None, url: str | None = None,
|
|
290
|
+
actions: list | None = None, capture_network: bool = False,
|
|
291
|
+
wait_until: str | None = None, html: bool = False,
|
|
292
|
+
) -> dict:
|
|
293
|
+
"""Pathfind a LIVE site one step at a time — YOU (the harness model) drive zu's
|
|
294
|
+
off-box tool, see the observation, and decide the next step; the trail is recorded.
|
|
295
|
+
The session persists across calls. When the path reaches the data you need, call
|
|
296
|
+
``zu_explore_save`` to capture it as the agent's fixtures.
|
|
297
|
+
|
|
298
|
+
``tool`` is one of: ``http_fetch`` / ``render_dom`` (one-shot — pass ``url``) or
|
|
299
|
+
``browser`` (a PERSISTENT session — pass ``op`` open/act/read/close, plus ``url`` for
|
|
300
|
+
open and ``actions`` for act). Tip: fetch the page first; if it's a JS shell, drive
|
|
301
|
+
the browser — that fetch step is what lets the agent escalate offline later."""
|
|
302
|
+
from .explore import EXPLORABLE, new_session
|
|
303
|
+
|
|
304
|
+
if tool not in EXPLORABLE:
|
|
305
|
+
return {"ok": False, "error": f"unknown tool {tool!r}; choose one of {list(EXPLORABLE)}"}
|
|
306
|
+
args: dict[str, Any] = {}
|
|
307
|
+
if tool in ("http_fetch", "render_dom"):
|
|
308
|
+
if not url:
|
|
309
|
+
return {"ok": False, "error": f"{tool} needs a url"}
|
|
310
|
+
args["url"] = url
|
|
311
|
+
if tool == "render_dom" and wait_until:
|
|
312
|
+
args["wait_until"] = wait_until
|
|
313
|
+
else: # browser
|
|
314
|
+
if not op:
|
|
315
|
+
return {"ok": False, "error": "browser needs an op (open/act/read/close)"}
|
|
316
|
+
args["op"] = op
|
|
317
|
+
if url:
|
|
318
|
+
args["url"] = url
|
|
319
|
+
if actions:
|
|
320
|
+
args["actions"] = actions
|
|
321
|
+
if capture_network:
|
|
322
|
+
args["capture_network"] = True
|
|
323
|
+
if html:
|
|
324
|
+
args["html"] = True
|
|
325
|
+
if _explore["session"] is None:
|
|
326
|
+
_explore["session"] = new_session()
|
|
327
|
+
try:
|
|
328
|
+
obs = await _explore["session"].step(tool, args)
|
|
329
|
+
except Exception as exc: # noqa: BLE001 - a tool/SSRF failure is data for the harness
|
|
330
|
+
return {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
|
|
331
|
+
return {"ok": True, "step": len(_explore["session"].steps), "observation": obs}
|
|
332
|
+
|
|
333
|
+
@mcp.tool()
|
|
334
|
+
async def zu_explore_save(agent: str, task: str, answer: Any) -> dict:
|
|
335
|
+
"""Capture the current exploration as the agent's ``fixtures/capture.json`` — the
|
|
336
|
+
discovered path becomes a replayable bundle (then ``zu build`` hardens it into a
|
|
337
|
+
track). ``task`` is the query the agent will run; ``answer`` is the final value you
|
|
338
|
+
read (what the agent should produce). Ends the session."""
|
|
339
|
+
from pathlib import Path
|
|
340
|
+
|
|
341
|
+
from .offline import bundle_path
|
|
342
|
+
|
|
343
|
+
sess = _explore["session"]
|
|
344
|
+
if sess is None or not sess.steps:
|
|
345
|
+
return {"ok": False, "error": "no exploration to save — call zu_explore first"}
|
|
346
|
+
p = Path(agent)
|
|
347
|
+
agent_dir = p if p.is_dir() else p.parent
|
|
348
|
+
out = bundle_path(agent_dir)
|
|
349
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
350
|
+
sess.to_bundle(task=task, answer=answer).save(out)
|
|
351
|
+
steps = len(sess.steps)
|
|
352
|
+
tools_seen = sorted({s["tool"] for s in sess.steps})
|
|
353
|
+
_explore["session"] = None # a save finalizes the session
|
|
354
|
+
return {
|
|
355
|
+
"ok": True, "bundle": str(out), "steps": steps, "tools": tools_seen,
|
|
356
|
+
"next": "`zu run --offline` replays it at ~$0; `zu build` hardens it into a track.",
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
@mcp.tool()
|
|
360
|
+
async def zu_explore_reset() -> dict:
|
|
361
|
+
"""Discard the current exploration (close any open browser) and start fresh."""
|
|
362
|
+
sess = _explore["session"]
|
|
363
|
+
if sess is not None and "browser" in sess.tools:
|
|
364
|
+
try:
|
|
365
|
+
await sess.tools["browser"](sess.ctx, op="close")
|
|
366
|
+
except Exception: # noqa: BLE001 - best-effort teardown
|
|
367
|
+
pass
|
|
368
|
+
_explore["session"] = None
|
|
369
|
+
return {"ok": True}
|
|
370
|
+
|
|
371
|
+
@mcp.tool()
|
|
372
|
+
async def zu_report_gap(
|
|
373
|
+
agent: str, summary: str, expected: str, observed: str, proposed: str | None = None,
|
|
374
|
+
) -> dict:
|
|
375
|
+
"""When zu genuinely CAN'T do something — a missing tool/primitive, a detector that
|
|
376
|
+
won't fire, a selector zu can't resolve, a soft miss it mishandles — that's a
|
|
377
|
+
CAPABILITY GAP in zu, not a bug in your agent. **Don't hardcode around it.** This
|
|
378
|
+
builds a strong, REPEATABLE issue for the zu repo: it embeds the agent's `agent.yaml`,
|
|
379
|
+
points at its `fixtures/` bundle (a $0 deterministic repro the maintainers reproduce
|
|
380
|
+
with `zu run --offline`), and records expected vs observed + a proposed GENERIC
|
|
381
|
+
capability. Writes `gap-report.md` next to the agent and returns a ready
|
|
382
|
+
`gh issue create` command. Capture a bundle first (`zu_explore` / `zu capture`) so the
|
|
383
|
+
gap reproduces — see the `zu://contributing` resource."""
|
|
384
|
+
from pathlib import Path
|
|
385
|
+
|
|
386
|
+
from .contribute import ZU_REPO, build_gap_report
|
|
387
|
+
|
|
388
|
+
p = Path(agent)
|
|
389
|
+
agent_dir = p if p.is_dir() else p.parent
|
|
390
|
+
report = build_gap_report(
|
|
391
|
+
agent_dir, summary=summary, expected=expected, observed=observed, proposed=proposed)
|
|
392
|
+
out = agent_dir / "gap-report.md"
|
|
393
|
+
out.write_text(report.body, encoding="utf-8")
|
|
394
|
+
return {
|
|
395
|
+
"ok": True,
|
|
396
|
+
"title": report.title,
|
|
397
|
+
"issue_markdown": report.body,
|
|
398
|
+
"has_repro": report.has_repro,
|
|
399
|
+
"repro": report.repro_path,
|
|
400
|
+
"report_file": str(out),
|
|
401
|
+
"repo": ZU_REPO,
|
|
402
|
+
"gh_command": report.gh_command(str(out)),
|
|
403
|
+
"next": (
|
|
404
|
+
"A repeatable fixtures repro is attached — file it (gh_command); the "
|
|
405
|
+
"maintainers' agent reproduces it with `zu run --offline` and builds the "
|
|
406
|
+
"generic capability." if report.has_repro else
|
|
407
|
+
"⚠️ No fixtures bundle — capture one first (zu_explore / zu capture) so the "
|
|
408
|
+
"gap reproduces deterministically, then re-run zu_report_gap."
|
|
409
|
+
),
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
@mcp.resource("zu://contributing")
|
|
413
|
+
def contributing_resource() -> str:
|
|
414
|
+
"""When and how to contribute a capability gap upstream — read this on any wall."""
|
|
415
|
+
return (
|
|
416
|
+
"Contributing to zu — the no-hardcoding contract.\n\n"
|
|
417
|
+
"zu's rule: when you hit a wall you do NOT hardcode around it. The model reasons; "
|
|
418
|
+
"tools expose generic primitives. So if zu can't do something you need, that is a "
|
|
419
|
+
"CAPABILITY GAP in zu — file it upstream rather than working around it locally.\n\n"
|
|
420
|
+
"What counts as a gap (not a user error): a missing tool/primitive; a detector "
|
|
421
|
+
"that should have escalated but didn't; a selector or control zu can't resolve; a "
|
|
422
|
+
"soft miss the loop mishandles; a tier ladder that can't express your flow.\n\n"
|
|
423
|
+
"How to file a strong one: capture a repeatable example first — drive the path "
|
|
424
|
+
"with `zu_explore` (your harness pathfinds the live site) or `zu capture` (one "
|
|
425
|
+
"live run), which records `fixtures/capture.json`. That bundle reproduces the run "
|
|
426
|
+
"deterministically at $0, so the maintainers reproduce the gap with "
|
|
427
|
+
"`zu run --offline` and the maintainers' agent picks it up. Then call "
|
|
428
|
+
"`zu_report_gap` to build the issue (agent.yaml + the repro + expected/observed + "
|
|
429
|
+
"a proposed GENERIC capability) and a ready `gh issue create` command.\n\n"
|
|
430
|
+
"The fix that lands will be a generic capability — which then helps every zu user, "
|
|
431
|
+
"the same way the existing capabilities were built."
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
@mcp.resource("zu://plugins")
|
|
435
|
+
def plugins_resource() -> str:
|
|
436
|
+
"""Everything Zu can discover here — context for designing a config."""
|
|
437
|
+
return json.dumps(_discovered(), indent=2)
|
|
438
|
+
|
|
439
|
+
@mcp.resource("zu://config/schema")
|
|
440
|
+
def config_schema_resource() -> str:
|
|
441
|
+
"""The JSON schema of a Zu run config — so the agent writes valid YAML."""
|
|
442
|
+
return json.dumps(RunConfig.model_json_schema(), indent=2)
|
|
443
|
+
|
|
444
|
+
return mcp
|
zu_cli/observe.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""The uniform observability hook — wired the same way by every harness.
|
|
2
|
+
|
|
3
|
+
"Show me what this agent is doing, and what its guards just blocked" should be
|
|
4
|
+
identical whether you ``zu run``, embed ``import zu``, ``zu serve``, drive it over
|
|
5
|
+
MCP, or run the red-team gate. So each harness builds its bus and then calls
|
|
6
|
+
``attach_observability(bus, cfg.observability)`` — one place, one behaviour. The
|
|
7
|
+
taps it wires:
|
|
8
|
+
|
|
9
|
+
* a live trace (the console train of thought), and
|
|
10
|
+
* a defense review queue: every ``harness.defense.blocked`` event (a contained
|
|
11
|
+
attack) is appended to a JSONL file, marked ``pending``, so a blocked attempt
|
|
12
|
+
is visible and triageable in test AND in production — never a silent log line.
|
|
13
|
+
|
|
14
|
+
It is all read-side: pure subscribers on the bus, capability-free, isolated by
|
|
15
|
+
append-before-notify. Observation never participates in a run.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
from collections.abc import Callable
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from zu_core import events as ev
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def defense_record(event: Any) -> dict:
|
|
28
|
+
"""The review-queue record for a contained attempt: the defense payload plus
|
|
29
|
+
provenance (ts, ids) and ``status: pending`` for triage. Shared with the
|
|
30
|
+
HTTP server so the queue shape is identical everywhere."""
|
|
31
|
+
payload = getattr(event, "payload", {}) or {}
|
|
32
|
+
return {
|
|
33
|
+
**payload,
|
|
34
|
+
"ts": event.ts.isoformat() if hasattr(event.ts, "isoformat") else str(event.ts),
|
|
35
|
+
"trace_id": str(event.trace_id),
|
|
36
|
+
"event_id": str(event.event_id),
|
|
37
|
+
"status": "pending",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _review_tee(path: str) -> Callable[[Any], None]:
|
|
42
|
+
"""A subscriber that appends each defense event to the JSONL review queue.
|
|
43
|
+
Queue IO never breaks a run (append-only, errors swallowed)."""
|
|
44
|
+
|
|
45
|
+
def _on(event: Any) -> None:
|
|
46
|
+
if getattr(event, "type", "") != ev.DEFENSE_BLOCKED:
|
|
47
|
+
return
|
|
48
|
+
try:
|
|
49
|
+
with open(path, "a", encoding="utf-8") as fh:
|
|
50
|
+
fh.write(json.dumps(defense_record(event), default=str) + "\n")
|
|
51
|
+
except OSError:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
return _on
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def attach_observability(
|
|
58
|
+
bus: Any, observability: Any, *, trace: bool = False, write: Callable[[str], None] | None = None
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Wire the standard observability taps onto ``bus``. ``observability`` is the
|
|
61
|
+
config block (``review_queue``: a JSONL path or None; ``scope``). ``trace``
|
|
62
|
+
turns on the live console trace (the CLI sets it; embedding leaves it off)."""
|
|
63
|
+
if trace:
|
|
64
|
+
from .trace import live_printer
|
|
65
|
+
|
|
66
|
+
bus.subscribe(live_printer(write))
|
|
67
|
+
path = getattr(observability, "review_queue", None)
|
|
68
|
+
if path:
|
|
69
|
+
bus.subscribe(_review_tee(path))
|