zu-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_cli/__init__.py +0 -0
- zu_cli/build.py +111 -0
- zu_cli/config.py +738 -0
- zu_cli/construct.py +318 -0
- zu_cli/construct_sandbox.py +139 -0
- zu_cli/contribute.py +104 -0
- zu_cli/demo.py +373 -0
- zu_cli/deploy.py +207 -0
- zu_cli/explore.py +93 -0
- zu_cli/guardrails.py +102 -0
- zu_cli/harden.py +221 -0
- zu_cli/main.py +1126 -0
- zu_cli/mcp_server.py +444 -0
- zu_cli/observe.py +69 -0
- zu_cli/offline.py +335 -0
- zu_cli/sandbox.py +276 -0
- zu_cli/scaffold.py +116 -0
- zu_cli/server.py +363 -0
- zu_cli/trace.py +111 -0
- zu_cli-0.1.0.dist-info/METADATA +26 -0
- zu_cli-0.1.0.dist-info/RECORD +23 -0
- zu_cli-0.1.0.dist-info/WHEEL +4 -0
- zu_cli-0.1.0.dist-info/entry_points.txt +4 -0
zu_cli/scaffold.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Project scaffolding — the starter `agent.yaml` behind `zu init` and the MCP
|
|
2
|
+
`zu_scaffold` tool. One source of truth so the CLI and the coding-agent
|
|
3
|
+
integration always write the same thing.
|
|
4
|
+
|
|
5
|
+
A template is a single self-contained `agent.yaml`: the model, the escalation
|
|
6
|
+
ladder (`tiers:` — the tools at each tier, yours or built-in), the checks, and
|
|
7
|
+
the task — what + how in one file. Edit the `provider` block to swap models; drop
|
|
8
|
+
your own tools in a `tools/` dir beside it and list them in `tiers` as
|
|
9
|
+
`tools.my_module:MyTool`.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
_PROVIDER = (
|
|
17
|
+
"provider:\n"
|
|
18
|
+
" name: anthropic # scripted | anthropic | openai-compatible | <module:Class>\n"
|
|
19
|
+
" model: claude-sonnet-4-6\n"
|
|
20
|
+
" api_key_env: ANTHROPIC_API_KEY # the env var NAME — never the key itself\n"
|
|
21
|
+
)
|
|
22
|
+
_SINK = "event_sink: { driver: sqlite, path: ./zu.db }\n"
|
|
23
|
+
_BUDGET = "budget: { max_steps: 20, max_tokens: 200000, wall_time_s: 120 }\n"
|
|
24
|
+
|
|
25
|
+
# A tier-1/2 web-extraction agent: fetch, fall back to a browser on JS, validate.
|
|
26
|
+
_WEB = (
|
|
27
|
+
_PROVIDER
|
|
28
|
+
+ "tiers: # the escalation ladder — tools at each tier\n"
|
|
29
|
+
" 1: [http_fetch, html_parse] # cheap: fetch + parse (your own tools go here too)\n"
|
|
30
|
+
" 2: [render_dom] # escalate to a real browser when a detector says so\n"
|
|
31
|
+
"plugins:\n"
|
|
32
|
+
" detectors: [empty, error, js-shell, bot-wall] # what makes a tier give up\n"
|
|
33
|
+
" validators: [schema, grounding]\n"
|
|
34
|
+
+ _SINK
|
|
35
|
+
+ _BUDGET
|
|
36
|
+
+ "task:\n"
|
|
37
|
+
" query: \"Extract the product name and price.\"\n"
|
|
38
|
+
" target: \"https://example.com/product/123\"\n"
|
|
39
|
+
" max_tier: 2\n"
|
|
40
|
+
" output_schema:\n"
|
|
41
|
+
" type: object\n"
|
|
42
|
+
" properties:\n"
|
|
43
|
+
" name: { type: string }\n"
|
|
44
|
+
" price: { type: string }\n"
|
|
45
|
+
" required: [name, price]\n"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# The smallest agent: a model answers, schema-validated. No tools, no network.
|
|
49
|
+
_MINIMAL = (
|
|
50
|
+
_PROVIDER
|
|
51
|
+
+ "plugins:\n validators: [schema]\n"
|
|
52
|
+
+ _SINK
|
|
53
|
+
+ "task:\n"
|
|
54
|
+
" query: \"Answer the question as JSON: {\\\"answer\\\": ...}.\"\n"
|
|
55
|
+
" output_schema:\n"
|
|
56
|
+
" type: object\n"
|
|
57
|
+
" properties: { answer: { type: string } }\n"
|
|
58
|
+
" required: [answer]\n"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# A web-research agent: extract several fields from an article page.
|
|
62
|
+
_RESEARCH = (
|
|
63
|
+
_PROVIDER
|
|
64
|
+
+ "tiers:\n"
|
|
65
|
+
" 1: [http_fetch, html_parse]\n"
|
|
66
|
+
" 2: [render_dom]\n"
|
|
67
|
+
"plugins:\n"
|
|
68
|
+
" detectors: [empty, error, js-shell, bot-wall]\n"
|
|
69
|
+
" validators: [schema, grounding]\n"
|
|
70
|
+
+ _SINK
|
|
71
|
+
+ _BUDGET
|
|
72
|
+
+ "task:\n"
|
|
73
|
+
" query: \"Extract the article's title, author, and publication date.\"\n"
|
|
74
|
+
" target: \"https://example.com/article\"\n"
|
|
75
|
+
" max_tier: 2\n"
|
|
76
|
+
" output_schema:\n"
|
|
77
|
+
" type: object\n"
|
|
78
|
+
" properties:\n"
|
|
79
|
+
" title: { type: string }\n"
|
|
80
|
+
" author: { type: string }\n"
|
|
81
|
+
" published: { type: string }\n"
|
|
82
|
+
" required: [title]\n"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
TEMPLATES: dict[str, dict[str, str]] = {
|
|
86
|
+
"web": {"agent.yaml": _WEB},
|
|
87
|
+
"minimal": {"agent.yaml": _MINIMAL},
|
|
88
|
+
"research": {"agent.yaml": _RESEARCH},
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
TEMPLATE_NAMES = tuple(TEMPLATES)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def render(template: str) -> dict[str, str]:
|
|
95
|
+
"""The ``{filename: content}`` map for a template. Raises KeyError if unknown."""
|
|
96
|
+
return dict(TEMPLATES[template])
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def write_template(directory: str, template: str, *, force: bool = False) -> list[str]:
|
|
100
|
+
"""Write a template's files into ``directory``. Refuses to overwrite an
|
|
101
|
+
existing file unless ``force`` (so a stray `zu init` never clobbers work).
|
|
102
|
+
Returns the paths written. Raises KeyError (unknown template) or
|
|
103
|
+
FileExistsError (a target exists and not force)."""
|
|
104
|
+
files = render(template)
|
|
105
|
+
os.makedirs(directory, exist_ok=True)
|
|
106
|
+
if not force:
|
|
107
|
+
existing = [n for n in files if os.path.exists(os.path.join(directory, n))]
|
|
108
|
+
if existing:
|
|
109
|
+
raise FileExistsError(", ".join(existing))
|
|
110
|
+
written: list[str] = []
|
|
111
|
+
for name, content in files.items():
|
|
112
|
+
path = os.path.join(directory, name)
|
|
113
|
+
with open(path, "w", encoding="utf-8") as fh:
|
|
114
|
+
fh.write(content)
|
|
115
|
+
written.append(path)
|
|
116
|
+
return written
|
zu_cli/server.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
"""`zu serve` — a thin HTTP wrapper over the same run path as the CLI, plus a
|
|
2
|
+
live observability dashboard.
|
|
3
|
+
|
|
4
|
+
Endpoints:
|
|
5
|
+
POST /run run a task, return Result (+ events) — the core API
|
|
6
|
+
POST /run/stream run a task, stream the loop live (SSE)
|
|
7
|
+
GET / the live dashboard (HTML) — watch production
|
|
8
|
+
GET /events a global live feed of ALL runs (SSE) — what the UI consumes
|
|
9
|
+
GET /review the defense review queue (blocked attempts) — triage
|
|
10
|
+
GET /healthz liveness
|
|
11
|
+
|
|
12
|
+
It is a *wrapper*, not a second code path — it assembles the provider/registry/bus
|
|
13
|
+
from config exactly as ``zu run`` does. Every run tees its events to a broadcast
|
|
14
|
+
hub, so the dashboard sees production traffic as it happens; and every
|
|
15
|
+
``harness.defense.blocked`` event (a contained attack) is queued to a JSONL review
|
|
16
|
+
file so a blocked attempt is never invisible.
|
|
17
|
+
|
|
18
|
+
FastAPI is an optional dependency (the ``serve`` extra): the import lives inside
|
|
19
|
+
``create_app``. Install it with ``pip install 'zu-runtime[serve]'``.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
import hmac
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
from pydantic import BaseModel, Field
|
|
31
|
+
|
|
32
|
+
from zu_core import events as ev
|
|
33
|
+
from zu_core.loop import run_task
|
|
34
|
+
from zu_core.view import scope_event
|
|
35
|
+
|
|
36
|
+
from .config import ConfigError, assemble, coerce_config, coerce_task
|
|
37
|
+
from .observe import defense_record
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RunRequest(BaseModel):
|
|
41
|
+
"""The POST /run body. Defined at module scope (not inside create_app) so
|
|
42
|
+
FastAPI can resolve the annotation under ``from __future__ import annotations``."""
|
|
43
|
+
|
|
44
|
+
task: dict = Field(..., description="The task spec (query, target, output_schema, ...).")
|
|
45
|
+
config: dict | None = Field(
|
|
46
|
+
None, description="Optional per-request config override; omit to use the server default."
|
|
47
|
+
)
|
|
48
|
+
include_events: bool = Field(True, description="Return the run's event log alongside the result.")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class _Hub:
|
|
52
|
+
"""A tiny in-process pub/sub: every run publishes its events here, and each
|
|
53
|
+
GET /events client subscribes a bounded queue. Bounded so a slow client is
|
|
54
|
+
dropped, never able to back up a run (a slow dashboard must not block the
|
|
55
|
+
agent)."""
|
|
56
|
+
|
|
57
|
+
def __init__(self) -> None:
|
|
58
|
+
self._subscribers: set[asyncio.Queue] = set()
|
|
59
|
+
|
|
60
|
+
def publish(self, item: tuple[str, Any]) -> None:
|
|
61
|
+
for q in list(self._subscribers):
|
|
62
|
+
try:
|
|
63
|
+
q.put_nowait(item)
|
|
64
|
+
except asyncio.QueueFull:
|
|
65
|
+
pass # drop for a slow consumer; the canonical log is unaffected
|
|
66
|
+
|
|
67
|
+
def subscribe(self) -> asyncio.Queue:
|
|
68
|
+
q: asyncio.Queue = asyncio.Queue(maxsize=1000)
|
|
69
|
+
self._subscribers.add(q)
|
|
70
|
+
return q
|
|
71
|
+
|
|
72
|
+
def unsubscribe(self, q: asyncio.Queue) -> None:
|
|
73
|
+
self._subscribers.discard(q)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def create_app(
|
|
77
|
+
config: Any = None, *, title: str = "Zu",
|
|
78
|
+
review_queue: str | None = None, view_scope: str | None = None,
|
|
79
|
+
auth_token: str | None = None,
|
|
80
|
+
) -> Any:
|
|
81
|
+
"""Build the ASGI app. ``config`` is the server's default run config; a request
|
|
82
|
+
may override it per call. ``review_queue`` (JSONL path for blocked attempts)
|
|
83
|
+
and ``view_scope`` (``render`` | ``full``) default to the config's
|
|
84
|
+
``observability`` block. Fails fast if the default config can't be loaded.
|
|
85
|
+
|
|
86
|
+
``auth_token`` (defaulting to the ``ZU_SERVE_TOKEN`` env var) gates every
|
|
87
|
+
endpoint except ``/healthz``: when set, a request must present it as an
|
|
88
|
+
``Authorization: Bearer <token>`` header — or, for the SSE/dashboard GETs
|
|
89
|
+
that can't set headers, a ``?token=`` query parameter. When unset the server
|
|
90
|
+
is open (the localhost-dev default); the ``zu serve`` CLI refuses to bind a
|
|
91
|
+
non-localhost host without a token so an exposed deploy can't be tokenless."""
|
|
92
|
+
try:
|
|
93
|
+
from fastapi import Depends, FastAPI, Header, HTTPException
|
|
94
|
+
from fastapi.responses import HTMLResponse, StreamingResponse
|
|
95
|
+
except ModuleNotFoundError as exc: # pragma: no cover - exercised via message
|
|
96
|
+
raise RuntimeError(
|
|
97
|
+
"the HTTP server needs FastAPI; install it with: pip install 'zu-runtime[serve]'"
|
|
98
|
+
) from exc
|
|
99
|
+
|
|
100
|
+
from .trace import format_event
|
|
101
|
+
|
|
102
|
+
required_token = auth_token if auth_token is not None else os.environ.get("ZU_SERVE_TOKEN")
|
|
103
|
+
|
|
104
|
+
def require_auth(
|
|
105
|
+
authorization: str | None = Header(default=None),
|
|
106
|
+
token: str | None = None,
|
|
107
|
+
) -> None:
|
|
108
|
+
# No token configured -> open (localhost dev). Otherwise require the token
|
|
109
|
+
# via an ``Authorization: Bearer <token>`` header, or a ``?token=`` query
|
|
110
|
+
# param for the SSE/dashboard GETs that can't set headers. Applied to every
|
|
111
|
+
# route except /healthz (liveness must not need a credential).
|
|
112
|
+
if not required_token:
|
|
113
|
+
return
|
|
114
|
+
header = authorization or ""
|
|
115
|
+
presented = header[7:] if header[:7].lower() == "bearer " else token
|
|
116
|
+
# Constant-time compare so the bearer token can't be recovered byte-by-byte
|
|
117
|
+
# via response-timing on a budget-spending endpoint.
|
|
118
|
+
if presented is None or not hmac.compare_digest(presented, required_token):
|
|
119
|
+
raise HTTPException(status_code=401, detail="missing or invalid bearer token")
|
|
120
|
+
|
|
121
|
+
auth = [Depends(require_auth)] # applied per protected route below
|
|
122
|
+
|
|
123
|
+
default_cfg = coerce_config(config)
|
|
124
|
+
# Networked surfaces are allowlist-render by default (safe to leave on in
|
|
125
|
+
# prod); ``full`` shows content for local/authorized viewing.
|
|
126
|
+
review_path = review_queue if review_queue is not None else default_cfg.observability.review_queue
|
|
127
|
+
scope_full = (view_scope or default_cfg.observability.scope) == "full"
|
|
128
|
+
hub = _Hub()
|
|
129
|
+
review: list[dict] = [] # in-memory view of the review queue (recent first)
|
|
130
|
+
|
|
131
|
+
def _append_review(record: dict) -> None:
|
|
132
|
+
review.insert(0, record)
|
|
133
|
+
del review[200:] # keep the in-memory view bounded
|
|
134
|
+
if not review_path:
|
|
135
|
+
return
|
|
136
|
+
try: # persist for triage; never let queue IO break a run
|
|
137
|
+
with open(review_path, "a", encoding="utf-8") as f:
|
|
138
|
+
f.write(json.dumps(record, default=str) + "\n")
|
|
139
|
+
except OSError:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
def _tee(event: Any) -> None:
|
|
143
|
+
"""A per-run bus subscriber: fan the event to the dashboard and queue any
|
|
144
|
+
contained attempt for review."""
|
|
145
|
+
hub.publish(("event", event))
|
|
146
|
+
if event.type == ev.DEFENSE_BLOCKED:
|
|
147
|
+
rec = defense_record(event)
|
|
148
|
+
_append_review(rec)
|
|
149
|
+
hub.publish(("defense", rec))
|
|
150
|
+
|
|
151
|
+
def sse(kind: str, data: dict) -> str:
|
|
152
|
+
return f"event: {kind}\ndata: {json.dumps(data, default=str)}\n\n"
|
|
153
|
+
|
|
154
|
+
def event_frame(val: Any) -> str:
|
|
155
|
+
"""An SSE 'event' frame — allowlist-rendered unless the scope is full."""
|
|
156
|
+
return sse("event", {
|
|
157
|
+
"line": format_event(val, full=scope_full),
|
|
158
|
+
"event": scope_event(val, full=scope_full),
|
|
159
|
+
})
|
|
160
|
+
|
|
161
|
+
app = FastAPI(title=title, description="Zu — Agent Production Runtime")
|
|
162
|
+
|
|
163
|
+
@app.get("/healthz")
|
|
164
|
+
async def healthz() -> dict:
|
|
165
|
+
return {"status": "ok"}
|
|
166
|
+
|
|
167
|
+
@app.post("/run", dependencies=auth)
|
|
168
|
+
async def run_endpoint(req: RunRequest) -> dict:
|
|
169
|
+
try:
|
|
170
|
+
# A per-request config arrived over the network: it may select
|
|
171
|
+
# installed, named plugins but NOT name an arbitrary 'module:Attr' to
|
|
172
|
+
# import (that executes code). The operator's server default is
|
|
173
|
+
# trusted and keeps the full import door.
|
|
174
|
+
allow_imports = req.config is None
|
|
175
|
+
cfg = coerce_config(req.config) if req.config is not None else default_cfg
|
|
176
|
+
spec = coerce_task(req.task, cfg.budget, allow_paths=False)
|
|
177
|
+
provider, registry, bus, providers = assemble(cfg, allow_imports=allow_imports)
|
|
178
|
+
except ConfigError as exc:
|
|
179
|
+
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
|
180
|
+
bus.subscribe(_tee) # feed the dashboard + review queue
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
result = await run_task(spec, provider, registry, bus, providers=providers,
|
|
184
|
+
containment=default_cfg.containment,
|
|
185
|
+
max_observation_chars=default_cfg.max_observation_chars,
|
|
186
|
+
observation_strategy=default_cfg.observation_strategy,
|
|
187
|
+
max_context_chars=default_cfg.max_context_chars)
|
|
188
|
+
body: dict = {"result": result.model_dump(mode="json")}
|
|
189
|
+
if req.include_events:
|
|
190
|
+
events = await bus.query()
|
|
191
|
+
body["events"] = [e.model_dump(mode="json") for e in events]
|
|
192
|
+
return body
|
|
193
|
+
except Exception as exc: # noqa: BLE001 - a model/infra failure is a 502, not a crash
|
|
194
|
+
raise HTTPException(status_code=502, detail=f"{type(exc).__name__}: {exc}") from exc
|
|
195
|
+
finally:
|
|
196
|
+
# Release the per-request bus's sink (e.g. a sqlite connection) so a
|
|
197
|
+
# long-lived server doesn't leak one connection per request.
|
|
198
|
+
await bus.aclose()
|
|
199
|
+
|
|
200
|
+
@app.post("/run/stream", dependencies=auth)
|
|
201
|
+
async def run_stream(req: RunRequest) -> Any:
|
|
202
|
+
"""Run a task and stream the loop live as Server-Sent Events — one
|
|
203
|
+
``event`` frame per loop event, then a final ``result`` and ``done``."""
|
|
204
|
+
try:
|
|
205
|
+
allow_imports = req.config is None # see /run: networked config can't import code
|
|
206
|
+
cfg = coerce_config(req.config) if req.config is not None else default_cfg
|
|
207
|
+
spec = coerce_task(req.task, cfg.budget, allow_paths=False)
|
|
208
|
+
provider, registry, bus, providers = assemble(cfg, allow_imports=allow_imports)
|
|
209
|
+
except ConfigError as exc:
|
|
210
|
+
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
|
211
|
+
|
|
212
|
+
# Bounded queue with drop-on-full: a slow/disconnected SSE consumer must
|
|
213
|
+
# never let events accumulate without limit (the same backpressure
|
|
214
|
+
# posture the global hub takes). The producer is a sync bus subscriber,
|
|
215
|
+
# so it can only put_nowait — full means drop, never block the run.
|
|
216
|
+
queue: asyncio.Queue = asyncio.Queue(maxsize=1000)
|
|
217
|
+
|
|
218
|
+
def _enqueue(event: Any) -> None:
|
|
219
|
+
try:
|
|
220
|
+
queue.put_nowait(("event", event))
|
|
221
|
+
except asyncio.QueueFull:
|
|
222
|
+
pass # slow consumer; the canonical log is unaffected
|
|
223
|
+
|
|
224
|
+
bus.subscribe(_enqueue)
|
|
225
|
+
bus.subscribe(_tee) # also feed the global dashboard + review queue
|
|
226
|
+
|
|
227
|
+
async def runner() -> None:
|
|
228
|
+
try:
|
|
229
|
+
result = await run_task(spec, provider, registry, bus, providers=providers,
|
|
230
|
+
containment=default_cfg.containment,
|
|
231
|
+
max_observation_chars=default_cfg.max_observation_chars,
|
|
232
|
+
observation_strategy=default_cfg.observation_strategy,
|
|
233
|
+
max_context_chars=default_cfg.max_context_chars)
|
|
234
|
+
await queue.put(("result", result))
|
|
235
|
+
except asyncio.CancelledError:
|
|
236
|
+
raise
|
|
237
|
+
except Exception as exc: # noqa: BLE001 - report as a stream frame, not a 500
|
|
238
|
+
await queue.put(("error", exc))
|
|
239
|
+
finally:
|
|
240
|
+
await queue.put(("done", None))
|
|
241
|
+
# Release the per-request bus's sink even if the client vanished.
|
|
242
|
+
await bus.aclose()
|
|
243
|
+
|
|
244
|
+
async def gen() -> Any:
|
|
245
|
+
task = asyncio.create_task(runner())
|
|
246
|
+
try:
|
|
247
|
+
while True:
|
|
248
|
+
kind, val = await queue.get()
|
|
249
|
+
if kind == "event":
|
|
250
|
+
yield event_frame(val)
|
|
251
|
+
elif kind == "result":
|
|
252
|
+
yield sse("result", val.model_dump(mode="json"))
|
|
253
|
+
elif kind == "error":
|
|
254
|
+
yield sse("error", {"error": f"{type(val).__name__}: {val}"})
|
|
255
|
+
elif kind == "done":
|
|
256
|
+
yield sse("done", {})
|
|
257
|
+
break
|
|
258
|
+
finally:
|
|
259
|
+
# If the client disconnected mid-run, the generator is closed
|
|
260
|
+
# before "done": cancel the run rather than spending model tokens
|
|
261
|
+
# for nobody and leaving the runner blocked on a full queue.
|
|
262
|
+
if not task.done():
|
|
263
|
+
task.cancel()
|
|
264
|
+
try:
|
|
265
|
+
await task
|
|
266
|
+
except (asyncio.CancelledError, Exception): # noqa: BLE001 - teardown
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
return StreamingResponse(gen(), media_type="text/event-stream")
|
|
270
|
+
|
|
271
|
+
@app.get("/events", dependencies=auth)
|
|
272
|
+
async def events_stream() -> Any:
|
|
273
|
+
"""A global live feed of every run's events (SSE) — what the dashboard
|
|
274
|
+
consumes. ``event`` frames carry a human ``line`` and the raw event;
|
|
275
|
+
``defense`` frames carry a queued blocked attempt."""
|
|
276
|
+
q = hub.subscribe()
|
|
277
|
+
|
|
278
|
+
async def gen() -> Any:
|
|
279
|
+
# An initial comment so the client connects promptly even when idle.
|
|
280
|
+
yield ": connected\n\n"
|
|
281
|
+
try:
|
|
282
|
+
while True:
|
|
283
|
+
kind, val = await q.get()
|
|
284
|
+
if kind == "event":
|
|
285
|
+
yield event_frame(val)
|
|
286
|
+
elif kind == "defense":
|
|
287
|
+
yield sse("defense", val)
|
|
288
|
+
finally:
|
|
289
|
+
hub.unsubscribe(q)
|
|
290
|
+
|
|
291
|
+
return StreamingResponse(gen(), media_type="text/event-stream")
|
|
292
|
+
|
|
293
|
+
@app.get("/review", dependencies=auth)
|
|
294
|
+
async def review_queue_endpoint() -> dict:
|
|
295
|
+
"""The defense review queue: contained adversarial attempts awaiting
|
|
296
|
+
triage (most recent first), from the in-memory view of this process."""
|
|
297
|
+
return {"pending": len(review), "items": review}
|
|
298
|
+
|
|
299
|
+
@app.get("/", response_class=HTMLResponse, dependencies=auth)
|
|
300
|
+
async def dashboard() -> Any:
|
|
301
|
+
return _DASHBOARD_HTML
|
|
302
|
+
|
|
303
|
+
return app
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
# A single self-contained page (vanilla JS, no build step): it opens the /events
|
|
307
|
+
# SSE feed and renders the live run plus a highlighted Defenses panel fed by the
|
|
308
|
+
# same stream and /review.
|
|
309
|
+
_DASHBOARD_HTML = """<!doctype html>
|
|
310
|
+
<html lang="en"><head><meta charset="utf-8"><title>Zu · live</title>
|
|
311
|
+
<style>
|
|
312
|
+
:root{--bg:#0b0e14;--fg:#cdd6f4;--dim:#6c7086;--ok:#a6e3a1;--warn:#f9e2af;--bad:#f38ba8;--esc:#89b4fa}
|
|
313
|
+
body{background:var(--bg);color:var(--fg);font:13px/1.5 ui-monospace,SFMono-Regular,Menlo,monospace;margin:0}
|
|
314
|
+
header{display:flex;align-items:center;gap:.6rem;padding:.6rem 1rem;border-bottom:1px solid #1e2230}
|
|
315
|
+
header b{font-size:15px} .dot{width:.6rem;height:.6rem;border-radius:50%;background:var(--bad)}
|
|
316
|
+
.dot.live{background:var(--ok)} .grid{display:grid;grid-template-columns:1fr 22rem;gap:1px;background:#1e2230;height:calc(100vh - 49px)}
|
|
317
|
+
.col{background:var(--bg);overflow:auto;padding:.5rem 1rem} .col h2{font-size:11px;color:var(--dim);text-transform:uppercase;letter-spacing:.1em;margin:.3rem 0 .6rem}
|
|
318
|
+
.row{white-space:pre-wrap;word-break:break-word;padding:.05rem 0} .t{color:var(--dim)}
|
|
319
|
+
.def{color:var(--bad)} .esc{color:var(--esc)} .ok{color:var(--ok)} .warn{color:var(--warn)}
|
|
320
|
+
.card{border:1px solid #1e2230;border-left:3px solid var(--bad);border-radius:4px;padding:.4rem .6rem;margin:.4rem 0}
|
|
321
|
+
.card .k{color:var(--bad);font-weight:600} .card .m{color:var(--dim)} .empty{color:var(--dim);font-style:italic}
|
|
322
|
+
</style></head><body>
|
|
323
|
+
<header><b>Zu</b><span class="dot" id="dot"></span><span id="status" class="t">connecting…</span>
|
|
324
|
+
<span style="margin-left:auto" class="t">defenses queued: <b id="dcount">0</b></span></header>
|
|
325
|
+
<div class="grid">
|
|
326
|
+
<div class="col"><h2>Live run feed</h2><div id="feed"></div></div>
|
|
327
|
+
<div class="col"><h2>Defenses — queued for review</h2><div id="defs"><div class="empty">none yet</div></div></div>
|
|
328
|
+
</div>
|
|
329
|
+
<script>
|
|
330
|
+
const feed=document.getElementById('feed'),defs=document.getElementById('defs');
|
|
331
|
+
const dot=document.getElementById('dot'),status=document.getElementById('status'),dcount=document.getElementById('dcount');
|
|
332
|
+
let nd=0;
|
|
333
|
+
function line(text,cls){const d=document.createElement('div');d.className='row'+(cls?' '+cls:'');
|
|
334
|
+
const ts=new Date().toLocaleTimeString();d.innerHTML='<span class="t">'+ts+'</span> '+text;
|
|
335
|
+
feed.appendChild(d);feed.scrollTop=feed.scrollHeight;
|
|
336
|
+
while(feed.childNodes.length>500)feed.removeChild(feed.firstChild);}
|
|
337
|
+
function esc(s){return (s||'').replace(/[&<>]/g,c=>({'&':'&','<':'<','>':'>'}[c]));}
|
|
338
|
+
// When the server requires a token, this page is opened as /?token=... — carry
|
|
339
|
+
// it to the SSE feed (query param: EventSource can't set headers) and to /review
|
|
340
|
+
// (bearer header). No token -> open server, both calls are unauthenticated.
|
|
341
|
+
const token=new URLSearchParams(location.search).get('token');
|
|
342
|
+
const authHeaders=token?{'Authorization':'Bearer '+token}:{};
|
|
343
|
+
const es=new EventSource('/events'+(token?('?token='+encodeURIComponent(token)):''));
|
|
344
|
+
es.onopen=()=>{dot.classList.add('live');status.textContent='live';};
|
|
345
|
+
es.onerror=()=>{dot.classList.remove('live');status.textContent='reconnecting…';};
|
|
346
|
+
es.addEventListener('event',e=>{const d=JSON.parse(e.data);const ev=d.event||{};
|
|
347
|
+
let cls=''; const t=ev.type||'';
|
|
348
|
+
if(t==='harness.defense.blocked')cls='def'; else if(t==='harness.task.escalated')cls='esc';
|
|
349
|
+
else if(t==='harness.task.completed')cls='ok'; else if(t==='harness.task.terminal')cls='warn';
|
|
350
|
+
line(esc(d.line||t),cls);});
|
|
351
|
+
es.addEventListener('defense',e=>{const r=JSON.parse(e.data);nd++;dcount.textContent=nd;
|
|
352
|
+
if(defs.querySelector('.empty'))defs.innerHTML='';
|
|
353
|
+
const c=document.createElement('div');c.className='card';
|
|
354
|
+
c.innerHTML='<div><span class="k">⚠ '+esc(r.kind||'blocked')+'</span> '+esc(r.tool||'')+'</div>'+
|
|
355
|
+
'<div class="m">'+esc(r.detail||'')+(r.target?' · '+esc(r.target):'')+'</div>'+
|
|
356
|
+
'<div class="m">'+esc(r.ts||'')+' · status: '+esc(r.status||'pending')+'</div>';
|
|
357
|
+
defs.insertBefore(c,defs.firstChild);});
|
|
358
|
+
fetch('/review',{headers:authHeaders}).then(r=>r.json()).then(d=>{if(d.items&&d.items.length){nd=d.items.length;dcount.textContent=nd;
|
|
359
|
+
defs.innerHTML='';for(const r of d.items){const c=document.createElement('div');c.className='card';
|
|
360
|
+
c.innerHTML='<div><span class="k">⚠ '+esc(r.kind||'blocked')+'</span> '+esc(r.tool||'')+'</div>'+
|
|
361
|
+
'<div class="m">'+esc(r.detail||'')+'</div><div class="m">'+esc(r.ts||'')+'</div>';defs.appendChild(c);}}}).catch(()=>{});
|
|
362
|
+
</script></body></html>
|
|
363
|
+
"""
|
zu_cli/trace.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Live trace — turn the event stream into a human-readable, real-time view.
|
|
2
|
+
|
|
3
|
+
The bus notifies every subscriber *as each event is appended* (append-before-
|
|
4
|
+
notify), so a subscriber that prints is a live window into the running loop: the
|
|
5
|
+
model's train of thought, every tool call and its result, detector verdicts,
|
|
6
|
+
escalations, and the final answer — streaming with no refresh, no polling, no
|
|
7
|
+
restart. The same formatter renders the CLI trace and the HTTP (SSE) stream, so
|
|
8
|
+
what you watch locally and what you watch against a container are identical.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Callable
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _truncate(value: Any, limit: int = 160) -> str:
|
|
18
|
+
text = value if isinstance(value, str) else repr(value)
|
|
19
|
+
text = " ".join(text.split())
|
|
20
|
+
return text if len(text) <= limit else text[: limit - 1] + "…"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def format_event(event: Any, *, full: bool = True) -> str | None:
|
|
24
|
+
"""A one-line view of an event, or None to omit it from the trace. Pure and
|
|
25
|
+
side-effect-free so it serves both the console and the SSE stream.
|
|
26
|
+
|
|
27
|
+
``full=True`` (the local console default) shows content — the query, the
|
|
28
|
+
model's reasoning, tool args, extracted values. ``full=False`` is the
|
|
29
|
+
allowlist-render scope for a networked window: the *actions and decisions* (so
|
|
30
|
+
you see what the agent is doing and what its guards blocked) without dumping
|
|
31
|
+
the content it read or produced."""
|
|
32
|
+
t = getattr(event, "type", "")
|
|
33
|
+
p = getattr(event, "payload", {}) or {}
|
|
34
|
+
|
|
35
|
+
if t == "harness.task.started":
|
|
36
|
+
if not full:
|
|
37
|
+
return "▶ task started"
|
|
38
|
+
target = f" → {p['target']}" if p.get("target") else ""
|
|
39
|
+
return f"▶ task: {_truncate(p.get('query', ''))}{target}"
|
|
40
|
+
if t == "harness.turn.started":
|
|
41
|
+
return f"· turn {p.get('step')}"
|
|
42
|
+
if t == "harness.turn.completed":
|
|
43
|
+
# The model's natural-language output this turn — the train of thought.
|
|
44
|
+
text = p.get("text")
|
|
45
|
+
if not text:
|
|
46
|
+
return None # a pure tool-call turn with no prose; the tool lines speak
|
|
47
|
+
if not full:
|
|
48
|
+
return f"💭 reasoning ({len(text)} chars)" # content-light: that it thought, not what
|
|
49
|
+
return f"💭 {_truncate(text, 240)}"
|
|
50
|
+
if t == "harness.tool.invoked":
|
|
51
|
+
args = {k: v for k, v in (p.get("args") or {}).items() if k != "html"}
|
|
52
|
+
if not full:
|
|
53
|
+
keys = ", ".join(args) # arg names only, never values
|
|
54
|
+
return f"🔧 {p.get('tool')}({keys})"
|
|
55
|
+
return f"🔧 {p.get('tool')}({_truncate(args, 120)})"
|
|
56
|
+
if t == "harness.tool.returned":
|
|
57
|
+
if not full:
|
|
58
|
+
return f"↩ {p.get('tool')} returned"
|
|
59
|
+
return f"↩ {p.get('tool')} → {_truncate(p.get('observation'), 140)}"
|
|
60
|
+
if t == "data.source.fetched":
|
|
61
|
+
# Already a summary (length + status), safe in either scope.
|
|
62
|
+
body = p.get("html") or p.get("text") or p.get("content") or ""
|
|
63
|
+
return f"📄 fetched {len(body)} chars (status {p.get('status', '?')})"
|
|
64
|
+
if t == "harness.detector.fired":
|
|
65
|
+
return f"🔎 detector {p.get('detector')} [{p.get('severity')}] — {_truncate(p.get('detail'), 120)}"
|
|
66
|
+
if t == "harness.defense.blocked":
|
|
67
|
+
target = f" → {p['target']}" if p.get("target") else ""
|
|
68
|
+
return f"🛡 BLOCKED {p.get('kind')}{target} ({p.get('tool')}) — {_truncate(p.get('detail'), 120)}"
|
|
69
|
+
if t == "harness.task.escalated":
|
|
70
|
+
if p.get("exhausted"):
|
|
71
|
+
return f"⛔ escalation exhausted at tier {p.get('tier')}: {p.get('reason')}"
|
|
72
|
+
return f"⬆️ ESCALATE {p.get('from_tier')}→{p.get('to_tier')}: {p.get('reason')} — climbing a tier"
|
|
73
|
+
if t == "harness.validation.failed":
|
|
74
|
+
return f"❌ validation {p.get('detector')} [{p.get('severity')}] — {_truncate(p.get('detail'), 120)}"
|
|
75
|
+
if t == "data.record.extracted":
|
|
76
|
+
value = p.get("value")
|
|
77
|
+
if not full:
|
|
78
|
+
n = len(value) if isinstance(value, dict) else 1
|
|
79
|
+
return f"📦 extracted ({n} field{'s' if n != 1 else ''})" # shape, not content
|
|
80
|
+
return f"📦 extracted: {_truncate(value, 200)}"
|
|
81
|
+
if t == "harness.task.completed":
|
|
82
|
+
return "✅ completed"
|
|
83
|
+
if t == "harness.task.terminal":
|
|
84
|
+
return f"🛑 terminal: {p.get('reason')}"
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def live_printer(
|
|
89
|
+
write: Callable[[str], None] | None = None, *, clock: bool = True
|
|
90
|
+
) -> Callable[[Any], None]:
|
|
91
|
+
"""A bus subscriber that prints each event the moment it is published. Pass a
|
|
92
|
+
custom ``write`` to redirect; ``clock`` prefixes a wall-clock timestamp."""
|
|
93
|
+
|
|
94
|
+
def _write(line: str) -> None:
|
|
95
|
+
if write is not None:
|
|
96
|
+
write(line)
|
|
97
|
+
else:
|
|
98
|
+
print(line, flush=True) # flush so the trace is truly real-time
|
|
99
|
+
|
|
100
|
+
def _on_event(event: Any) -> None:
|
|
101
|
+
line = format_event(event)
|
|
102
|
+
if line is None:
|
|
103
|
+
return
|
|
104
|
+
if clock:
|
|
105
|
+
ts = getattr(event, "ts", None)
|
|
106
|
+
stamp = ts.strftime("%H:%M:%S") if ts is not None else ""
|
|
107
|
+
_write(f" {stamp} {line}")
|
|
108
|
+
else:
|
|
109
|
+
_write(f" {line}")
|
|
110
|
+
|
|
111
|
+
return _on_event
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zu-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The `zu` command — Agent Production Runtime CLI
|
|
5
|
+
Project-URL: Homepage, https://github.com/k3-mt/zu
|
|
6
|
+
Project-URL: Repository, https://github.com/k3-mt/zu
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: typer
|
|
19
|
+
Requires-Dist: zu-core==0.1.0
|
|
20
|
+
Provides-Extra: mcp
|
|
21
|
+
Requires-Dist: mcp>=1.2; extra == 'mcp'
|
|
22
|
+
Provides-Extra: serve
|
|
23
|
+
Requires-Dist: fastapi>=0.110; extra == 'serve'
|
|
24
|
+
Requires-Dist: uvicorn>=0.27; extra == 'serve'
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: zu-redteam==0.1.0; extra == 'test'
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
zu_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
zu_cli/build.py,sha256=oogoaVsJrjvMlTmD-Hm4P2EyovAZBroaVaX0BoptcF8,4772
|
|
3
|
+
zu_cli/config.py,sha256=8LcXK-DdfZFEbk6rcAKvu2A2RVYrujL3OOAmgCfuqV4,33565
|
|
4
|
+
zu_cli/construct.py,sha256=DhkiLDzqf562xjByRbUrtHAkqdVp-y_QEiBPB9eDh30,13692
|
|
5
|
+
zu_cli/construct_sandbox.py,sha256=0ExeeN3no7KFMdlNE5b0Wgz7T7j5htQ7UmfdqBs17_g,6977
|
|
6
|
+
zu_cli/contribute.py,sha256=-fdOIfFqptcf6qQyqOW0tW1pLt0TwXCQhcN1gBu6iiY,4732
|
|
7
|
+
zu_cli/demo.py,sha256=x1WqOMmD9dqBn3zjAV31FVgNctnSVjQTLnX9VVqfmX4,13735
|
|
8
|
+
zu_cli/deploy.py,sha256=rJ0wPRo4Qjnv8y2xdjvKh9ME_PEZFwSUp-1Hbng4zvw,8698
|
|
9
|
+
zu_cli/explore.py,sha256=0MpfvYrrq-VmoFDgtNOcCRHRetxNy5HJGRhGZ2LbLk4,4695
|
|
10
|
+
zu_cli/guardrails.py,sha256=wC9BrCbOqB7sgFsT-5mdbbLzlnuoPt5OvIvSc40if3A,4446
|
|
11
|
+
zu_cli/harden.py,sha256=75C-cbh3Hb9IvVeZMvsA88FJXjitCm4v-EBHoOsnCa0,9066
|
|
12
|
+
zu_cli/main.py,sha256=cunMeo72fb6sAEOavJEm_o0ac8mFw3VnyQHeyY6mUW4,49233
|
|
13
|
+
zu_cli/mcp_server.py,sha256=ZSmMWAoxwzys0QRiytkTaxXHQu8fsz8s3AN85O5PYCs,21406
|
|
14
|
+
zu_cli/observe.py,sha256=sO96g6Yq39I8veC2y9kPE1RF9HIiuIQMRuVBPHfTPQw,2674
|
|
15
|
+
zu_cli/offline.py,sha256=FiZAgGDKFPSTUjRcJKb2pqTLGSbrX5M2k6p-rujSD7I,14261
|
|
16
|
+
zu_cli/sandbox.py,sha256=Ad8S1MXKUcr9P_pHDn3VLhrDtxLBdwMJrDaa0eVUCP8,13242
|
|
17
|
+
zu_cli/scaffold.py,sha256=-iFZwy9Xqn3c8yh8xosScp3LHRd6GhkZhRBbns0RI6c,4203
|
|
18
|
+
zu_cli/server.py,sha256=As5kYW1cBNXvZ8csEXeAmTcMiV-ZnMfjiqxSrzGQz24,18473
|
|
19
|
+
zu_cli/trace.py,sha256=nl1C94oyu-dcwuJJ2eaazIj8aHu2zGVliccxjiX_V_4,5113
|
|
20
|
+
zu_cli-0.1.0.dist-info/METADATA,sha256=cPkqqAswrsZku5AeLWTRBiP3vlSCLlj4408vQdIkIkM,998
|
|
21
|
+
zu_cli-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
22
|
+
zu_cli-0.1.0.dist-info/entry_points.txt,sha256=4TWH-lwe2t8CcPuXaxW-m6iqT8CKrS_SP0blHDwLZAk,175
|
|
23
|
+
zu_cli-0.1.0.dist-info/RECORD,,
|