zu-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_cli/__init__.py +0 -0
- zu_cli/build.py +111 -0
- zu_cli/config.py +738 -0
- zu_cli/construct.py +318 -0
- zu_cli/construct_sandbox.py +139 -0
- zu_cli/contribute.py +104 -0
- zu_cli/demo.py +373 -0
- zu_cli/deploy.py +207 -0
- zu_cli/explore.py +93 -0
- zu_cli/guardrails.py +102 -0
- zu_cli/harden.py +221 -0
- zu_cli/main.py +1126 -0
- zu_cli/mcp_server.py +444 -0
- zu_cli/observe.py +69 -0
- zu_cli/offline.py +335 -0
- zu_cli/sandbox.py +276 -0
- zu_cli/scaffold.py +116 -0
- zu_cli/server.py +363 -0
- zu_cli/trace.py +111 -0
- zu_cli-0.1.0.dist-info/METADATA +26 -0
- zu_cli-0.1.0.dist-info/RECORD +23 -0
- zu_cli-0.1.0.dist-info/WHEEL +4 -0
- zu_cli-0.1.0.dist-info/entry_points.txt +4 -0
zu_cli/config.py
ADDED
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
"""The config system (build step 8).
|
|
2
|
+
|
|
3
|
+
One declarative file (`agent.yaml`) wires a run: which model the provider calls,
|
|
4
|
+
which plugins are active, where events are stored, and the default budget. The
|
|
5
|
+
headline promise is that **swapping the model is a one-line edit** — point the
|
|
6
|
+
``provider`` block at Anthropic, OpenRouter, or a local server and nothing in
|
|
7
|
+
the code changes, because the loop only ever speaks to the ``ModelProvider``
|
|
8
|
+
port.
|
|
9
|
+
|
|
10
|
+
The wiring stays faithful to the architecture's two rules:
|
|
11
|
+
|
|
12
|
+
* **The core never special-cases a provider.** Plugins (providers, tools,
|
|
13
|
+
detectors, validators, sinks, backends) are looked up *by name* in the same
|
|
14
|
+
registry the loop reads, and constructed by passing only the config fields
|
|
15
|
+
their constructor actually accepts (signature-filtered). A new provider that
|
|
16
|
+
follows the port needs no change here.
|
|
17
|
+
* **Secrets stay in the environment.** Config names the *environment variable*
|
|
18
|
+
that holds a key (``api_key_env``), never the key itself — resolved inside
|
|
19
|
+
the adapter at call time, never placed in config or the model's context.
|
|
20
|
+
|
|
21
|
+
Plugins enter the run registry three ways (the architecture's three doors): a
|
|
22
|
+
discovered built-in named by its short name (``http_fetch``), a pip-installed
|
|
23
|
+
third-party plugin (same path — it is discovered too), or **by reference** as an
|
|
24
|
+
``module:Attr`` import path, which activates a plugin with no packaging at all.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import importlib
|
|
30
|
+
import inspect
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
from pydantic import BaseModel, Field, field_validator
|
|
35
|
+
|
|
36
|
+
from zu_core.bus import EventBus
|
|
37
|
+
from zu_core.contracts import Budget, TaskSpec
|
|
38
|
+
from zu_core.ports import ModelProvider
|
|
39
|
+
from zu_core.registry import GROUPS, Registry
|
|
40
|
+
|
|
41
|
+
# --- the parsed config shape --------------------------------------------------
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ProviderConfig(BaseModel):
|
|
45
|
+
"""The model the run calls — the one block you edit to swap models.
|
|
46
|
+
|
|
47
|
+
``name`` is a registry name (``anthropic``, ``openai-compatible``,
|
|
48
|
+
``scripted``) or an ``module:Attr`` import path for a custom provider. The
|
|
49
|
+
remaining fields are the neutral knobs the built-in adapters accept; only
|
|
50
|
+
those an adapter's constructor declares are passed to it, so this stays
|
|
51
|
+
provider-agnostic. ``script`` is used only by the offline ``scripted``
|
|
52
|
+
provider (a list of fake moves) so a run is testable with no live model.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
name: str
|
|
56
|
+
model: str | None = None
|
|
57
|
+
api_key_env: str | None = None
|
|
58
|
+
base_url_env: str | None = None
|
|
59
|
+
# Direct key/URL for *programmatic* use (a key your app already holds). Prefer
|
|
60
|
+
# the *_env forms in files so a secret is never committed; an explicit api_key
|
|
61
|
+
# here is meant for in-memory config dicts, not checked-in YAML.
|
|
62
|
+
api_key: str | None = None
|
|
63
|
+
base_url: str | None = None
|
|
64
|
+
max_tokens: int | None = None
|
|
65
|
+
script: list[dict] | None = None
|
|
66
|
+
options: dict[str, Any] = Field(default_factory=dict)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class PluginsConfig(BaseModel):
|
|
70
|
+
"""Which plugins are active, by name (or ``module:Attr`` reference). Listing
|
|
71
|
+
a plugin here is what activates it — the run registry contains exactly these,
|
|
72
|
+
never everything installed, so a config controls (and orders) plugins per run
|
|
73
|
+
without touching code.
|
|
74
|
+
|
|
75
|
+
``validators`` defaults to ``[schema, grounding]`` — **correct by default**: a
|
|
76
|
+
run is held to its output schema *and* every reported value must appear in the
|
|
77
|
+
content it actually fetched, so a fabricated answer is refused rather than
|
|
78
|
+
returned as success. Dropping ``grounding`` is opting out of the
|
|
79
|
+
anti-hallucination check; a legitimately non-fetching agent (pure Q&A from the
|
|
80
|
+
model's own knowledge — e.g. the ``minimal`` template) must set
|
|
81
|
+
``validators: [schema]`` explicitly, because grounding has no retrieved content
|
|
82
|
+
to check against."""
|
|
83
|
+
|
|
84
|
+
tools: list[str] = Field(default_factory=list)
|
|
85
|
+
detectors: list[str] = Field(default_factory=list)
|
|
86
|
+
validators: list[str] = Field(default_factory=lambda: ["schema", "grounding"])
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class EventSinkConfig(BaseModel):
|
|
90
|
+
"""Where the canonical event log is written. ``driver`` is a sink name
|
|
91
|
+
(built-in: ``sqlite``); omit the whole block to keep the in-memory default.
|
|
92
|
+
|
|
93
|
+
``encryption`` opts the payload into encryption-at-rest (needs
|
|
94
|
+
``zu-backends[encryption]`` and a key in the environment):
|
|
95
|
+
* ``none`` (default) — plaintext, fully queryable on disk.
|
|
96
|
+
* ``aesgcm`` — AES-256-GCM with a single key (``ZU_EVENT_KEY``).
|
|
97
|
+
* ``managed`` — AES-256-GCM with a rotatable, KMS-pluggable ``KeyProvider``
|
|
98
|
+
(``EnvKeyProvider`` by default; the KMS is the deployment's choice).
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
driver: str
|
|
102
|
+
path: str | None = None
|
|
103
|
+
encryption: str = "none"
|
|
104
|
+
options: dict[str, Any] = Field(default_factory=dict)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class ObservabilityConfig(BaseModel):
|
|
108
|
+
"""How a run is made watchable — the same hook for every harness.
|
|
109
|
+
|
|
110
|
+
``review_queue`` is the JSONL path contained attacks (``harness.defense.blocked``)
|
|
111
|
+
are appended to for triage; set it to null to disable. ``scope`` is the default
|
|
112
|
+
view scope for *networked* surfaces (the SSE feed and dashboard): ``render``
|
|
113
|
+
(allowlist-render, safe to leave on in production) or ``full`` (show content —
|
|
114
|
+
for local/authorized viewing). The local console trace is always full."""
|
|
115
|
+
|
|
116
|
+
review_queue: str | None = "zu_review.jsonl"
|
|
117
|
+
scope: str = "render"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class ReplayConfig(BaseModel):
|
|
121
|
+
"""Maturity settings for a recorded-track replay — how a run behaves once it has
|
|
122
|
+
a deterministic path and the model is reserved for the frontier. All optional:
|
|
123
|
+
omit the block and replay uses the normal budget and the global provider.
|
|
124
|
+
|
|
125
|
+
* ``budget`` — REPLACES the run budget when a matching track replays. The
|
|
126
|
+
navigation is solved, so it can be tight; a broken track then fails fast and
|
|
127
|
+
cheap (a tripwire to re-record) instead of silently re-pathfinding at full
|
|
128
|
+
cost. (The top-level ``budget`` still governs a fresh / --no-track run.)
|
|
129
|
+
* ``finish_model`` — a cheap model id for the post-replay frontier (usually just
|
|
130
|
+
the final extraction). It REUSES the global provider's endpoint/key, swapping
|
|
131
|
+
only the model. Used solely when replay reaches the frontier without diverging;
|
|
132
|
+
a divergence keeps the strong global model to re-pathfind."""
|
|
133
|
+
|
|
134
|
+
budget: Budget | None = None
|
|
135
|
+
finish_model: str | None = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class RunConfig(BaseModel):
|
|
139
|
+
"""A whole `agent.yaml` (or `zu.yaml`-style config), parsed and validated."""
|
|
140
|
+
|
|
141
|
+
# The agent's GLOBAL provider — required. An agent with no provider cannot
|
|
142
|
+
# operate, so there is deliberately no default: a config that omits it fails
|
|
143
|
+
# to validate rather than silently assuming one.
|
|
144
|
+
provider: ProviderConfig
|
|
145
|
+
# Optional PER-TIER provider overrides, keyed by tier number. The global
|
|
146
|
+
# ``provider`` runs every tier unless overridden here; when the loop escalates
|
|
147
|
+
# to a tier listed below, that provider takes over mid-run (the neutral
|
|
148
|
+
# message format lets a different adapter continue the same conversation). The
|
|
149
|
+
# canonical use: a cheap/fast model at tier 1, a frontier/vision model unlocked
|
|
150
|
+
# on escalation to tier 2 — e.g. ``providers: {2: {name: anthropic, model: ...}}``.
|
|
151
|
+
providers: dict[int, ProviderConfig] = Field(default_factory=dict)
|
|
152
|
+
plugins: PluginsConfig = Field(default_factory=PluginsConfig)
|
|
153
|
+
# The escalation ladder, OWNED BY THE AGENT AUTHOR: tier number -> the tools
|
|
154
|
+
# offered at that tier (by built-in name or ``module:Attr`` import-ref). This
|
|
155
|
+
# is where you mix Zu's tools and your own and decide which sits at which tier —
|
|
156
|
+
# the config's choice OVERRIDES a tool class's own default ``tier``. Tools also
|
|
157
|
+
# listed in ``plugins.tools`` (without a tier here) keep their class default.
|
|
158
|
+
# ``max_tier`` on the task still caps how high the loop climbs.
|
|
159
|
+
tiers: dict[int, list[str]] = Field(default_factory=dict)
|
|
160
|
+
backend: str | None = None
|
|
161
|
+
# The canonical store (the single source of truth for the run).
|
|
162
|
+
event_sink: EventSinkConfig | None = None
|
|
163
|
+
# How the run is surfaced (live trace + defense review queue), the same hook
|
|
164
|
+
# for every harness — see zu_cli.observe.attach_observability.
|
|
165
|
+
observability: ObservabilityConfig = Field(default_factory=ObservabilityConfig)
|
|
166
|
+
# Secondary trace destinations — events are shipped to each *in addition* to
|
|
167
|
+
# the canonical store, isolated (a failing sink never breaks the run). This is
|
|
168
|
+
# how a run emits to local files or cloud storage for observability.
|
|
169
|
+
trace_sinks: list[EventSinkConfig] = Field(default_factory=list)
|
|
170
|
+
budget: Budget = Field(default_factory=Budget)
|
|
171
|
+
# Maturity settings for a recorded-track replay: a tight replay budget and an
|
|
172
|
+
# optional cheap finisher model for the frontier (see ReplayConfig).
|
|
173
|
+
replay: ReplayConfig = Field(default_factory=ReplayConfig)
|
|
174
|
+
# Optional cap (chars per content field) on how much of a tool observation the
|
|
175
|
+
# MODEL sees — OFF by default (the model gets the full page). Set it when an
|
|
176
|
+
# agent fetches big pages on a small-context model: a tier-2 rendered DOM can
|
|
177
|
+
# be hundreds of KB and a few pages overflow the context window. The full
|
|
178
|
+
# content always stays on the event log (grounding reads that), so the cap is
|
|
179
|
+
# a context-fit measure, not a provenance loss. A large-context model leaves
|
|
180
|
+
# it unset and keeps everything.
|
|
181
|
+
max_observation_chars: int | None = None
|
|
182
|
+
# How an over-cap content field is shaped for the model (only when
|
|
183
|
+
# ``max_observation_chars`` is set). Both are LOSSLESS — the full content stays
|
|
184
|
+
# on the event log either way:
|
|
185
|
+
# * ``truncate`` (default) — elide it to a ``recall`` pointer (cheap, no
|
|
186
|
+
# calls); the model pulls back the part it needs on demand. (Despite the
|
|
187
|
+
# name it does NOT cut the tail — it defers to recall.)
|
|
188
|
+
# * ``extract`` — map-reduce: scan the whole field in chunks and pull the
|
|
189
|
+
# task-relevant parts now (one model call per chunk).
|
|
190
|
+
observation_strategy: str = "truncate"
|
|
191
|
+
|
|
192
|
+
@field_validator("observation_strategy")
|
|
193
|
+
@classmethod
|
|
194
|
+
def _known_strategy(cls, v: str) -> str:
|
|
195
|
+
if v not in ("truncate", "extract"):
|
|
196
|
+
raise ValueError(f"observation_strategy must be 'truncate' or 'extract', got {v!r}")
|
|
197
|
+
return v
|
|
198
|
+
|
|
199
|
+
# Optional bound on the TOTAL conversation the model sees (chars across all
|
|
200
|
+
# messages) — OFF by default. Where ``max_observation_chars`` caps a single
|
|
201
|
+
# tool result, this caps their SUM across a long multi-step run (e.g. driving a
|
|
202
|
+
# browser for many turns), eliding old tool observations so the running context
|
|
203
|
+
# never overflows the model's window. Set it for long agentic runs on a
|
|
204
|
+
# finite-context model; leave it unset for short runs / huge-context models.
|
|
205
|
+
max_context_chars: int | None = None
|
|
206
|
+
# The agent's task, embedded so a single ``agent.yaml`` is the whole agent
|
|
207
|
+
# (what + how in one file). The task block — query, target, output_schema,
|
|
208
|
+
# max_tier — is split out into a TaskSpec by ``load_agent``. Optional: a config
|
|
209
|
+
# used as a *service* default (``zu serve``) has no task (tasks arrive per
|
|
210
|
+
# request); a runnable agent file carries one.
|
|
211
|
+
task: dict | None = None
|
|
212
|
+
# The containment posture for tool execution (see zu_core.security):
|
|
213
|
+
# * ``audit`` (default) — tools run in-process; each declared envelope and
|
|
214
|
+
# every contained block is recorded on the event log. Tier-1 tools carry
|
|
215
|
+
# their own in-process guards (the SSRF/DNS-pin in zu-tools). Right for
|
|
216
|
+
# trusted tools on a host.
|
|
217
|
+
# * ``required`` — fail closed: refuse to run any tool with off-box reach
|
|
218
|
+
# (non-empty egress/capabilities, or tier >= 2) UNLESS the run is executing
|
|
219
|
+
# inside the Zu sandbox (``ZU_SANDBOXED=1``), where the container — default-
|
|
220
|
+
# DROP network + egress proxy + dropped caps — is the real boundary. Run
|
|
221
|
+
# such a config via the sandboxed launcher; on a bare host it refuses rather
|
|
222
|
+
# than run a capability-bearing (or untrusted third-party) tool unguarded.
|
|
223
|
+
containment: str = "audit"
|
|
224
|
+
|
|
225
|
+
@field_validator("containment")
|
|
226
|
+
@classmethod
|
|
227
|
+
def _known_containment(cls, v: str) -> str:
|
|
228
|
+
if v not in ("audit", "required"):
|
|
229
|
+
raise ValueError(f"containment must be 'audit' or 'required', got {v!r}")
|
|
230
|
+
return v
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# --- loading -----------------------------------------------------------------
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _read_doc(path: str) -> dict:
|
|
237
|
+
"""Parse a YAML (or JSON — YAML is a superset) document into a dict."""
|
|
238
|
+
import yaml
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
with open(path, encoding="utf-8") as fh:
|
|
242
|
+
data = yaml.safe_load(fh)
|
|
243
|
+
except FileNotFoundError as exc:
|
|
244
|
+
raise ConfigError(f"file not found: {path}") from exc
|
|
245
|
+
except yaml.YAMLError as exc:
|
|
246
|
+
raise ConfigError(f"{path}: invalid YAML — {exc}") from exc
|
|
247
|
+
if not isinstance(data, dict):
|
|
248
|
+
raise ConfigError(f"{path}: expected a mapping at the top level")
|
|
249
|
+
return data
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class ConfigError(Exception):
|
|
253
|
+
"""A config or task file that cannot be loaded or wired — surfaced to the
|
|
254
|
+
user with a clear message rather than a traceback."""
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def load_config(path: str) -> RunConfig:
|
|
258
|
+
from pydantic import ValidationError
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
return RunConfig.model_validate(_read_doc(path))
|
|
262
|
+
except ValidationError as exc:
|
|
263
|
+
raise ConfigError(f"{path}: {exc}") from exc
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def load_task(path: str, *, default_budget: Budget | None = None) -> TaskSpec:
|
|
267
|
+
"""Parse a task file into a ``TaskSpec``. A task may omit ``budget`` and
|
|
268
|
+
inherit the run config's default; a budget in the task file wins."""
|
|
269
|
+
from pydantic import ValidationError
|
|
270
|
+
|
|
271
|
+
doc = _read_doc(path)
|
|
272
|
+
if "budget" not in doc and default_budget is not None:
|
|
273
|
+
doc = {**doc, "budget": default_budget.model_dump()}
|
|
274
|
+
try:
|
|
275
|
+
return TaskSpec.model_validate(doc)
|
|
276
|
+
except ValidationError as exc:
|
|
277
|
+
raise ConfigError(f"{path}: {exc}") from exc
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# --- coercion (a config/task may arrive as a path, a dict, or a typed object) -
|
|
281
|
+
#
|
|
282
|
+
# The CLI surfaces — `zu serve`, `zu mcp`, and the `zu` embed facade — all accept
|
|
283
|
+
# a config/task that may be a file path, a plain dict, an already-built typed
|
|
284
|
+
# object, or None. The coercion is identical except for one axis: whether a task
|
|
285
|
+
# given as a *str path* is allowed. The HTTP server says no (a path would resolve
|
|
286
|
+
# server-side, which a client can't set); the MCP tools and the embed facade say
|
|
287
|
+
# yes. So these live here once, parameterised by ``allow_paths``, rather than
|
|
288
|
+
# being re-implemented (and drifting) in each caller.
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
AGENT_FILE = "agent.yaml"
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def load_dotenv(path: Path) -> list[str]:
|
|
295
|
+
"""Load ``KEY=VALUE`` lines from a bundle's ``.env`` into ``os.environ`` and
|
|
296
|
+
return the names loaded. This is how a bundle carries its **secrets** — a
|
|
297
|
+
gitignored ``.env`` next to ``agent.yaml`` holding ``EXA_API_KEY=…``,
|
|
298
|
+
``ANTHROPIC_API_KEY=…`` — without committing them: config still names the
|
|
299
|
+
*variable* (``api_key_env``), and the value is supplied here at load time, for
|
|
300
|
+
both a local run and (the file being mounted with the bundle) a contained one.
|
|
301
|
+
|
|
302
|
+
An already-set variable is never overwritten, so an explicit environment wins
|
|
303
|
+
over the file. Minimal and dependency-free: blank lines and ``#`` comments are
|
|
304
|
+
skipped, an ``export`` prefix is tolerated, and surrounding quotes are stripped.
|
|
305
|
+
"""
|
|
306
|
+
import os
|
|
307
|
+
|
|
308
|
+
if not path.is_file():
|
|
309
|
+
return []
|
|
310
|
+
loaded: list[str] = []
|
|
311
|
+
for raw in path.read_text(encoding="utf-8").splitlines():
|
|
312
|
+
line = raw.strip()
|
|
313
|
+
if not line or line.startswith("#"):
|
|
314
|
+
continue
|
|
315
|
+
if line.startswith("export "):
|
|
316
|
+
line = line[len("export ") :]
|
|
317
|
+
key, sep, val = line.partition("=")
|
|
318
|
+
key = key.strip()
|
|
319
|
+
if not sep or not key:
|
|
320
|
+
continue
|
|
321
|
+
val = val.strip()
|
|
322
|
+
if len(val) >= 2 and val[0] == val[-1] and val[0] in "\"'":
|
|
323
|
+
val = val[1:-1]
|
|
324
|
+
if key not in os.environ:
|
|
325
|
+
os.environ[key] = val
|
|
326
|
+
loaded.append(key)
|
|
327
|
+
return loaded
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def load_agent(source: Any) -> tuple[TaskSpec, RunConfig]:
|
|
331
|
+
"""Load a single self-contained agent → ``(task, config)``.
|
|
332
|
+
|
|
333
|
+
``source`` is a path to an ``agent.yaml``, a **bundle directory** (containing
|
|
334
|
+
``agent.yaml`` + optionally a ``tools/`` package), a dict, or None (``./agent.yaml``
|
|
335
|
+
or ``./`` as a bundle). A bundle dir is put on ``sys.path`` so the agent's own
|
|
336
|
+
tools — referenced in ``tiers`` as ``tools.x:MyTool`` — import, whether they
|
|
337
|
+
were written in the owner's codebase or a fresh repo dropped in the bundle.
|
|
338
|
+
|
|
339
|
+
The merged file is parsed into one RunConfig; its ``task:`` block is split out
|
|
340
|
+
into a TaskSpec. A file with no ``task:`` is an error (it's not runnable)."""
|
|
341
|
+
if source is None:
|
|
342
|
+
source = AGENT_FILE if Path(AGENT_FILE).is_file() else "."
|
|
343
|
+
if isinstance(source, (str, Path)):
|
|
344
|
+
p = Path(source)
|
|
345
|
+
if p.is_dir():
|
|
346
|
+
_add_bundle_to_path(p)
|
|
347
|
+
load_dotenv(p / ".env") # the bundle's gitignored secrets
|
|
348
|
+
p = p / AGENT_FILE
|
|
349
|
+
else:
|
|
350
|
+
load_dotenv(p.parent / ".env")
|
|
351
|
+
cfg = load_config(str(p))
|
|
352
|
+
elif isinstance(source, dict):
|
|
353
|
+
cfg = coerce_config(source)
|
|
354
|
+
elif isinstance(source, RunConfig):
|
|
355
|
+
cfg = source
|
|
356
|
+
else:
|
|
357
|
+
raise ConfigError(f"unsupported agent source: {type(source).__name__}")
|
|
358
|
+
|
|
359
|
+
if cfg.task is None:
|
|
360
|
+
raise ConfigError(
|
|
361
|
+
"agent has no `task:` block — a runnable agent file must include one "
|
|
362
|
+
"(query/target/output_schema). See `zu init`."
|
|
363
|
+
)
|
|
364
|
+
spec = coerce_task(cfg.task, cfg.budget, allow_paths=False)
|
|
365
|
+
return spec, cfg
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _add_bundle_to_path(directory: Path) -> None:
|
|
369
|
+
"""Put a bundle directory on ``sys.path`` (front) so its own ``tools/`` package
|
|
370
|
+
is importable by the ``module:Attr`` refs in the agent's ``tiers``."""
|
|
371
|
+
import sys
|
|
372
|
+
|
|
373
|
+
resolved = str(directory.resolve())
|
|
374
|
+
if resolved not in sys.path:
|
|
375
|
+
sys.path.insert(0, resolved)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def coerce_config(source: Any) -> RunConfig:
|
|
379
|
+
"""A RunConfig from a path (str), a dict, an existing RunConfig, or None
|
|
380
|
+
(meaning ``./agent.yaml``). A malformed *dict* raises ``ConfigError`` like a
|
|
381
|
+
malformed *file* does — so callers that ``except ConfigError`` get a clean
|
|
382
|
+
message for either, never a raw pydantic ``ValidationError`` escaping."""
|
|
383
|
+
if source is None:
|
|
384
|
+
return load_config("agent.yaml")
|
|
385
|
+
if isinstance(source, RunConfig):
|
|
386
|
+
return source
|
|
387
|
+
if isinstance(source, str):
|
|
388
|
+
return load_config(source)
|
|
389
|
+
if isinstance(source, dict):
|
|
390
|
+
from pydantic import ValidationError
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
return RunConfig.model_validate(source)
|
|
394
|
+
except ValidationError as exc:
|
|
395
|
+
raise ConfigError(f"invalid config: {exc}") from exc
|
|
396
|
+
raise ConfigError(f"unsupported config type: {type(source).__name__}")
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def coerce_task(source: Any, default_budget: Budget, *, allow_paths: bool) -> TaskSpec:
|
|
400
|
+
"""A TaskSpec from a dict, an existing TaskSpec, or (when ``allow_paths``) a
|
|
401
|
+
file path. A task that omits a budget inherits ``default_budget``. A malformed
|
|
402
|
+
dict (or, where permitted, a bad file) surfaces as ``ConfigError``.
|
|
403
|
+
|
|
404
|
+
``allow_paths=False`` is the server's stance: a str task is a *path*, which a
|
|
405
|
+
remote client cannot meaningfully set, so it is rejected rather than read off
|
|
406
|
+
the server's filesystem."""
|
|
407
|
+
if isinstance(source, TaskSpec):
|
|
408
|
+
return source
|
|
409
|
+
if isinstance(source, str):
|
|
410
|
+
if not allow_paths:
|
|
411
|
+
raise ConfigError("task must be a JSON object (the task spec)")
|
|
412
|
+
return load_task(source, default_budget=default_budget)
|
|
413
|
+
if isinstance(source, dict):
|
|
414
|
+
doc = dict(source)
|
|
415
|
+
doc.setdefault("budget", default_budget.model_dump())
|
|
416
|
+
try:
|
|
417
|
+
return TaskSpec.model_validate(doc)
|
|
418
|
+
except Exception as exc: # noqa: BLE001 - surface as a ConfigError, not a raw pydantic error
|
|
419
|
+
raise ConfigError(f"invalid task: {exc}") from exc
|
|
420
|
+
raise ConfigError(f"unsupported task type: {type(source).__name__}")
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
# --- building the run --------------------------------------------------------
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _catalog() -> Registry:
|
|
427
|
+
"""Everything installed, discovered once. The run registry is built by
|
|
428
|
+
selecting from this; discovery failures are tolerated (a broken third-party
|
|
429
|
+
plugin must not stop a run that does not use it)."""
|
|
430
|
+
reg = Registry()
|
|
431
|
+
reg.discover()
|
|
432
|
+
return reg
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _import_ref(ref: str) -> Any:
|
|
436
|
+
"""Resolve an ``module:Attr`` (or ``module:Attr.Nested``) import path — the
|
|
437
|
+
'by reference in config' door. Used for both plugins and providers."""
|
|
438
|
+
module, _, attr = ref.partition(":")
|
|
439
|
+
if not module or not attr:
|
|
440
|
+
raise ConfigError(f"bad import reference {ref!r}; expected 'module:Attr'")
|
|
441
|
+
try:
|
|
442
|
+
obj: Any = importlib.import_module(module)
|
|
443
|
+
for part in attr.split("."):
|
|
444
|
+
obj = getattr(obj, part)
|
|
445
|
+
except (ImportError, AttributeError) as exc:
|
|
446
|
+
raise ConfigError(f"cannot import {ref!r}: {exc}") from exc
|
|
447
|
+
return obj
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
def _construct(factory: Any, candidate: dict[str, Any]) -> Any:
|
|
451
|
+
"""Build ``factory`` passing only the kwargs its constructor declares, and
|
|
452
|
+
only those with a value. This is what keeps the wiring provider-agnostic:
|
|
453
|
+
config offers a neutral set of knobs and each adapter takes the subset it
|
|
454
|
+
understands — no per-provider branching here."""
|
|
455
|
+
try:
|
|
456
|
+
params = inspect.signature(factory).parameters
|
|
457
|
+
except (TypeError, ValueError):
|
|
458
|
+
return factory()
|
|
459
|
+
accepts_kwargs = any(p.kind is p.VAR_KEYWORD for p in params.values())
|
|
460
|
+
kwargs = {
|
|
461
|
+
k: v
|
|
462
|
+
for k, v in candidate.items()
|
|
463
|
+
if v is not None and (accepts_kwargs or k in params)
|
|
464
|
+
}
|
|
465
|
+
return factory(**kwargs)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _refuse_import(ref: str, what: str) -> None:
|
|
469
|
+
"""Raise when an arbitrary ``module:Attr`` ref is named on a surface that may
|
|
470
|
+
not import code. Importing a module executes its top-level code, so a config
|
|
471
|
+
that can name any ``module:Attr`` is a code-execution door — fine for the
|
|
472
|
+
operator-trusted CLI, never for a config that arrived over the network."""
|
|
473
|
+
raise ConfigError(
|
|
474
|
+
f"refusing to import {what} {ref!r}: this surface does not permit arbitrary "
|
|
475
|
+
"'module:Attr' imports (a per-request config may only use installed, named "
|
|
476
|
+
"plugins). Configure it on the trusted server default instead."
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def build_provider(
|
|
481
|
+
cfg: ProviderConfig, catalog: Registry | None = None, *, allow_imports: bool = True
|
|
482
|
+
) -> ModelProvider:
|
|
483
|
+
"""Construct the configured model provider — the one-line model swap.
|
|
484
|
+
|
|
485
|
+
``scripted`` is special only in that it has no env/model to construct from:
|
|
486
|
+
it replays a fixed list of moves (for offline runs and tests). Every other
|
|
487
|
+
provider — built-in or a user's ``module:Attr`` — is looked up by name and
|
|
488
|
+
constructed from the neutral config knobs it accepts. ``allow_imports=False``
|
|
489
|
+
forbids the ``module:Attr`` door (the networked surface)."""
|
|
490
|
+
if cfg.name == "scripted":
|
|
491
|
+
from zu_providers.scripted import ScriptedProvider
|
|
492
|
+
|
|
493
|
+
return ScriptedProvider.from_moves(cfg.script or [])
|
|
494
|
+
|
|
495
|
+
if ":" in cfg.name:
|
|
496
|
+
if not allow_imports:
|
|
497
|
+
_refuse_import(cfg.name, "provider")
|
|
498
|
+
factory = _import_ref(cfg.name)
|
|
499
|
+
else:
|
|
500
|
+
catalog = catalog or _catalog()
|
|
501
|
+
try:
|
|
502
|
+
factory = catalog.get("providers", cfg.name)
|
|
503
|
+
except KeyError:
|
|
504
|
+
raise ConfigError(
|
|
505
|
+
f"unknown provider {cfg.name!r}; discovered: "
|
|
506
|
+
f"{', '.join(catalog.names('providers')) or 'none'} "
|
|
507
|
+
"(is its package installed?)"
|
|
508
|
+
) from None
|
|
509
|
+
|
|
510
|
+
candidate = {
|
|
511
|
+
"model": cfg.model,
|
|
512
|
+
"api_key_env": cfg.api_key_env,
|
|
513
|
+
"base_url_env": cfg.base_url_env,
|
|
514
|
+
"api_key": cfg.api_key,
|
|
515
|
+
"base_url": cfg.base_url,
|
|
516
|
+
"max_tokens": cfg.max_tokens,
|
|
517
|
+
**cfg.options,
|
|
518
|
+
}
|
|
519
|
+
return _construct(factory, candidate)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _resolve_plugin(
|
|
523
|
+
kind: str, name: str, catalog: Registry, extra: dict[str, Any], *, allow_imports: bool = True
|
|
524
|
+
) -> Any:
|
|
525
|
+
"""A single named plugin → an object for the run registry. An ``module:Attr``
|
|
526
|
+
name is imported (only if ``allow_imports``); a short name is taken from the
|
|
527
|
+
catalog. ``extra`` carries optional injected dependencies (e.g. a configured
|
|
528
|
+
``backend`` for a tool that accepts one); a class that wants one is
|
|
529
|
+
instantiated here, otherwise it is handed to the registry as-is and the loop
|
|
530
|
+
materialises it."""
|
|
531
|
+
if ":" in name:
|
|
532
|
+
if not allow_imports:
|
|
533
|
+
_refuse_import(name, kind[:-1])
|
|
534
|
+
return _import_ref(name)
|
|
535
|
+
try:
|
|
536
|
+
obj = catalog.get(kind, name)
|
|
537
|
+
except KeyError:
|
|
538
|
+
raise ConfigError(
|
|
539
|
+
f"unknown {kind[:-1]} {name!r}; discovered: "
|
|
540
|
+
f"{', '.join(catalog.names(kind)) or 'none'} (is its package installed?)"
|
|
541
|
+
) from None
|
|
542
|
+
# Inject an optional dependency only when the plugin is a class that declares
|
|
543
|
+
# it — e.g. render_dom(backend=...). Otherwise leave the class for the loop.
|
|
544
|
+
if extra and isinstance(obj, type):
|
|
545
|
+
params = inspect.signature(obj).parameters
|
|
546
|
+
inject = {k: v for k, v in extra.items() if k in params}
|
|
547
|
+
if inject:
|
|
548
|
+
return obj(**inject)
|
|
549
|
+
return obj
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def build_registry(
|
|
553
|
+
cfg: RunConfig, catalog: Registry | None = None, *, allow_imports: bool = True
|
|
554
|
+
) -> Registry:
|
|
555
|
+
"""A registry containing exactly the configured plugins — no more. This is
|
|
556
|
+
how config activates and orders plugins per run without code changes.
|
|
557
|
+
``allow_imports=False`` forbids ``module:Attr`` plugin refs (networked
|
|
558
|
+
surface): a per-request config may only activate installed, named plugins."""
|
|
559
|
+
catalog = catalog or _catalog()
|
|
560
|
+
reg = Registry()
|
|
561
|
+
|
|
562
|
+
backend_obj = None
|
|
563
|
+
if cfg.backend is not None:
|
|
564
|
+
backend_obj = _resolve_plugin("backends", cfg.backend, catalog, {}, allow_imports=allow_imports)
|
|
565
|
+
backend_obj = backend_obj() if isinstance(backend_obj, type) else backend_obj
|
|
566
|
+
|
|
567
|
+
extra = {"backend": backend_obj} if backend_obj is not None else {}
|
|
568
|
+
|
|
569
|
+
# Tools: from the config-owned escalation ladder (``tiers``) and/or the flat
|
|
570
|
+
# ``plugins.tools`` list. A name in ``tiers`` is registered with its effective
|
|
571
|
+
# tier STAMPED on the instance (the agent author's choice overrides the tool's
|
|
572
|
+
# own default); a name only in ``plugins.tools`` keeps its class-default tier.
|
|
573
|
+
tier_of: dict[str, int] = {}
|
|
574
|
+
for tier, names in cfg.tiers.items():
|
|
575
|
+
for name in names:
|
|
576
|
+
tier_of[name] = tier
|
|
577
|
+
tool_names = list(cfg.plugins.tools) + [n for n in tier_of if n not in cfg.plugins.tools]
|
|
578
|
+
for name in tool_names:
|
|
579
|
+
obj = _resolve_plugin("tools", name, catalog, extra, allow_imports=allow_imports)
|
|
580
|
+
if name in tier_of:
|
|
581
|
+
# Need an instance to stamp the tier; a class is materialised here
|
|
582
|
+
# (the loop would otherwise instantiate it with no args anyway).
|
|
583
|
+
obj = obj() if isinstance(obj, type) else obj
|
|
584
|
+
obj.tier = tier_of[name]
|
|
585
|
+
reg.register("tools", getattr(obj, "name", name), obj)
|
|
586
|
+
|
|
587
|
+
for kind in ("detectors", "validators"):
|
|
588
|
+
for name in getattr(cfg.plugins, kind):
|
|
589
|
+
obj = _resolve_plugin(kind, name, catalog, extra, allow_imports=allow_imports)
|
|
590
|
+
reg.register(kind, getattr(obj, "name", name), obj)
|
|
591
|
+
return reg
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
def _refuse_path(spec: EventSinkConfig) -> None:
|
|
595
|
+
"""Raise when a sink names a filesystem ``path`` on a surface that may not
|
|
596
|
+
write the host. A sink ``path`` is an arbitrary file the process opens for
|
|
597
|
+
write (a sqlite db, a jsonl log), so a config that can name any path is a
|
|
598
|
+
file-write door — fine for the operator-trusted CLI, never for a config that
|
|
599
|
+
arrived over the network. The in-memory default (no ``event_sink``) and any
|
|
600
|
+
path-free, options-only sink remain available to a per-request config."""
|
|
601
|
+
raise ConfigError(
|
|
602
|
+
f"refusing to open sink path {spec.path!r}: this surface does not permit "
|
|
603
|
+
"writing arbitrary host paths (a per-request config may not configure a "
|
|
604
|
+
"filesystem sink). Configure event_sink/trace_sinks on the trusted server "
|
|
605
|
+
"default instead."
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def _build_one_sink(
|
|
610
|
+
spec: EventSinkConfig, catalog: Registry, *, allow_paths: bool = True
|
|
611
|
+
) -> Any:
|
|
612
|
+
"""Construct one EventSink from its config (driver name + path/options).
|
|
613
|
+
|
|
614
|
+
``allow_paths=False`` forbids a sink that names a filesystem ``path`` (the
|
|
615
|
+
networked surface), so a remote caller cannot drive an arbitrary file write."""
|
|
616
|
+
if not allow_paths and spec.path is not None:
|
|
617
|
+
_refuse_path(spec)
|
|
618
|
+
try:
|
|
619
|
+
factory = catalog.get("sinks", spec.driver)
|
|
620
|
+
except KeyError:
|
|
621
|
+
raise ConfigError(
|
|
622
|
+
f"unknown event sink {spec.driver!r}; discovered: "
|
|
623
|
+
f"{', '.join(catalog.names('sinks')) or 'none'} (is its package installed?)"
|
|
624
|
+
) from None
|
|
625
|
+
candidate = {"path": spec.path, **spec.options}
|
|
626
|
+
codec = _build_codec(spec.encryption)
|
|
627
|
+
if codec is not None:
|
|
628
|
+
candidate["codec"] = codec
|
|
629
|
+
return _construct(factory, candidate)
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def _build_codec(encryption: str) -> Any:
|
|
633
|
+
"""Map the ``encryption`` config value to a payload codec instance (or None
|
|
634
|
+
for plaintext). The codec lives in ``zu-backends[encryption]`` and is imported
|
|
635
|
+
lazily, with a clear error if the extra isn't installed."""
|
|
636
|
+
mode = (encryption or "none").lower()
|
|
637
|
+
if mode in ("none", ""):
|
|
638
|
+
return None
|
|
639
|
+
try:
|
|
640
|
+
from zu_backends.encryption import AesGcmCodec, ManagedAesGcmCodec
|
|
641
|
+
except ModuleNotFoundError as exc:
|
|
642
|
+
raise ConfigError(
|
|
643
|
+
"encryption-at-rest needs the optional dependency: "
|
|
644
|
+
"pip install 'zu-backends[encryption]'"
|
|
645
|
+
) from exc
|
|
646
|
+
try:
|
|
647
|
+
if mode == "aesgcm":
|
|
648
|
+
return AesGcmCodec.from_env()
|
|
649
|
+
if mode == "managed":
|
|
650
|
+
return ManagedAesGcmCodec.from_env()
|
|
651
|
+
except RuntimeError as exc: # a missing/invalid key in the environment
|
|
652
|
+
raise ConfigError(str(exc)) from exc
|
|
653
|
+
raise ConfigError(
|
|
654
|
+
f"unknown encryption mode {encryption!r}; use 'none', 'aesgcm', or 'managed'."
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def build_sink(
|
|
659
|
+
cfg: RunConfig, catalog: Registry | None = None, *, allow_paths: bool = True
|
|
660
|
+
) -> Any:
|
|
661
|
+
"""The canonical EventSink for the run, or None for the in-memory default."""
|
|
662
|
+
if cfg.event_sink is None:
|
|
663
|
+
return None
|
|
664
|
+
return _build_one_sink(cfg.event_sink, catalog or _catalog(), allow_paths=allow_paths)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def build_trace_sinks(
|
|
668
|
+
cfg: RunConfig, catalog: Registry | None = None, *, allow_paths: bool = True
|
|
669
|
+
) -> list[Any]:
|
|
670
|
+
"""The secondary trace destinations (shippers) — one EventSink per
|
|
671
|
+
``trace_sinks`` entry, attached to the bus alongside the canonical store."""
|
|
672
|
+
if not cfg.trace_sinks:
|
|
673
|
+
return []
|
|
674
|
+
catalog = catalog or _catalog()
|
|
675
|
+
return [_build_one_sink(s, catalog, allow_paths=allow_paths) for s in cfg.trace_sinks]
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def build_providers_by_tier(
|
|
679
|
+
cfg: RunConfig, catalog: Registry | None = None, *, allow_imports: bool = True
|
|
680
|
+
) -> dict[int, ModelProvider]:
|
|
681
|
+
"""The per-tier provider overrides (``cfg.providers``) as built ModelProviders,
|
|
682
|
+
keyed by tier. Empty when no overrides are configured — the loop then runs the
|
|
683
|
+
global provider on every tier."""
|
|
684
|
+
if not cfg.providers:
|
|
685
|
+
return {}
|
|
686
|
+
catalog = catalog or _catalog()
|
|
687
|
+
return {
|
|
688
|
+
tier: build_provider(pc, catalog, allow_imports=allow_imports)
|
|
689
|
+
for tier, pc in cfg.providers.items()
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def assemble(
|
|
694
|
+
cfg: RunConfig, *, allow_imports: bool = True
|
|
695
|
+
) -> tuple[ModelProvider, Registry, EventBus, dict[int, ModelProvider]]:
|
|
696
|
+
"""Turn a parsed config into what ``run_task`` needs: the global provider, the
|
|
697
|
+
run registry, a bus whose canonical sink is configured, and the per-tier
|
|
698
|
+
provider override map. Any ``trace_sinks`` are attached as isolated secondary
|
|
699
|
+
destinations.
|
|
700
|
+
|
|
701
|
+
``allow_imports`` defaults True for the operator-trusted CLI; pass False when
|
|
702
|
+
the config arrived over the network (``zu serve`` per-request override) so an
|
|
703
|
+
arbitrary ``module:Attr`` provider/plugin cannot be imported (and its
|
|
704
|
+
top-level code executed) by a remote caller. The same flag gates filesystem
|
|
705
|
+
sink paths: a per-request config may not name an ``event_sink``/``trace_sinks``
|
|
706
|
+
``path`` (an arbitrary host file the process would open for write)."""
|
|
707
|
+
catalog = _catalog()
|
|
708
|
+
provider = build_provider(cfg.provider, catalog, allow_imports=allow_imports)
|
|
709
|
+
providers_by_tier = build_providers_by_tier(cfg, catalog, allow_imports=allow_imports)
|
|
710
|
+
registry = build_registry(cfg, catalog, allow_imports=allow_imports)
|
|
711
|
+
bus = EventBus(sink=build_sink(cfg, catalog, allow_paths=allow_imports))
|
|
712
|
+
for trace_sink in build_trace_sinks(cfg, catalog, allow_paths=allow_imports):
|
|
713
|
+
bus.add_destination(trace_sink)
|
|
714
|
+
return provider, registry, bus, providers_by_tier
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
# Re-exported so callers can introspect the plugin kinds without importing the
|
|
718
|
+
# registry module directly.
|
|
719
|
+
__all__ = [
|
|
720
|
+
"RunConfig",
|
|
721
|
+
"ProviderConfig",
|
|
722
|
+
"PluginsConfig",
|
|
723
|
+
"EventSinkConfig",
|
|
724
|
+
"ObservabilityConfig",
|
|
725
|
+
"ConfigError",
|
|
726
|
+
"load_config",
|
|
727
|
+
"load_task",
|
|
728
|
+
"load_agent",
|
|
729
|
+
"load_dotenv",
|
|
730
|
+
"coerce_config",
|
|
731
|
+
"coerce_task",
|
|
732
|
+
"build_provider",
|
|
733
|
+
"build_providers_by_tier",
|
|
734
|
+
"build_registry",
|
|
735
|
+
"build_sink",
|
|
736
|
+
"assemble",
|
|
737
|
+
"GROUPS",
|
|
738
|
+
]
|