zu-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_cli/config.py ADDED
@@ -0,0 +1,738 @@
1
+ """The config system (build step 8).
2
+
3
+ One declarative file (`agent.yaml`) wires a run: which model the provider calls,
4
+ which plugins are active, where events are stored, and the default budget. The
5
+ headline promise is that **swapping the model is a one-line edit** — point the
6
+ ``provider`` block at Anthropic, OpenRouter, or a local server and nothing in
7
+ the code changes, because the loop only ever speaks to the ``ModelProvider``
8
+ port.
9
+
10
+ The wiring stays faithful to the architecture's two rules:
11
+
12
+ * **The core never special-cases a provider.** Plugins (providers, tools,
13
+ detectors, validators, sinks, backends) are looked up *by name* in the same
14
+ registry the loop reads, and constructed by passing only the config fields
15
+ their constructor actually accepts (signature-filtered). A new provider that
16
+ follows the port needs no change here.
17
+ * **Secrets stay in the environment.** Config names the *environment variable*
18
+ that holds a key (``api_key_env``), never the key itself — resolved inside
19
+ the adapter at call time, never placed in config or the model's context.
20
+
21
+ Plugins enter the run registry three ways (the architecture's three doors): a
22
+ discovered built-in named by its short name (``http_fetch``), a pip-installed
23
+ third-party plugin (same path — it is discovered too), or **by reference** as an
24
+ ``module:Attr`` import path, which activates a plugin with no packaging at all.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import importlib
30
+ import inspect
31
+ from pathlib import Path
32
+ from typing import Any
33
+
34
+ from pydantic import BaseModel, Field, field_validator
35
+
36
+ from zu_core.bus import EventBus
37
+ from zu_core.contracts import Budget, TaskSpec
38
+ from zu_core.ports import ModelProvider
39
+ from zu_core.registry import GROUPS, Registry
40
+
41
+ # --- the parsed config shape --------------------------------------------------
42
+
43
+
44
+ class ProviderConfig(BaseModel):
45
+ """The model the run calls — the one block you edit to swap models.
46
+
47
+ ``name`` is a registry name (``anthropic``, ``openai-compatible``,
48
+ ``scripted``) or an ``module:Attr`` import path for a custom provider. The
49
+ remaining fields are the neutral knobs the built-in adapters accept; only
50
+ those an adapter's constructor declares are passed to it, so this stays
51
+ provider-agnostic. ``script`` is used only by the offline ``scripted``
52
+ provider (a list of fake moves) so a run is testable with no live model.
53
+ """
54
+
55
+ name: str
56
+ model: str | None = None
57
+ api_key_env: str | None = None
58
+ base_url_env: str | None = None
59
+ # Direct key/URL for *programmatic* use (a key your app already holds). Prefer
60
+ # the *_env forms in files so a secret is never committed; an explicit api_key
61
+ # here is meant for in-memory config dicts, not checked-in YAML.
62
+ api_key: str | None = None
63
+ base_url: str | None = None
64
+ max_tokens: int | None = None
65
+ script: list[dict] | None = None
66
+ options: dict[str, Any] = Field(default_factory=dict)
67
+
68
+
69
+ class PluginsConfig(BaseModel):
70
+ """Which plugins are active, by name (or ``module:Attr`` reference). Listing
71
+ a plugin here is what activates it — the run registry contains exactly these,
72
+ never everything installed, so a config controls (and orders) plugins per run
73
+ without touching code.
74
+
75
+ ``validators`` defaults to ``[schema, grounding]`` — **correct by default**: a
76
+ run is held to its output schema *and* every reported value must appear in the
77
+ content it actually fetched, so a fabricated answer is refused rather than
78
+ returned as success. Dropping ``grounding`` is opting out of the
79
+ anti-hallucination check; a legitimately non-fetching agent (pure Q&A from the
80
+ model's own knowledge — e.g. the ``minimal`` template) must set
81
+ ``validators: [schema]`` explicitly, because grounding has no retrieved content
82
+ to check against."""
83
+
84
+ tools: list[str] = Field(default_factory=list)
85
+ detectors: list[str] = Field(default_factory=list)
86
+ validators: list[str] = Field(default_factory=lambda: ["schema", "grounding"])
87
+
88
+
89
+ class EventSinkConfig(BaseModel):
90
+ """Where the canonical event log is written. ``driver`` is a sink name
91
+ (built-in: ``sqlite``); omit the whole block to keep the in-memory default.
92
+
93
+ ``encryption`` opts the payload into encryption-at-rest (needs
94
+ ``zu-backends[encryption]`` and a key in the environment):
95
+ * ``none`` (default) — plaintext, fully queryable on disk.
96
+ * ``aesgcm`` — AES-256-GCM with a single key (``ZU_EVENT_KEY``).
97
+ * ``managed`` — AES-256-GCM with a rotatable, KMS-pluggable ``KeyProvider``
98
+ (``EnvKeyProvider`` by default; the KMS is the deployment's choice).
99
+ """
100
+
101
+ driver: str
102
+ path: str | None = None
103
+ encryption: str = "none"
104
+ options: dict[str, Any] = Field(default_factory=dict)
105
+
106
+
107
+ class ObservabilityConfig(BaseModel):
108
+ """How a run is made watchable — the same hook for every harness.
109
+
110
+ ``review_queue`` is the JSONL path contained attacks (``harness.defense.blocked``)
111
+ are appended to for triage; set it to null to disable. ``scope`` is the default
112
+ view scope for *networked* surfaces (the SSE feed and dashboard): ``render``
113
+ (allowlist-render, safe to leave on in production) or ``full`` (show content —
114
+ for local/authorized viewing). The local console trace is always full."""
115
+
116
+ review_queue: str | None = "zu_review.jsonl"
117
+ scope: str = "render"
118
+
119
+
120
+ class ReplayConfig(BaseModel):
121
+ """Maturity settings for a recorded-track replay — how a run behaves once it has
122
+ a deterministic path and the model is reserved for the frontier. All optional:
123
+ omit the block and replay uses the normal budget and the global provider.
124
+
125
+ * ``budget`` — REPLACES the run budget when a matching track replays. The
126
+ navigation is solved, so it can be tight; a broken track then fails fast and
127
+ cheap (a tripwire to re-record) instead of silently re-pathfinding at full
128
+ cost. (The top-level ``budget`` still governs a fresh / --no-track run.)
129
+ * ``finish_model`` — a cheap model id for the post-replay frontier (usually just
130
+ the final extraction). It REUSES the global provider's endpoint/key, swapping
131
+ only the model. Used solely when replay reaches the frontier without diverging;
132
+ a divergence keeps the strong global model to re-pathfind."""
133
+
134
+ budget: Budget | None = None
135
+ finish_model: str | None = None
136
+
137
+
138
+ class RunConfig(BaseModel):
139
+ """A whole `agent.yaml` (or `zu.yaml`-style config), parsed and validated."""
140
+
141
+ # The agent's GLOBAL provider — required. An agent with no provider cannot
142
+ # operate, so there is deliberately no default: a config that omits it fails
143
+ # to validate rather than silently assuming one.
144
+ provider: ProviderConfig
145
+ # Optional PER-TIER provider overrides, keyed by tier number. The global
146
+ # ``provider`` runs every tier unless overridden here; when the loop escalates
147
+ # to a tier listed below, that provider takes over mid-run (the neutral
148
+ # message format lets a different adapter continue the same conversation). The
149
+ # canonical use: a cheap/fast model at tier 1, a frontier/vision model unlocked
150
+ # on escalation to tier 2 — e.g. ``providers: {2: {name: anthropic, model: ...}}``.
151
+ providers: dict[int, ProviderConfig] = Field(default_factory=dict)
152
+ plugins: PluginsConfig = Field(default_factory=PluginsConfig)
153
+ # The escalation ladder, OWNED BY THE AGENT AUTHOR: tier number -> the tools
154
+ # offered at that tier (by built-in name or ``module:Attr`` import-ref). This
155
+ # is where you mix Zu's tools and your own and decide which sits at which tier —
156
+ # the config's choice OVERRIDES a tool class's own default ``tier``. Tools also
157
+ # listed in ``plugins.tools`` (without a tier here) keep their class default.
158
+ # ``max_tier`` on the task still caps how high the loop climbs.
159
+ tiers: dict[int, list[str]] = Field(default_factory=dict)
160
+ backend: str | None = None
161
+ # The canonical store (the single source of truth for the run).
162
+ event_sink: EventSinkConfig | None = None
163
+ # How the run is surfaced (live trace + defense review queue), the same hook
164
+ # for every harness — see zu_cli.observe.attach_observability.
165
+ observability: ObservabilityConfig = Field(default_factory=ObservabilityConfig)
166
+ # Secondary trace destinations — events are shipped to each *in addition* to
167
+ # the canonical store, isolated (a failing sink never breaks the run). This is
168
+ # how a run emits to local files or cloud storage for observability.
169
+ trace_sinks: list[EventSinkConfig] = Field(default_factory=list)
170
+ budget: Budget = Field(default_factory=Budget)
171
+ # Maturity settings for a recorded-track replay: a tight replay budget and an
172
+ # optional cheap finisher model for the frontier (see ReplayConfig).
173
+ replay: ReplayConfig = Field(default_factory=ReplayConfig)
174
+ # Optional cap (chars per content field) on how much of a tool observation the
175
+ # MODEL sees — OFF by default (the model gets the full page). Set it when an
176
+ # agent fetches big pages on a small-context model: a tier-2 rendered DOM can
177
+ # be hundreds of KB and a few pages overflow the context window. The full
178
+ # content always stays on the event log (grounding reads that), so the cap is
179
+ # a context-fit measure, not a provenance loss. A large-context model leaves
180
+ # it unset and keeps everything.
181
+ max_observation_chars: int | None = None
182
+ # How an over-cap content field is shaped for the model (only when
183
+ # ``max_observation_chars`` is set). Both are LOSSLESS — the full content stays
184
+ # on the event log either way:
185
+ # * ``truncate`` (default) — elide it to a ``recall`` pointer (cheap, no
186
+ # calls); the model pulls back the part it needs on demand. (Despite the
187
+ # name it does NOT cut the tail — it defers to recall.)
188
+ # * ``extract`` — map-reduce: scan the whole field in chunks and pull the
189
+ # task-relevant parts now (one model call per chunk).
190
+ observation_strategy: str = "truncate"
191
+
192
+ @field_validator("observation_strategy")
193
+ @classmethod
194
+ def _known_strategy(cls, v: str) -> str:
195
+ if v not in ("truncate", "extract"):
196
+ raise ValueError(f"observation_strategy must be 'truncate' or 'extract', got {v!r}")
197
+ return v
198
+
199
+ # Optional bound on the TOTAL conversation the model sees (chars across all
200
+ # messages) — OFF by default. Where ``max_observation_chars`` caps a single
201
+ # tool result, this caps their SUM across a long multi-step run (e.g. driving a
202
+ # browser for many turns), eliding old tool observations so the running context
203
+ # never overflows the model's window. Set it for long agentic runs on a
204
+ # finite-context model; leave it unset for short runs / huge-context models.
205
+ max_context_chars: int | None = None
206
+ # The agent's task, embedded so a single ``agent.yaml`` is the whole agent
207
+ # (what + how in one file). The task block — query, target, output_schema,
208
+ # max_tier — is split out into a TaskSpec by ``load_agent``. Optional: a config
209
+ # used as a *service* default (``zu serve``) has no task (tasks arrive per
210
+ # request); a runnable agent file carries one.
211
+ task: dict | None = None
212
+ # The containment posture for tool execution (see zu_core.security):
213
+ # * ``audit`` (default) — tools run in-process; each declared envelope and
214
+ # every contained block is recorded on the event log. Tier-1 tools carry
215
+ # their own in-process guards (the SSRF/DNS-pin in zu-tools). Right for
216
+ # trusted tools on a host.
217
+ # * ``required`` — fail closed: refuse to run any tool with off-box reach
218
+ # (non-empty egress/capabilities, or tier >= 2) UNLESS the run is executing
219
+ # inside the Zu sandbox (``ZU_SANDBOXED=1``), where the container — default-
220
+ # DROP network + egress proxy + dropped caps — is the real boundary. Run
221
+ # such a config via the sandboxed launcher; on a bare host it refuses rather
222
+ # than run a capability-bearing (or untrusted third-party) tool unguarded.
223
+ containment: str = "audit"
224
+
225
+ @field_validator("containment")
226
+ @classmethod
227
+ def _known_containment(cls, v: str) -> str:
228
+ if v not in ("audit", "required"):
229
+ raise ValueError(f"containment must be 'audit' or 'required', got {v!r}")
230
+ return v
231
+
232
+
233
+ # --- loading -----------------------------------------------------------------
234
+
235
+
236
+ def _read_doc(path: str) -> dict:
237
+ """Parse a YAML (or JSON — YAML is a superset) document into a dict."""
238
+ import yaml
239
+
240
+ try:
241
+ with open(path, encoding="utf-8") as fh:
242
+ data = yaml.safe_load(fh)
243
+ except FileNotFoundError as exc:
244
+ raise ConfigError(f"file not found: {path}") from exc
245
+ except yaml.YAMLError as exc:
246
+ raise ConfigError(f"{path}: invalid YAML — {exc}") from exc
247
+ if not isinstance(data, dict):
248
+ raise ConfigError(f"{path}: expected a mapping at the top level")
249
+ return data
250
+
251
+
252
+ class ConfigError(Exception):
253
+ """A config or task file that cannot be loaded or wired — surfaced to the
254
+ user with a clear message rather than a traceback."""
255
+
256
+
257
+ def load_config(path: str) -> RunConfig:
258
+ from pydantic import ValidationError
259
+
260
+ try:
261
+ return RunConfig.model_validate(_read_doc(path))
262
+ except ValidationError as exc:
263
+ raise ConfigError(f"{path}: {exc}") from exc
264
+
265
+
266
+ def load_task(path: str, *, default_budget: Budget | None = None) -> TaskSpec:
267
+ """Parse a task file into a ``TaskSpec``. A task may omit ``budget`` and
268
+ inherit the run config's default; a budget in the task file wins."""
269
+ from pydantic import ValidationError
270
+
271
+ doc = _read_doc(path)
272
+ if "budget" not in doc and default_budget is not None:
273
+ doc = {**doc, "budget": default_budget.model_dump()}
274
+ try:
275
+ return TaskSpec.model_validate(doc)
276
+ except ValidationError as exc:
277
+ raise ConfigError(f"{path}: {exc}") from exc
278
+
279
+
280
+ # --- coercion (a config/task may arrive as a path, a dict, or a typed object) -
281
+ #
282
+ # The CLI surfaces — `zu serve`, `zu mcp`, and the `zu` embed facade — all accept
283
+ # a config/task that may be a file path, a plain dict, an already-built typed
284
+ # object, or None. The coercion is identical except for one axis: whether a task
285
+ # given as a *str path* is allowed. The HTTP server says no (a path would resolve
286
+ # server-side, which a client can't set); the MCP tools and the embed facade say
287
+ # yes. So these live here once, parameterised by ``allow_paths``, rather than
288
+ # being re-implemented (and drifting) in each caller.
289
+
290
+
291
+ AGENT_FILE = "agent.yaml"
292
+
293
+
294
+ def load_dotenv(path: Path) -> list[str]:
295
+ """Load ``KEY=VALUE`` lines from a bundle's ``.env`` into ``os.environ`` and
296
+ return the names loaded. This is how a bundle carries its **secrets** — a
297
+ gitignored ``.env`` next to ``agent.yaml`` holding ``EXA_API_KEY=…``,
298
+ ``ANTHROPIC_API_KEY=…`` — without committing them: config still names the
299
+ *variable* (``api_key_env``), and the value is supplied here at load time, for
300
+ both a local run and (the file being mounted with the bundle) a contained one.
301
+
302
+ An already-set variable is never overwritten, so an explicit environment wins
303
+ over the file. Minimal and dependency-free: blank lines and ``#`` comments are
304
+ skipped, an ``export`` prefix is tolerated, and surrounding quotes are stripped.
305
+ """
306
+ import os
307
+
308
+ if not path.is_file():
309
+ return []
310
+ loaded: list[str] = []
311
+ for raw in path.read_text(encoding="utf-8").splitlines():
312
+ line = raw.strip()
313
+ if not line or line.startswith("#"):
314
+ continue
315
+ if line.startswith("export "):
316
+ line = line[len("export ") :]
317
+ key, sep, val = line.partition("=")
318
+ key = key.strip()
319
+ if not sep or not key:
320
+ continue
321
+ val = val.strip()
322
+ if len(val) >= 2 and val[0] == val[-1] and val[0] in "\"'":
323
+ val = val[1:-1]
324
+ if key not in os.environ:
325
+ os.environ[key] = val
326
+ loaded.append(key)
327
+ return loaded
328
+
329
+
330
+ def load_agent(source: Any) -> tuple[TaskSpec, RunConfig]:
331
+ """Load a single self-contained agent → ``(task, config)``.
332
+
333
+ ``source`` is a path to an ``agent.yaml``, a **bundle directory** (containing
334
+ ``agent.yaml`` + optionally a ``tools/`` package), a dict, or None (``./agent.yaml``
335
+ or ``./`` as a bundle). A bundle dir is put on ``sys.path`` so the agent's own
336
+ tools — referenced in ``tiers`` as ``tools.x:MyTool`` — import, whether they
337
+ were written in the owner's codebase or a fresh repo dropped in the bundle.
338
+
339
+ The merged file is parsed into one RunConfig; its ``task:`` block is split out
340
+ into a TaskSpec. A file with no ``task:`` is an error (it's not runnable)."""
341
+ if source is None:
342
+ source = AGENT_FILE if Path(AGENT_FILE).is_file() else "."
343
+ if isinstance(source, (str, Path)):
344
+ p = Path(source)
345
+ if p.is_dir():
346
+ _add_bundle_to_path(p)
347
+ load_dotenv(p / ".env") # the bundle's gitignored secrets
348
+ p = p / AGENT_FILE
349
+ else:
350
+ load_dotenv(p.parent / ".env")
351
+ cfg = load_config(str(p))
352
+ elif isinstance(source, dict):
353
+ cfg = coerce_config(source)
354
+ elif isinstance(source, RunConfig):
355
+ cfg = source
356
+ else:
357
+ raise ConfigError(f"unsupported agent source: {type(source).__name__}")
358
+
359
+ if cfg.task is None:
360
+ raise ConfigError(
361
+ "agent has no `task:` block — a runnable agent file must include one "
362
+ "(query/target/output_schema). See `zu init`."
363
+ )
364
+ spec = coerce_task(cfg.task, cfg.budget, allow_paths=False)
365
+ return spec, cfg
366
+
367
+
368
+ def _add_bundle_to_path(directory: Path) -> None:
369
+ """Put a bundle directory on ``sys.path`` (front) so its own ``tools/`` package
370
+ is importable by the ``module:Attr`` refs in the agent's ``tiers``."""
371
+ import sys
372
+
373
+ resolved = str(directory.resolve())
374
+ if resolved not in sys.path:
375
+ sys.path.insert(0, resolved)
376
+
377
+
378
+ def coerce_config(source: Any) -> RunConfig:
379
+ """A RunConfig from a path (str), a dict, an existing RunConfig, or None
380
+ (meaning ``./agent.yaml``). A malformed *dict* raises ``ConfigError`` like a
381
+ malformed *file* does — so callers that ``except ConfigError`` get a clean
382
+ message for either, never a raw pydantic ``ValidationError`` escaping."""
383
+ if source is None:
384
+ return load_config("agent.yaml")
385
+ if isinstance(source, RunConfig):
386
+ return source
387
+ if isinstance(source, str):
388
+ return load_config(source)
389
+ if isinstance(source, dict):
390
+ from pydantic import ValidationError
391
+
392
+ try:
393
+ return RunConfig.model_validate(source)
394
+ except ValidationError as exc:
395
+ raise ConfigError(f"invalid config: {exc}") from exc
396
+ raise ConfigError(f"unsupported config type: {type(source).__name__}")
397
+
398
+
399
+ def coerce_task(source: Any, default_budget: Budget, *, allow_paths: bool) -> TaskSpec:
400
+ """A TaskSpec from a dict, an existing TaskSpec, or (when ``allow_paths``) a
401
+ file path. A task that omits a budget inherits ``default_budget``. A malformed
402
+ dict (or, where permitted, a bad file) surfaces as ``ConfigError``.
403
+
404
+ ``allow_paths=False`` is the server's stance: a str task is a *path*, which a
405
+ remote client cannot meaningfully set, so it is rejected rather than read off
406
+ the server's filesystem."""
407
+ if isinstance(source, TaskSpec):
408
+ return source
409
+ if isinstance(source, str):
410
+ if not allow_paths:
411
+ raise ConfigError("task must be a JSON object (the task spec)")
412
+ return load_task(source, default_budget=default_budget)
413
+ if isinstance(source, dict):
414
+ doc = dict(source)
415
+ doc.setdefault("budget", default_budget.model_dump())
416
+ try:
417
+ return TaskSpec.model_validate(doc)
418
+ except Exception as exc: # noqa: BLE001 - surface as a ConfigError, not a raw pydantic error
419
+ raise ConfigError(f"invalid task: {exc}") from exc
420
+ raise ConfigError(f"unsupported task type: {type(source).__name__}")
421
+
422
+
423
+ # --- building the run --------------------------------------------------------
424
+
425
+
426
+ def _catalog() -> Registry:
427
+ """Everything installed, discovered once. The run registry is built by
428
+ selecting from this; discovery failures are tolerated (a broken third-party
429
+ plugin must not stop a run that does not use it)."""
430
+ reg = Registry()
431
+ reg.discover()
432
+ return reg
433
+
434
+
435
+ def _import_ref(ref: str) -> Any:
436
+ """Resolve an ``module:Attr`` (or ``module:Attr.Nested``) import path — the
437
+ 'by reference in config' door. Used for both plugins and providers."""
438
+ module, _, attr = ref.partition(":")
439
+ if not module or not attr:
440
+ raise ConfigError(f"bad import reference {ref!r}; expected 'module:Attr'")
441
+ try:
442
+ obj: Any = importlib.import_module(module)
443
+ for part in attr.split("."):
444
+ obj = getattr(obj, part)
445
+ except (ImportError, AttributeError) as exc:
446
+ raise ConfigError(f"cannot import {ref!r}: {exc}") from exc
447
+ return obj
448
+
449
+
450
+ def _construct(factory: Any, candidate: dict[str, Any]) -> Any:
451
+ """Build ``factory`` passing only the kwargs its constructor declares, and
452
+ only those with a value. This is what keeps the wiring provider-agnostic:
453
+ config offers a neutral set of knobs and each adapter takes the subset it
454
+ understands — no per-provider branching here."""
455
+ try:
456
+ params = inspect.signature(factory).parameters
457
+ except (TypeError, ValueError):
458
+ return factory()
459
+ accepts_kwargs = any(p.kind is p.VAR_KEYWORD for p in params.values())
460
+ kwargs = {
461
+ k: v
462
+ for k, v in candidate.items()
463
+ if v is not None and (accepts_kwargs or k in params)
464
+ }
465
+ return factory(**kwargs)
466
+
467
+
468
+ def _refuse_import(ref: str, what: str) -> None:
469
+ """Raise when an arbitrary ``module:Attr`` ref is named on a surface that may
470
+ not import code. Importing a module executes its top-level code, so a config
471
+ that can name any ``module:Attr`` is a code-execution door — fine for the
472
+ operator-trusted CLI, never for a config that arrived over the network."""
473
+ raise ConfigError(
474
+ f"refusing to import {what} {ref!r}: this surface does not permit arbitrary "
475
+ "'module:Attr' imports (a per-request config may only use installed, named "
476
+ "plugins). Configure it on the trusted server default instead."
477
+ )
478
+
479
+
480
+ def build_provider(
481
+ cfg: ProviderConfig, catalog: Registry | None = None, *, allow_imports: bool = True
482
+ ) -> ModelProvider:
483
+ """Construct the configured model provider — the one-line model swap.
484
+
485
+ ``scripted`` is special only in that it has no env/model to construct from:
486
+ it replays a fixed list of moves (for offline runs and tests). Every other
487
+ provider — built-in or a user's ``module:Attr`` — is looked up by name and
488
+ constructed from the neutral config knobs it accepts. ``allow_imports=False``
489
+ forbids the ``module:Attr`` door (the networked surface)."""
490
+ if cfg.name == "scripted":
491
+ from zu_providers.scripted import ScriptedProvider
492
+
493
+ return ScriptedProvider.from_moves(cfg.script or [])
494
+
495
+ if ":" in cfg.name:
496
+ if not allow_imports:
497
+ _refuse_import(cfg.name, "provider")
498
+ factory = _import_ref(cfg.name)
499
+ else:
500
+ catalog = catalog or _catalog()
501
+ try:
502
+ factory = catalog.get("providers", cfg.name)
503
+ except KeyError:
504
+ raise ConfigError(
505
+ f"unknown provider {cfg.name!r}; discovered: "
506
+ f"{', '.join(catalog.names('providers')) or 'none'} "
507
+ "(is its package installed?)"
508
+ ) from None
509
+
510
+ candidate = {
511
+ "model": cfg.model,
512
+ "api_key_env": cfg.api_key_env,
513
+ "base_url_env": cfg.base_url_env,
514
+ "api_key": cfg.api_key,
515
+ "base_url": cfg.base_url,
516
+ "max_tokens": cfg.max_tokens,
517
+ **cfg.options,
518
+ }
519
+ return _construct(factory, candidate)
520
+
521
+
522
+ def _resolve_plugin(
523
+ kind: str, name: str, catalog: Registry, extra: dict[str, Any], *, allow_imports: bool = True
524
+ ) -> Any:
525
+ """A single named plugin → an object for the run registry. An ``module:Attr``
526
+ name is imported (only if ``allow_imports``); a short name is taken from the
527
+ catalog. ``extra`` carries optional injected dependencies (e.g. a configured
528
+ ``backend`` for a tool that accepts one); a class that wants one is
529
+ instantiated here, otherwise it is handed to the registry as-is and the loop
530
+ materialises it."""
531
+ if ":" in name:
532
+ if not allow_imports:
533
+ _refuse_import(name, kind[:-1])
534
+ return _import_ref(name)
535
+ try:
536
+ obj = catalog.get(kind, name)
537
+ except KeyError:
538
+ raise ConfigError(
539
+ f"unknown {kind[:-1]} {name!r}; discovered: "
540
+ f"{', '.join(catalog.names(kind)) or 'none'} (is its package installed?)"
541
+ ) from None
542
+ # Inject an optional dependency only when the plugin is a class that declares
543
+ # it — e.g. render_dom(backend=...). Otherwise leave the class for the loop.
544
+ if extra and isinstance(obj, type):
545
+ params = inspect.signature(obj).parameters
546
+ inject = {k: v for k, v in extra.items() if k in params}
547
+ if inject:
548
+ return obj(**inject)
549
+ return obj
550
+
551
+
552
+ def build_registry(
553
+ cfg: RunConfig, catalog: Registry | None = None, *, allow_imports: bool = True
554
+ ) -> Registry:
555
+ """A registry containing exactly the configured plugins — no more. This is
556
+ how config activates and orders plugins per run without code changes.
557
+ ``allow_imports=False`` forbids ``module:Attr`` plugin refs (networked
558
+ surface): a per-request config may only activate installed, named plugins."""
559
+ catalog = catalog or _catalog()
560
+ reg = Registry()
561
+
562
+ backend_obj = None
563
+ if cfg.backend is not None:
564
+ backend_obj = _resolve_plugin("backends", cfg.backend, catalog, {}, allow_imports=allow_imports)
565
+ backend_obj = backend_obj() if isinstance(backend_obj, type) else backend_obj
566
+
567
+ extra = {"backend": backend_obj} if backend_obj is not None else {}
568
+
569
+ # Tools: from the config-owned escalation ladder (``tiers``) and/or the flat
570
+ # ``plugins.tools`` list. A name in ``tiers`` is registered with its effective
571
+ # tier STAMPED on the instance (the agent author's choice overrides the tool's
572
+ # own default); a name only in ``plugins.tools`` keeps its class-default tier.
573
+ tier_of: dict[str, int] = {}
574
+ for tier, names in cfg.tiers.items():
575
+ for name in names:
576
+ tier_of[name] = tier
577
+ tool_names = list(cfg.plugins.tools) + [n for n in tier_of if n not in cfg.plugins.tools]
578
+ for name in tool_names:
579
+ obj = _resolve_plugin("tools", name, catalog, extra, allow_imports=allow_imports)
580
+ if name in tier_of:
581
+ # Need an instance to stamp the tier; a class is materialised here
582
+ # (the loop would otherwise instantiate it with no args anyway).
583
+ obj = obj() if isinstance(obj, type) else obj
584
+ obj.tier = tier_of[name]
585
+ reg.register("tools", getattr(obj, "name", name), obj)
586
+
587
+ for kind in ("detectors", "validators"):
588
+ for name in getattr(cfg.plugins, kind):
589
+ obj = _resolve_plugin(kind, name, catalog, extra, allow_imports=allow_imports)
590
+ reg.register(kind, getattr(obj, "name", name), obj)
591
+ return reg
592
+
593
+
594
+ def _refuse_path(spec: EventSinkConfig) -> None:
595
+ """Raise when a sink names a filesystem ``path`` on a surface that may not
596
+ write the host. A sink ``path`` is an arbitrary file the process opens for
597
+ write (a sqlite db, a jsonl log), so a config that can name any path is a
598
+ file-write door — fine for the operator-trusted CLI, never for a config that
599
+ arrived over the network. The in-memory default (no ``event_sink``) and any
600
+ path-free, options-only sink remain available to a per-request config."""
601
+ raise ConfigError(
602
+ f"refusing to open sink path {spec.path!r}: this surface does not permit "
603
+ "writing arbitrary host paths (a per-request config may not configure a "
604
+ "filesystem sink). Configure event_sink/trace_sinks on the trusted server "
605
+ "default instead."
606
+ )
607
+
608
+
609
+ def _build_one_sink(
610
+ spec: EventSinkConfig, catalog: Registry, *, allow_paths: bool = True
611
+ ) -> Any:
612
+ """Construct one EventSink from its config (driver name + path/options).
613
+
614
+ ``allow_paths=False`` forbids a sink that names a filesystem ``path`` (the
615
+ networked surface), so a remote caller cannot drive an arbitrary file write."""
616
+ if not allow_paths and spec.path is not None:
617
+ _refuse_path(spec)
618
+ try:
619
+ factory = catalog.get("sinks", spec.driver)
620
+ except KeyError:
621
+ raise ConfigError(
622
+ f"unknown event sink {spec.driver!r}; discovered: "
623
+ f"{', '.join(catalog.names('sinks')) or 'none'} (is its package installed?)"
624
+ ) from None
625
+ candidate = {"path": spec.path, **spec.options}
626
+ codec = _build_codec(spec.encryption)
627
+ if codec is not None:
628
+ candidate["codec"] = codec
629
+ return _construct(factory, candidate)
630
+
631
+
632
+ def _build_codec(encryption: str) -> Any:
633
+ """Map the ``encryption`` config value to a payload codec instance (or None
634
+ for plaintext). The codec lives in ``zu-backends[encryption]`` and is imported
635
+ lazily, with a clear error if the extra isn't installed."""
636
+ mode = (encryption or "none").lower()
637
+ if mode in ("none", ""):
638
+ return None
639
+ try:
640
+ from zu_backends.encryption import AesGcmCodec, ManagedAesGcmCodec
641
+ except ModuleNotFoundError as exc:
642
+ raise ConfigError(
643
+ "encryption-at-rest needs the optional dependency: "
644
+ "pip install 'zu-backends[encryption]'"
645
+ ) from exc
646
+ try:
647
+ if mode == "aesgcm":
648
+ return AesGcmCodec.from_env()
649
+ if mode == "managed":
650
+ return ManagedAesGcmCodec.from_env()
651
+ except RuntimeError as exc: # a missing/invalid key in the environment
652
+ raise ConfigError(str(exc)) from exc
653
+ raise ConfigError(
654
+ f"unknown encryption mode {encryption!r}; use 'none', 'aesgcm', or 'managed'."
655
+ )
656
+
657
+
658
+ def build_sink(
659
+ cfg: RunConfig, catalog: Registry | None = None, *, allow_paths: bool = True
660
+ ) -> Any:
661
+ """The canonical EventSink for the run, or None for the in-memory default."""
662
+ if cfg.event_sink is None:
663
+ return None
664
+ return _build_one_sink(cfg.event_sink, catalog or _catalog(), allow_paths=allow_paths)
665
+
666
+
667
+ def build_trace_sinks(
668
+ cfg: RunConfig, catalog: Registry | None = None, *, allow_paths: bool = True
669
+ ) -> list[Any]:
670
+ """The secondary trace destinations (shippers) — one EventSink per
671
+ ``trace_sinks`` entry, attached to the bus alongside the canonical store."""
672
+ if not cfg.trace_sinks:
673
+ return []
674
+ catalog = catalog or _catalog()
675
+ return [_build_one_sink(s, catalog, allow_paths=allow_paths) for s in cfg.trace_sinks]
676
+
677
+
678
+ def build_providers_by_tier(
679
+ cfg: RunConfig, catalog: Registry | None = None, *, allow_imports: bool = True
680
+ ) -> dict[int, ModelProvider]:
681
+ """The per-tier provider overrides (``cfg.providers``) as built ModelProviders,
682
+ keyed by tier. Empty when no overrides are configured — the loop then runs the
683
+ global provider on every tier."""
684
+ if not cfg.providers:
685
+ return {}
686
+ catalog = catalog or _catalog()
687
+ return {
688
+ tier: build_provider(pc, catalog, allow_imports=allow_imports)
689
+ for tier, pc in cfg.providers.items()
690
+ }
691
+
692
+
693
+ def assemble(
694
+ cfg: RunConfig, *, allow_imports: bool = True
695
+ ) -> tuple[ModelProvider, Registry, EventBus, dict[int, ModelProvider]]:
696
+ """Turn a parsed config into what ``run_task`` needs: the global provider, the
697
+ run registry, a bus whose canonical sink is configured, and the per-tier
698
+ provider override map. Any ``trace_sinks`` are attached as isolated secondary
699
+ destinations.
700
+
701
+ ``allow_imports`` defaults True for the operator-trusted CLI; pass False when
702
+ the config arrived over the network (``zu serve`` per-request override) so an
703
+ arbitrary ``module:Attr`` provider/plugin cannot be imported (and its
704
+ top-level code executed) by a remote caller. The same flag gates filesystem
705
+ sink paths: a per-request config may not name an ``event_sink``/``trace_sinks``
706
+ ``path`` (an arbitrary host file the process would open for write)."""
707
+ catalog = _catalog()
708
+ provider = build_provider(cfg.provider, catalog, allow_imports=allow_imports)
709
+ providers_by_tier = build_providers_by_tier(cfg, catalog, allow_imports=allow_imports)
710
+ registry = build_registry(cfg, catalog, allow_imports=allow_imports)
711
+ bus = EventBus(sink=build_sink(cfg, catalog, allow_paths=allow_imports))
712
+ for trace_sink in build_trace_sinks(cfg, catalog, allow_paths=allow_imports):
713
+ bus.add_destination(trace_sink)
714
+ return provider, registry, bus, providers_by_tier
715
+
716
+
717
+ # Re-exported so callers can introspect the plugin kinds without importing the
718
+ # registry module directly.
719
+ __all__ = [
720
+ "RunConfig",
721
+ "ProviderConfig",
722
+ "PluginsConfig",
723
+ "EventSinkConfig",
724
+ "ObservabilityConfig",
725
+ "ConfigError",
726
+ "load_config",
727
+ "load_task",
728
+ "load_agent",
729
+ "load_dotenv",
730
+ "coerce_config",
731
+ "coerce_task",
732
+ "build_provider",
733
+ "build_providers_by_tier",
734
+ "build_registry",
735
+ "build_sink",
736
+ "assemble",
737
+ "GROUPS",
738
+ ]