weaver-kernel 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/AGENTS.md +1 -1
- weaver_kernel-0.6.0/CHANGELOG.md +199 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/PKG-INFO +10 -1
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/invariants.md +13 -0
- weaver_kernel-0.6.0/docs/architecture.md +110 -0
- weaver_kernel-0.6.0/docs/capabilities.md +169 -0
- weaver_kernel-0.6.0/docs/context_firewall.md +120 -0
- weaver_kernel-0.6.0/docs/integrations.md +329 -0
- weaver_kernel-0.6.0/examples/policies/default.toml +68 -0
- weaver_kernel-0.6.0/examples/policies/default.yaml +64 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/pyproject.toml +29 -2
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/__init__.py +42 -6
- weaver_kernel-0.6.0/src/agent_kernel/adapters/__init__.py +35 -0
- weaver_kernel-0.6.0/src/agent_kernel/adapters/_base.py +459 -0
- weaver_kernel-0.6.0/src/agent_kernel/adapters/anthropic.py +273 -0
- weaver_kernel-0.6.0/src/agent_kernel/adapters/openai.py +358 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/mcp.py +4 -2
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/errors.py +44 -0
- weaver_kernel-0.6.0/src/agent_kernel/firewall/__init__.py +18 -0
- weaver_kernel-0.6.0/src/agent_kernel/firewall/budget_manager.py +275 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/budgets.py +5 -3
- weaver_kernel-0.6.0/src/agent_kernel/firewall/token_counting.py +41 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/kernel.py +228 -19
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/models.py +131 -1
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/policy.py +197 -2
- weaver_kernel-0.6.0/src/agent_kernel/policy_dsl.py +503 -0
- weaver_kernel-0.6.0/tests/test_adapters.py +1130 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_firewall.py +262 -1
- weaver_kernel-0.6.0/tests/test_kernel.py +734 -0
- weaver_kernel-0.6.0/tests/test_policy.py +1263 -0
- weaver_kernel-0.5.0/CHANGELOG.md +0 -81
- weaver_kernel-0.5.0/docs/architecture.md +0 -70
- weaver_kernel-0.5.0/docs/capabilities.md +0 -49
- weaver_kernel-0.5.0/docs/context_firewall.md +0 -64
- weaver_kernel-0.5.0/docs/integrations.md +0 -130
- weaver_kernel-0.5.0/src/agent_kernel/firewall/__init__.py +0 -8
- weaver_kernel-0.5.0/tests/test_kernel.py +0 -217
- weaver_kernel-0.5.0/tests/test_policy.py +0 -470
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.claude/CLAUDE.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.github/copilot-instructions.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.github/workflows/ci.yml +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.github/workflows/publish.yml +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.gitignore +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/CONTRIBUTING.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/LICENSE +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/Makefile +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/README.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/RELEASE.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/architecture.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/lessons-learned.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/review-checklist.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/workflows.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/security.md +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/examples/basic_cli.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/examples/billing_demo.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/examples/http_driver_demo.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/__init__.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/base.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/http.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/mcp_support.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/memory.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/enums.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/redaction.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/summarize.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/transform.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/handles.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/py.typed +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/registry.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/router.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/tokens.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/trace.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/conftest.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_drivers.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_handles.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_logging.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_mcp_driver.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_models.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_redaction.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_registry.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_router.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_tokens.py +0 -0
- {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_trace.py +0 -0
|
@@ -52,7 +52,7 @@ Use these terms consistently. Never substitute synonyms:
|
|
|
52
52
|
- Error messages are part of the contract — tests must assert both exception type and message.
|
|
53
53
|
- Keep modules ≤ 300 lines. Split if needed.
|
|
54
54
|
- No randomness in matching, routing, or summarization. Deterministic outputs always.
|
|
55
|
-
- No new dependencies without justification. The dep list is intentionally minimal (`httpx`
|
|
55
|
+
- No new dependencies without justification. The dep list is intentionally minimal (`httpx`, `pydantic`).
|
|
56
56
|
|
|
57
57
|
## Security rules
|
|
58
58
|
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.6.0] - 2026-05-19
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Cross-invocation context budget manager (`BudgetManager`) tracks cumulative token usage across
|
|
14
|
+
multiple `Kernel.invoke()` calls within a session. When attached to a `Kernel` via the new
|
|
15
|
+
`budget_manager` keyword argument, the kernel reserves a budget slice before each invocation
|
|
16
|
+
and reconciles actual frame-payload usage afterwards. As the remaining budget shrinks the
|
|
17
|
+
requested `response_mode` is auto-escalated to a more aggressive tier (> 50% remaining keeps
|
|
18
|
+
the caller's mode; 20–50% downgrades `raw` to `table`; 5–20% floors at `summary`; < 5% forces
|
|
19
|
+
`handle_only`). `Kernel.invoke(..., dry_run=True)` now also reports `budget_remaining` and the
|
|
20
|
+
escalated `response_mode` when a manager is configured. The `BudgetManager` is optional and
|
|
21
|
+
off by default — existing kernels are unchanged. (#44)
|
|
22
|
+
- `TokenCounter` protocol and `default_token_counter` (character-based `len(json.dumps(...))//4`
|
|
23
|
+
approximation) provide pluggable token counting without runtime dependencies. A new optional
|
|
24
|
+
`[tiktoken]` extra is reserved for callers that want to plug in `tiktoken`-based counting.
|
|
25
|
+
- `BudgetExhausted(AgentKernelError)` raised by `BudgetManager.allocate()` (and by
|
|
26
|
+
`Kernel.invoke()` before driver execution) when the cumulative session budget is fully spent.
|
|
27
|
+
- `BudgetConfigError(AgentKernelError)` raised by `BudgetManager` for invalid configuration or
|
|
28
|
+
validation failures (non-positive budgets, negative allocate/record/release amounts), replacing
|
|
29
|
+
bare `ValueError` so callers can catch budget mistakes via the `AgentKernelError` hierarchy
|
|
30
|
+
per `AGENTS.md` ("never raise bare ValueError to callers").
|
|
31
|
+
- New public exports: `BudgetManager`, `BudgetExhausted`, `BudgetConfigError`, `TokenCounter`,
|
|
32
|
+
`default_token_counter`, and `Kernel.budget` accessor property.
|
|
33
|
+
- LLM tool-format adapters and middleware (`agent_kernel.adapters`): `OpenAIMiddleware` (OpenAI
|
|
34
|
+
Responses API + Chat Completions, auto-detected on input) and `AnthropicMiddleware` (Anthropic
|
|
35
|
+
Messages with `cache_control` support). Both translate `Capability` objects to vendor tool
|
|
36
|
+
schemas, route tool calls through the full kernel pipeline (grant → invoke → firewall → trace),
|
|
37
|
+
and surface kernel errors (`PolicyDenied`, `CapabilityNotFound`, `DriverError`) as tool-result
|
|
38
|
+
errors so the LLM can react. Pre/post hooks (`intercept_tool_call`, `intercept_tool_result`,
|
|
39
|
+
sync or async) support logging, metrics, approval gates, and per-call justification injection.
|
|
40
|
+
Zero runtime dependency on the `openai` / `anthropic` SDK packages. (#55, #50, #40)
|
|
41
|
+
- New `Capability` fields for LLM adapters: `parameters_model: type[pydantic.BaseModel] | None`
|
|
42
|
+
(input schema source + validation), `parameters_schema: dict | None` (raw JSON Schema escape
|
|
43
|
+
hatch), and `tool_hints: ToolHints | None` (vendor hints — Anthropic `cache_control`, OpenAI
|
|
44
|
+
`strict` mode). All default to ``None``; existing capabilities and tests are unaffected.
|
|
45
|
+
- New `ToolHints` dataclass and `OpenAIMiddleware` / `AnthropicMiddleware` top-level exports.
|
|
46
|
+
- New `AdapterParseError(AgentKernelError)` exception raised by adapter parse / validation
|
|
47
|
+
helpers (`tool_call_to_request`, `tool_use_to_request`, `make_namespace_safe_name`) instead
|
|
48
|
+
of bare `ValueError`. Satisfies `AGENTS.md`'s "no bare ValueError to callers" rule and
|
|
49
|
+
gives consumers a stable adapter-specific exception type. Also catches capability IDs that
|
|
50
|
+
contain the reserved OpenAI namespace separator `__` (which would otherwise produce
|
|
51
|
+
colliding tool names).
|
|
52
|
+
- `Kernel.list_capabilities()` convenience accessor returning every registered capability in
|
|
53
|
+
registration order. Used by the new adapters but generally useful for tooling that needs to
|
|
54
|
+
enumerate the registry without keyword search.
|
|
55
|
+
- Declarative policy engine (`DeclarativePolicyEngine`) that loads rules from YAML or TOML files.
|
|
56
|
+
Rules are evaluated top-down with first-match-wins semantics; supports `safety_class`, `sensitivity`,
|
|
57
|
+
`roles`, `attributes`, and `min_justification` match conditions. (#42)
|
|
58
|
+
- Policy denial explanation: `ExplainingPolicyEngine` protocol plus `DefaultPolicyEngine.explain()` and
|
|
59
|
+
`DeclarativePolicyEngine.explain()` implementations return a structured `DenialExplanation` with a
|
|
60
|
+
`FailedCondition` list for every failing check (no short-circuit), a `remediation` list, and a
|
|
61
|
+
human-readable `narrative`. (#48)
|
|
62
|
+
- Dry-run invocation mode: `kernel.invoke(..., dry_run=True)` verifies the token and resolves the
|
|
63
|
+
execution plan without calling the driver. Returns `DryRunResult` with the resolved `driver_id`,
|
|
64
|
+
`operation`, `response_mode`, and an `estimated_cost` tier (`low`/`medium`/`high`). (#43)
|
|
65
|
+
- `Kernel.explain_denial()` convenience method that calls the policy engine's `explain()` for a given
|
|
66
|
+
`CapabilityRequest` and `Principal` without requiring a token. Raises `AgentKernelError` when the
|
|
67
|
+
configured engine does not implement `explain()`.
|
|
68
|
+
- New public types exported from `agent_kernel`: `DeclarativePolicyEngine`, `ExplainingPolicyEngine`,
|
|
69
|
+
`PolicyEngine`, `PolicyMatch`, `PolicyRule`, `DenialExplanation`, `FailedCondition`, `DryRunResult`,
|
|
70
|
+
`PolicyConfigError`.
|
|
71
|
+
- `policy` optional extra (`pip install weaver-kernel[policy]`) pulls in `pyyaml` and `tomli` (Python 3.10).
|
|
72
|
+
- Example policy files in `examples/policies/` (YAML and TOML formats).
|
|
73
|
+
|
|
74
|
+
### Changed
|
|
75
|
+
- Runtime dependencies now include `pydantic>=2` in addition to `httpx`. Pydantic is used by the new
|
|
76
|
+
`agent_kernel.adapters` package for JSON-Schema generation and argument validation when a
|
|
77
|
+
`Capability` declares a `parameters_model`. Existing kernel behavior is unchanged; pydantic is not
|
|
78
|
+
imported at module load by anything outside the adapters.
|
|
79
|
+
- `PolicyEngine` protocol no longer requires `explain()`. Engines that need to support
|
|
80
|
+
`Kernel.explain_denial()` should implement the new `ExplainingPolicyEngine` protocol. Built-in
|
|
81
|
+
engines satisfy both. This avoids a breaking typing change for downstream implementers.
|
|
82
|
+
- `DeclarativePolicyEngine` now defers `yaml` and `tomllib`/`tomli` imports into the corresponding
|
|
83
|
+
loaders, so `import agent_kernel` works without the `policy` extra installed. Calling
|
|
84
|
+
`from_yaml`/`from_toml` without the parser surfaces a `PolicyConfigError` with an install hint.
|
|
85
|
+
- `Kernel.invoke(dry_run=True)` resolves `operation` the same way drivers do
|
|
86
|
+
(`args.get("operation", capability_id)`) so `DryRunResult.operation` matches what a driver would
|
|
87
|
+
actually receive — instead of `capability.impl.operation`, which can diverge.
|
|
88
|
+
- `Kernel.invoke(dry_run=True)` mirrors the Firewall's admin-only gate for `raw` mode: non-admin
|
|
89
|
+
principals see their requested `raw` mode downgraded to `summary` in `DryRunResult`, matching
|
|
90
|
+
what they would actually get at real-invoke time. Prevents probing for raw availability.
|
|
91
|
+
|
|
92
|
+
### Documentation
|
|
93
|
+
- `docs/architecture.md` now describes `PolicyEngine` / `ExplainingPolicyEngine` protocols,
|
|
94
|
+
`DefaultPolicyEngine` and `DeclarativePolicyEngine` (with policy-DSL semantics), and dry-run
|
|
95
|
+
mode (admin gate, operation resolution rule). Closes the canonical "Components & API
|
|
96
|
+
reference" gap flagged in audit.
|
|
97
|
+
- `docs/capabilities.md` adds a "Dry-run mode" section (semantics, the three parity rules,
|
|
98
|
+
no-side-effects guarantee), a "Declarative policies" section (loaders, match conditions,
|
|
99
|
+
optional-extra behaviour), and a "Denial explanations" section. Closes the affected-files
|
|
100
|
+
gap from issue #43.
|
|
101
|
+
|
|
102
|
+
### Fixed
|
|
103
|
+
- `DeclarativePolicyEngine._parse_rule()` now validates the types of `roles`, `attributes`,
|
|
104
|
+
`min_justification`, and `constraints` in policy files and raises `PolicyConfigError` with a
|
|
105
|
+
precise message instead of silently producing misbehaving rules or raising at evaluation time.
|
|
106
|
+
- `DeclarativePolicyEngine.explain()` now correctly reports explicit deny rules that fully match
|
|
107
|
+
(previously fell through to the misleading `no_matching_rule` fallback and dropped the rule's
|
|
108
|
+
reason). Partial-match deny rules are now skipped so the explanation focuses on actionable allow
|
|
109
|
+
rules instead of suggesting changes that would only trigger the deny.
|
|
110
|
+
- Example policy files (`examples/policies/default.{yaml,toml}`) now use the correct `default` key
|
|
111
|
+
(was `default_action`, which the parser silently ignored), express PII-with-tenant as an allow
|
|
112
|
+
rule paired with default-deny (the previous deny rule was inverted under first-match-wins), and
|
|
113
|
+
order the `allow-secrets-service` rule before the deny rule (the deny was previously unreachable).
|
|
114
|
+
- `Kernel.explain_denial()` docstring no longer contradicts itself ("never raises" vs.
|
|
115
|
+
`CapabilityNotFound`).
|
|
116
|
+
- `DryRunResult.budget_remaining` docstring no longer references the unimplemented `BudgetManager`;
|
|
117
|
+
the field is documented as reserved for a future cross-invocation budget mechanism.
|
|
118
|
+
- `drivers/mcp.py` adds an explicit `_McpError: type[BaseException] | None` annotation so mypy
|
|
119
|
+
`--strict` remains happy across the try/except import branches.
|
|
120
|
+
|
|
121
|
+
### Tests
|
|
122
|
+
- `tests/test_policy.py` adds `test_declarative_replicates_default_policy_decisions` — a
|
|
123
|
+
comparative test asserting that `DeclarativePolicyEngine` and `DefaultPolicyEngine` produce
|
|
124
|
+
the same allow/deny outcomes across a curated scenario matrix (READ × non-sensitive / PII /
|
|
125
|
+
PCI / SECRETS, WRITE/DESTRUCTIVE with and without required roles and justification). Closes
|
|
126
|
+
issue #42's "comparative test" acceptance criterion.
|
|
127
|
+
|
|
128
|
+
## [0.5.0] - 2026-04-12
|
|
129
|
+
|
|
130
|
+
### Added
|
|
131
|
+
- Built-in `MCPDriver` with stdio and Streamable HTTP transports, tool auto-discovery, normalized MCP result handling, and optional dependency guardrails.
|
|
132
|
+
- Declared weaver-spec v0.1.0 compatibility in README: invariants I-01 (firewall), I-02 (authorization + audit), and I-06 (scoped tokens) are satisfied.
|
|
133
|
+
- Added placeholder `conformance_stub` CI job that will activate once the weaver-spec conformance suite ships (dgenio/weaver-spec#4).
|
|
134
|
+
|
|
135
|
+
## [0.4.0] - 2026-03-14
|
|
136
|
+
|
|
137
|
+
### Added
|
|
138
|
+
- Sliding-window rate limiting in `DefaultPolicyEngine` per `(principal_id, capability_id)` pair (#39).
|
|
139
|
+
Default limits by safety class: 60 READ / 10 WRITE / 2 DESTRUCTIVE per 60s window.
|
|
140
|
+
Service-role principals get 10× limits. Configurable via constructor.
|
|
141
|
+
- GitHub Release step in publish workflow — creates a release with auto-generated notes and artifacts before publishing to PyPI.
|
|
142
|
+
|
|
143
|
+
### Fixed
|
|
144
|
+
- `HTTPDriver`: DELETE requests now forward args as query params instead of silently dropping them.
|
|
145
|
+
|
|
146
|
+
### Removed
|
|
147
|
+
- Dead `_truncate_str` helper in `firewall/transform.py` (defined but never called).
|
|
148
|
+
|
|
149
|
+
## [0.3.0] - 2026-03-09
|
|
150
|
+
|
|
151
|
+
### Added
|
|
152
|
+
- Structured logging at kernel decision points (invoke, grant, deny, revoke).
|
|
153
|
+
- Agent-facing documentation system: `docs/agent-context/` (architecture, workflows, invariants, lessons-learned, review-checklist).
|
|
154
|
+
- `.github/copilot-instructions.md` — review-critical projections for GitHub Copilot.
|
|
155
|
+
- `.claude/CLAUDE.md` — Claude-specific operating instructions.
|
|
156
|
+
- PyPI publish workflow (`.github/workflows/publish.yml`) with Trusted Publisher (OIDC) (#37).
|
|
157
|
+
- `RELEASE.md` documenting the full release process.
|
|
158
|
+
- `[project.urls]` in `pyproject.toml` (Homepage, Repository, Documentation, Changelog).
|
|
159
|
+
- Optional dependency groups: `mcp` and `otel` in `pyproject.toml`.
|
|
160
|
+
|
|
161
|
+
### Changed
|
|
162
|
+
- Rewrote `AGENTS.md` with full domain vocabulary, security rules, code conventions, documentation map, and weaver-spec references.
|
|
163
|
+
- Renamed PyPI package from `agent-kernel` to `weaver-kernel` to align with Weaver ecosystem.
|
|
164
|
+
- Added `workflow_call` trigger to CI workflow so publish workflow can reuse it as a gate.
|
|
165
|
+
|
|
166
|
+
### Refactored
|
|
167
|
+
- Extracted `_log_verify_failure` helper in `tokens.py`.
|
|
168
|
+
- Consolidated invoke logging with shared base dict in `kernel.py`.
|
|
169
|
+
- Extracted `_deny` static method in policy engine.
|
|
170
|
+
|
|
171
|
+
### Fixed
|
|
172
|
+
- Pinned GitHub Actions to commit SHAs in publish workflow.
|
|
173
|
+
- Added `contents:read` permission to publish job.
|
|
174
|
+
- Clarified PyPI vs import name in README Quickstart.
|
|
175
|
+
|
|
176
|
+
## [0.2.0] - 2026-03-06
|
|
177
|
+
|
|
178
|
+
### Added
|
|
179
|
+
- Token revocation support: `revoke_token()` and `revoke_all()` on `Kernel` (#33, #57).
|
|
180
|
+
- `SECRETS` sensitivity tag enforcement in policy engine and redaction (#56).
|
|
181
|
+
|
|
182
|
+
### Fixed
|
|
183
|
+
- Policy engine now strips whitespace from justification before length check.
|
|
184
|
+
- Policy engine reports both raw and stripped length in justification errors.
|
|
185
|
+
- Policy engine checks role before justification in all safety/sensitivity blocks.
|
|
186
|
+
- Redaction preserves field-name context in API key and connection string patterns.
|
|
187
|
+
- `revoke_all()` drops `_principal_tokens` entry after revoking.
|
|
188
|
+
|
|
189
|
+
## [0.1.0] - 2024-01-01
|
|
190
|
+
|
|
191
|
+
### Added
|
|
192
|
+
- Initial scaffold: `CapabilityRegistry`, `PolicyEngine`, `HMACTokenProvider`, `Kernel`.
|
|
193
|
+
- `InMemoryDriver` and `HTTPDriver` (httpx-based).
|
|
194
|
+
- Context `Firewall` with `Budgets`, redaction, and summarization.
|
|
195
|
+
- `HandleStore` with TTL, pagination, field selection, and basic filtering.
|
|
196
|
+
- `TraceStore` and `explain()` for full audit trail.
|
|
197
|
+
- Examples: `basic_cli.py`, `billing_demo.py`, `http_driver_demo.py`.
|
|
198
|
+
- Documentation: architecture, security model, integrations, capabilities, context firewall.
|
|
199
|
+
- CI pipeline for Python 3.10, 3.11, 3.12 with ruff + mypy + pytest.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: weaver-kernel
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Capability-based security kernel for AI agents operating in large tool ecosystems
|
|
5
5
|
Project-URL: Homepage, https://github.com/dgenio/agent-kernel
|
|
6
6
|
Project-URL: Repository, https://github.com/dgenio/agent-kernel
|
|
@@ -221,6 +221,7 @@ Classifier: Topic :: Security
|
|
|
221
221
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
222
222
|
Requires-Python: >=3.10
|
|
223
223
|
Requires-Dist: httpx>=0.27
|
|
224
|
+
Requires-Dist: pydantic>=2
|
|
224
225
|
Provides-Extra: dev
|
|
225
226
|
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
226
227
|
Requires-Dist: mcp>=1.6; extra == 'dev'
|
|
@@ -228,11 +229,19 @@ Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
|
228
229
|
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
229
230
|
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
230
231
|
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
232
|
+
Requires-Dist: pyyaml>=6.0; extra == 'dev'
|
|
231
233
|
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
234
|
+
Requires-Dist: tomli>=2.0; (python_version < '3.11') and extra == 'dev'
|
|
235
|
+
Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
|
|
232
236
|
Provides-Extra: mcp
|
|
233
237
|
Requires-Dist: mcp>=1.6; extra == 'mcp'
|
|
234
238
|
Provides-Extra: otel
|
|
235
239
|
Requires-Dist: opentelemetry-api>=1.20; extra == 'otel'
|
|
240
|
+
Provides-Extra: policy
|
|
241
|
+
Requires-Dist: pyyaml>=6.0; extra == 'policy'
|
|
242
|
+
Requires-Dist: tomli>=2.0; (python_version < '3.11') and extra == 'policy'
|
|
243
|
+
Provides-Extra: tiktoken
|
|
244
|
+
Requires-Dist: tiktoken>=0.6; extra == 'tiktoken'
|
|
236
245
|
Description-Content-Type: text/markdown
|
|
237
246
|
|
|
238
247
|
# agent-kernel
|
|
@@ -64,6 +64,19 @@ tag is **silently ignored** — capabilities tagged with it pass policy without
|
|
|
64
64
|
|
|
65
65
|
**Rule:** When adding a `SensitivityTag`, always add a matching policy rule and test.
|
|
66
66
|
|
|
67
|
+
### Dry-run response-mode parity
|
|
68
|
+
`Kernel.invoke(dry_run=True)` reports the response mode the caller would actually
|
|
69
|
+
get at real-invoke time. The Firewall downgrades `raw` to `summary` for non-admin
|
|
70
|
+
principals (`firewall/transform.py:108`), so dry-run must mirror that downgrade —
|
|
71
|
+
otherwise a non-admin caller can probe/assume raw-mode availability they will never
|
|
72
|
+
actually receive. The same applies to `operation`: dry-run resolves it the same way
|
|
73
|
+
drivers do (`args.get("operation", capability_id)`), so what the caller sees in
|
|
74
|
+
`DryRunResult` matches what a driver would receive.
|
|
75
|
+
|
|
76
|
+
**Rule:** Any code path that reports a response mode or driver operation back to the
|
|
77
|
+
caller must apply the same admin gate / resolution rule the real-invoke path uses,
|
|
78
|
+
including dry-run, mock, and test paths.
|
|
79
|
+
|
|
67
80
|
## Safe vs. unsafe changes
|
|
68
81
|
|
|
69
82
|
| Safe | Unsafe |
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
`agent-kernel` is a capability-based security kernel that sits **above** raw tool execution (MCP, HTTP APIs, internal services) and **below** the LLM context window.
|
|
6
|
+
|
|
7
|
+
```mermaid
|
|
8
|
+
graph TD
|
|
9
|
+
LLM["LLM / Agent"] -->|goal text| K["Kernel"]
|
|
10
|
+
K -->|search| REG["CapabilityRegistry"]
|
|
11
|
+
REG -->|CapabilityRequest| K
|
|
12
|
+
K -->|evaluate| POL["PolicyEngine"]
|
|
13
|
+
POL -->|PolicyDecision| K
|
|
14
|
+
K -->|issue| TOK["TokenProvider (HMAC)"]
|
|
15
|
+
TOK -->|CapabilityToken| K
|
|
16
|
+
K -->|route| ROU["Router"]
|
|
17
|
+
ROU -->|RoutePlan| K
|
|
18
|
+
K -->|execute| DRV["Driver (Memory / HTTP / MCP)"]
|
|
19
|
+
DRV -->|RawResult| K
|
|
20
|
+
K -->|transform| FW["Firewall"]
|
|
21
|
+
FW -->|Frame| K
|
|
22
|
+
K -->|store| HS["HandleStore"]
|
|
23
|
+
K -->|record| TS["TraceStore"]
|
|
24
|
+
K -->|Frame| LLM
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Components
|
|
28
|
+
|
|
29
|
+
### Kernel
|
|
30
|
+
The central orchestrator. Wires all components together and exposes:
|
|
31
|
+
- `request_capabilities(goal)` — discover relevant capabilities
|
|
32
|
+
- `grant_capability(request, principal, justification)` — policy check + token issuance
|
|
33
|
+
- `invoke(token, principal, args, response_mode, dry_run=False)` — execute + firewall + trace, or short-circuit before driver dispatch when `dry_run=True`
|
|
34
|
+
- `expand(handle, query)` — paginate/filter stored results
|
|
35
|
+
- `explain(action_id)` — retrieve audit trace
|
|
36
|
+
- `explain_denial(request, principal, justification)` — return a structured `DenialExplanation` instead of raising `PolicyDenied`
|
|
37
|
+
|
|
38
|
+
### CapabilityRegistry
|
|
39
|
+
A flat dict of `Capability` objects indexed by `capability_id`. Provides keyword-based search (no LLM, no vector DB — purely token overlap scoring).
|
|
40
|
+
|
|
41
|
+
### PolicyEngine
|
|
42
|
+
Two protocols and two built-in engines:
|
|
43
|
+
|
|
44
|
+
- **`PolicyEngine`** (protocol) — single required method: `evaluate(request, capability, principal, justification) -> PolicyDecision`.
|
|
45
|
+
- **`ExplainingPolicyEngine`** (protocol, extends `PolicyEngine`) — adds `explain(...) -> DenialExplanation`. Only engines that implement this protocol can be used with `Kernel.explain_denial`; otherwise that call raises `AgentKernelError` with a clear message. Splitting the contract keeps existing downstream `PolicyEngine` implementers backward-compatible.
|
|
46
|
+
|
|
47
|
+
Both built-in engines satisfy `ExplainingPolicyEngine`:
|
|
48
|
+
|
|
49
|
+
- **`DefaultPolicyEngine`** — hardcoded role-based rules:
|
|
50
|
+
1. **READ** — always allowed
|
|
51
|
+
2. **WRITE** — requires `justification ≥ 15 chars` + role `writer|admin`
|
|
52
|
+
3. **DESTRUCTIVE** — requires role `admin` + `justification ≥ 15 chars`
|
|
53
|
+
4. **PII/PCI** — requires `tenant` attribute; enforces `allowed_fields` unless `pii_reader`
|
|
54
|
+
5. **SECRETS** — requires role `admin|secrets_reader` + `justification ≥ 15 chars`
|
|
55
|
+
6. **max_rows** — 50 (user), 500 (service)
|
|
56
|
+
7. **Rate limiting** — sliding-window per `(principal_id, capability_id)` (60 READ / 10 WRITE / 2 DESTRUCTIVE per 60s; service role gets 10×)
|
|
57
|
+
- **`DeclarativePolicyEngine`** — loads rules from a YAML or TOML file (or a plain dict). Supports `safety_class`, `sensitivity`, `roles`, `attributes`, and `min_justification` match conditions; `allow`/`deny` actions; per-rule `constraints` merged into the resulting `PolicyDecision`; configurable `default` action. Rules are evaluated top-down with first-match-wins. `pyyaml` and `tomli` are optional dependencies — `import agent_kernel` works without them; calling `from_yaml`/`from_toml` without the parser raises `PolicyConfigError` with an install hint.
|
|
58
|
+
|
|
59
|
+
#### Denial explanations
|
|
60
|
+
|
|
61
|
+
`PolicyEngine.explain()` (when available) returns a structured `DenialExplanation` with `denied`, `rule_name`, a `failed_conditions: list[FailedCondition]` describing each missing condition with `required`/`actual`/`suggestion`, a `remediation` list, and a human-readable `narrative`. Engines collect all failing conditions (no short-circuit) so callers get the full picture. For `DeclarativePolicyEngine`, an explicit deny rule that fully matches is reported as the cause; partial-match deny rules are skipped during explanation so the surfaced advice is actionable rather than self-defeating.
|
|
62
|
+
|
|
63
|
+
#### Dry-run mode
|
|
64
|
+
|
|
65
|
+
`Kernel.invoke(dry_run=True)` verifies the token and resolves the route plan but **never calls the driver**. It returns a `DryRunResult` with the resolved `driver_id`, the same `operation` a driver would receive (`args.get("operation", capability_id)`), the request constraints, the effective `response_mode` (Firewall's admin-only gate is mirrored: non-admin `raw` is downgraded to `summary`), and a coarse `estimated_cost` tier based on `SafetyClass`. Token verification still raises `TokenExpired` / `TokenInvalid` / `TokenScopeError` in dry-run, so the mode is safe as a policy/route sanity check. See [`docs/capabilities.md`](capabilities.md#dry-run-mode) for usage and [`docs/agent-context/invariants.md`](agent-context/invariants.md) for the parity rule with the real-invoke path.
|
|
66
|
+
|
|
67
|
+
### TokenProvider (HMAC)
|
|
68
|
+
Issues HMAC-SHA256 signed tokens. Each token is bound to `principal_id + capability_id + constraints`. Verification checks: expiry → signature → principal → capability.
|
|
69
|
+
|
|
70
|
+
### Router
|
|
71
|
+
`StaticRouter` maps `capability_id → [driver_id, ...]`. First driver that succeeds wins; others are tried as fallbacks.
|
|
72
|
+
|
|
73
|
+
### Drivers
|
|
74
|
+
- **InMemoryDriver** — Python callables, used for tests and demos
|
|
75
|
+
- **HTTPDriver** — `httpx`-based async HTTP client
|
|
76
|
+
- (Future) **MCPDriver** — adapter for Model Context Protocol tool servers
|
|
77
|
+
|
|
78
|
+
### Firewall
|
|
79
|
+
Transforms `RawResult → Frame`. Never exposes raw output to the LLM.
|
|
80
|
+
- Four response modes: `summary`, `table`, `handle_only`, `raw`
|
|
81
|
+
- Enforces `Budgets` (max_rows, max_fields, max_chars, max_depth)
|
|
82
|
+
- Redacts sensitive fields and inline PII patterns
|
|
83
|
+
- Deterministic summarisation (no LLM)
|
|
84
|
+
|
|
85
|
+
### HandleStore
|
|
86
|
+
Stores full results by opaque handle ID with TTL. `expand()` supports pagination, field selection, and basic equality filtering.
|
|
87
|
+
|
|
88
|
+
### TraceStore
|
|
89
|
+
Records every `ActionTrace`. `explain(action_id)` returns the full audit record.
|
|
90
|
+
|
|
91
|
+
### Adapters (`agent_kernel.adapters`)
|
|
92
|
+
Vendor-specific tool-format adapters that translate between `Capability` objects
|
|
93
|
+
and the tool shapes used by LLM provider APIs:
|
|
94
|
+
|
|
95
|
+
- **`OpenAIMiddleware`** — emits OpenAI tool definitions (Responses API or Chat
|
|
96
|
+
Completions shape), parses `response.output` / `message.tool_calls`, and
|
|
97
|
+
returns `function_call_output` / tool-result messages. Dotted capability IDs
|
|
98
|
+
map to `namespace__function` (OpenAI tool names cannot contain `.`).
|
|
99
|
+
- **`AnthropicMiddleware`** — emits Anthropic tool definitions with optional
|
|
100
|
+
`cache_control` blocks, parses `tool_use` content blocks, and returns
|
|
101
|
+
`tool_result` content blocks. Dotted capability IDs are preserved as-is.
|
|
102
|
+
|
|
103
|
+
Both classes share `BaseToolMiddleware`, which owns hook registration
|
|
104
|
+
(`intercept_tool_call`, `intercept_tool_result`), pre/post dispatch (sync or
|
|
105
|
+
async), and conversion of kernel exceptions (`PolicyDenied`,
|
|
106
|
+
`CapabilityNotFound`, `DriverError`) into tool-result errors the LLM can react
|
|
107
|
+
to. Input arguments are validated against `Capability.parameters_model`
|
|
108
|
+
(pydantic) when present. **Zero runtime dependency** on the `openai` /
|
|
109
|
+
`anthropic` SDK packages. See [`docs/integrations.md`](integrations.md) for
|
|
110
|
+
usage examples.
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Designing Capabilities
|
|
2
|
+
|
|
3
|
+
## Naming conventions
|
|
4
|
+
|
|
5
|
+
- Use `domain.verb_noun` format: `billing.list_invoices`, `users.get_profile`.
|
|
6
|
+
- Be specific: prefer `billing.cancel_invoice` over `billing.update`.
|
|
7
|
+
- Avoid generic names like `billing.execute` or `api.call`.
|
|
8
|
+
|
|
9
|
+
## Granularity
|
|
10
|
+
|
|
11
|
+
Each capability should map to a single, auditable action with clear side-effects.
|
|
12
|
+
|
|
13
|
+
**Good:**
|
|
14
|
+
- `billing.list_invoices` (READ, no side-effects)
|
|
15
|
+
- `billing.send_reminder` (WRITE, sends an email)
|
|
16
|
+
- `billing.void_invoice` (DESTRUCTIVE, irreversible)
|
|
17
|
+
|
|
18
|
+
**Avoid:**
|
|
19
|
+
- `billing.do_stuff` (too broad)
|
|
20
|
+
- `billing.list_or_update_invoices` (mixed safety classes)
|
|
21
|
+
|
|
22
|
+
## Safety classes
|
|
23
|
+
|
|
24
|
+
| Class | Examples | Policy |
|
|
25
|
+
|-------|---------|--------|
|
|
26
|
+
| READ | list, get, search, summarize | Always allowed |
|
|
27
|
+
| WRITE | create, update, send, approve | Justification + writer role |
|
|
28
|
+
| DESTRUCTIVE | delete, void, purge, terminate | Admin role only |
|
|
29
|
+
|
|
30
|
+
## Sensitivity tags
|
|
31
|
+
|
|
32
|
+
Use `SensitivityTag.PII` when results may contain: name, email, phone, SSN, address.
|
|
33
|
+
Use `SensitivityTag.PCI` when results may contain: card numbers, CVV, bank details.
|
|
34
|
+
Use `SensitivityTag.SECRETS` when results may contain: API keys, passwords, tokens.
|
|
35
|
+
|
|
36
|
+
Always pair sensitivity tags with `allowed_fields` to restrict which fields are returned
|
|
37
|
+
to non-privileged callers.
|
|
38
|
+
|
|
39
|
+
## Tags
|
|
40
|
+
|
|
41
|
+
Add descriptive tags to improve keyword matching:
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
Capability(
|
|
45
|
+
capability_id="billing.list_invoices",
|
|
46
|
+
tags=["billing", "invoices", "list", "finance", "accounts receivable"],
|
|
47
|
+
...
|
|
48
|
+
)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Dry-run mode
|
|
52
|
+
|
|
53
|
+
`Kernel.invoke(..., dry_run=True)` verifies the token and resolves the route
|
|
54
|
+
plan but **never calls the driver**. Use it to validate that a principal can
|
|
55
|
+
invoke a capability, inspect what a driver *would* receive, or run policy
|
|
56
|
+
checks in CI without live tool backends.
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
result = await kernel.invoke(
|
|
60
|
+
token,
|
|
61
|
+
principal=principal,
|
|
62
|
+
args={"operation": "billing.list_invoices", "max_rows": 5},
|
|
63
|
+
response_mode="summary",
|
|
64
|
+
dry_run=True,
|
|
65
|
+
)
|
|
66
|
+
# result: DryRunResult(
|
|
67
|
+
# capability_id="billing.list_invoices",
|
|
68
|
+
# principal_id="user-001",
|
|
69
|
+
# policy_decision=PolicyDecision(allowed=True, ...),
|
|
70
|
+
# driver_id="billing",
|
|
71
|
+
# operation="billing.list_invoices",
|
|
72
|
+
# resolved_args={"operation": "billing.list_invoices", "max_rows": 5},
|
|
73
|
+
# response_mode="summary",
|
|
74
|
+
# budget_remaining=None,
|
|
75
|
+
# estimated_cost="low",
|
|
76
|
+
# )
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Three rules govern dry-run behaviour — keep them in sync with the real-invoke
|
|
80
|
+
path if you change either:
|
|
81
|
+
|
|
82
|
+
1. **Token verification still runs.** Expired, revoked, or scope-mismatched
|
|
83
|
+
tokens raise `TokenExpired` / `TokenRevoked` / `TokenInvalid` /
|
|
84
|
+
`TokenScopeError` exactly as they would at real-invoke. Policy is *not*
|
|
85
|
+
re-evaluated at invoke time — the granting policy decision is encoded in
|
|
86
|
+
the token at `grant_capability`.
|
|
87
|
+
2. **Operation resolution mirrors drivers.** `DryRunResult.operation` is
|
|
88
|
+
computed the same way every driver computes it:
|
|
89
|
+
`str(args.get("operation", capability_id))`. Always use `args["operation"]`
|
|
90
|
+
when you need a fixed operation; otherwise the dry-run operation is the
|
|
91
|
+
capability ID, matching what the driver would see.
|
|
92
|
+
3. **Raw-mode admin gate mirrors the Firewall.** Non-admin principals never
|
|
93
|
+
get `response_mode="raw"` at real-invoke (the Firewall downgrades it to
|
|
94
|
+
`"summary"` — see `firewall/transform.py`). Dry-run downgrades the same
|
|
95
|
+
way, so non-admin callers cannot probe for raw-mode availability via
|
|
96
|
+
`DryRunResult`.
|
|
97
|
+
|
|
98
|
+
The driver's `execute()` is never called in dry-run, so the mode is free of
|
|
99
|
+
side effects regardless of driver type (`InMemoryDriver`, `HTTPDriver`,
|
|
100
|
+
`MCPDriver`). `DryRunResult.budget_remaining` is currently always `None`; the
|
|
101
|
+
field is reserved for a future cross-invocation budget mechanism.
|
|
102
|
+
|
|
103
|
+
## Declarative policies
|
|
104
|
+
|
|
105
|
+
`DeclarativePolicyEngine` is an alternative to `DefaultPolicyEngine` that
|
|
106
|
+
loads rules from a YAML or TOML file (or a plain dict). Rules are evaluated
|
|
107
|
+
top-down, first-match-wins; if no rule matches, the policy's `default` action
|
|
108
|
+
applies (`"deny"` unless overridden).
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from pathlib import Path
|
|
112
|
+
from agent_kernel import DeclarativePolicyEngine, Kernel
|
|
113
|
+
|
|
114
|
+
# YAML or TOML — both formats are interchangeable.
|
|
115
|
+
policy = DeclarativePolicyEngine.from_yaml(Path("examples/policies/default.yaml"))
|
|
116
|
+
|
|
117
|
+
# Or build entirely in-memory:
|
|
118
|
+
policy = DeclarativePolicyEngine.from_dict({
|
|
119
|
+
"default": "deny",
|
|
120
|
+
"rules": [
|
|
121
|
+
{"name": "allow-read", "action": "allow",
|
|
122
|
+
"match": {"safety_class": ["READ"], "sensitivity": ["NONE"]}},
|
|
123
|
+
# ...
|
|
124
|
+
],
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
kernel = Kernel(registry=registry, policy=policy)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
A rule's `match` block supports `safety_class`, `sensitivity`, `roles`
|
|
131
|
+
(ANY-of), `attributes` (ALL-of, with `"*"` meaning "attribute must be
|
|
132
|
+
present"), and `min_justification` (minimum stripped length). On `allow`, the
|
|
133
|
+
rule's `constraints` are merged into the resulting `PolicyDecision`. On
|
|
134
|
+
`deny`, `reason` is embedded in the raised `PolicyDenied`.
|
|
135
|
+
|
|
136
|
+
The DSL has no negation/missing-attribute operator today, so a policy that
|
|
137
|
+
should deny "when an attribute is missing" should be expressed as an allow
|
|
138
|
+
rule requiring the attribute paired with `default: deny`. See
|
|
139
|
+
[`examples/policies/default.yaml`](../examples/policies/default.yaml) for a
|
|
140
|
+
worked example.
|
|
141
|
+
|
|
142
|
+
`pyyaml` and `tomli` are **optional** — they live behind the `[policy]`
|
|
143
|
+
extra. `import agent_kernel` always works; calling `from_yaml` / `from_toml`
|
|
144
|
+
without the parser installed raises `PolicyConfigError` with an install hint.
|
|
145
|
+
|
|
146
|
+
## Denial explanations
|
|
147
|
+
|
|
148
|
+
When a capability call is denied, `Kernel.explain_denial(request, principal,
|
|
149
|
+
justification="")` returns a structured `DenialExplanation` describing
|
|
150
|
+
**every** unmet condition (not just the first one), so the caller can see the
|
|
151
|
+
full remediation path:
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
explanation = kernel.explain_denial(
|
|
155
|
+
CapabilityRequest(capability_id="billing.update_invoice", goal="..."),
|
|
156
|
+
principal,
|
|
157
|
+
justification="too short",
|
|
158
|
+
)
|
|
159
|
+
# explanation.denied == True
|
|
160
|
+
# explanation.rule_name == "write-min_justification"
|
|
161
|
+
# explanation.failed_conditions == [FailedCondition(condition="roles", required=[...]), ...]
|
|
162
|
+
# explanation.remediation == ["Add 'writer' or 'admin' role to ...", "Provide ..."]
|
|
163
|
+
# explanation.narrative == "Request for 'billing.update_invoice' by '...' would be denied: ..."
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Both built-in engines support `explain()`. If you bring a custom policy
|
|
167
|
+
engine that implements only `PolicyEngine.evaluate`, `explain_denial` raises
|
|
168
|
+
`AgentKernelError` with guidance — implement the `ExplainingPolicyEngine`
|
|
169
|
+
protocol to enable structured explanations.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Context Firewall
|
|
2
|
+
|
|
3
|
+
## Why it exists
|
|
4
|
+
|
|
5
|
+
Large tool ecosystems produce large, verbose outputs. Passing raw tool output to an LLM
|
|
6
|
+
causes context blowup, leaks PII, and makes the agent unpredictable. The firewall
|
|
7
|
+
transforms every `RawResult` into a bounded `Frame` before the LLM sees it.
|
|
8
|
+
|
|
9
|
+
## Budgets
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from agent_kernel.firewall.budgets import Budgets
|
|
13
|
+
|
|
14
|
+
Budgets(
|
|
15
|
+
max_rows=50, # max rows in table_preview
|
|
16
|
+
max_fields=20, # max fields per row
|
|
17
|
+
max_chars=4000, # total characters across all facts
|
|
18
|
+
max_depth=3, # recursion depth for nested structures
|
|
19
|
+
)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Response modes
|
|
23
|
+
|
|
24
|
+
| Mode | What you get | When to use |
|
|
25
|
+
|------|-------------|-------------|
|
|
26
|
+
| `summary` | ≤20 fact strings + handle | Default; best for LLM context |
|
|
27
|
+
| `table` | ≤max_rows dicts + handle | When the LLM needs tabular data |
|
|
28
|
+
| `handle_only` | handle + warnings | Defer all data to an expand() call |
|
|
29
|
+
| `raw` | Full data (admin only) | Debugging; never for LLM context |
|
|
30
|
+
|
|
31
|
+
## Handles
|
|
32
|
+
|
|
33
|
+
A `Handle` is an opaque reference to the full dataset stored server-side.
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
# Stored automatically on every invoke()
|
|
37
|
+
handle = frame.handle
|
|
38
|
+
|
|
39
|
+
# Expand with pagination
|
|
40
|
+
expanded = kernel.expand(handle, query={"offset": 10, "limit": 5})
|
|
41
|
+
|
|
42
|
+
# Field selection
|
|
43
|
+
expanded = kernel.expand(handle, query={"fields": ["id", "name"]})
|
|
44
|
+
|
|
45
|
+
# Basic filtering
|
|
46
|
+
expanded = kernel.expand(handle, query={"filter": {"status": "unpaid"}})
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Redaction
|
|
50
|
+
|
|
51
|
+
When a capability has `SensitivityTag.PII` or `SensitivityTag.PCI`:
|
|
52
|
+
- Fields in `Capability.allowed_fields` are kept (others removed)
|
|
53
|
+
- Sensitive field names (`email`, `phone`, `card_number`, `ssn`, etc.) are replaced with `[REDACTED]`
|
|
54
|
+
- Inline patterns in string values (email addresses, phone numbers, SSNs, card numbers) are redacted
|
|
55
|
+
|
|
56
|
+
Principals with the `pii_reader` role bypass `allowed_fields` enforcement.
|
|
57
|
+
|
|
58
|
+
## Summarization
|
|
59
|
+
|
|
60
|
+
Summaries are produced deterministically:
|
|
61
|
+
- **list of dicts** → row count + top keys + numeric stats + categorical distributions
|
|
62
|
+
- **dict** → key list + per-value type/value
|
|
63
|
+
- **string** → truncated to 500 chars
|
|
64
|
+
- **other** → repr() truncated to 200 chars
|
|
65
|
+
|
|
66
|
+
## Cross-invocation budgets
|
|
67
|
+
|
|
68
|
+
The per-invocation `Budgets` above cap a single Frame. A separate
|
|
69
|
+
`BudgetManager` tracks cumulative token usage *across* invocations within a
|
|
70
|
+
session. It is optional — if you don't attach one, kernel behavior is
|
|
71
|
+
unchanged.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from agent_kernel import BudgetManager, Kernel
|
|
75
|
+
|
|
76
|
+
manager = BudgetManager(total_budget=100_000)
|
|
77
|
+
kernel = Kernel(registry, budget_manager=manager)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Per `invoke()` the kernel:
|
|
81
|
+
|
|
82
|
+
1. Reserves a slice of the remaining budget (default 4,000 tokens). If the
|
|
83
|
+
budget is empty, `BudgetExhausted` is raised before the driver runs.
|
|
84
|
+
2. Consults `manager.suggested_mode(requested)` to escalate the requested
|
|
85
|
+
`response_mode` to a more aggressive tier as the remaining budget shrinks.
|
|
86
|
+
3. After the firewall produces a Frame, counts the actual tokens in the
|
|
87
|
+
LLM-facing payload and reconciles them against the reservation.
|
|
88
|
+
|
|
89
|
+
Escalation table:
|
|
90
|
+
|
|
91
|
+
| Budget remaining | Suggested mode (effective `response_mode`) |
|
|
92
|
+
|-----------------:|------------------------------------------------|
|
|
93
|
+
| > 50% | Caller's requested mode (no change) |
|
|
94
|
+
| 20% – 50% | `table` (when caller requested `raw`) |
|
|
95
|
+
| 5% – 20% (≥ 5%) | `summary` (floor — never *relaxes* to `table`) |
|
|
96
|
+
| < 5% | `handle_only` |
|
|
97
|
+
|
|
98
|
+
Boundaries land in the more-conservative tier — exactly 50% remaining
|
|
99
|
+
downgrades `raw` to `table`, exactly 20% floors at `summary`, and only when
|
|
100
|
+
remaining drops *below* 5% does `handle_only` take over.
|
|
101
|
+
|
|
102
|
+
`Kernel.invoke(..., dry_run=True)` mirrors the escalation and reports
|
|
103
|
+
`budget_remaining` in the returned `DryRunResult`, so callers can preview
|
|
104
|
+
what their next live invocation would actually return.
|
|
105
|
+
|
|
106
|
+
Plug a different token counter (for example, a `tiktoken`-based one) via the
|
|
107
|
+
`TokenCounter` protocol:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
import tiktoken # pip install weaver-kernel[tiktoken]
|
|
111
|
+
enc = tiktoken.encoding_for_model("gpt-4o")
|
|
112
|
+
|
|
113
|
+
def tiktoken_counter(value):
|
|
114
|
+
return len(enc.encode(str(value)))
|
|
115
|
+
|
|
116
|
+
manager = BudgetManager(total_budget=128_000, token_counter=tiktoken_counter)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The default counter (`default_token_counter`) is a character-based
|
|
120
|
+
`len(json.dumps(value)) // 4` approximation with no extra dependencies.
|