weaver-kernel 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/AGENTS.md +1 -1
  2. weaver_kernel-0.6.0/CHANGELOG.md +199 -0
  3. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/PKG-INFO +10 -1
  4. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/invariants.md +13 -0
  5. weaver_kernel-0.6.0/docs/architecture.md +110 -0
  6. weaver_kernel-0.6.0/docs/capabilities.md +169 -0
  7. weaver_kernel-0.6.0/docs/context_firewall.md +120 -0
  8. weaver_kernel-0.6.0/docs/integrations.md +329 -0
  9. weaver_kernel-0.6.0/examples/policies/default.toml +68 -0
  10. weaver_kernel-0.6.0/examples/policies/default.yaml +64 -0
  11. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/pyproject.toml +29 -2
  12. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/__init__.py +42 -6
  13. weaver_kernel-0.6.0/src/agent_kernel/adapters/__init__.py +35 -0
  14. weaver_kernel-0.6.0/src/agent_kernel/adapters/_base.py +459 -0
  15. weaver_kernel-0.6.0/src/agent_kernel/adapters/anthropic.py +273 -0
  16. weaver_kernel-0.6.0/src/agent_kernel/adapters/openai.py +358 -0
  17. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/mcp.py +4 -2
  18. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/errors.py +44 -0
  19. weaver_kernel-0.6.0/src/agent_kernel/firewall/__init__.py +18 -0
  20. weaver_kernel-0.6.0/src/agent_kernel/firewall/budget_manager.py +275 -0
  21. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/budgets.py +5 -3
  22. weaver_kernel-0.6.0/src/agent_kernel/firewall/token_counting.py +41 -0
  23. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/kernel.py +228 -19
  24. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/models.py +131 -1
  25. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/policy.py +197 -2
  26. weaver_kernel-0.6.0/src/agent_kernel/policy_dsl.py +503 -0
  27. weaver_kernel-0.6.0/tests/test_adapters.py +1130 -0
  28. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_firewall.py +262 -1
  29. weaver_kernel-0.6.0/tests/test_kernel.py +734 -0
  30. weaver_kernel-0.6.0/tests/test_policy.py +1263 -0
  31. weaver_kernel-0.5.0/CHANGELOG.md +0 -81
  32. weaver_kernel-0.5.0/docs/architecture.md +0 -70
  33. weaver_kernel-0.5.0/docs/capabilities.md +0 -49
  34. weaver_kernel-0.5.0/docs/context_firewall.md +0 -64
  35. weaver_kernel-0.5.0/docs/integrations.md +0 -130
  36. weaver_kernel-0.5.0/src/agent_kernel/firewall/__init__.py +0 -8
  37. weaver_kernel-0.5.0/tests/test_kernel.py +0 -217
  38. weaver_kernel-0.5.0/tests/test_policy.py +0 -470
  39. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.claude/CLAUDE.md +0 -0
  40. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.github/copilot-instructions.md +0 -0
  41. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.github/workflows/ci.yml +0 -0
  42. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.github/workflows/publish.yml +0 -0
  43. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/.gitignore +0 -0
  44. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/CONTRIBUTING.md +0 -0
  45. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/LICENSE +0 -0
  46. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/Makefile +0 -0
  47. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/README.md +0 -0
  48. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/RELEASE.md +0 -0
  49. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/architecture.md +0 -0
  50. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/lessons-learned.md +0 -0
  51. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/review-checklist.md +0 -0
  52. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/agent-context/workflows.md +0 -0
  53. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/docs/security.md +0 -0
  54. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/examples/basic_cli.py +0 -0
  55. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/examples/billing_demo.py +0 -0
  56. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/examples/http_driver_demo.py +0 -0
  57. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/__init__.py +0 -0
  58. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/base.py +0 -0
  59. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/http.py +0 -0
  60. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/mcp_support.py +0 -0
  61. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/drivers/memory.py +0 -0
  62. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/enums.py +0 -0
  63. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/redaction.py +0 -0
  64. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/summarize.py +0 -0
  65. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/firewall/transform.py +0 -0
  66. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/handles.py +0 -0
  67. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/py.typed +0 -0
  68. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/registry.py +0 -0
  69. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/router.py +0 -0
  70. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/tokens.py +0 -0
  71. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/src/agent_kernel/trace.py +0 -0
  72. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/conftest.py +0 -0
  73. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_drivers.py +0 -0
  74. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_handles.py +0 -0
  75. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_logging.py +0 -0
  76. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_mcp_driver.py +0 -0
  77. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_models.py +0 -0
  78. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_redaction.py +0 -0
  79. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_registry.py +0 -0
  80. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_router.py +0 -0
  81. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_tokens.py +0 -0
  82. {weaver_kernel-0.5.0 → weaver_kernel-0.6.0}/tests/test_trace.py +0 -0
@@ -52,7 +52,7 @@ Use these terms consistently. Never substitute synonyms:
52
52
  - Error messages are part of the contract — tests must assert both exception type and message.
53
53
  - Keep modules ≤ 300 lines. Split if needed.
54
54
  - No randomness in matching, routing, or summarization. Deterministic outputs always.
55
- - No new dependencies without justification. The dep list is intentionally minimal (`httpx` only).
55
+ - No new dependencies without justification. The dep list is intentionally minimal (`httpx`, `pydantic`).
56
56
 
57
57
  ## Security rules
58
58
 
@@ -0,0 +1,199 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.6.0] - 2026-05-19
11
+
12
+ ### Added
13
+ - Cross-invocation context budget manager (`BudgetManager`) tracks cumulative token usage across
14
+ multiple `Kernel.invoke()` calls within a session. When attached to a `Kernel` via the new
15
+ `budget_manager` keyword argument, the kernel reserves a budget slice before each invocation
16
+ and reconciles actual frame-payload usage afterwards. As the remaining budget shrinks the
17
+ requested `response_mode` is auto-escalated to a more aggressive tier (> 50% remaining keeps
18
+ the caller's mode; 20–50% downgrades `raw` to `table`; 5–20% floors at `summary`; < 5% forces
19
+ `handle_only`). `Kernel.invoke(..., dry_run=True)` now also reports `budget_remaining` and the
20
+ escalated `response_mode` when a manager is configured. The `BudgetManager` is optional and
21
+ off by default — existing kernels are unchanged. (#44)
22
+ - `TokenCounter` protocol and `default_token_counter` (character-based `len(json.dumps(...))//4`
23
+ approximation) provide pluggable token counting without runtime dependencies. A new optional
24
+ `[tiktoken]` extra is reserved for callers that want to plug in `tiktoken`-based counting.
25
+ - `BudgetExhausted(AgentKernelError)` raised by `BudgetManager.allocate()` (and by
26
+ `Kernel.invoke()` before driver execution) when the cumulative session budget is fully spent.
27
+ - `BudgetConfigError(AgentKernelError)` raised by `BudgetManager` for invalid configuration or
28
+ validation failures (non-positive budgets, negative allocate/record/release amounts), replacing
29
+ bare `ValueError` so callers can catch budget mistakes via the `AgentKernelError` hierarchy
30
+ per `AGENTS.md` ("never raise bare ValueError to callers").
31
+ - New public exports: `BudgetManager`, `BudgetExhausted`, `BudgetConfigError`, `TokenCounter`,
32
+ `default_token_counter`, and `Kernel.budget` accessor property.
33
+ - LLM tool-format adapters and middleware (`agent_kernel.adapters`): `OpenAIMiddleware` (OpenAI
34
+ Responses API + Chat Completions, auto-detected on input) and `AnthropicMiddleware` (Anthropic
35
+ Messages with `cache_control` support). Both translate `Capability` objects to vendor tool
36
+ schemas, route tool calls through the full kernel pipeline (grant → invoke → firewall → trace),
37
+ and surface kernel errors (`PolicyDenied`, `CapabilityNotFound`, `DriverError`) as tool-result
38
+ errors so the LLM can react. Pre/post hooks (`intercept_tool_call`, `intercept_tool_result`,
39
+ sync or async) support logging, metrics, approval gates, and per-call justification injection.
40
+ Zero runtime dependency on the `openai` / `anthropic` SDK packages. (#55, #50, #40)
41
+ - New `Capability` fields for LLM adapters: `parameters_model: type[pydantic.BaseModel] | None`
42
+ (input schema source + validation), `parameters_schema: dict | None` (raw JSON Schema escape
43
+ hatch), and `tool_hints: ToolHints | None` (vendor hints — Anthropic `cache_control`, OpenAI
44
+ `strict` mode). All default to ``None``; existing capabilities and tests are unaffected.
45
+ - New `ToolHints` dataclass and `OpenAIMiddleware` / `AnthropicMiddleware` top-level exports.
46
+ - New `AdapterParseError(AgentKernelError)` exception raised by adapter parse / validation
47
+ helpers (`tool_call_to_request`, `tool_use_to_request`, `make_namespace_safe_name`) instead
48
+ of bare `ValueError`. Satisfies `AGENTS.md`'s "no bare ValueError to callers" rule and
49
+ gives consumers a stable adapter-specific exception type. Also catches capability IDs that
50
+ contain the reserved OpenAI namespace separator `__` (which would otherwise produce
51
+ colliding tool names).
52
+ - `Kernel.list_capabilities()` convenience accessor returning every registered capability in
53
+ registration order. Used by the new adapters but generally useful for tooling that needs to
54
+ enumerate the registry without keyword search.
55
+ - Declarative policy engine (`DeclarativePolicyEngine`) that loads rules from YAML or TOML files.
56
+ Rules are evaluated top-down with first-match-wins semantics; supports `safety_class`, `sensitivity`,
57
+ `roles`, `attributes`, and `min_justification` match conditions. (#42)
58
+ - Policy denial explanation: `ExplainingPolicyEngine` protocol plus `DefaultPolicyEngine.explain()` and
59
+ `DeclarativePolicyEngine.explain()` implementations return a structured `DenialExplanation` with a
60
+ `FailedCondition` list for every failing check (no short-circuit), a `remediation` list, and a
61
+ human-readable `narrative`. (#48)
62
+ - Dry-run invocation mode: `kernel.invoke(..., dry_run=True)` verifies the token and resolves the
63
+ execution plan without calling the driver. Returns `DryRunResult` with the resolved `driver_id`,
64
+ `operation`, `response_mode`, and an `estimated_cost` tier (`low`/`medium`/`high`). (#43)
65
+ - `Kernel.explain_denial()` convenience method that calls the policy engine's `explain()` for a given
66
+ `CapabilityRequest` and `Principal` without requiring a token. Raises `AgentKernelError` when the
67
+ configured engine does not implement `explain()`.
68
+ - New public types exported from `agent_kernel`: `DeclarativePolicyEngine`, `ExplainingPolicyEngine`,
69
+ `PolicyEngine`, `PolicyMatch`, `PolicyRule`, `DenialExplanation`, `FailedCondition`, `DryRunResult`,
70
+ `PolicyConfigError`.
71
+ - `policy` optional extra (`pip install weaver-kernel[policy]`) pulls in `pyyaml` and `tomli` (Python 3.10).
72
+ - Example policy files in `examples/policies/` (YAML and TOML formats).
73
+
74
+ ### Changed
75
+ - Runtime dependencies now include `pydantic>=2` in addition to `httpx`. Pydantic is used by the new
76
+ `agent_kernel.adapters` package for JSON-Schema generation and argument validation when a
77
+ `Capability` declares a `parameters_model`. Existing kernel behavior is unchanged; pydantic is not
78
+ imported at module load by anything outside the adapters.
79
+ - `PolicyEngine` protocol no longer requires `explain()`. Engines that need to support
80
+ `Kernel.explain_denial()` should implement the new `ExplainingPolicyEngine` protocol. Built-in
81
+ engines satisfy both. This avoids a breaking typing change for downstream implementers.
82
+ - `DeclarativePolicyEngine` now defers `yaml` and `tomllib`/`tomli` imports into the corresponding
83
+ loaders, so `import agent_kernel` works without the `policy` extra installed. Calling
84
+ `from_yaml`/`from_toml` without the parser surfaces a `PolicyConfigError` with an install hint.
85
+ - `Kernel.invoke(dry_run=True)` resolves `operation` the same way drivers do
86
+ (`args.get("operation", capability_id)`) so `DryRunResult.operation` matches what a driver would
87
+ actually receive — instead of `capability.impl.operation`, which can diverge.
88
+ - `Kernel.invoke(dry_run=True)` mirrors the Firewall's admin-only gate for `raw` mode: non-admin
89
+ principals see their requested `raw` mode downgraded to `summary` in `DryRunResult`, matching
90
+ what they would actually get at real-invoke time. Prevents probing for raw availability.
91
+
92
+ ### Documentation
93
+ - `docs/architecture.md` now describes `PolicyEngine` / `ExplainingPolicyEngine` protocols,
94
+ `DefaultPolicyEngine` and `DeclarativePolicyEngine` (with policy-DSL semantics), and dry-run
95
+ mode (admin gate, operation resolution rule). Closes the canonical "Components & API
96
+ reference" gap flagged in audit.
97
+ - `docs/capabilities.md` adds a "Dry-run mode" section (semantics, the three parity rules,
98
+ no-side-effects guarantee), a "Declarative policies" section (loaders, match conditions,
99
+ optional-extra behaviour), and a "Denial explanations" section. Closes the affected-files
100
+ gap from issue #43.
101
+
102
+ ### Fixed
103
+ - `DeclarativePolicyEngine._parse_rule()` now validates the types of `roles`, `attributes`,
104
+ `min_justification`, and `constraints` in policy files and raises `PolicyConfigError` with a
105
+ precise message instead of silently producing misbehaving rules or raising at evaluation time.
106
+ - `DeclarativePolicyEngine.explain()` now correctly reports explicit deny rules that fully match
107
+ (previously fell through to the misleading `no_matching_rule` fallback and dropped the rule's
108
+ reason). Partial-match deny rules are now skipped so the explanation focuses on actionable allow
109
+ rules instead of suggesting changes that would only trigger the deny.
110
+ - Example policy files (`examples/policies/default.{yaml,toml}`) now use the correct `default` key
111
+ (was `default_action`, which the parser silently ignored), express PII-with-tenant as an allow
112
+ rule paired with default-deny (the previous deny rule was inverted under first-match-wins), and
113
+ order the `allow-secrets-service` rule before the deny rule (the deny was previously unreachable).
114
+ - `Kernel.explain_denial()` docstring no longer contradicts itself ("never raises" vs.
115
+ `CapabilityNotFound`).
116
+ - `DryRunResult.budget_remaining` docstring no longer references the unimplemented `BudgetManager`;
117
+ the field is documented as reserved for a future cross-invocation budget mechanism.
118
+ - `drivers/mcp.py` adds an explicit `_McpError: type[BaseException] | None` annotation so mypy
119
+ `--strict` remains happy across the try/except import branches.
120
+
121
+ ### Tests
122
+ - `tests/test_policy.py` adds `test_declarative_replicates_default_policy_decisions` — a
123
+ comparative test asserting that `DeclarativePolicyEngine` and `DefaultPolicyEngine` produce
124
+ the same allow/deny outcomes across a curated scenario matrix (READ × non-sensitive / PII /
125
+ PCI / SECRETS, WRITE/DESTRUCTIVE with and without required roles and justification). Closes
126
+ issue #42's "comparative test" acceptance criterion.
127
+
128
+ ## [0.5.0] - 2026-04-12
129
+
130
+ ### Added
131
+ - Built-in `MCPDriver` with stdio and Streamable HTTP transports, tool auto-discovery, normalized MCP result handling, and optional dependency guardrails.
132
+ - Declared weaver-spec v0.1.0 compatibility in README: invariants I-01 (firewall), I-02 (authorization + audit), and I-06 (scoped tokens) are satisfied.
133
+ - Added placeholder `conformance_stub` CI job that will activate once the weaver-spec conformance suite ships (dgenio/weaver-spec#4).
134
+
135
+ ## [0.4.0] - 2026-03-14
136
+
137
+ ### Added
138
+ - Sliding-window rate limiting in `DefaultPolicyEngine` per `(principal_id, capability_id)` pair (#39).
139
+ Default limits by safety class: 60 READ / 10 WRITE / 2 DESTRUCTIVE per 60s window.
140
+ Service-role principals get 10× limits. Configurable via constructor.
141
+ - GitHub Release step in publish workflow — creates a release with auto-generated notes and artifacts before publishing to PyPI.
142
+
143
+ ### Fixed
144
+ - `HTTPDriver`: DELETE requests now forward args as query params instead of silently dropping them.
145
+
146
+ ### Removed
147
+ - Dead `_truncate_str` helper in `firewall/transform.py` (defined but never called).
148
+
149
+ ## [0.3.0] - 2026-03-09
150
+
151
+ ### Added
152
+ - Structured logging at kernel decision points (invoke, grant, deny, revoke).
153
+ - Agent-facing documentation system: `docs/agent-context/` (architecture, workflows, invariants, lessons-learned, review-checklist).
154
+ - `.github/copilot-instructions.md` — review-critical projections for GitHub Copilot.
155
+ - `.claude/CLAUDE.md` — Claude-specific operating instructions.
156
+ - PyPI publish workflow (`.github/workflows/publish.yml`) with Trusted Publisher (OIDC) (#37).
157
+ - `RELEASE.md` documenting the full release process.
158
+ - `[project.urls]` in `pyproject.toml` (Homepage, Repository, Documentation, Changelog).
159
+ - Optional dependency groups: `mcp` and `otel` in `pyproject.toml`.
160
+
161
+ ### Changed
162
+ - Rewrote `AGENTS.md` with full domain vocabulary, security rules, code conventions, documentation map, and weaver-spec references.
163
+ - Renamed PyPI package from `agent-kernel` to `weaver-kernel` to align with Weaver ecosystem.
164
+ - Added `workflow_call` trigger to CI workflow so publish workflow can reuse it as a gate.
165
+
166
+ ### Refactored
167
+ - Extracted `_log_verify_failure` helper in `tokens.py`.
168
+ - Consolidated invoke logging with shared base dict in `kernel.py`.
169
+ - Extracted `_deny` static method in policy engine.
170
+
171
+ ### Fixed
172
+ - Pinned GitHub Actions to commit SHAs in publish workflow.
173
+ - Added `contents:read` permission to publish job.
174
+ - Clarified PyPI vs import name in README Quickstart.
175
+
176
+ ## [0.2.0] - 2026-03-06
177
+
178
+ ### Added
179
+ - Token revocation support: `revoke_token()` and `revoke_all()` on `Kernel` (#33, #57).
180
+ - `SECRETS` sensitivity tag enforcement in policy engine and redaction (#56).
181
+
182
+ ### Fixed
183
+ - Policy engine now strips whitespace from justification before length check.
184
+ - Policy engine reports both raw and stripped length in justification errors.
185
+ - Policy engine checks role before justification in all safety/sensitivity blocks.
186
+ - Redaction preserves field-name context in API key and connection string patterns.
187
+ - `revoke_all()` drops `_principal_tokens` entry after revoking.
188
+
189
+ ## [0.1.0] - 2024-01-01
190
+
191
+ ### Added
192
+ - Initial scaffold: `CapabilityRegistry`, `PolicyEngine`, `HMACTokenProvider`, `Kernel`.
193
+ - `InMemoryDriver` and `HTTPDriver` (httpx-based).
194
+ - Context `Firewall` with `Budgets`, redaction, and summarization.
195
+ - `HandleStore` with TTL, pagination, field selection, and basic filtering.
196
+ - `TraceStore` and `explain()` for full audit trail.
197
+ - Examples: `basic_cli.py`, `billing_demo.py`, `http_driver_demo.py`.
198
+ - Documentation: architecture, security model, integrations, capabilities, context firewall.
199
+ - CI pipeline for Python 3.10, 3.11, 3.12 with ruff + mypy + pytest.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: weaver-kernel
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Capability-based security kernel for AI agents operating in large tool ecosystems
5
5
  Project-URL: Homepage, https://github.com/dgenio/agent-kernel
6
6
  Project-URL: Repository, https://github.com/dgenio/agent-kernel
@@ -221,6 +221,7 @@ Classifier: Topic :: Security
221
221
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
222
222
  Requires-Python: >=3.10
223
223
  Requires-Dist: httpx>=0.27
224
+ Requires-Dist: pydantic>=2
224
225
  Provides-Extra: dev
225
226
  Requires-Dist: httpx>=0.27; extra == 'dev'
226
227
  Requires-Dist: mcp>=1.6; extra == 'dev'
@@ -228,11 +229,19 @@ Requires-Dist: mypy>=1.10; extra == 'dev'
228
229
  Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
229
230
  Requires-Dist: pytest-cov>=5.0; extra == 'dev'
230
231
  Requires-Dist: pytest>=8.0; extra == 'dev'
232
+ Requires-Dist: pyyaml>=6.0; extra == 'dev'
231
233
  Requires-Dist: ruff>=0.4; extra == 'dev'
234
+ Requires-Dist: tomli>=2.0; (python_version < '3.11') and extra == 'dev'
235
+ Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
232
236
  Provides-Extra: mcp
233
237
  Requires-Dist: mcp>=1.6; extra == 'mcp'
234
238
  Provides-Extra: otel
235
239
  Requires-Dist: opentelemetry-api>=1.20; extra == 'otel'
240
+ Provides-Extra: policy
241
+ Requires-Dist: pyyaml>=6.0; extra == 'policy'
242
+ Requires-Dist: tomli>=2.0; (python_version < '3.11') and extra == 'policy'
243
+ Provides-Extra: tiktoken
244
+ Requires-Dist: tiktoken>=0.6; extra == 'tiktoken'
236
245
  Description-Content-Type: text/markdown
237
246
 
238
247
  # agent-kernel
@@ -64,6 +64,19 @@ tag is **silently ignored** — capabilities tagged with it pass policy without
64
64
 
65
65
  **Rule:** When adding a `SensitivityTag`, always add a matching policy rule and test.
66
66
 
67
+ ### Dry-run response-mode parity
68
+ `Kernel.invoke(dry_run=True)` reports the response mode the caller would actually
69
+ get at real-invoke time. The Firewall downgrades `raw` to `summary` for non-admin
70
+ principals (`firewall/transform.py:108`), so dry-run must mirror that downgrade —
71
+ otherwise a non-admin caller can probe/assume raw-mode availability they will never
72
+ actually receive. The same applies to `operation`: dry-run resolves it the same way
73
+ drivers do (`args.get("operation", capability_id)`), so what the caller sees in
74
+ `DryRunResult` matches what a driver would receive.
75
+
76
+ **Rule:** Any code path that reports a response mode or driver operation back to the
77
+ caller must apply the same admin gate / resolution rule the real-invoke path uses,
78
+ including dry-run, mock, and test paths.
79
+
67
80
  ## Safe vs. unsafe changes
68
81
 
69
82
  | Safe | Unsafe |
@@ -0,0 +1,110 @@
1
+ # Architecture
2
+
3
+ ## Overview
4
+
5
+ `agent-kernel` is a capability-based security kernel that sits **above** raw tool execution (MCP, HTTP APIs, internal services) and **below** the LLM context window.
6
+
7
+ ```mermaid
8
+ graph TD
9
+ LLM["LLM / Agent"] -->|goal text| K["Kernel"]
10
+ K -->|search| REG["CapabilityRegistry"]
11
+ REG -->|CapabilityRequest| K
12
+ K -->|evaluate| POL["PolicyEngine"]
13
+ POL -->|PolicyDecision| K
14
+ K -->|issue| TOK["TokenProvider (HMAC)"]
15
+ TOK -->|CapabilityToken| K
16
+ K -->|route| ROU["Router"]
17
+ ROU -->|RoutePlan| K
18
+ K -->|execute| DRV["Driver (Memory / HTTP / MCP)"]
19
+ DRV -->|RawResult| K
20
+ K -->|transform| FW["Firewall"]
21
+ FW -->|Frame| K
22
+ K -->|store| HS["HandleStore"]
23
+ K -->|record| TS["TraceStore"]
24
+ K -->|Frame| LLM
25
+ ```
26
+
27
+ ## Components
28
+
29
+ ### Kernel
30
+ The central orchestrator. Wires all components together and exposes:
31
+ - `request_capabilities(goal)` — discover relevant capabilities
32
+ - `grant_capability(request, principal, justification)` — policy check + token issuance
33
+ - `invoke(token, principal, args, response_mode, dry_run=False)` — execute + firewall + trace, or short-circuit before driver dispatch when `dry_run=True`
34
+ - `expand(handle, query)` — paginate/filter stored results
35
+ - `explain(action_id)` — retrieve audit trace
36
+ - `explain_denial(request, principal, justification)` — return a structured `DenialExplanation` instead of raising `PolicyDenied`
37
+
38
+ ### CapabilityRegistry
39
+ A flat dict of `Capability` objects indexed by `capability_id`. Provides keyword-based search (no LLM, no vector DB — purely token overlap scoring).
40
+
41
+ ### PolicyEngine
42
+ Two protocols and two built-in engines:
43
+
44
+ - **`PolicyEngine`** (protocol) — single required method: `evaluate(request, capability, principal, justification) -> PolicyDecision`.
45
+ - **`ExplainingPolicyEngine`** (protocol, extends `PolicyEngine`) — adds `explain(...) -> DenialExplanation`. Only engines that implement this protocol can be used with `Kernel.explain_denial`; otherwise that call raises `AgentKernelError` with a clear message. Splitting the contract keeps existing downstream `PolicyEngine` implementers backward-compatible.
46
+
47
+ Both built-in engines satisfy `ExplainingPolicyEngine`:
48
+
49
+ - **`DefaultPolicyEngine`** — hardcoded role-based rules:
50
+ 1. **READ** — always allowed
51
+ 2. **WRITE** — requires `justification ≥ 15 chars` + role `writer|admin`
52
+ 3. **DESTRUCTIVE** — requires role `admin` + `justification ≥ 15 chars`
53
+ 4. **PII/PCI** — requires `tenant` attribute; enforces `allowed_fields` unless `pii_reader`
54
+ 5. **SECRETS** — requires role `admin|secrets_reader` + `justification ≥ 15 chars`
55
+ 6. **max_rows** — 50 (user), 500 (service)
56
+ 7. **Rate limiting** — sliding-window per `(principal_id, capability_id)` (60 READ / 10 WRITE / 2 DESTRUCTIVE per 60s; service role gets 10×)
57
+ - **`DeclarativePolicyEngine`** — loads rules from a YAML or TOML file (or a plain dict). Supports `safety_class`, `sensitivity`, `roles`, `attributes`, and `min_justification` match conditions; `allow`/`deny` actions; per-rule `constraints` merged into the resulting `PolicyDecision`; configurable `default` action. Rules are evaluated top-down with first-match-wins. `pyyaml` and `tomli` are optional dependencies — `import agent_kernel` works without them; calling `from_yaml`/`from_toml` without the parser raises `PolicyConfigError` with an install hint.
58
+
59
+ #### Denial explanations
60
+
61
+ `PolicyEngine.explain()` (when available) returns a structured `DenialExplanation` with `denied`, `rule_name`, a `failed_conditions: list[FailedCondition]` describing each missing condition with `required`/`actual`/`suggestion`, a `remediation` list, and a human-readable `narrative`. Engines collect all failing conditions (no short-circuit) so callers get the full picture. For `DeclarativePolicyEngine`, an explicit deny rule that fully matches is reported as the cause; partial-match deny rules are skipped during explanation so the surfaced advice is actionable rather than self-defeating.
62
+
63
+ #### Dry-run mode
64
+
65
+ `Kernel.invoke(dry_run=True)` verifies the token and resolves the route plan but **never calls the driver**. It returns a `DryRunResult` with the resolved `driver_id`, the same `operation` a driver would receive (`args.get("operation", capability_id)`), the request constraints, the effective `response_mode` (Firewall's admin-only gate is mirrored: non-admin `raw` is downgraded to `summary`), and a coarse `estimated_cost` tier based on `SafetyClass`. Token verification still raises `TokenExpired` / `TokenInvalid` / `TokenScopeError` in dry-run, so the mode is safe as a policy/route sanity check. See [`docs/capabilities.md`](capabilities.md#dry-run-mode) for usage and [`docs/agent-context/invariants.md`](agent-context/invariants.md) for the parity rule with the real-invoke path.
66
+
67
+ ### TokenProvider (HMAC)
68
+ Issues HMAC-SHA256 signed tokens. Each token is bound to `principal_id + capability_id + constraints`. Verification checks: expiry → signature → principal → capability.
69
+
70
+ ### Router
71
+ `StaticRouter` maps `capability_id → [driver_id, ...]`. First driver that succeeds wins; others are tried as fallbacks.
72
+
73
+ ### Drivers
74
+ - **InMemoryDriver** — Python callables, used for tests and demos
75
+ - **HTTPDriver** — `httpx`-based async HTTP client
76
+ - (Future) **MCPDriver** — adapter for Model Context Protocol tool servers
77
+
78
+ ### Firewall
79
+ Transforms `RawResult → Frame`. Never exposes raw output to the LLM.
80
+ - Four response modes: `summary`, `table`, `handle_only`, `raw`
81
+ - Enforces `Budgets` (max_rows, max_fields, max_chars, max_depth)
82
+ - Redacts sensitive fields and inline PII patterns
83
+ - Deterministic summarisation (no LLM)
84
+
85
+ ### HandleStore
86
+ Stores full results by opaque handle ID with TTL. `expand()` supports pagination, field selection, and basic equality filtering.
87
+
88
+ ### TraceStore
89
+ Records every `ActionTrace`. `explain(action_id)` returns the full audit record.
90
+
91
+ ### Adapters (`agent_kernel.adapters`)
92
+ Vendor-specific tool-format adapters that translate between `Capability` objects
93
+ and the tool shapes used by LLM provider APIs:
94
+
95
+ - **`OpenAIMiddleware`** — emits OpenAI tool definitions (Responses API or Chat
96
+ Completions shape), parses `response.output` / `message.tool_calls`, and
97
+ returns `function_call_output` / tool-result messages. Dotted capability IDs
98
+ map to `namespace__function` (OpenAI tool names cannot contain `.`).
99
+ - **`AnthropicMiddleware`** — emits Anthropic tool definitions with optional
100
+ `cache_control` blocks, parses `tool_use` content blocks, and returns
101
+ `tool_result` content blocks. Dotted capability IDs are preserved as-is.
102
+
103
+ Both classes share `BaseToolMiddleware`, which owns hook registration
104
+ (`intercept_tool_call`, `intercept_tool_result`), pre/post dispatch (sync or
105
+ async), and conversion of kernel exceptions (`PolicyDenied`,
106
+ `CapabilityNotFound`, `DriverError`) into tool-result errors the LLM can react
107
+ to. Input arguments are validated against `Capability.parameters_model`
108
+ (pydantic) when present. **Zero runtime dependency** on the `openai` /
109
+ `anthropic` SDK packages. See [`docs/integrations.md`](integrations.md) for
110
+ usage examples.
@@ -0,0 +1,169 @@
1
+ # Designing Capabilities
2
+
3
+ ## Naming conventions
4
+
5
+ - Use `domain.verb_noun` format: `billing.list_invoices`, `users.get_profile`.
6
+ - Be specific: prefer `billing.cancel_invoice` over `billing.update`.
7
+ - Avoid generic names like `billing.execute` or `api.call`.
8
+
9
+ ## Granularity
10
+
11
+ Each capability should map to a single, auditable action with clear side-effects.
12
+
13
+ **Good:**
14
+ - `billing.list_invoices` (READ, no side-effects)
15
+ - `billing.send_reminder` (WRITE, sends an email)
16
+ - `billing.void_invoice` (DESTRUCTIVE, irreversible)
17
+
18
+ **Avoid:**
19
+ - `billing.do_stuff` (too broad)
20
+ - `billing.list_or_update_invoices` (mixed safety classes)
21
+
22
+ ## Safety classes
23
+
24
+ | Class | Examples | Policy |
25
+ |-------|---------|--------|
26
+ | READ | list, get, search, summarize | Always allowed |
27
+ | WRITE | create, update, send, approve | Justification + writer role |
28
+ | DESTRUCTIVE | delete, void, purge, terminate | Admin role only |
29
+
30
+ ## Sensitivity tags
31
+
32
+ Use `SensitivityTag.PII` when results may contain: name, email, phone, SSN, address.
33
+ Use `SensitivityTag.PCI` when results may contain: card numbers, CVV, bank details.
34
+ Use `SensitivityTag.SECRETS` when results may contain: API keys, passwords, tokens.
35
+
36
+ Always pair sensitivity tags with `allowed_fields` to restrict which fields are returned
37
+ to non-privileged callers.
38
+
39
+ ## Tags
40
+
41
+ Add descriptive tags to improve keyword matching:
42
+
43
+ ```python
44
+ Capability(
45
+ capability_id="billing.list_invoices",
46
+ tags=["billing", "invoices", "list", "finance", "accounts receivable"],
47
+ ...
48
+ )
49
+ ```
50
+
51
+ ## Dry-run mode
52
+
53
+ `Kernel.invoke(..., dry_run=True)` verifies the token and resolves the route
54
+ plan but **never calls the driver**. Use it to validate that a principal can
55
+ invoke a capability, inspect what a driver *would* receive, or run policy
56
+ checks in CI without live tool backends.
57
+
58
+ ```python
59
+ result = await kernel.invoke(
60
+ token,
61
+ principal=principal,
62
+ args={"operation": "billing.list_invoices", "max_rows": 5},
63
+ response_mode="summary",
64
+ dry_run=True,
65
+ )
66
+ # result: DryRunResult(
67
+ # capability_id="billing.list_invoices",
68
+ # principal_id="user-001",
69
+ # policy_decision=PolicyDecision(allowed=True, ...),
70
+ # driver_id="billing",
71
+ # operation="billing.list_invoices",
72
+ # resolved_args={"operation": "billing.list_invoices", "max_rows": 5},
73
+ # response_mode="summary",
74
+ # budget_remaining=None,
75
+ # estimated_cost="low",
76
+ # )
77
+ ```
78
+
79
+ Three rules govern dry-run behaviour — keep them in sync with the real-invoke
80
+ path if you change either:
81
+
82
+ 1. **Token verification still runs.** Expired, revoked, or scope-mismatched
83
+ tokens raise `TokenExpired` / `TokenRevoked` / `TokenInvalid` /
84
+ `TokenScopeError` exactly as they would at real-invoke. Policy is *not*
85
+ re-evaluated at invoke time — the granting policy decision is encoded in
86
+ the token at `grant_capability`.
87
+ 2. **Operation resolution mirrors drivers.** `DryRunResult.operation` is
88
+ computed the same way every driver computes it:
89
+ `str(args.get("operation", capability_id))`. Always use `args["operation"]`
90
+ when you need a fixed operation; otherwise the dry-run operation is the
91
+ capability ID, matching what the driver would see.
92
+ 3. **Raw-mode admin gate mirrors the Firewall.** Non-admin principals never
93
+ get `response_mode="raw"` at real-invoke (the Firewall downgrades it to
94
+ `"summary"` — see `firewall/transform.py`). Dry-run downgrades the same
95
+ way, so non-admin callers cannot probe for raw-mode availability via
96
+ `DryRunResult`.
97
+
98
+ The driver's `execute()` is never called in dry-run, so the mode is free of
99
+ side effects regardless of driver type (`InMemoryDriver`, `HTTPDriver`,
100
+ `MCPDriver`). `DryRunResult.budget_remaining` is currently always `None`; the
101
+ field is reserved for a future cross-invocation budget mechanism.
102
+
103
+ ## Declarative policies
104
+
105
+ `DeclarativePolicyEngine` is an alternative to `DefaultPolicyEngine` that
106
+ loads rules from a YAML or TOML file (or a plain dict). Rules are evaluated
107
+ top-down, first-match-wins; if no rule matches, the policy's `default` action
108
+ applies (`"deny"` unless overridden).
109
+
110
+ ```python
111
+ from pathlib import Path
112
+ from agent_kernel import DeclarativePolicyEngine, Kernel
113
+
114
+ # YAML or TOML — both formats are interchangeable.
115
+ policy = DeclarativePolicyEngine.from_yaml(Path("examples/policies/default.yaml"))
116
+
117
+ # Or build entirely in-memory:
118
+ policy = DeclarativePolicyEngine.from_dict({
119
+ "default": "deny",
120
+ "rules": [
121
+ {"name": "allow-read", "action": "allow",
122
+ "match": {"safety_class": ["READ"], "sensitivity": ["NONE"]}},
123
+ # ...
124
+ ],
125
+ })
126
+
127
+ kernel = Kernel(registry=registry, policy=policy)
128
+ ```
129
+
130
+ A rule's `match` block supports `safety_class`, `sensitivity`, `roles`
131
+ (ANY-of), `attributes` (ALL-of, with `"*"` meaning "attribute must be
132
+ present"), and `min_justification` (minimum stripped length). On `allow`, the
133
+ rule's `constraints` are merged into the resulting `PolicyDecision`. On
134
+ `deny`, `reason` is embedded in the raised `PolicyDenied`.
135
+
136
+ The DSL has no negation/missing-attribute operator today, so a policy that
137
+ should deny "when an attribute is missing" should be expressed as an allow
138
+ rule requiring the attribute paired with `default: deny`. See
139
+ [`examples/policies/default.yaml`](../examples/policies/default.yaml) for a
140
+ worked example.
141
+
142
+ `pyyaml` and `tomli` are **optional** — they live behind the `[policy]`
143
+ extra. `import agent_kernel` always works; calling `from_yaml` / `from_toml`
144
+ without the parser installed raises `PolicyConfigError` with an install hint.
145
+
146
+ ## Denial explanations
147
+
148
+ When a capability call is denied, `Kernel.explain_denial(request, principal,
149
+ justification="")` returns a structured `DenialExplanation` describing
150
+ **every** unmet condition (not just the first one), so the caller can see the
151
+ full remediation path:
152
+
153
+ ```python
154
+ explanation = kernel.explain_denial(
155
+ CapabilityRequest(capability_id="billing.update_invoice", goal="..."),
156
+ principal,
157
+ justification="too short",
158
+ )
159
+ # explanation.denied == True
160
+ # explanation.rule_name == "write-min_justification"
161
+ # explanation.failed_conditions == [FailedCondition(condition="roles", required=[...]), ...]
162
+ # explanation.remediation == ["Add 'writer' or 'admin' role to ...", "Provide ..."]
163
+ # explanation.narrative == "Request for 'billing.update_invoice' by '...' would be denied: ..."
164
+ ```
165
+
166
+ Both built-in engines support `explain()`. If you bring a custom policy
167
+ engine that implements only `PolicyEngine.evaluate`, `explain_denial` raises
168
+ `AgentKernelError` with guidance — implement the `ExplainingPolicyEngine`
169
+ protocol to enable structured explanations.
@@ -0,0 +1,120 @@
1
+ # Context Firewall
2
+
3
+ ## Why it exists
4
+
5
+ Large tool ecosystems produce large, verbose outputs. Passing raw tool output to an LLM
6
+ causes context blowup, leaks PII, and makes the agent unpredictable. The firewall
7
+ transforms every `RawResult` into a bounded `Frame` before the LLM sees it.
8
+
9
+ ## Budgets
10
+
11
+ ```python
12
+ from agent_kernel.firewall.budgets import Budgets
13
+
14
+ Budgets(
15
+ max_rows=50, # max rows in table_preview
16
+ max_fields=20, # max fields per row
17
+ max_chars=4000, # total characters across all facts
18
+ max_depth=3, # recursion depth for nested structures
19
+ )
20
+ ```
21
+
22
+ ## Response modes
23
+
24
+ | Mode | What you get | When to use |
25
+ |------|-------------|-------------|
26
+ | `summary` | ≤20 fact strings + handle | Default; best for LLM context |
27
+ | `table` | ≤max_rows dicts + handle | When the LLM needs tabular data |
28
+ | `handle_only` | handle + warnings | Defer all data to an expand() call |
29
+ | `raw` | Full data (admin only) | Debugging; never for LLM context |
30
+
31
+ ## Handles
32
+
33
+ A `Handle` is an opaque reference to the full dataset stored server-side.
34
+
35
+ ```python
36
+ # Stored automatically on every invoke()
37
+ handle = frame.handle
38
+
39
+ # Expand with pagination
40
+ expanded = kernel.expand(handle, query={"offset": 10, "limit": 5})
41
+
42
+ # Field selection
43
+ expanded = kernel.expand(handle, query={"fields": ["id", "name"]})
44
+
45
+ # Basic filtering
46
+ expanded = kernel.expand(handle, query={"filter": {"status": "unpaid"}})
47
+ ```
48
+
49
+ ## Redaction
50
+
51
+ When a capability has `SensitivityTag.PII` or `SensitivityTag.PCI`:
52
+ - Fields in `Capability.allowed_fields` are kept (others removed)
53
+ - Sensitive field names (`email`, `phone`, `card_number`, `ssn`, etc.) are replaced with `[REDACTED]`
54
+ - Inline patterns in string values (email addresses, phone numbers, SSNs, card numbers) are redacted
55
+
56
+ Principals with the `pii_reader` role bypass `allowed_fields` enforcement.
57
+
58
+ ## Summarization
59
+
60
+ Summaries are produced deterministically:
61
+ - **list of dicts** → row count + top keys + numeric stats + categorical distributions
62
+ - **dict** → key list + per-value type/value
63
+ - **string** → truncated to 500 chars
64
+ - **other** → repr() truncated to 200 chars
65
+
66
+ ## Cross-invocation budgets
67
+
68
+ The per-invocation `Budgets` above cap a single Frame. A separate
69
+ `BudgetManager` tracks cumulative token usage *across* invocations within a
70
+ session. It is optional — if you don't attach one, kernel behavior is
71
+ unchanged.
72
+
73
+ ```python
74
+ from agent_kernel import BudgetManager, Kernel
75
+
76
+ manager = BudgetManager(total_budget=100_000)
77
+ kernel = Kernel(registry, budget_manager=manager)
78
+ ```
79
+
80
+ Per `invoke()` the kernel:
81
+
82
+ 1. Reserves a slice of the remaining budget (default 4,000 tokens). If the
83
+ budget is empty, `BudgetExhausted` is raised before the driver runs.
84
+ 2. Consults `manager.suggested_mode(requested)` to escalate the requested
85
+ `response_mode` to a more aggressive tier as the remaining budget shrinks.
86
+ 3. After the firewall produces a Frame, counts the actual tokens in the
87
+ LLM-facing payload and reconciles them against the reservation.
88
+
89
+ Escalation table:
90
+
91
+ | Budget remaining | Suggested mode (effective `response_mode`) |
92
+ |-----------------:|------------------------------------------------|
93
+ | > 50% | Caller's requested mode (no change) |
94
+ | 20% – 50% | `table` (when caller requested `raw`) |
95
+ | 5% – 20% (≥ 5%) | `summary` (floor — never *relaxes* to `table`) |
96
+ | < 5% | `handle_only` |
97
+
98
+ Boundaries land in the more-conservative tier — exactly 50% remaining
99
+ downgrades `raw` to `table`, exactly 20% floors at `summary`, and only when
100
+ remaining drops *below* 5% does `handle_only` take over.
101
+
102
+ `Kernel.invoke(..., dry_run=True)` mirrors the escalation and reports
103
+ `budget_remaining` in the returned `DryRunResult`, so callers can preview
104
+ what their next live invocation would actually return.
105
+
106
+ Plug a different token counter (for example, a `tiktoken`-based one) via the
107
+ `TokenCounter` protocol:
108
+
109
+ ```python
110
+ import tiktoken # pip install weaver-kernel[tiktoken]
111
+ enc = tiktoken.encoding_for_model("gpt-4o")
112
+
113
+ def tiktoken_counter(value):
114
+ return len(enc.encode(str(value)))
115
+
116
+ manager = BudgetManager(total_budget=128_000, token_counter=tiktoken_counter)
117
+ ```
118
+
119
+ The default counter (`default_token_counter`) is a character-based
120
+ `len(json.dumps(value)) // 4` approximation with no extra dependencies.