weakincentives 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. weakincentives/__init__.py +67 -0
  2. weakincentives/adapters/__init__.py +37 -0
  3. weakincentives/adapters/_names.py +32 -0
  4. weakincentives/adapters/_provider_protocols.py +69 -0
  5. weakincentives/adapters/_tool_messages.py +80 -0
  6. weakincentives/adapters/core.py +102 -0
  7. weakincentives/adapters/litellm.py +254 -0
  8. weakincentives/adapters/openai.py +254 -0
  9. weakincentives/adapters/shared.py +1021 -0
  10. weakincentives/cli/__init__.py +23 -0
  11. weakincentives/cli/wink.py +58 -0
  12. weakincentives/dbc/__init__.py +412 -0
  13. weakincentives/deadlines.py +58 -0
  14. weakincentives/prompt/__init__.py +105 -0
  15. weakincentives/prompt/_generic_params_specializer.py +64 -0
  16. weakincentives/prompt/_normalization.py +48 -0
  17. weakincentives/prompt/_overrides_protocols.py +33 -0
  18. weakincentives/prompt/_types.py +34 -0
  19. weakincentives/prompt/chapter.py +146 -0
  20. weakincentives/prompt/composition.py +281 -0
  21. weakincentives/prompt/errors.py +57 -0
  22. weakincentives/prompt/markdown.py +108 -0
  23. weakincentives/prompt/overrides/__init__.py +59 -0
  24. weakincentives/prompt/overrides/_fs.py +164 -0
  25. weakincentives/prompt/overrides/inspection.py +141 -0
  26. weakincentives/prompt/overrides/local_store.py +275 -0
  27. weakincentives/prompt/overrides/validation.py +534 -0
  28. weakincentives/prompt/overrides/versioning.py +269 -0
  29. weakincentives/prompt/prompt.py +353 -0
  30. weakincentives/prompt/protocols.py +103 -0
  31. weakincentives/prompt/registry.py +375 -0
  32. weakincentives/prompt/rendering.py +288 -0
  33. weakincentives/prompt/response_format.py +60 -0
  34. weakincentives/prompt/section.py +166 -0
  35. weakincentives/prompt/structured_output.py +179 -0
  36. weakincentives/prompt/tool.py +397 -0
  37. weakincentives/prompt/tool_result.py +30 -0
  38. weakincentives/py.typed +0 -0
  39. weakincentives/runtime/__init__.py +82 -0
  40. weakincentives/runtime/events/__init__.py +126 -0
  41. weakincentives/runtime/events/_types.py +110 -0
  42. weakincentives/runtime/logging.py +284 -0
  43. weakincentives/runtime/session/__init__.py +46 -0
  44. weakincentives/runtime/session/_slice_types.py +24 -0
  45. weakincentives/runtime/session/_types.py +55 -0
  46. weakincentives/runtime/session/dataclasses.py +29 -0
  47. weakincentives/runtime/session/protocols.py +34 -0
  48. weakincentives/runtime/session/reducer_context.py +40 -0
  49. weakincentives/runtime/session/reducers.py +82 -0
  50. weakincentives/runtime/session/selectors.py +56 -0
  51. weakincentives/runtime/session/session.py +387 -0
  52. weakincentives/runtime/session/snapshots.py +310 -0
  53. weakincentives/serde/__init__.py +19 -0
  54. weakincentives/serde/_utils.py +240 -0
  55. weakincentives/serde/dataclass_serde.py +55 -0
  56. weakincentives/serde/dump.py +189 -0
  57. weakincentives/serde/parse.py +417 -0
  58. weakincentives/serde/schema.py +260 -0
  59. weakincentives/tools/__init__.py +154 -0
  60. weakincentives/tools/_context.py +38 -0
  61. weakincentives/tools/asteval.py +853 -0
  62. weakincentives/tools/errors.py +26 -0
  63. weakincentives/tools/planning.py +831 -0
  64. weakincentives/tools/podman.py +1655 -0
  65. weakincentives/tools/subagents.py +346 -0
  66. weakincentives/tools/vfs.py +1390 -0
  67. weakincentives/types/__init__.py +35 -0
  68. weakincentives/types/json.py +45 -0
  69. weakincentives-0.9.0.dist-info/METADATA +775 -0
  70. weakincentives-0.9.0.dist-info/RECORD +73 -0
  71. weakincentives-0.9.0.dist-info/WHEEL +4 -0
  72. weakincentives-0.9.0.dist-info/entry_points.txt +2 -0
  73. weakincentives-0.9.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,775 @@
1
+ Metadata-Version: 2.4
2
+ Name: weakincentives
3
+ Version: 0.9.0
4
+ Summary: Tools for developing and optimizing side effect free background agents
5
+ Project-URL: Homepage, https://weakincentives.com/
6
+ Project-URL: Documentation, https://github.com/weakincentives/weakincentives#readme
7
+ Project-URL: Repository, https://github.com/weakincentives/weakincentives
8
+ Project-URL: Issue Tracker, https://github.com/weakincentives/weakincentives/issues
9
+ Author-email: Andrei Savu <andrei@weakincentives.com>
10
+ License: Apache-2.0
11
+ License-File: LICENSE
12
+ Keywords: agents,ai,background-agents,optimization,side-effect-free,weak-incentives
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: Apache Software License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3.14
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Classifier: Typing :: Typed
25
+ Requires-Python: >=3.12
26
+ Provides-Extra: asteval
27
+ Requires-Dist: asteval>=1.0.7; extra == 'asteval'
28
+ Provides-Extra: litellm
29
+ Requires-Dist: litellm>=1.79.3; extra == 'litellm'
30
+ Provides-Extra: openai
31
+ Requires-Dist: openai>=2.8.0; extra == 'openai'
32
+ Provides-Extra: podman
33
+ Requires-Dist: podman>=5.6.0; extra == 'podman'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # Weak Incentives (Is All You Need)
37
+
38
+ Weak Incentives is a Python library for building "background agents" (automated
39
+ AI systems). It provides lean, typed, and composable building blocks that keep
40
+ determinism, testability, and safe execution front and center without relying on
41
+ heavy dependencies or hosted services.
42
+
43
+ The core philosophy treats agent development as a structured engineering
44
+ discipline rather than an exercise in ad-hoc scripting. The library ships a set
45
+ of focused abstractions that reinforce that rigor:
46
+
47
+ - **Prompts as code.** Prompts are typed `Prompt` objects composed from sections
48
+ such as `MarkdownSection`, versioned deterministically, and scoped with
49
+ chapters so long-form directives can stay dormant until policies open them.
50
+ - **Structured I/O.** Declaring a dataclass output type for a `Prompt[OutputT]`
51
+ automatically builds a JSON schema, instructs the provider to obey it, and
52
+ parses the reply back into a typed Python object.
53
+ - **Stateful, replayable sessions.** `Session` acts as a Redux-like state
54
+ store, letting pure reducers respond to events (for example, `ToolInvoked`)
55
+ so every change is observable, replayable, and snapshot-friendly.
56
+ - **Typed and sandboxed tools.** Tools are typed callables (`Tool[ParamsT, ResultT]`) with explicit contracts for inputs and outputs, plus built-in
57
+ suites for planning, a secure in-memory VFS, and sandboxed Python evaluation.
58
+ - **Provider-agnostic adapters.** Adapters connect the framework to providers
59
+ like OpenAI or LiteLLM by handling API calls, tool negotiation, and response
60
+ parsing while keeping the agent logic model-agnostic.
61
+ - **Configuration and optimization hooks.** Structured logging via
62
+ `structlog`, enforced deadlines, and a powerful prompt overrides system let
63
+ teams A/B test and iterate on prompts through JSON files without touching the
64
+ application source.
65
+
66
+ ## What's novel?
67
+
68
+ While other agent frameworks provide a toolbox of loose components, Weak
69
+ Incentives offers an opinionated chassis that emphasizes determinism, type
70
+ contracts, and observable workflows:
71
+
72
+ 1. **Redux-like state management with reducers.** Every state change is a
73
+ traceable consequence of a published event processed by a pure reducer. This
74
+ thread of causality delivers replayability and visibility far beyond
75
+ free-form dictionaries or mutable object properties.
76
+ 1. **Composable prompt blueprints with typed contracts.** Prompts are built
77
+ from reusable sections and chapters backed by dataclasses, so composition and
78
+ parameter binding feel like standard software engineering instead of string
79
+ concatenation.
80
+ 1. **Integrated, hash-based prompt overrides.** `PromptDescriptor` content
81
+ hashes ensure overrides only apply to the intended section version, and
82
+ `LocalPromptOverridesStore` keeps the JSON artifacts in version control so
83
+ teams can collaborate without risking stale edits.
84
+ 1. **First-class in-memory virtual filesystem.** The sandboxed VFS ships as a
85
+ core tool, giving agents a secure workspace whose state is tracked like any
86
+ other session slice and avoiding accidental host access.
87
+ 1. **Lean dependency surface.** Avoiding heavyweight stacks such as Pydantic
88
+ keeps the core lightweight. Custom serde modules provide the needed
89
+ functionality without saddling users with sprawling dependency trees.
90
+
91
+ In short, Weak Incentives favors software-engineering discipline—determinism,
92
+ type safety, testability, and clear state management—over maximizing the number
93
+ of exposed knobs.
94
+
95
+ The specs below dive into each area when you need exact contracts and deeper
96
+ context:
97
+
98
+ - **Observable session state with reducer hooks.** A Redux-like session ledger
99
+ and in-process event bus keep every tool call and prompt render replayable.
100
+ Built-in planning, virtual filesystem, and Python-evaluation sections ship
101
+ with reducers that enforce domain rules while emitting structured telemetry.
102
+ See [Session State](https://github.com/weakincentives/weakincentives/blob/main/specs/SESSIONS.md), [Prompt Event Emission](https://github.com/weakincentives/weakincentives/blob/main/specs/EVENTS.md),
103
+ [Planning Tools](https://github.com/weakincentives/weakincentives/blob/main/specs/PLANNING_TOOL.md), [Virtual Filesystem Tools](https://github.com/weakincentives/weakincentives/blob/main/specs/VFS_TOOLS.md),
104
+ and [Asteval Integration](https://github.com/weakincentives/weakincentives/blob/main/specs/ASTEVAL.md).
105
+ - **Composable prompt blueprints with strict contracts.** Dataclass-backed
106
+ sections compose into reusable blueprints that render validated Markdown and
107
+ expose tool contracts automatically. Specs: [Prompt Overview](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPTS.md),
108
+ [Prompt Composition](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPTS_COMPOSITION.md), and
109
+ [Structured Output](https://github.com/weakincentives/weakincentives/blob/main/specs/STRUCTURED_OUTPUT.md).
110
+ - **Chapter-driven visibility controls.** Chapters gate when prompt regions
111
+ enter the model context, defaulting to closed until runtime policies open
112
+ them. Expansion strategies and lifecycle guidance live in
113
+ [Chapters Specification](https://github.com/weakincentives/weakincentives/blob/main/specs/CHAPTERS.md).
114
+ - **Override-friendly workflows that scale into optimization.** Prompt
115
+ definitions ship with hash-based descriptors and on-disk overrides that stay
116
+ in sync through schema validation and Git-root discovery, laying the
117
+ groundwork for iterative optimization. Review
118
+ [Prompt Overrides](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPT_OVERRIDES.md) for the full contract.
119
+ - **Provider adapters standardize tool negotiation.** Shared conversation
120
+ loops negotiate tool calls, apply JSON-schema response formats, and normalize
121
+ structured payloads so the runtime stays model-agnostic. See
122
+ [Adapter Specification](https://github.com/weakincentives/weakincentives/blob/main/specs/ADAPTERS.md) and provider-specific docs such as
123
+ [LiteLLM Adapter](https://github.com/weakincentives/weakincentives/blob/main/specs/LITE_LLM_ADAPTER.md).
124
+ - **Local-first, deterministic execution.** Everything runs locally without
125
+ hosted dependencies, and prompt renders stay diff-friendly so version control
126
+ captures intent instead of churn. The code-review example ties it together
127
+ with override-aware prompts, session telemetry, and replayable tooling.
128
+
129
+ ## Requirements
130
+
131
+ - Python 3.12+ (the repository pins 3.12 in `.python-version` for development)
132
+ - [`uv`](https://github.com/astral-sh/uv) CLI
133
+
134
+ ## Install
135
+
136
+ ```bash
137
+ uv add weakincentives
138
+ # optional tool extras
139
+ uv add "weakincentives[asteval]"
140
+ # optional provider adapters
141
+ uv add "weakincentives[openai]"
142
+ uv add "weakincentives[litellm]"
143
+ # cloning the repo? use: uv sync --extra asteval --extra openai --extra litellm
144
+ ```
145
+
146
+ ## Tutorial: Build a Stateful Code-Reviewing Agent
147
+
148
+ Use Weak Incentives to assemble a reproducible reviewer that tracks every
149
+ decision, stages edits safely, and answers quick calculations inline. The
150
+ runtime already ships with a session ledger and override-aware prompts, so you
151
+ avoid custom state stores or ad-hoc optimizers.
152
+
153
+ ### 1. Model review data and expected outputs
154
+
155
+ Typed dataclasses keep inputs and outputs honest so adapters emit consistent
156
+ telemetry and structured responses stay predictable. See
157
+ [Dataclass Serde Utilities](https://github.com/weakincentives/weakincentives/blob/main/specs/DATACLASS_SERDE.md) and
158
+ [Structured Output via `Prompt[OutputT]`](https://github.com/weakincentives/weakincentives/blob/main/specs/STRUCTURED_OUTPUT.md) for the
159
+ validation and JSON-contract details behind this snippet.
160
+
161
+ ```python
162
+ from dataclasses import dataclass
163
+
164
+
165
+ @dataclass
166
+ class PullRequestContext:
167
+ repository: str
168
+ title: str
169
+ body: str
170
+ files_summary: str
171
+
172
+
173
+ @dataclass
174
+ class ReviewComment:
175
+ file_path: str
176
+ line: int
177
+ severity: str
178
+ summary: str
179
+ rationale: str
180
+
181
+
182
+ @dataclass
183
+ class ReviewBundle:
184
+ comments: tuple[ReviewComment, ...]
185
+ overall_assessment: str
186
+ ```
187
+
188
+ ### 2. Create a session, surface built-in tool suites, and mount diffs
189
+
190
+ Planning, virtual filesystem, and Python-evaluation sections register reducers on
191
+ the session so every run supports plans, staged edits, and quick calculations.
192
+ Mount diffs ahead of time so the agent can read them through the virtual
193
+ filesystem without extra callbacks. Install the `asteval` extra
194
+ (`uv add "weakincentives[asteval]"`) before instantiating `AstevalSection` so the
195
+ sandbox is available at runtime. Specs worth skimming:
196
+ [Session State](https://github.com/weakincentives/weakincentives/blob/main/specs/SESSIONS.md), [Prompt Event Emission](https://github.com/weakincentives/weakincentives/blob/main/specs/EVENTS.md),
197
+ [Virtual Filesystem Tools](https://github.com/weakincentives/weakincentives/blob/main/specs/VFS_TOOLS.md), [Planning Tools](https://github.com/weakincentives/weakincentives/blob/main/specs/PLANNING_TOOL.md),
198
+ and [Asteval Integration](https://github.com/weakincentives/weakincentives/blob/main/specs/ASTEVAL.md).
199
+
200
+ ```python
201
+ from pathlib import Path
202
+
203
+ from weakincentives.runtime.events import InProcessEventBus, PromptExecuted
204
+ from weakincentives.runtime.session import Session
205
+ from weakincentives.tools import (
206
+ AstevalSection,
207
+ HostMount,
208
+ PlanningToolsSection,
209
+ VfsPath,
210
+ VfsToolsSection,
211
+ )
212
+
213
+
214
+ bus = InProcessEventBus()
215
+ session = Session(bus=bus)
216
+
217
+
218
+ diff_root = Path("/srv/agent-mounts")
219
+ diff_root.mkdir(parents=True, exist_ok=True)
220
+ vfs_section = VfsToolsSection(
221
+ session=session,
222
+ allowed_host_roots=(diff_root,),
223
+ mounts=(
224
+ HostMount(
225
+ host_path="octo_widgets/cache-layer.diff",
226
+ mount_path=VfsPath(("diffs", "cache-layer.diff")),
227
+ ),
228
+ ),
229
+ )
230
+ planning_section = PlanningToolsSection(session=session)
231
+ asteval_section = AstevalSection(session=session)
232
+
233
+
234
+ def log_prompt(event: PromptExecuted) -> None:
235
+ print(
236
+ f"Prompt {event.prompt_name} completed with "
237
+ f"{len(event.result.tool_results)} tool calls"
238
+ )
239
+
240
+
241
+ bus.subscribe(PromptExecuted, log_prompt)
242
+ ```
243
+
244
+ Copy unified diff files into `/srv/agent-mounts` before launching the run. The
245
+ host mount resolves `octo_widgets/cache-layer.diff` relative to that directory
246
+ and exposes it to the agent as `diffs/cache-layer.diff` inside the virtual
247
+ filesystem snapshot. `PlanningToolsSection`, `AstevalSection`, and
248
+ `VfsToolsSection` all register reducers when constructed, so wire them up with
249
+ the `Session` you'll pass through `ToolContext` when dispatching tools.
250
+
251
+ ### 3. Define a symbol search helper tool
252
+
253
+ Tools are typed callables that return structured results. Add lightweight
254
+ helpers alongside the built-in suites—in this case, a symbol searcher that reads
255
+ from a repo mounted at `/srv/agent-repo`. Review the
256
+ [Tool Runtime Specification](https://github.com/weakincentives/weakincentives/blob/main/specs/TOOLS.md) to match the handler,
257
+ `ToolContext`, and `ToolResult` contracts.
258
+
259
+ ```python
260
+ from dataclasses import dataclass
261
+ from pathlib import Path
262
+
263
+ from weakincentives.prompt import Tool, ToolResult
264
+
265
+
266
+ @dataclass
267
+ class SymbolSearchRequest:
268
+ query: str
269
+ file_glob: str = "*.py"
270
+ max_results: int = 5
271
+
272
+
273
+ @dataclass
274
+ class SymbolMatch:
275
+ file_path: str
276
+ line: int
277
+ snippet: str
278
+
279
+
280
+ @dataclass
281
+ class SymbolSearchResult:
282
+ matches: tuple[SymbolMatch, ...]
283
+
284
+
285
+ repo_root = Path("/srv/agent-repo")
286
+
287
+
288
+ def find_symbol(params: SymbolSearchRequest) -> ToolResult[SymbolSearchResult]:
289
+ if not repo_root.exists():
290
+ raise FileNotFoundError(
291
+ "Mount a repository checkout at /srv/agent-repo before running the agent."
292
+ )
293
+
294
+ matches: list[SymbolMatch] = []
295
+ for file_path in repo_root.rglob(params.file_glob):
296
+ if not file_path.is_file():
297
+ continue
298
+ with file_path.open("r", encoding="utf-8") as handle:
299
+ for line_number, line in enumerate(handle, start=1):
300
+ if params.query in line:
301
+ matches.append(
302
+ SymbolMatch(
303
+ file_path=str(file_path.relative_to(repo_root)),
304
+ line=line_number,
305
+ snippet=line.strip(),
306
+ )
307
+ )
308
+ if len(matches) >= params.max_results:
309
+ break
310
+ if len(matches) >= params.max_results:
311
+ break
312
+
313
+ return ToolResult(
314
+ message=f"Found {len(matches)} matching snippets.",
315
+ value=SymbolSearchResult(matches=tuple(matches)),
316
+ )
317
+
318
+
319
+ symbol_search_tool = Tool[SymbolSearchRequest, SymbolSearchResult](
320
+ name="symbol_search",
321
+ description=(
322
+ "Search the repository checkout for a symbol and return file snippets."
323
+ ),
324
+ handler=find_symbol,
325
+ )
326
+ ```
327
+
328
+ Session reducers accumulate structured state across prompt and tool events.
329
+ When the `symbol_search` tool returns results, register a reducer that records
330
+ the queries the reviewer explored along with the snippets that satisfied each
331
+ one. Downstream sections can inspect this slice with
332
+ `session.select_all(ReviewedSymbol)` to summarize the investigation history.
333
+
334
+ ```python
335
+ from dataclasses import dataclass
336
+
337
+ from weakincentives.runtime.events import ToolInvoked
338
+
339
+
340
+ @dataclass
341
+ class ReviewedSymbol:
342
+ query: str
343
+ matches: tuple[SymbolMatch, ...]
344
+
345
+
346
+ def track_reviewed_symbols(
347
+ reviewed: tuple[ReviewedSymbol, ...],
348
+ event: ToolInvoked,
349
+ *,
350
+ context: object,
351
+ ) -> tuple[ReviewedSymbol, ...]:
352
+ del context
353
+ if event.value is None or not isinstance(event.value, SymbolSearchResult):
354
+ return reviewed
355
+
356
+ params = event.params
357
+ reviewed_symbol = ReviewedSymbol(
358
+ query=params.query,
359
+ matches=event.value.matches,
360
+ )
361
+ return (*reviewed, reviewed_symbol)
362
+
363
+
364
+ session.register_reducer(
365
+ SymbolSearchResult,
366
+ track_reviewed_symbols,
367
+ slice_type=ReviewedSymbol,
368
+ )
369
+ ```
370
+
371
+ Attach custom tools to sections (next step) so the adapter can call them and
372
+ record their outputs on the session alongside built-in reducers. The prompt can
373
+ now chase suspicious references without delegating work back to the orchestrator.
374
+
375
+ ### 4. Compose the prompt with deterministic sections and chapters
376
+
377
+ Sections render through `string.Template`, so keep placeholders readable and
378
+ combine guidance with the tool suites into one auditable prompt tree. Long-form
379
+ checklists or escalation playbooks often span many pages and only matter for
380
+ specialized reviews; wrap them in a chapter so adapters can toggle visibility
381
+ based on the user prompt. See the [Prompt Class](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPTS.md),
382
+ [Prompt Versioning & Persistence](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPTS_VERSIONING.md), and
383
+ [Chapters Specification](https://github.com/weakincentives/weakincentives/blob/main/specs/CHAPTERS.md) for the rendering, hashing, and
384
+ visibility rules that stabilize this structure.
385
+
386
+ ```python
387
+ from dataclasses import dataclass
388
+
389
+ from weakincentives import MarkdownSection, Prompt
390
+ from weakincentives.prompt import Chapter, ChaptersExpansionPolicy
391
+
392
+
393
+ @dataclass
394
+ class ReviewGuidance:
395
+ severity_scale: str = "minor | major | critical"
396
+ output_schema: str = "ReviewBundle with comments[] and overall_assessment"
397
+ focus_areas: str = (
398
+ "Security regressions, concurrency bugs, test coverage gaps, and"
399
+ " ambiguous logic should be escalated."
400
+ )
401
+
402
+
403
+ @dataclass
404
+ class ComplianceChapterParams:
405
+ required: bool = False
406
+ primary_jurisdictions: str = ""
407
+ regulation_matrix_summary: str = ""
408
+ escalation_contact: str = "compliance@octo.widgets"
409
+ evidence_workspace: str = "gs://audit-artifacts"
410
+
411
+
412
+ overview_section = MarkdownSection[PullRequestContext](
413
+ title="Repository Overview",
414
+ key="review.overview",
415
+ template="""
416
+ You are a principal engineer reviewing a pull request.
417
+ Repository: ${repository}
418
+ Title: ${title}
419
+
420
+ Pull request summary:
421
+ ${body}
422
+
423
+ Files touched: ${files_summary}
424
+ """,
425
+ )
426
+
427
+
428
+ analysis_section = MarkdownSection[ReviewGuidance](
429
+ title="Review Directives",
430
+ key="review.directives",
431
+ template="""
432
+ - Classify findings using this severity scale: ${severity_scale}.
433
+ - Emit output that matches ${output_schema}; missing fields fail the run.
434
+ - Investigation focus:
435
+ ${focus_areas}
436
+ - Inspect mounted diffs under `diffs/` with `vfs_read_file` before
437
+ commenting on unfamiliar hunks.
438
+ - Reach for `symbol_search` when you need surrounding context from the
439
+ repository checkout.
440
+ """,
441
+ tools=(symbol_search_tool,),
442
+ default_params=ReviewGuidance(),
443
+ )
444
+
445
+
446
+ review_prompt = Prompt[ReviewBundle](
447
+ ns="tutorial/code_review",
448
+ key="review.generate",
449
+ name="code_review_agent",
450
+ sections=(
451
+ overview_section,
452
+ planning_section,
453
+ vfs_section,
454
+ asteval_section,
455
+ analysis_section,
456
+ ),
457
+ chapters=(
458
+ Chapter[ComplianceChapterParams](
459
+ key="review.compliance",
460
+ title="Compliance Deep Dive",
461
+ description=(
462
+ "Multi-page regulations guidance that only opens when the "
463
+ "request demands a compliance audit."
464
+ ),
465
+ sections=(
466
+ MarkdownSection[ComplianceChapterParams](
467
+ title="Regulatory Background",
468
+ key="review.compliance.background",
469
+ template="""
470
+ Compliance review requested.
471
+ Focus jurisdictions: ${primary_jurisdictions}
472
+
473
+ The attached regulation matrix may span many pages. Only
474
+ cite sections that apply to this pull request.
475
+ """,
476
+ default_params=ComplianceChapterParams(),
477
+ ),
478
+ MarkdownSection[ComplianceChapterParams](
479
+ title="Compliance Checklist",
480
+ key="review.compliance.checklist",
481
+ template="""
482
+ - Summarize gaps against: ${regulation_matrix_summary}
483
+ - Escalate urgent findings to: ${escalation_contact}
484
+ - Link all evidence in: ${evidence_workspace}
485
+ """,
486
+ default_params=ComplianceChapterParams(),
487
+ ),
488
+ ),
489
+ default_params=ComplianceChapterParams(),
490
+ enabled=lambda params: params.required,
491
+ ),
492
+ ),
493
+ )
494
+
495
+
496
+ requires_compliance_review = True # derived from user metadata
497
+ compliance_params = ComplianceChapterParams(
498
+ required=requires_compliance_review,
499
+ primary_jurisdictions="SOX §404, PCI-DSS",
500
+ regulation_matrix_summary="See 12-page compliance dossier in appendix.",
501
+ evidence_workspace="gs://audit-artifacts/octo-widgets",
502
+ )
503
+
504
+ expanded_prompt = review_prompt.expand_chapters(
505
+ ChaptersExpansionPolicy.ALL_INCLUDED,
506
+ chapter_params={
507
+ "review.compliance": compliance_params,
508
+ },
509
+ )
510
+
511
+
512
+ rendered = expanded_prompt.render(
513
+ PullRequestContext(
514
+ repository="octo/widgets",
515
+ title="Add caching layer",
516
+ body="Introduces memoization to reduce redundant IO while preserving correctness.",
517
+ files_summary="loader.py, cache.py",
518
+ ),
519
+ ReviewGuidance(),
520
+ compliance_params,
521
+ )
522
+
523
+
524
+ print(rendered.text)
525
+ print([tool.name for tool in rendered.tools])
526
+ ```
527
+
528
+ Set `requires_compliance_review = False` (and skip the chapter parameters) when
529
+ the user prompt does not request a regulated-industry audit—the compliance
530
+ chapter stays closed and the oversized guidance never reaches the model.
531
+
532
+ ### 5. Evaluate the prompt with an adapter
533
+
534
+ Adapters send the rendered prompt to a provider and publish telemetry to the
535
+ event bus; the session wiring above captures `PromptExecuted` and `ToolInvoked`
536
+ events automatically. Pass the chapter-expanded prompt plus the same parameter
537
+ dataclasses you used for rendering so the adapter sees the specialized
538
+ compliance guidance. For payload formats and parsing guarantees see
539
+ [Adapter Evaluation](https://github.com/weakincentives/weakincentives/blob/main/specs/ADAPTERS.md) and
540
+ [Native OpenAI Structured Outputs](https://github.com/weakincentives/weakincentives/blob/main/specs/NATIVE_OPENAI_STRUCTURED_OUTPUTS.md).
541
+
542
+ ```python
543
+ from weakincentives.adapters.openai import OpenAIAdapter
544
+
545
+
546
+ adapter = OpenAIAdapter(
547
+ model="gpt-4o-mini",
548
+ client_kwargs={"api_key": "sk-..."},
549
+ )
550
+
551
+
552
+ response = adapter.evaluate(
553
+ expanded_prompt,
554
+ PullRequestContext(
555
+ repository="octo/widgets",
556
+ title="Add caching layer",
557
+ body="Introduces memoization to reduce redundant IO while preserving correctness.",
558
+ files_summary="loader.py, cache.py",
559
+ ),
560
+ ReviewGuidance(),
561
+ compliance_params,
562
+ bus=bus,
563
+ session=session,
564
+ )
565
+
566
+
567
+ bundle = response.output
568
+ if bundle is None:
569
+ raise RuntimeError("Structured parsing failed")
570
+
571
+
572
+ for comment in bundle.comments:
573
+ print(f"{comment.file_path}:{comment.line} → {comment.summary}")
574
+ ```
575
+
576
+ If the model omits a required field, `OpenAIAdapter` raises `PromptEvaluationError`
577
+ with provider context rather than silently degrading.
578
+
579
+ ### 6. Mine session state for downstream automation
580
+
581
+ Selectors expose reducer output so you can ship audit logs without extra
582
+ plumbing. Planning reducers keep only the latest `Plan`; register a custom
583
+ reducer before `PlanningToolsSection` if you need history. See
584
+ [Session State](https://github.com/weakincentives/weakincentives/blob/main/specs/SESSIONS.md) and
585
+ [Snapshot Capture and Rollback](https://github.com/weakincentives/weakincentives/blob/main/specs/SESSIONS.md#snapshot-capture-and-rollback)
586
+ for selector and rollback rules.
587
+
588
+ ```python
589
+ from weakincentives.runtime.session import select_latest
590
+ from weakincentives.tools import Plan, VirtualFileSystem
591
+
592
+
593
+ latest_plan = select_latest(session, Plan)
594
+ vfs_snapshot = select_latest(session, VirtualFileSystem)
595
+
596
+
597
+ if latest_plan:
598
+ print(f"Plan objective: {latest_plan.objective}")
599
+ for step in latest_plan.steps:
600
+ print(f"- [{step.status}] {step.title}")
601
+ else:
602
+ print("No plan recorded yet.")
603
+
604
+
605
+ if vfs_snapshot:
606
+ for file in vfs_snapshot.files:
607
+ print(f"Staged file {file.path.segments} (version {file.version})")
608
+ ```
609
+
610
+ ### 7. Override sections with an overrides store
611
+
612
+ Persist optimizer output so the runtime can swap in tuned sections without a
613
+ redeploy. `LocalPromptOverridesStore` is the default choice: it discovers the
614
+ workspace root, enforces descriptors, and reads JSON overrides from
615
+ `.weakincentives/prompts/overrides/`. Refer to the
616
+ [Prompt Overrides specification](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPT_OVERRIDES.md) to keep namespace,
617
+ key, and tag hashes aligned.
618
+
619
+ ```python
620
+ from pathlib import Path
621
+
622
+ from weakincentives.prompt.overrides import (
623
+ LocalPromptOverridesStore,
624
+ PromptDescriptor,
625
+ PromptOverride,
626
+ SectionOverride,
627
+ )
628
+
629
+
630
+ workspace_root = Path("/srv/agent-workspace")
631
+ overrides_store = LocalPromptOverridesStore(root_path=workspace_root)
632
+
633
+ descriptor = PromptDescriptor.from_prompt(review_prompt)
634
+ seed_override = overrides_store.seed_if_necessary(
635
+ review_prompt, tag="assertive-feedback"
636
+ )
637
+
638
+ section_path = ("review", "directives")
639
+ section_descriptor = next(
640
+ section
641
+ for section in descriptor.sections
642
+ if section.path == section_path
643
+ )
644
+
645
+ custom_override = PromptOverride(
646
+ ns=descriptor.ns,
647
+ prompt_key=descriptor.key,
648
+ tag="assertive-feedback",
649
+ sections={
650
+ **seed_override.sections,
651
+ section_path: SectionOverride(
652
+ expected_hash=section_descriptor.content_hash,
653
+ body="\n".join(
654
+ (
655
+ "- Classify findings using this severity scale: minor | major | critical.",
656
+ "- Always cite the exact diff hunk when raising a major or critical issue.",
657
+ "- Respond with ReviewBundle JSON. Missing fields terminate the run.",
658
+ )
659
+ ),
660
+ ),
661
+ },
662
+ tool_overrides=seed_override.tool_overrides,
663
+ )
664
+
665
+ persisted_override = overrides_store.upsert(descriptor, custom_override)
666
+
667
+ rendered_with_override = review_prompt.render(
668
+ PullRequestContext(
669
+ repository="octo/widgets",
670
+ title="Add caching layer",
671
+ body="Introduces memoization to reduce redundant IO while preserving correctness.",
672
+ files_summary="loader.py, cache.py",
673
+ ),
674
+ overrides_store=overrides_store,
675
+ tag=persisted_override.tag,
676
+ )
677
+
678
+
679
+ print(rendered_with_override.text)
680
+ ```
681
+
682
+ The overrides store writes atomically to
683
+ `.weakincentives/prompts/overrides/{ns}/{prompt_key}/{tag}.json` inside the
684
+ workspace described in the [Prompt Overrides specification](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPT_OVERRIDES.md).
685
+ Optimizers and prompt engineers can still drop JSON overrides into that tree by
686
+ hand—checked into source control or generated during evaluations—without
687
+ subclassing `PromptOverridesStore`. Because sections expose stable `(ns, key, path)` identifiers, overrides stay scoped to the intended content so teams can
688
+ iterate on directives without risking accidental drift elsewhere in the tree.
689
+
690
+ ### 8. Ship it
691
+
692
+ You now have a deterministic reviewer that:
693
+
694
+ 1. Enforces typed contracts for inputs, tools, and outputs.
695
+ 1. Persists plans, VFS edits, and evaluation transcripts inside a session.
696
+ 1. Supports optimizer-driven overrides that fit neatly into CI or evaluation
697
+ harnesses.
698
+
699
+ Run it inside a worker, bot, or scheduler; the captured session state keeps each
700
+ evaluation replayable. For long-lived deployments, follow the
701
+ [Prompt Overrides specification](https://github.com/weakincentives/weakincentives/blob/main/specs/PROMPT_OVERRIDES.md) to keep overrides
702
+ and tool descriptors in sync.
703
+
704
+ ## Logging
705
+
706
+ Weak Incentives ships a structured logging adapter so hosts can add contextual
707
+ metadata to every record without manual dictionary plumbing. Call
708
+ `configure_logging()` during startup to install the default handler and then
709
+ bind logger instances wherever you need telemetry:
710
+
711
+ ```python
712
+ from weakincentives.runtime.logging import configure_logging, get_logger
713
+
714
+ configure_logging(json_mode=True)
715
+ logger = get_logger("demo").bind(component="cli")
716
+ logger.info("boot", event="demo.start", context={"attempt": 1})
717
+ ```
718
+
719
+ The helper respects any existing root handlers—omit `force=True` if your
720
+ application already configures logging and you only want Weak Incentives to
721
+ honor the selected level. When you do want to take over the pipeline, call
722
+ `configure_logging(..., force=True)` and then customize the root handler list
723
+ with additional sinks (for example, forwarding records to Cloud Logging or a
724
+ structured log shipper). Each emitted record contains an `event` field plus a
725
+ `context` mapping, so downstream processors can make routing decisions without
726
+ parsing raw message strings.
727
+
728
+ ## Development Setup
729
+
730
+ 1. Install Python 3.12 (for example with `pyenv install 3.12.0`).
731
+
732
+ 1. Install `uv`, then bootstrap the environment and hooks:
733
+
734
+ ```bash
735
+ uv sync
736
+ ./install-hooks.sh
737
+ ```
738
+
739
+ 1. Run checks with `uv run` so everything shares the managed virtualenv:
740
+
741
+ - `make format` / `make format-check`
742
+ - `make lint` / `make lint-fix`
743
+ - `make typecheck` (Ty + Pyright, warnings fail the build)
744
+ - `make test` (pytest via `build/run_pytest.py`, 100% coverage enforced)
745
+ - `make check` (aggregates the quiet checks above plus Bandit, Deptry, pip-audit,
746
+ and markdown linting)
747
+
748
+ ### Integration tests
749
+
750
+ Provider integrations require live credentials, so the suite stays opt-in. Export the
751
+ necessary OpenAI configuration and then run the dedicated `make` target, which disables
752
+ coverage enforcement automatically:
753
+
754
+ ```bash
755
+ export OPENAI_API_KEY="sk-your-key"
756
+ # Optionally override the default model (`gpt-4.1`).
757
+ export OPENAI_TEST_MODEL="gpt-4.1-mini"
758
+
759
+ make integration-tests
760
+ ```
761
+
762
+ `make integration-tests` forwards `--no-cov` to pytest so you can exercise the adapter
763
+ scenarios without tripping the 100% coverage gate configured for the unit test suite. The
764
+ tests remain skipped when `OPENAI_API_KEY` is not present.
765
+
766
+ ## Documentation
767
+
768
+ - `AGENTS.md` — operational handbook and contributor workflow.
769
+ - `specs/` — design docs for prompts, planning tools, and adapters.
770
+ - `ROADMAP.md` — upcoming feature sketches.
771
+ - `docs/api/` — API reference material.
772
+
773
+ ## License
774
+
775
+ Apache 2.0 • Status: Alpha (APIs may change between releases)