stratum-py 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. stratum_py-0.1.0/.claude/mcp.json +7 -0
  2. stratum_py-0.1.0/.claude/settings.local.json +43 -0
  3. stratum_py-0.1.0/.gitignore +11 -0
  4. stratum_py-0.1.0/CHANGELOG.md +73 -0
  5. stratum_py-0.1.0/CLAUDE.md +92 -0
  6. stratum_py-0.1.0/PKG-INFO +12 -0
  7. stratum_py-0.1.0/README.md +225 -0
  8. stratum_py-0.1.0/SPEC.md +847 -0
  9. stratum_py-0.1.0/blog/claude-code-tutorial.md +333 -0
  10. stratum_py-0.1.0/blog/introducing-stratum.md +261 -0
  11. stratum_py-0.1.0/blog/stratum-in-claude-code.md +238 -0
  12. stratum_py-0.1.0/blog/stratum-in-codex.md +262 -0
  13. stratum_py-0.1.0/examples/01_sentiment.py +92 -0
  14. stratum_py-0.1.0/examples/02_migrate.py +151 -0
  15. stratum_py-0.1.0/examples/03_parallel.py +154 -0
  16. stratum_py-0.1.0/examples/04_refine.py +135 -0
  17. stratum_py-0.1.0/examples/05_debate.py +145 -0
  18. stratum_py-0.1.0/examples/06_hitl.py +179 -0
  19. stratum_py-0.1.0/pyproject.toml +23 -0
  20. stratum_py-0.1.0/src/stratum/__init__.py +120 -0
  21. stratum_py-0.1.0/src/stratum/_config.py +48 -0
  22. stratum_py-0.1.0/src/stratum/budget.py +61 -0
  23. stratum_py-0.1.0/src/stratum/compiler.py +160 -0
  24. stratum_py-0.1.0/src/stratum/concurrency.py +215 -0
  25. stratum_py-0.1.0/src/stratum/contracts.py +232 -0
  26. stratum_py-0.1.0/src/stratum/decorators.py +377 -0
  27. stratum_py-0.1.0/src/stratum/exceptions.py +126 -0
  28. stratum_py-0.1.0/src/stratum/executor.py +564 -0
  29. stratum_py-0.1.0/src/stratum/exporters/__init__.py +5 -0
  30. stratum_py-0.1.0/src/stratum/exporters/otlp.py +149 -0
  31. stratum_py-0.1.0/src/stratum/flow_scope.py +31 -0
  32. stratum_py-0.1.0/src/stratum/hitl.py +170 -0
  33. stratum_py-0.1.0/src/stratum/trace.py +51 -0
  34. stratum_py-0.1.0/src/stratum/types.py +108 -0
  35. stratum_py-0.1.0/stratum-mcp/pyproject.toml +23 -0
  36. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/__init__.py +0 -0
  37. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/errors.py +84 -0
  38. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/executor.py +301 -0
  39. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/server.py +287 -0
  40. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-debug/SKILL.md +206 -0
  41. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-feature/SKILL.md +169 -0
  42. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-learn/SKILL.md +96 -0
  43. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-migrate/SKILL.md +157 -0
  44. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-refactor/SKILL.md +166 -0
  45. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-review/SKILL.md +162 -0
  46. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-test/SKILL.md +151 -0
  47. stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/spec.py +299 -0
  48. stratum_py-0.1.0/stratum-mcp/tests/__init__.py +0 -0
  49. stratum_py-0.1.0/stratum-mcp/tests/contracts/__init__.py +0 -0
  50. stratum_py-0.1.0/stratum-mcp/tests/contracts/test_errors.py +50 -0
  51. stratum_py-0.1.0/stratum-mcp/tests/contracts/test_ir_schema.py +195 -0
  52. stratum_py-0.1.0/stratum-mcp/tests/integration/__init__.py +0 -0
  53. stratum_py-0.1.0/stratum-mcp/tests/integration/test_roundtrip.py +195 -0
  54. stratum_py-0.1.0/stratum-mcp/tests/integration/test_server.py +225 -0
  55. stratum_py-0.1.0/stratum-mcp/tests/integration/test_setup.py +214 -0
  56. stratum_py-0.1.0/stratum-mcp/tests/invariants/__init__.py +0 -0
  57. stratum_py-0.1.0/stratum-mcp/tests/invariants/test_executor.py +153 -0
  58. stratum_py-0.1.0/tests/test_concurrency.py +312 -0
  59. stratum_py-0.1.0/tests/test_contracts.py +315 -0
  60. stratum_py-0.1.0/tests/test_decorators.py +325 -0
  61. stratum_py-0.1.0/tests/test_executor.py +648 -0
  62. stratum_py-0.1.0/tests/test_flow_scope.py +53 -0
  63. stratum_py-0.1.0/tests/test_hitl.py +250 -0
  64. stratum_py-0.1.0/tests/test_otlp.py +176 -0
@@ -0,0 +1,7 @@
1
+ {
2
+ "mcpServers": {
3
+ "stratum": {
4
+ "command": "stratum-mcp"
5
+ }
6
+ }
7
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(tree:*)",
5
+ "Bash(npm install:*)",
6
+ "Bash(npm run build:*)",
7
+ "Bash(node config-loader.js:*)",
8
+ "Bash(curl:*)",
9
+ "Bash(echo \"=== PRODUCT EVALUATION ===\" echo \"\" echo \"1. CONFIG HIERARCHY \\(3 levels: Home -> reg -> project\\)\" curl -s http://localhost:3333/api/configs)",
10
+ "Bash(jq -r \".[] | \"\" Level: \\\\\\(.label\\) -> MCPs: \\\\\\(.config.include | join\\(\"\", \"\"\\)\\)\"\"\" echo echo '2. AGGREGATED MCPs \\(should show 5 total from all levels\\)' curl -s http://localhost:3333/api/configs curl -s http://localhost:3333/api/registry node -e \"\nconst c = require\\(\"\"/tmp/c.json\"\"\\), r = require\\(\"\"/tmp/r.json\"\"\\);\nconst mcps = new Map\\(\\);\nc.forEach\\(cfg => {\n \\(cfg.config.include||[]\\).forEach\\(n => mcps.has\\(n\\) || mcps.set\\(n, {name:n, type:\"\"registry\"\", from:cfg.label}\\)\\);\n Object.keys\\(cfg.config.mcpServers||{}\\).forEach\\(n => mcps.has\\(n\\) || mcps.set\\(n, {name:n, type:\"\"custom\"\", from:cfg.label}\\)\\);\n}\\);\nconsole.log\\(\"\" Total:\"\", mcps.size, \"\"MCPs\"\"\\);\n[...mcps.values\\(\\)].forEach\\(m => console.log\\(\"\" -\"\", m.name, \"\"\\(\"\"+m.type+\"\"\\) from\"\", m.from\\)\\);\n\" echo echo '3. RULES' curl -s http://localhost:3333/api/rules)",
11
+ "Bash(jq -r '.success' echo \"\" echo \"5. VERIFY: Config updated\" curl -s http://localhost:3333/api/configs)",
12
+ "Bash(jq '[.[] | \\(\\(.config.include | length\\) + \\(.config.mcpServers | keys | length\\)\\)] | add' __NEW_LINE_d4c713bb8c5792e3__ echo \"\" echo \"Actual unique MCPs:\" curl -s http://localhost:3333/api/configs)",
13
+ "Bash(jq '[.[].config.include[], \\(.[].config.mcpServers | keys[]\\)] | unique | length' __NEW_LINE_d4c713bb8c5792e3__ echo \"\" echo \"Registry has total MCPs:\" curl -s http://localhost:3333/api/registry)",
14
+ "Bash(jq \".[] | {dir: .dir, include: .config.include}\" echo echo '=== Generated .mcp.json \\(should have ALL including inherited\\) ===' curl -s -X POST http://localhost:3333/api/apply -H 'Content-Type: application/json' -d {} cat /Users/ruze/reg/my/project-config-system/.mcp.json)",
15
+ "Bash(find:*)",
16
+ "Bash(npm ls:*)",
17
+ "Bash(lsof:*)",
18
+ "Bash(xargs kill -9)",
19
+ "Bash(npm rebuild:*)",
20
+ "Bash(echo:*)",
21
+ "Bash(node -e:*)",
22
+ "Bash(npm view:*)",
23
+ "Bash(npm pack:*)",
24
+ "Bash(node cli.js:*)",
25
+ "Bash(git init:*)",
26
+ "Bash(git add:*)",
27
+ "Bash(git commit:*)",
28
+ "Bash(git remote add:*)",
29
+ "Bash(git push:*)",
30
+ "Bash(cat:*)",
31
+ "Bash(head:*)",
32
+ "Bash(gh repo:*)",
33
+ "Bash(gh label:*)",
34
+ "Bash(git rm -r --cached \"src/stratum/__pycache__\" \"src/stratum/exporters/__pycache__\" \"tests/__pycache__\" 2>&1 && git add .gitignore && git commit -m \"Add .gitignore, remove cached pycache files\" 2>1&)"
35
+ ]
36
+ },
37
+ "enableAllProjectMcpServers": true,
38
+ "enabledMcpjsonServers": [
39
+ "filesystem",
40
+ "memory",
41
+ "fetch"
42
+ ]
43
+ }
@@ -0,0 +1,11 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .pytest_cache/
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ .venv/
9
+ venv/
10
+ .env
11
+ docs/
@@ -0,0 +1,73 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ### Added
6
+
7
+ **MCP server (Track 2) — `stratum-mcp`**
8
+
9
+ - `stratum_validate` — validates a `.stratum.yaml` IR spec; returns `{valid, errors}`
10
+ - `stratum_plan` — validates a spec, creates in-memory flow execution state, returns the first step to execute with resolved inputs and output contract details
11
+ - `stratum_step_done` — accepts a completed step result from Claude Code, checks `ensure` postconditions, returns next step or flow completion; handles retries and exhaustion
12
+ - `stratum_audit` — returns per-step execution trace (attempts, duration) for an active or completed flow
13
+ - MCP controller model: Claude Code is the executor; the server manages plan state and enforces contracts — no sub-LLM calls, no separate API billing
14
+ - `FlowState` — in-memory execution state per flow: ordered steps, accumulated outputs, attempt counts, dispatch timestamps, step records
15
+ - `ensure` expressions evaluated by the server against Claude Code's reported output (Python expressions, dunder-blocked, SimpleNamespace-wrapped for dict access)
16
+ - `$.input.<field>` and `$.steps.<id>.output[.<field>]` reference resolution for chaining step outputs
17
+ - Kahn's topological sort on explicit `depends_on` + implicit `$.steps.*` ref dependencies
18
+ - `stratum-mcp setup` — one-command project configuration: writes `.claude/mcp.json` (MCP server registration), appends execution model block to `CLAUDE.md`, and installs seven Claude Code skills to `~/.claude/skills/`; idempotent, finds project root via `.git` or `CLAUDE.md`
19
+ - Seven Claude Code skills installed by `setup`: `stratum-review` (three-pass code review), `stratum-feature` (read → design → implement → test), `stratum-debug` (hypothesis formation and elimination), `stratum-refactor` (extraction order planning, no broken intermediate states), `stratum-migrate` (rewrite bare LLM calls as `@infer` + `@contract`), `stratum-test` (write test suite for existing code — golden flows, error-path harness), `stratum-learn` (extract patterns from session transcripts into `MEMORY.md`)
20
+ - Each skill contains a spec template Claude adapts internally — YAML never shown to the user; Claude narrates in plain English
21
+ - All skills include a `## Memory` section: read project `MEMORY.md` before writing spec (incorporate `[stratum-<skill>]` tagged patterns); write new patterns after `stratum_audit`
22
+ - CLI triple-mode: `stratum-mcp setup`, `stratum-mcp validate <file>`, stdio MCP transport
23
+ - 66 passing tests across contracts, invariants, and integration suites
24
+
25
+ **Dependencies:** `mcp>=1.0`, `jsonschema>=4.20`, `pyyaml>=6.0` — no stratum library dependency
26
+
27
+ ### Architecture decision
28
+
29
+ The MCP server does not use the Track 1 stratum library at runtime. Executing infer steps via the library (litellm) would spawn separate billed API calls outside the Claude Code subscription. The MCP controller model keeps all execution inside the running Claude Code session: Claude Code writes the spec, reports step results, and the server tracks state and enforces contracts.
30
+
31
+ ---
32
+
33
+ ## [0.1.0] — 2026-02-23
34
+
35
+ ### Added
36
+
37
+ **Core library (Track 1)**
38
+
39
+ - `@contract` — registers a pydantic `BaseModel` subclass as a typed contract; generates JSON Schema via `model_json_schema()`, stores a 12-char content hash for drift detection
40
+ - `@infer` — LLM-backed inference step; async-first, typed return, structured retry on `ensure` failure, budget enforcement, session cache, OTLP trace records
41
+ - `@compute` — deterministic step marker; function executes normally, composes identically with `@infer` at call sites
42
+ - `@flow` — async flow wrapper; injects `flow_id` + `Budget` clone into a `ContextVar` so nested `@infer` calls inherit them without explicit passing; session cache scoped per flow execution
43
+ - `@refine` — convergence loop stacked on `@infer`; iterates with feedback context until `until(result)` passes or `max_iterations` exhausted → `ConvergenceFailure`
44
+ - `parallel(require=)` — `"all"` / `"any"` / N / `0` modes using `asyncio.TaskGroup`; `require=0` returns `list[Success | Failure]`
45
+ - `race()` — alias for `parallel(require="any")`
46
+ - `debate()` — multi-agent structured argumentation with rebuttal rounds and a synthesizer step
47
+ - `await_human()` — HITL gate; suspends flow until a `ReviewSink` resolves a `PendingReview`; supports `timeout` and `on_timeout`
48
+ - `quorum=` on `@infer` — runs N parallel calls, asserts `threshold` agreement on `agree_on` field, returns highest-confidence agreeing result
49
+ - `stable=False` on `@infer` — return type becomes `Probabilistic[T]`; caller must call `.most_likely()`, `.sample()`, or `.assert_stable()`
50
+ - `stable=True` test mode — when `stratum.configure(test_mode=True)` is set, samples `sample_n` times and raises `StabilityAssertionError` if outputs are not unanimous
51
+ - `Probabilistic[T]` — wraps a sample of LLM outputs; `.most_likely()`, `.sample()`, `.assert_stable(threshold)`
52
+ - `Budget(ms=, usd=, tokens=)` — time + cost + token envelope; enforced via `asyncio.timeout` and LiteLLM cost tracking
53
+ - OTLP trace export — built-in emitter posts spans over HTTP/JSON to any OTLP endpoint; no OTel SDK dependency; `traceId` derived from `flow_id` so all `@infer` spans in a flow share a trace
54
+ - `opaque[T]` annotation — marks fields excluded from the tool-call schema (present in output but not constrained)
55
+
56
+ **Exceptions**
57
+
58
+ - `StratumCompileError` — static violations at decoration time
59
+ - `PreconditionFailed` — `given` condition false before LLM call
60
+ - `PostconditionFailed` — `ensure` violations after all retries
61
+ - `ParseFailure` — LLM output cannot be parsed against contract schema
62
+ - `BudgetExceeded` — time or cost budget exceeded
63
+ - `ConvergenceFailure` — `@refine` exhausted `max_iterations`
64
+ - `ConsensusFailure` — `quorum` could not reach `threshold` agreement
65
+ - `ParallelValidationFailed` — `parallel` `validate` callback returned False
66
+ - `HITLTimeoutError` — `await_human` wall-clock timeout with `on_timeout="raise"`
67
+ - `StabilityAssertionError` — `Probabilistic[T].assert_stable()` below threshold
68
+
69
+ ### Dependencies
70
+
71
+ - `litellm>=1.0` — LLM client, multi-model routing, cost tracking
72
+ - `pydantic>=2.0` — required; `@contract` requires `BaseModel`
73
+ - Python 3.11+ — `asyncio.TaskGroup`, `asyncio.timeout`
@@ -0,0 +1,92 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Status
6
+
7
+ Design phase. Nothing is implemented. All files under `docs/` are design notes produced through an extended design session. The implementation has not started.
8
+
9
+ ## What This Project Is
10
+
11
+ Stratum is a Python (and TypeScript) library where `@infer` and `@compute` functions compose identically, typed contracts flow between steps, and orchestration is always deterministic regardless of what's inside individual steps. The `.stratum.yaml` IR is what the library emits internally — developers never write it.
12
+
13
+ Two deployment tracks:
14
+ - **Track 1 — Python library**: `@infer`, `@contract`, `@flow` decorators. One required dependency (`litellm`). Python 3.11+.
15
+ - **Track 2 — Claude Code + MCP**: Stratum as an execution runtime behind Claude Code. Two audiences: professional developers (see typed plans) and vibe coders (see plain-language summaries, get `@infer`-annotated code as output).
16
+
17
+ ## Doc Structure
18
+
19
+ ```
20
+ docs/library/ — Python library design (the primary product)
21
+ docs/claude-code/ — MCP server + Claude Code integration (Track 2)
22
+ docs/strategy/ — competitive analysis, go-to-market, implementation path
23
+ ```
24
+
25
+ **Start here for implementation:**
26
+ - `docs/library/how-to-build.md` — project structure, executor.py, compiler.py, build sequence
27
+ - `docs/library/language-design.md` — semantic model: what `@infer`, `@contract`, `@flow` mean
28
+ - `docs/library/execution-model.md` — the full execution loop, async model, LLM client config, OTel
29
+
30
+ **Key design decisions recorded in:**
31
+ - `docs/library/open-problems.md` — all 16 design problems with resolution status
32
+ - `docs/library/type-system.md` — contracts, `Probabilistic[T]`, content hash
33
+ - `docs/library/concurrency-and-agents.md` — `parallel`, `debate`, isolation model, Ray upgrade path
34
+
35
+ ## Architecture Decisions
36
+
37
+ **Contracts**: `@contract` works on plain annotated Python classes. Pydantic `BaseModel` is an optional enhanced backend — not required. Stratum generates JSON Schema from `typing.get_type_hints()` internally.
38
+
39
+ **Async**: runtime is async-first. `@infer` and `@flow` are async natively. Sync shim via `stratum.run()`. Uses `asyncio.TaskGroup` (Python 3.11+) for `parallel`, `asyncio.timeout` for budget enforcement.
40
+
41
+ **LLM routing**: LiteLLM is the required LLM client substrate — handles multi-model routing, fallback, cost tracking. The `model:` annotation is a hint passed through to LiteLLM.
42
+
43
+ **Observability**: internal trace records always written in-memory. OTLP export via a built-in emitter (`stratum/exporters/otlp.py`) — HTTP/JSON POST to any OTLP endpoint. No OTel SDK dependency.
44
+
45
+ **Non-determinism**: `stable=True` (default) → return type is `T`. `stable=False` → return type is `Probabilistic[T]`, caller must unwrap via `.most_likely()`, `.sample()`, or `.assert_stable()`.
46
+
47
+ **Prompt optimization**: v1 uses a deterministic prompt compiler (intent + context + inputs). DSPy-backed optimization is a Phase 3 integration for teams with labeled data.
48
+
49
+ ## v1 Dependencies
50
+
51
+ ```toml
52
+ dependencies = ["litellm>=1.0"]
53
+ requires-python = ">=3.11"
54
+
55
+ [project.optional-dependencies]
56
+ pydantic = ["pydantic>=2.0"]
57
+ all = ["stratum[pydantic]"]
58
+ ```
59
+
60
+ jsonschema and pyyaml are MCP-server-only (Phase 2) — not library dependencies.
61
+
62
+ ## Phase 2.5 — Decoration-time Static Analysis (not v1)
63
+
64
+ Introspection-based checks that run when decorators are applied, requiring no new syntax.
65
+
66
+ - **`ensure`/`given` field validation**: inspect `LOAD_ATTR` bytecode of lambda/callable against the contract's JSON schema fields. Raise `StratumCompileError` at decoration time if an accessed attribute doesn't exist on the return type.
67
+ - **Sequential independence warning**: walk the `@flow` function's AST at decoration time, identify `await` calls whose arguments have no data dependency on each other, and emit `StratumWarning` suggesting `parallel()`. Optionally auto-rewrite to `parallel()`.
68
+ - **Budget sufficiency warning**: sum `budget.ms` across `@infer` calls visible in the `@flow` AST; warn if the sum exceeds the flow's `budget` envelope.
69
+
70
+ Implementation note: all three have access to everything they need at decoration time — the decorator already holds the function object, its bytecode, its AST (via `inspect.getsource` + `ast.parse`), and the resolved contract schema.
71
+
72
+ ## Phase 3 Integrations (not v1)
73
+
74
+ Build from observed pain, not schedule: Temporal (durable execution), Ray (distributed agents), Outlines (self-hosted constrained decoding via LiteLLM → vLLM → Outlines), DSPy (prompt optimization).
75
+
76
+ ## TypeScript Library (Phase 2)
77
+
78
+ Zod for contracts, `@anthropic-ai/sdk` for LLM calls. Vercel AI SDK is an integration target for Next.js users — not the substrate.
79
+
80
+ ## The IR
81
+
82
+ `.stratum.yaml` is generated by the library and by Claude (via MCP). Developers never write it. It is LLVM IR — the compiler targets it, nobody writes it from scratch. IR parsing/validation (jsonschema + pyyaml) is a Phase 2 concern, needed for the MCP server.
83
+
84
+ ## Stratum Execution Model
85
+
86
+ For non-trivial tasks, use Stratum internally:
87
+ 1. Write a `.stratum.yaml` spec — never show it to the user
88
+ 2. Call `stratum_plan` to validate and get the first step
89
+ 3. Narrate progress in plain English as you execute each step
90
+ 4. Call `stratum_step_done` after each step — the server checks your work
91
+ 5. If a step fails postconditions, fix it silently and retry
92
+ 6. Call `stratum_audit` at the end and include the trace in the commit
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: stratum-py
3
+ Version: 0.1.0
4
+ Summary: LLM calls that behave like the rest of your code
5
+ Project-URL: Specification, https://github.com/regression-io/stratum-spec/blob/main/SPEC.md
6
+ Project-URL: Repository, https://github.com/regression-io/stratum
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: litellm>=1.0
9
+ Requires-Dist: pydantic>=2.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
12
+ Requires-Dist: pytest>=8.0; extra == 'dev'
@@ -0,0 +1,225 @@
1
+ # Stratum
2
+
3
+ [![Specification](https://img.shields.io/badge/Specification-SPEC.md-blue)](https://github.com/regression-io/stratum/blob/main/SPEC.md)
4
+ [![PyPI](https://img.shields.io/badge/PyPI-stratum-orange)](https://pypi.org/project/stratum/)
5
+
6
+ **Stop babysitting your LLM calls.**
7
+
8
+ Stratum is a Python library where `@infer` (LLM calls) and `@compute` (normal functions) compose identically. Typed contracts flow between steps. The runtime handles retry, budget enforcement, and observability — so you don't have to wire them up yourself.
9
+
10
+ ```python
11
+ @contract
12
+ class SentimentResult(BaseModel):
13
+ label: Literal["positive", "negative", "neutral"]
14
+ confidence: float
15
+ reasoning: str
16
+
17
+ @infer(
18
+ intent="Classify the emotional tone of customer feedback",
19
+ ensure=lambda r: r.confidence > 0.7,
20
+ budget=Budget(ms=500, usd=0.001),
21
+ retries=3,
22
+ )
23
+ def classify_sentiment(text: str) -> SentimentResult: ...
24
+ ```
25
+
26
+ If the LLM returns low confidence, it gets told exactly what failed and retries with that context — not a blank replay. If it hits the budget, it stops. Every call produces a structured trace record you can query.
27
+
28
+ ---
29
+
30
+ ## Two Tracks
31
+
32
+ **Track 1 — Python library** (`stratum`): `@infer`, `@contract`, `@flow` decorators for building production LLM systems. Requires Python 3.11+, `litellm`, `pydantic`.
33
+
34
+ **Track 2 — Claude Code MCP server** (`stratum-mcp`): Stratum as an execution runtime for Claude Code. Claude writes `.stratum.yaml` specs, the MCP server enforces typed contracts and postconditions, Claude narrates progress in plain English. No sub-LLM calls — all execution stays within the Claude Code session.
35
+
36
+ ---
37
+
38
+ ## Track 2: Claude Code + Stratum
39
+
40
+ ```bash
41
+ pip install "git+https://github.com/regression-io/stratum.git#subdirectory=stratum-mcp"
42
+ stratum-mcp setup
43
+ ```
44
+
45
+ `setup` configures Claude Code in one command: writes `.claude/mcp.json`, appends the execution model block to `CLAUDE.md`, and installs seven skills to `~/.claude/skills/`. Restart Claude Code and it's active.
46
+
47
+ **Seven skills installed automatically:**
48
+
49
+ | Skill | What it structures |
50
+ |---|---|
51
+ | `/stratum-review` | Three-pass code review: security → logic → performance → consolidate |
52
+ | `/stratum-feature` | Feature build: read existing patterns → design → implement → tests pass |
53
+ | `/stratum-debug` | Debug: read test → read code → check env → form hypotheses → confirm/rule out → fix |
54
+ | `/stratum-refactor` | File split: analyze → design modules → plan extraction order → extract one at a time |
55
+ | `/stratum-migrate` | Find bare LLM calls and rewrite as `@infer` + `@contract` with typed contracts and postconditions |
56
+ | `/stratum-test` | Write a test suite for existing untested code — golden flows, error-path harness, passing on first report |
57
+ | `/stratum-learn` | Review recent session transcripts — extract retry patterns, write project-specific conclusions to `MEMORY.md` |
58
+
59
+ Claude writes the `.stratum.yaml` spec internally — you never see it. You see plain English narration and the result. The MCP server enforces postconditions on every step; if a step's output fails a check, Claude fixes it and retries before reporting success.
60
+
61
+ Each skill reads project-specific patterns from `MEMORY.md` before writing its spec, and writes new patterns after `stratum_audit` — retry reasons, confirmed root causes, extraction order constraints. Run `/stratum-learn` periodically to extract conclusions from recent session transcripts and feed them back into future specs.
62
+
63
+ **MCP tools exposed:**
64
+
65
+ | Tool | What it does |
66
+ |---|---|
67
+ | `stratum_validate` | Validate a `.stratum.yaml` spec offline |
68
+ | `stratum_plan` | Validate + create execution state + return first step |
69
+ | `stratum_step_done` | Report a completed step; check postconditions; return next step or completion |
70
+ | `stratum_audit` | Return per-step trace (attempts, duration) for any flow |
71
+
72
+ ---
73
+
74
+ ## Blog
75
+
76
+ **[Introducing Stratum: LLM Calls That Behave Like the Rest of Your Code](https://github.com/regression-io/stratum/blob/main/blog/introducing-stratum.md)**
77
+ The design rationale — why `@infer` and `@compute` share a type, how structured retry works, and what contracts actually buy you.
78
+
79
+ **[Stratum as a Claude Code Execution Runtime](https://github.com/regression-io/stratum/blob/main/blog/stratum-in-claude-code.md)**
80
+ Claude Code is a capable agent improvising in a loop. This post is about giving it a formal execution model — typed plans, postcondition enforcement, auditable traces.
81
+
82
+ **[Building Software with Claude Code + Stratum: A Tutorial](https://github.com/regression-io/stratum/blob/main/blog/claude-code-tutorial.md)**
83
+ Real session transcripts: understanding a codebase, reviewing code, adding features, debugging CI failures, refactoring large files. Claude narrates in plain English throughout.
84
+
85
+ ---
86
+
87
+ ## Why
88
+
89
+ LLM calls in production share a few recurring failure modes:
90
+
91
+ - **Retry is brute force.** Most frameworks replay the full prompt on failure. Stratum injects only the specific postcondition that failed.
92
+ - **Budget is an afterthought.** Soft hints don't stop a runaway `refine` loop. Stratum enforces hard limits — `BudgetExceeded` is an exception, not a bill.
93
+ - **Flows are opaque.** When a multi-step pipeline fails, you want to know which step, with what input, after how many retries, at what cost. Stratum traces every call structurally.
94
+ - **LLM steps and regular functions don't compose.** Stratum makes `@infer` and `@compute` indistinguishable by type — swap one for the other and nothing downstream changes.
95
+ - **Agent outputs can hijack downstream agents.** `opaque[T]` fields are passed as structured data, never inlined into instruction text.
96
+ - **Human-in-the-loop is a custom build every time.** `await_human` genuinely suspends execution and returns a typed `HumanDecision[T]`.
97
+
98
+ ---
99
+
100
+ ## Core Concepts
101
+
102
+ ### `@infer` and `@compute` are the same type
103
+
104
+ ```python
105
+ # Phase 1: LLM classifies tickets
106
+ @infer(intent="Route this support ticket", model="groq/llama-3.3-70b-versatile")
107
+ def route_ticket(text: str) -> TicketRoute: ...
108
+
109
+ # Phase 2: patterns emerged — swap to rules, zero other changes
110
+ @compute
111
+ async def route_ticket(text: str) -> TicketRoute:
112
+ return TicketRoute(team=keyword_match(text), ...)
113
+ ```
114
+
115
+ These have identical signatures. The `@flow` that calls `route_ticket` doesn't change. This means:
116
+
117
+ - **Testing:** Replace `@infer` calls with `@compute` stubs for deterministic tests.
118
+ - **Migration:** Start with LLM, replace with rules as patterns emerge. No downstream changes.
119
+ - **Cost control:** Swap expensive inference for fast lookup when coverage allows.
120
+
121
+ ### Contracts are typed boundaries
122
+
123
+ ```python
124
+ @contract
125
+ class SentimentResult(BaseModel):
126
+ label: Literal["positive", "negative", "neutral"]
127
+ confidence: Annotated[float, Field(ge=0.0, le=1.0)]
128
+ reasoning: str
129
+ ```
130
+
131
+ A `@contract` class compiles to JSON Schema injected into the structured outputs API. The LLM's output is validated against it before your code sees it. Every contract carries a content hash — a hash change means the compiled prompt changed and LLM behavior may have drifted.
132
+
133
+ ### Retry is structured
134
+
135
+ On failure the LLM receives:
136
+
137
+ ```
138
+ Previous attempt failed:
139
+ - ensure condition 1 failed
140
+ Fix these issues specifically.
141
+ ```
142
+
143
+ Not a full prompt replay. The specific violation, nothing else.
144
+
145
+ ### Flows are deterministic
146
+
147
+ ```python
148
+ @flow(budget=Budget(ms=5000, usd=0.01))
149
+ async def process_ticket(text: str) -> Resolution:
150
+ sentiment = await classify_sentiment(text=text)
151
+ response = await draft_response(text=text, sentiment=sentiment)
152
+ return response if rule_check(response) else escalate(text)
153
+ ```
154
+
155
+ `@flow` is normal Python control flow. You can read it, test it, and trace it. The orchestration shape is known before any LLM call runs.
156
+
157
+ ---
158
+
159
+ ## Features
160
+
161
+ | Feature | Description |
162
+ |---|---|
163
+ | Structured retry | `ensure` postconditions drive retry with targeted failure feedback |
164
+ | Hard budget limits | Per-call and per-flow — `BudgetExceeded`, not a soft hint |
165
+ | `opaque[T]` | Field-level prompt injection protection |
166
+ | `await_human` | HITL as a first-class typed primitive — genuine suspension |
167
+ | `stratum.parallel` | Concurrent execution with `require: all/any/N/0` semantics |
168
+ | `quorum` | Run N times, require majority agreement |
169
+ | `stratum.debate` | Adversarial multi-agent synthesis with convergence detection |
170
+ | Full observability | Structured trace record on every call, OTLP export built-in |
171
+ | Two dependencies | `litellm` + `pydantic`. No OTel SDK. |
172
+
173
+ ---
174
+
175
+ ## Examples
176
+
177
+ Working examples in [`examples/`](https://github.com/regression-io/stratum/tree/main/examples):
178
+
179
+ | File | What it shows |
180
+ |---|---|
181
+ | [`01_sentiment.py`](https://github.com/regression-io/stratum/blob/main/examples/01_sentiment.py) | `@infer` + `@contract` + `@flow` + `@compute` end-to-end |
182
+ | [`02_migrate.py`](https://github.com/regression-io/stratum/blob/main/examples/02_migrate.py) | Migrating `@infer` → `@compute` without changing callers |
183
+ | [`03_parallel.py`](https://github.com/regression-io/stratum/blob/main/examples/03_parallel.py) | Three concurrent `@infer` calls with `parallel(require="all")` |
184
+ | [`04_refine.py`](https://github.com/regression-io/stratum/blob/main/examples/04_refine.py) | `@refine` convergence loop — iterates until quality passes |
185
+ | [`05_debate.py`](https://github.com/regression-io/stratum/blob/main/examples/05_debate.py) | `debate()` — two agents argue, synthesizer resolves |
186
+ | [`06_hitl.py`](https://github.com/regression-io/stratum/blob/main/examples/06_hitl.py) | `await_human` — human-in-the-loop approval gate |
187
+
188
+ ---
189
+
190
+ ## Install
191
+
192
+ **Track 1 — Python library:**
193
+ ```bash
194
+ pip install git+https://github.com/regression-io/stratum.git#egg=stratum-py
195
+ ```
196
+ Requires Python 3.11+. Set `GROQ_API_KEY`, `ANTHROPIC_API_KEY`, or any key LiteLLM supports, then specify it in `model=`.
197
+
198
+ **Track 2 — Claude Code MCP server:**
199
+ ```bash
200
+ pip install "git+https://github.com/regression-io/stratum.git#subdirectory=stratum-mcp"
201
+ stratum-mcp setup
202
+ ```
203
+ Requires Claude Code. `setup` configures everything — restart Claude Code to activate.
204
+
205
+ ---
206
+
207
+ ## Specification
208
+
209
+ [`SPEC.md`](https://github.com/regression-io/stratum/blob/main/SPEC.md) is the normative specification covering the full type system, decorator signatures, execution loop, prompt compiler, concurrency semantics, HITL protocol, budget rules, trace record schema, and error types.
210
+
211
+ ---
212
+
213
+ ## Status
214
+
215
+ **Track 1** (Python library): implemented and tested.
216
+
217
+ **Track 2** (stratum-mcp): MCP controller server implemented — `stratum_plan`, `stratum_step_done`, `stratum_audit`, `stratum_validate`. One-command setup with seven bundled skills and a memory system for project-specific pattern capture. 66 tests passing.
218
+
219
+ Questions and feedback: [GitHub Discussions](https://github.com/regression-io/stratum/discussions)
220
+
221
+ ---
222
+
223
+ ## License
224
+
225
+ [Apache 2.0](https://github.com/regression-io/stratum/blob/main/LICENSE)