stratum-py 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stratum_py-0.1.0/.claude/mcp.json +7 -0
- stratum_py-0.1.0/.claude/settings.local.json +43 -0
- stratum_py-0.1.0/.gitignore +11 -0
- stratum_py-0.1.0/CHANGELOG.md +73 -0
- stratum_py-0.1.0/CLAUDE.md +92 -0
- stratum_py-0.1.0/PKG-INFO +12 -0
- stratum_py-0.1.0/README.md +225 -0
- stratum_py-0.1.0/SPEC.md +847 -0
- stratum_py-0.1.0/blog/claude-code-tutorial.md +333 -0
- stratum_py-0.1.0/blog/introducing-stratum.md +261 -0
- stratum_py-0.1.0/blog/stratum-in-claude-code.md +238 -0
- stratum_py-0.1.0/blog/stratum-in-codex.md +262 -0
- stratum_py-0.1.0/examples/01_sentiment.py +92 -0
- stratum_py-0.1.0/examples/02_migrate.py +151 -0
- stratum_py-0.1.0/examples/03_parallel.py +154 -0
- stratum_py-0.1.0/examples/04_refine.py +135 -0
- stratum_py-0.1.0/examples/05_debate.py +145 -0
- stratum_py-0.1.0/examples/06_hitl.py +179 -0
- stratum_py-0.1.0/pyproject.toml +23 -0
- stratum_py-0.1.0/src/stratum/__init__.py +120 -0
- stratum_py-0.1.0/src/stratum/_config.py +48 -0
- stratum_py-0.1.0/src/stratum/budget.py +61 -0
- stratum_py-0.1.0/src/stratum/compiler.py +160 -0
- stratum_py-0.1.0/src/stratum/concurrency.py +215 -0
- stratum_py-0.1.0/src/stratum/contracts.py +232 -0
- stratum_py-0.1.0/src/stratum/decorators.py +377 -0
- stratum_py-0.1.0/src/stratum/exceptions.py +126 -0
- stratum_py-0.1.0/src/stratum/executor.py +564 -0
- stratum_py-0.1.0/src/stratum/exporters/__init__.py +5 -0
- stratum_py-0.1.0/src/stratum/exporters/otlp.py +149 -0
- stratum_py-0.1.0/src/stratum/flow_scope.py +31 -0
- stratum_py-0.1.0/src/stratum/hitl.py +170 -0
- stratum_py-0.1.0/src/stratum/trace.py +51 -0
- stratum_py-0.1.0/src/stratum/types.py +108 -0
- stratum_py-0.1.0/stratum-mcp/pyproject.toml +23 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/__init__.py +0 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/errors.py +84 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/executor.py +301 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/server.py +287 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-debug/SKILL.md +206 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-feature/SKILL.md +169 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-learn/SKILL.md +96 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-migrate/SKILL.md +157 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-refactor/SKILL.md +166 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-review/SKILL.md +162 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/skills/stratum-test/SKILL.md +151 -0
- stratum_py-0.1.0/stratum-mcp/src/stratum_mcp/spec.py +299 -0
- stratum_py-0.1.0/stratum-mcp/tests/__init__.py +0 -0
- stratum_py-0.1.0/stratum-mcp/tests/contracts/__init__.py +0 -0
- stratum_py-0.1.0/stratum-mcp/tests/contracts/test_errors.py +50 -0
- stratum_py-0.1.0/stratum-mcp/tests/contracts/test_ir_schema.py +195 -0
- stratum_py-0.1.0/stratum-mcp/tests/integration/__init__.py +0 -0
- stratum_py-0.1.0/stratum-mcp/tests/integration/test_roundtrip.py +195 -0
- stratum_py-0.1.0/stratum-mcp/tests/integration/test_server.py +225 -0
- stratum_py-0.1.0/stratum-mcp/tests/integration/test_setup.py +214 -0
- stratum_py-0.1.0/stratum-mcp/tests/invariants/__init__.py +0 -0
- stratum_py-0.1.0/stratum-mcp/tests/invariants/test_executor.py +153 -0
- stratum_py-0.1.0/tests/test_concurrency.py +312 -0
- stratum_py-0.1.0/tests/test_contracts.py +315 -0
- stratum_py-0.1.0/tests/test_decorators.py +325 -0
- stratum_py-0.1.0/tests/test_executor.py +648 -0
- stratum_py-0.1.0/tests/test_flow_scope.py +53 -0
- stratum_py-0.1.0/tests/test_hitl.py +250 -0
- stratum_py-0.1.0/tests/test_otlp.py +176 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(tree:*)",
|
|
5
|
+
"Bash(npm install:*)",
|
|
6
|
+
"Bash(npm run build:*)",
|
|
7
|
+
"Bash(node config-loader.js:*)",
|
|
8
|
+
"Bash(curl:*)",
|
|
9
|
+
"Bash(echo \"=== PRODUCT EVALUATION ===\" echo \"\" echo \"1. CONFIG HIERARCHY \\(3 levels: Home -> reg -> project\\)\" curl -s http://localhost:3333/api/configs)",
|
|
10
|
+
"Bash(jq -r \".[] | \"\" Level: \\\\\\(.label\\) -> MCPs: \\\\\\(.config.include | join\\(\"\", \"\"\\)\\)\"\"\" echo echo '2. AGGREGATED MCPs \\(should show 5 total from all levels\\)' curl -s http://localhost:3333/api/configs curl -s http://localhost:3333/api/registry node -e \"\nconst c = require\\(\"\"/tmp/c.json\"\"\\), r = require\\(\"\"/tmp/r.json\"\"\\);\nconst mcps = new Map\\(\\);\nc.forEach\\(cfg => {\n \\(cfg.config.include||[]\\).forEach\\(n => mcps.has\\(n\\) || mcps.set\\(n, {name:n, type:\"\"registry\"\", from:cfg.label}\\)\\);\n Object.keys\\(cfg.config.mcpServers||{}\\).forEach\\(n => mcps.has\\(n\\) || mcps.set\\(n, {name:n, type:\"\"custom\"\", from:cfg.label}\\)\\);\n}\\);\nconsole.log\\(\"\" Total:\"\", mcps.size, \"\"MCPs\"\"\\);\n[...mcps.values\\(\\)].forEach\\(m => console.log\\(\"\" -\"\", m.name, \"\"\\(\"\"+m.type+\"\"\\) from\"\", m.from\\)\\);\n\" echo echo '3. RULES' curl -s http://localhost:3333/api/rules)",
|
|
11
|
+
"Bash(jq -r '.success' echo \"\" echo \"5. VERIFY: Config updated\" curl -s http://localhost:3333/api/configs)",
|
|
12
|
+
"Bash(jq '[.[] | \\(\\(.config.include | length\\) + \\(.config.mcpServers | keys | length\\)\\)] | add' __NEW_LINE_d4c713bb8c5792e3__ echo \"\" echo \"Actual unique MCPs:\" curl -s http://localhost:3333/api/configs)",
|
|
13
|
+
"Bash(jq '[.[].config.include[], \\(.[].config.mcpServers | keys[]\\)] | unique | length' __NEW_LINE_d4c713bb8c5792e3__ echo \"\" echo \"Registry has total MCPs:\" curl -s http://localhost:3333/api/registry)",
|
|
14
|
+
"Bash(jq \".[] | {dir: .dir, include: .config.include}\" echo echo '=== Generated .mcp.json \\(should have ALL including inherited\\) ===' curl -s -X POST http://localhost:3333/api/apply -H 'Content-Type: application/json' -d {} cat /Users/ruze/reg/my/project-config-system/.mcp.json)",
|
|
15
|
+
"Bash(find:*)",
|
|
16
|
+
"Bash(npm ls:*)",
|
|
17
|
+
"Bash(lsof:*)",
|
|
18
|
+
"Bash(xargs kill -9)",
|
|
19
|
+
"Bash(npm rebuild:*)",
|
|
20
|
+
"Bash(echo:*)",
|
|
21
|
+
"Bash(node -e:*)",
|
|
22
|
+
"Bash(npm view:*)",
|
|
23
|
+
"Bash(npm pack:*)",
|
|
24
|
+
"Bash(node cli.js:*)",
|
|
25
|
+
"Bash(git init:*)",
|
|
26
|
+
"Bash(git add:*)",
|
|
27
|
+
"Bash(git commit:*)",
|
|
28
|
+
"Bash(git remote add:*)",
|
|
29
|
+
"Bash(git push:*)",
|
|
30
|
+
"Bash(cat:*)",
|
|
31
|
+
"Bash(head:*)",
|
|
32
|
+
"Bash(gh repo:*)",
|
|
33
|
+
"Bash(gh label:*)",
|
|
34
|
+
"Bash(git rm -r --cached \"src/stratum/__pycache__\" \"src/stratum/exporters/__pycache__\" \"tests/__pycache__\" 2>&1 && git add .gitignore && git commit -m \"Add .gitignore, remove cached pycache files\" 2>1&)"
|
|
35
|
+
]
|
|
36
|
+
},
|
|
37
|
+
"enableAllProjectMcpServers": true,
|
|
38
|
+
"enabledMcpjsonServers": [
|
|
39
|
+
"filesystem",
|
|
40
|
+
"memory",
|
|
41
|
+
"fetch"
|
|
42
|
+
]
|
|
43
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [Unreleased]
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
**MCP server (Track 2) — `stratum-mcp`**
|
|
8
|
+
|
|
9
|
+
- `stratum_validate` — validates a `.stratum.yaml` IR spec; returns `{valid, errors}`
|
|
10
|
+
- `stratum_plan` — validates a spec, creates in-memory flow execution state, returns the first step to execute with resolved inputs and output contract details
|
|
11
|
+
- `stratum_step_done` — accepts a completed step result from Claude Code, checks `ensure` postconditions, returns next step or flow completion; handles retries and exhaustion
|
|
12
|
+
- `stratum_audit` — returns per-step execution trace (attempts, duration) for an active or completed flow
|
|
13
|
+
- MCP controller model: Claude Code is the executor; the server manages plan state and enforces contracts — no sub-LLM calls, no separate API billing
|
|
14
|
+
- `FlowState` — in-memory execution state per flow: ordered steps, accumulated outputs, attempt counts, dispatch timestamps, step records
|
|
15
|
+
- `ensure` expressions evaluated by the server against Claude Code's reported output (Python expressions, dunder-blocked, SimpleNamespace-wrapped for dict access)
|
|
16
|
+
- `$.input.<field>` and `$.steps.<id>.output[.<field>]` reference resolution for chaining step outputs
|
|
17
|
+
- Kahn's topological sort on explicit `depends_on` + implicit `$.steps.*` ref dependencies
|
|
18
|
+
- `stratum-mcp setup` — one-command project configuration: writes `.claude/mcp.json` (MCP server registration), appends execution model block to `CLAUDE.md`, and installs seven Claude Code skills to `~/.claude/skills/`; idempotent, finds project root via `.git` or `CLAUDE.md`
|
|
19
|
+
- Seven Claude Code skills installed by `setup`: `stratum-review` (three-pass code review), `stratum-feature` (read → design → implement → test), `stratum-debug` (hypothesis formation and elimination), `stratum-refactor` (extraction order planning, no broken intermediate states), `stratum-migrate` (rewrite bare LLM calls as `@infer` + `@contract`), `stratum-test` (write test suite for existing code — golden flows, error-path harness), `stratum-learn` (extract patterns from session transcripts into `MEMORY.md`)
|
|
20
|
+
- Each skill contains a spec template Claude adapts internally — YAML never shown to the user; Claude narrates in plain English
|
|
21
|
+
- All skills include a `## Memory` section: read project `MEMORY.md` before writing spec (incorporate `[stratum-<skill>]` tagged patterns); write new patterns after `stratum_audit`
|
|
22
|
+
- CLI triple-mode: `stratum-mcp setup`, `stratum-mcp validate <file>`, stdio MCP transport
|
|
23
|
+
- 66 passing tests across contracts, invariants, and integration suites
|
|
24
|
+
|
|
25
|
+
**Dependencies:** `mcp>=1.0`, `jsonschema>=4.20`, `pyyaml>=6.0` — no stratum library dependency
|
|
26
|
+
|
|
27
|
+
### Architecture decision
|
|
28
|
+
|
|
29
|
+
The MCP server does not use the Track 1 stratum library at runtime. Executing infer steps via the library (litellm) would spawn separate billed API calls outside the Claude Code subscription. The MCP controller model keeps all execution inside the running Claude Code session: Claude Code writes the spec, reports step results, and the server tracks state and enforces contracts.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## [0.1.0] — 2026-02-23
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
|
|
37
|
+
**Core library (Track 1)**
|
|
38
|
+
|
|
39
|
+
- `@contract` — registers a pydantic `BaseModel` subclass as a typed contract; generates JSON Schema via `model_json_schema()`, stores a 12-char content hash for drift detection
|
|
40
|
+
- `@infer` — LLM-backed inference step; async-first, typed return, structured retry on `ensure` failure, budget enforcement, session cache, OTLP trace records
|
|
41
|
+
- `@compute` — deterministic step marker; function executes normally, composes identically with `@infer` at call sites
|
|
42
|
+
- `@flow` — async flow wrapper; injects `flow_id` + `Budget` clone into a `ContextVar` so nested `@infer` calls inherit them without explicit passing; session cache scoped per flow execution
|
|
43
|
+
- `@refine` — convergence loop stacked on `@infer`; iterates with feedback context until `until(result)` passes or `max_iterations` exhausted → `ConvergenceFailure`
|
|
44
|
+
- `parallel(require=)` — `"all"` / `"any"` / N / `0` modes using `asyncio.TaskGroup`; `require=0` returns `list[Success | Failure]`
|
|
45
|
+
- `race()` — alias for `parallel(require="any")`
|
|
46
|
+
- `debate()` — multi-agent structured argumentation with rebuttal rounds and a synthesizer step
|
|
47
|
+
- `await_human()` — HITL gate; suspends flow until a `ReviewSink` resolves a `PendingReview`; supports `timeout` and `on_timeout`
|
|
48
|
+
- `quorum=` on `@infer` — runs N parallel calls, asserts `threshold` agreement on `agree_on` field, returns highest-confidence agreeing result
|
|
49
|
+
- `stable=False` on `@infer` — return type becomes `Probabilistic[T]`; caller must call `.most_likely()`, `.sample()`, or `.assert_stable()`
|
|
50
|
+
- `stable=True` test mode — when `stratum.configure(test_mode=True)` is set, samples `sample_n` times and raises `StabilityAssertionError` if outputs are not unanimous
|
|
51
|
+
- `Probabilistic[T]` — wraps a sample of LLM outputs; `.most_likely()`, `.sample()`, `.assert_stable(threshold)`
|
|
52
|
+
- `Budget(ms=, usd=, tokens=)` — time + cost + token envelope; enforced via `asyncio.timeout` and LiteLLM cost tracking
|
|
53
|
+
- OTLP trace export — built-in emitter posts spans over HTTP/JSON to any OTLP endpoint; no OTel SDK dependency; `traceId` derived from `flow_id` so all `@infer` spans in a flow share a trace
|
|
54
|
+
- `opaque[T]` annotation — marks fields excluded from the tool-call schema (present in output but not constrained)
|
|
55
|
+
|
|
56
|
+
**Exceptions**
|
|
57
|
+
|
|
58
|
+
- `StratumCompileError` — static violations at decoration time
|
|
59
|
+
- `PreconditionFailed` — `given` condition false before LLM call
|
|
60
|
+
- `PostconditionFailed` — `ensure` violations after all retries
|
|
61
|
+
- `ParseFailure` — LLM output cannot be parsed against contract schema
|
|
62
|
+
- `BudgetExceeded` — time or cost budget exceeded
|
|
63
|
+
- `ConvergenceFailure` — `@refine` exhausted `max_iterations`
|
|
64
|
+
- `ConsensusFailure` — `quorum` could not reach `threshold` agreement
|
|
65
|
+
- `ParallelValidationFailed` — `parallel` `validate` callback returned False
|
|
66
|
+
- `HITLTimeoutError` — `await_human` wall-clock timeout with `on_timeout="raise"`
|
|
67
|
+
- `StabilityAssertionError` — `Probabilistic[T].assert_stable()` below threshold
|
|
68
|
+
|
|
69
|
+
### Dependencies
|
|
70
|
+
|
|
71
|
+
- `litellm>=1.0` — LLM client, multi-model routing, cost tracking
|
|
72
|
+
- `pydantic>=2.0` — required; `@contract` requires `BaseModel`
|
|
73
|
+
- Python 3.11+ — `asyncio.TaskGroup`, `asyncio.timeout`
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Status
|
|
6
|
+
|
|
7
|
+
Design phase. Nothing is implemented. All files under `docs/` are design notes produced through an extended design session. The implementation has not started.
|
|
8
|
+
|
|
9
|
+
## What This Project Is
|
|
10
|
+
|
|
11
|
+
Stratum is a Python (and TypeScript) library where `@infer` and `@compute` functions compose identically, typed contracts flow between steps, and orchestration is always deterministic regardless of what's inside individual steps. The `.stratum.yaml` IR is what the library emits internally — developers never write it.
|
|
12
|
+
|
|
13
|
+
Two deployment tracks:
|
|
14
|
+
- **Track 1 — Python library**: `@infer`, `@contract`, `@flow` decorators. One required dependency (`litellm`). Python 3.11+.
|
|
15
|
+
- **Track 2 — Claude Code + MCP**: Stratum as an execution runtime behind Claude Code. Two audiences: professional developers (see typed plans) and vibe coders (see plain-language summaries, get `@infer`-annotated code as output).
|
|
16
|
+
|
|
17
|
+
## Doc Structure
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
docs/library/ — Python library design (the primary product)
|
|
21
|
+
docs/claude-code/ — MCP server + Claude Code integration (Track 2)
|
|
22
|
+
docs/strategy/ — competitive analysis, go-to-market, implementation path
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Start here for implementation:**
|
|
26
|
+
- `docs/library/how-to-build.md` — project structure, executor.py, compiler.py, build sequence
|
|
27
|
+
- `docs/library/language-design.md` — semantic model: what `@infer`, `@contract`, `@flow` mean
|
|
28
|
+
- `docs/library/execution-model.md` — the full execution loop, async model, LLM client config, OTel
|
|
29
|
+
|
|
30
|
+
**Key design decisions recorded in:**
|
|
31
|
+
- `docs/library/open-problems.md` — all 16 design problems with resolution status
|
|
32
|
+
- `docs/library/type-system.md` — contracts, `Probabilistic[T]`, content hash
|
|
33
|
+
- `docs/library/concurrency-and-agents.md` — `parallel`, `debate`, isolation model, Ray upgrade path
|
|
34
|
+
|
|
35
|
+
## Architecture Decisions
|
|
36
|
+
|
|
37
|
+
**Contracts**: `@contract` works on plain annotated Python classes. Pydantic `BaseModel` is an optional enhanced backend — not required. Stratum generates JSON Schema from `typing.get_type_hints()` internally.
|
|
38
|
+
|
|
39
|
+
**Async**: runtime is async-first. `@infer` and `@flow` are async natively. Sync shim via `stratum.run()`. Uses `asyncio.TaskGroup` (Python 3.11+) for `parallel`, `asyncio.timeout` for budget enforcement.
|
|
40
|
+
|
|
41
|
+
**LLM routing**: LiteLLM is the required LLM client substrate — handles multi-model routing, fallback, cost tracking. The `model:` annotation is a hint passed through to LiteLLM.
|
|
42
|
+
|
|
43
|
+
**Observability**: internal trace records always written in-memory. OTLP export via a built-in emitter (`stratum/exporters/otlp.py`) — HTTP/JSON POST to any OTLP endpoint. No OTel SDK dependency.
|
|
44
|
+
|
|
45
|
+
**Non-determinism**: `stable=True` (default) → return type is `T`. `stable=False` → return type is `Probabilistic[T]`, caller must unwrap via `.most_likely()`, `.sample()`, or `.assert_stable()`.
|
|
46
|
+
|
|
47
|
+
**Prompt optimization**: v1 uses a deterministic prompt compiler (intent + context + inputs). DSPy-backed optimization is a Phase 3 integration for teams with labeled data.
|
|
48
|
+
|
|
49
|
+
## v1 Dependencies
|
|
50
|
+
|
|
51
|
+
```toml
|
|
52
|
+
dependencies = ["litellm>=1.0"]
|
|
53
|
+
requires-python = ">=3.11"
|
|
54
|
+
|
|
55
|
+
[project.optional-dependencies]
|
|
56
|
+
pydantic = ["pydantic>=2.0"]
|
|
57
|
+
all = ["stratum[pydantic]"]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
jsonschema and pyyaml are MCP-server-only (Phase 2) — not library dependencies.
|
|
61
|
+
|
|
62
|
+
## Phase 2.5 — Decoration-time Static Analysis (not v1)
|
|
63
|
+
|
|
64
|
+
Introspection-based checks that run when decorators are applied, requiring no new syntax.
|
|
65
|
+
|
|
66
|
+
- **`ensure`/`given` field validation**: inspect `LOAD_ATTR` bytecode of lambda/callable against the contract's JSON schema fields. Raise `StratumCompileError` at decoration time if an accessed attribute doesn't exist on the return type.
|
|
67
|
+
- **Sequential independence warning**: walk the `@flow` function's AST at decoration time, identify `await` calls whose arguments have no data dependency on each other, and emit `StratumWarning` suggesting `parallel()`. Optionally auto-rewrite to `parallel()`.
|
|
68
|
+
- **Budget sufficiency warning**: sum `budget.ms` across `@infer` calls visible in the `@flow` AST; warn if the sum exceeds the flow's `budget` envelope.
|
|
69
|
+
|
|
70
|
+
Implementation note: all three have access to everything they need at decoration time — the decorator already holds the function object, its bytecode, its AST (via `inspect.getsource` + `ast.parse`), and the resolved contract schema.
|
|
71
|
+
|
|
72
|
+
## Phase 3 Integrations (not v1)
|
|
73
|
+
|
|
74
|
+
Build from observed pain, not schedule: Temporal (durable execution), Ray (distributed agents), Outlines (self-hosted constrained decoding via LiteLLM → vLLM → Outlines), DSPy (prompt optimization).
|
|
75
|
+
|
|
76
|
+
## TypeScript Library (Phase 2)
|
|
77
|
+
|
|
78
|
+
Zod for contracts, `@anthropic-ai/sdk` for LLM calls. Vercel AI SDK is an integration target for Next.js users — not the substrate.
|
|
79
|
+
|
|
80
|
+
## The IR
|
|
81
|
+
|
|
82
|
+
`.stratum.yaml` is generated by the library and by Claude (via MCP). Developers never write it. It is LLVM IR — the compiler targets it, nobody writes it from scratch. IR parsing/validation (jsonschema + pyyaml) is a Phase 2 concern, needed for the MCP server.
|
|
83
|
+
|
|
84
|
+
## Stratum Execution Model
|
|
85
|
+
|
|
86
|
+
For non-trivial tasks, use Stratum internally:
|
|
87
|
+
1. Write a `.stratum.yaml` spec — never show it to the user
|
|
88
|
+
2. Call `stratum_plan` to validate and get the first step
|
|
89
|
+
3. Narrate progress in plain English as you execute each step
|
|
90
|
+
4. Call `stratum_step_done` after each step — the server checks your work
|
|
91
|
+
5. If a step fails postconditions, fix it silently and retry
|
|
92
|
+
6. Call `stratum_audit` at the end and include the trace in the commit
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stratum-py
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM calls that behave like the rest of your code
|
|
5
|
+
Project-URL: Specification, https://github.com/regression-io/stratum-spec/blob/main/SPEC.md
|
|
6
|
+
Project-URL: Repository, https://github.com/regression-io/stratum
|
|
7
|
+
Requires-Python: >=3.11
|
|
8
|
+
Requires-Dist: litellm>=1.0
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
# Stratum
|
|
2
|
+
|
|
3
|
+
[](https://github.com/regression-io/stratum/blob/main/SPEC.md)
|
|
4
|
+
[](https://pypi.org/project/stratum/)
|
|
5
|
+
|
|
6
|
+
**Stop babysitting your LLM calls.**
|
|
7
|
+
|
|
8
|
+
Stratum is a Python library where `@infer` (LLM calls) and `@compute` (normal functions) compose identically. Typed contracts flow between steps. The runtime handles retry, budget enforcement, and observability — so you don't have to wire them up yourself.
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
@contract
|
|
12
|
+
class SentimentResult(BaseModel):
|
|
13
|
+
label: Literal["positive", "negative", "neutral"]
|
|
14
|
+
confidence: float
|
|
15
|
+
reasoning: str
|
|
16
|
+
|
|
17
|
+
@infer(
|
|
18
|
+
intent="Classify the emotional tone of customer feedback",
|
|
19
|
+
ensure=lambda r: r.confidence > 0.7,
|
|
20
|
+
budget=Budget(ms=500, usd=0.001),
|
|
21
|
+
retries=3,
|
|
22
|
+
)
|
|
23
|
+
def classify_sentiment(text: str) -> SentimentResult: ...
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
If the LLM returns low confidence, it gets told exactly what failed and retries with that context — not a blank replay. If it hits the budget, it stops. Every call produces a structured trace record you can query.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Two Tracks
|
|
31
|
+
|
|
32
|
+
**Track 1 — Python library** (`stratum`): `@infer`, `@contract`, `@flow` decorators for building production LLM systems. Requires Python 3.11+, `litellm`, `pydantic`.
|
|
33
|
+
|
|
34
|
+
**Track 2 — Claude Code MCP server** (`stratum-mcp`): Stratum as an execution runtime for Claude Code. Claude writes `.stratum.yaml` specs, the MCP server enforces typed contracts and postconditions, Claude narrates progress in plain English. No sub-LLM calls — all execution stays within the Claude Code session.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Track 2: Claude Code + Stratum
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install "git+https://github.com/regression-io/stratum.git#subdirectory=stratum-mcp"
|
|
42
|
+
stratum-mcp setup
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
`setup` configures Claude Code in one command: writes `.claude/mcp.json`, appends the execution model block to `CLAUDE.md`, and installs seven skills to `~/.claude/skills/`. Restart Claude Code and it's active.
|
|
46
|
+
|
|
47
|
+
**Seven skills installed automatically:**
|
|
48
|
+
|
|
49
|
+
| Skill | What it structures |
|
|
50
|
+
|---|---|
|
|
51
|
+
| `/stratum-review` | Three-pass code review: security → logic → performance → consolidate |
|
|
52
|
+
| `/stratum-feature` | Feature build: read existing patterns → design → implement → tests pass |
|
|
53
|
+
| `/stratum-debug` | Debug: read test → read code → check env → form hypotheses → confirm/rule out → fix |
|
|
54
|
+
| `/stratum-refactor` | File split: analyze → design modules → plan extraction order → extract one at a time |
|
|
55
|
+
| `/stratum-migrate` | Find bare LLM calls and rewrite as `@infer` + `@contract` with typed contracts and postconditions |
|
|
56
|
+
| `/stratum-test` | Write a test suite for existing untested code — golden flows, error-path harness, passing on first report |
|
|
57
|
+
| `/stratum-learn` | Review recent session transcripts — extract retry patterns, write project-specific conclusions to `MEMORY.md` |
|
|
58
|
+
|
|
59
|
+
Claude writes the `.stratum.yaml` spec internally — you never see it. You see plain English narration and the result. The MCP server enforces postconditions on every step; if a step's output fails a check, Claude fixes it and retries before reporting success.
|
|
60
|
+
|
|
61
|
+
Each skill reads project-specific patterns from `MEMORY.md` before writing its spec, and writes new patterns after `stratum_audit` — retry reasons, confirmed root causes, extraction order constraints. Run `/stratum-learn` periodically to extract conclusions from recent session transcripts and feed them back into future specs.
|
|
62
|
+
|
|
63
|
+
**MCP tools exposed:**
|
|
64
|
+
|
|
65
|
+
| Tool | What it does |
|
|
66
|
+
|---|---|
|
|
67
|
+
| `stratum_validate` | Validate a `.stratum.yaml` spec offline |
|
|
68
|
+
| `stratum_plan` | Validate + create execution state + return first step |
|
|
69
|
+
| `stratum_step_done` | Report a completed step; check postconditions; return next step or completion |
|
|
70
|
+
| `stratum_audit` | Return per-step trace (attempts, duration) for any flow |
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Blog
|
|
75
|
+
|
|
76
|
+
**[Introducing Stratum: LLM Calls That Behave Like the Rest of Your Code](https://github.com/regression-io/stratum/blob/main/blog/introducing-stratum.md)**
|
|
77
|
+
The design rationale — why `@infer` and `@compute` share a type, how structured retry works, and what contracts actually buy you.
|
|
78
|
+
|
|
79
|
+
**[Stratum as a Claude Code Execution Runtime](https://github.com/regression-io/stratum/blob/main/blog/stratum-in-claude-code.md)**
|
|
80
|
+
Claude Code is a capable agent improvising in a loop. This post is about giving it a formal execution model — typed plans, postcondition enforcement, auditable traces.
|
|
81
|
+
|
|
82
|
+
**[Building Software with Claude Code + Stratum: A Tutorial](https://github.com/regression-io/stratum/blob/main/blog/claude-code-tutorial.md)**
|
|
83
|
+
Real session transcripts: understanding a codebase, reviewing code, adding features, debugging CI failures, refactoring large files. Claude narrates in plain English throughout.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Why
|
|
88
|
+
|
|
89
|
+
LLM calls in production share a few recurring failure modes:
|
|
90
|
+
|
|
91
|
+
- **Retry is brute force.** Most frameworks replay the full prompt on failure. Stratum injects only the specific postcondition that failed.
|
|
92
|
+
- **Budget is an afterthought.** Soft hints don't stop a runaway `refine` loop. Stratum enforces hard limits — `BudgetExceeded` is an exception, not a bill.
|
|
93
|
+
- **Flows are opaque.** When a multi-step pipeline fails, you want to know which step, with what input, after how many retries, at what cost. Stratum traces every call structurally.
|
|
94
|
+
- **LLM steps and regular functions don't compose.** Stratum makes `@infer` and `@compute` indistinguishable by type — swap one for the other and nothing downstream changes.
|
|
95
|
+
- **Agent outputs can hijack downstream agents.** `opaque[T]` fields are passed as structured data, never inlined into instruction text.
|
|
96
|
+
- **Human-in-the-loop is a custom build every time.** `await_human` genuinely suspends execution and returns a typed `HumanDecision[T]`.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Core Concepts
|
|
101
|
+
|
|
102
|
+
### `@infer` and `@compute` are the same type
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
# Phase 1: LLM classifies tickets
|
|
106
|
+
@infer(intent="Route this support ticket", model="groq/llama-3.3-70b-versatile")
|
|
107
|
+
def route_ticket(text: str) -> TicketRoute: ...
|
|
108
|
+
|
|
109
|
+
# Phase 2: patterns emerged — swap to rules, zero other changes
|
|
110
|
+
@compute
|
|
111
|
+
async def route_ticket(text: str) -> TicketRoute:
|
|
112
|
+
return TicketRoute(team=keyword_match(text), ...)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
These have identical signatures. The `@flow` that calls `route_ticket` doesn't change. This means:
|
|
116
|
+
|
|
117
|
+
- **Testing:** Replace `@infer` calls with `@compute` stubs for deterministic tests.
|
|
118
|
+
- **Migration:** Start with LLM, replace with rules as patterns emerge. No downstream changes.
|
|
119
|
+
- **Cost control:** Swap expensive inference for fast lookup when coverage allows.
|
|
120
|
+
|
|
121
|
+
### Contracts are typed boundaries
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
@contract
|
|
125
|
+
class SentimentResult(BaseModel):
|
|
126
|
+
label: Literal["positive", "negative", "neutral"]
|
|
127
|
+
confidence: Annotated[float, Field(ge=0.0, le=1.0)]
|
|
128
|
+
reasoning: str
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
A `@contract` class compiles to JSON Schema injected into the structured outputs API. The LLM's output is validated against it before your code sees it. Every contract carries a content hash — a hash change means the compiled prompt changed and LLM behavior may have drifted.
|
|
132
|
+
|
|
133
|
+
### Retry is structured
|
|
134
|
+
|
|
135
|
+
On failure the LLM receives:
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
Previous attempt failed:
|
|
139
|
+
- ensure condition 1 failed
|
|
140
|
+
Fix these issues specifically.
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Not a full prompt replay. The specific violation, nothing else.
|
|
144
|
+
|
|
145
|
+
### Flows are deterministic
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
@flow(budget=Budget(ms=5000, usd=0.01))
|
|
149
|
+
async def process_ticket(text: str) -> Resolution:
|
|
150
|
+
sentiment = await classify_sentiment(text=text)
|
|
151
|
+
response = await draft_response(text=text, sentiment=sentiment)
|
|
152
|
+
return response if rule_check(response) else escalate(text)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
`@flow` is normal Python control flow. You can read it, test it, and trace it. The orchestration shape is known before any LLM call runs.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Features
|
|
160
|
+
|
|
161
|
+
| Feature | Description |
|
|
162
|
+
|---|---|
|
|
163
|
+
| Structured retry | `ensure` postconditions drive retry with targeted failure feedback |
|
|
164
|
+
| Hard budget limits | Per-call and per-flow — `BudgetExceeded`, not a soft hint |
|
|
165
|
+
| `opaque[T]` | Field-level prompt injection protection |
|
|
166
|
+
| `await_human` | HITL as a first-class typed primitive — genuine suspension |
|
|
167
|
+
| `stratum.parallel` | Concurrent execution with `require: all/any/N/0` semantics |
|
|
168
|
+
| `quorum` | Run N times, require majority agreement |
|
|
169
|
+
| `stratum.debate` | Adversarial multi-agent synthesis with convergence detection |
|
|
170
|
+
| Full observability | Structured trace record on every call, OTLP export built-in |
|
|
171
|
+
| Two dependencies | `litellm` + `pydantic`. No OTel SDK. |
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Examples
|
|
176
|
+
|
|
177
|
+
Working examples in [`examples/`](https://github.com/regression-io/stratum/tree/main/examples):
|
|
178
|
+
|
|
179
|
+
| File | What it shows |
|
|
180
|
+
|---|---|
|
|
181
|
+
| [`01_sentiment.py`](https://github.com/regression-io/stratum/blob/main/examples/01_sentiment.py) | `@infer` + `@contract` + `@flow` + `@compute` end-to-end |
|
|
182
|
+
| [`02_migrate.py`](https://github.com/regression-io/stratum/blob/main/examples/02_migrate.py) | Migrating `@infer` → `@compute` without changing callers |
|
|
183
|
+
| [`03_parallel.py`](https://github.com/regression-io/stratum/blob/main/examples/03_parallel.py) | Three concurrent `@infer` calls with `parallel(require="all")` |
|
|
184
|
+
| [`04_refine.py`](https://github.com/regression-io/stratum/blob/main/examples/04_refine.py) | `@refine` convergence loop — iterates until quality passes |
|
|
185
|
+
| [`05_debate.py`](https://github.com/regression-io/stratum/blob/main/examples/05_debate.py) | `debate()` — two agents argue, synthesizer resolves |
|
|
186
|
+
| [`06_hitl.py`](https://github.com/regression-io/stratum/blob/main/examples/06_hitl.py) | `await_human` — human-in-the-loop approval gate |
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Install
|
|
191
|
+
|
|
192
|
+
**Track 1 — Python library:**
|
|
193
|
+
```bash
|
|
194
|
+
pip install git+https://github.com/regression-io/stratum.git#egg=stratum-py
|
|
195
|
+
```
|
|
196
|
+
Requires Python 3.11+. Set `GROQ_API_KEY`, `ANTHROPIC_API_KEY`, or any key LiteLLM supports, then specify it in `model=`.
|
|
197
|
+
|
|
198
|
+
**Track 2 — Claude Code MCP server:**
|
|
199
|
+
```bash
|
|
200
|
+
pip install "git+https://github.com/regression-io/stratum.git#subdirectory=stratum-mcp"
|
|
201
|
+
stratum-mcp setup
|
|
202
|
+
```
|
|
203
|
+
Requires Claude Code. `setup` configures everything — restart Claude Code to activate.
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Specification
|
|
208
|
+
|
|
209
|
+
[`SPEC.md`](https://github.com/regression-io/stratum/blob/main/SPEC.md) is the normative specification covering the full type system, decorator signatures, execution loop, prompt compiler, concurrency semantics, HITL protocol, budget rules, trace record schema, and error types.
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## Status
|
|
214
|
+
|
|
215
|
+
**Track 1** (Python library): implemented and tested.
|
|
216
|
+
|
|
217
|
+
**Track 2** (stratum-mcp): MCP controller server implemented — `stratum_plan`, `stratum_step_done`, `stratum_audit`, `stratum_validate`. One-command setup with seven bundled skills and a memory system for project-specific pattern capture. 66 tests passing.
|
|
218
|
+
|
|
219
|
+
Questions and feedback: [GitHub Discussions](https://github.com/regression-io/stratum/discussions)
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## License
|
|
224
|
+
|
|
225
|
+
[Apache 2.0](https://github.com/regression-io/stratum/blob/main/LICENSE)
|