PyPI - tilth - Versions diffs - 0.1.0__tar.gz - Mend

tilth 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

tilth-0.1.0/.claude/settings.json +14 -0
tilth-0.1.0/.env.example +43 -0
tilth-0.1.0/.github/workflows/docs.yml +23 -0
tilth-0.1.0/.github/workflows/release.yml +36 -0
tilth-0.1.0/.gitignore +47 -0
tilth-0.1.0/CLAUDE.md +218 -0
tilth-0.1.0/LICENSE +21 -0
tilth-0.1.0/PKG-INFO +143 -0
tilth-0.1.0/README.md +112 -0
tilth-0.1.0/docs/architecture/agent-visibility.md +56 -0
tilth-0.1.0/docs/architecture/anatomy-of-a-run.md +79 -0
tilth-0.1.0/docs/architecture/memory-channels.md +93 -0
tilth-0.1.0/docs/architecture/overview.md +78 -0
tilth-0.1.0/docs/assets/IMAGE_STYLE.md +92 -0
tilth-0.1.0/docs/assets/SITE_STYLE.md +116 -0
tilth-0.1.0/docs/assets/anatomy-of-a-run.png +0 -0
tilth-0.1.0/docs/assets/brain-hands-session.png +0 -0
tilth-0.1.0/docs/assets/harness-loop.png +0 -0
tilth-0.1.0/docs/assets/iter-cap-and-summary.png +0 -0
tilth-0.1.0/docs/assets/per-task-lifecycle.png +0 -0
tilth-0.1.0/docs/assets/resume-after-iter-cap.png +0 -0
tilth-0.1.0/docs/assets/session-end.png +0 -0
tilth-0.1.0/docs/assets/session-layout.png +0 -0
tilth-0.1.0/docs/assets/session-render.png +0 -0
tilth-0.1.0/docs/deep-dives/hyper-observability.md +134 -0
tilth-0.1.0/docs/deep-dives/index.md +14 -0
tilth-0.1.0/docs/deep-dives/session-layout.md +56 -0
tilth-0.1.0/docs/deep-dives/task-format.md +110 -0
tilth-0.1.0/docs/deep-dives/token-recording.md +88 -0
tilth-0.1.0/docs/deep-dives/two-loops.md +136 -0
tilth-0.1.0/docs/deep-dives/worker-evaluator-dialogue.md +134 -0
tilth-0.1.0/docs/getting-started/installation.md +102 -0
tilth-0.1.0/docs/getting-started/resuming-and-resetting.md +83 -0
tilth-0.1.0/docs/getting-started/running-the-demo.md +105 -0
tilth-0.1.0/docs/getting-started/visualizing.md +46 -0
tilth-0.1.0/docs/getting-started/your-own-project.md +81 -0
tilth-0.1.0/docs/index.md +59 -0
tilth-0.1.0/docs/reference/releasing.md +99 -0
tilth-0.1.0/docs/reference/safety-guards.md +47 -0
tilth-0.1.0/docs/stylesheets/extra.css +378 -0
tilth-0.1.0/documents/session-render.png +0 -0
tilth-0.1.0/mkdocs.yml +203 -0
tilth-0.1.0/proposals/completed/frictions-2026-05-26.md +214 -0
tilth-0.1.0/proposals/completed/prep-feature.md +296 -0
tilth-0.1.0/proposals/completed/v1-implementation-plan.md +389 -0
tilth-0.1.0/proposals/completed/v1-worker-evaluator-dialogue.md +198 -0
tilth-0.1.0/proposals/observability-dashboard/README.md +61 -0
tilth-0.1.0/proposals/observability-dashboard/mockup-a-ledger.html +642 -0
tilth-0.1.0/proposals/observability-dashboard/mockup-b-flight-recorder.html +638 -0
tilth-0.1.0/proposals/probes/phase1_verdict_tool_call_probe.py +319 -0
tilth-0.1.0/pyproject.toml +93 -0
tilth-0.1.0/tests/test_assistant_history_message.py +73 -0
tilth-0.1.0/tests/test_case_parsing.py +185 -0
tilth-0.1.0/tests/test_cli_router.py +156 -0
tilth-0.1.0/tests/test_client_routing.py +172 -0
tilth-0.1.0/tests/test_config.py +110 -0
tilth-0.1.0/tests/test_context_files.py +142 -0
tilth-0.1.0/tests/test_dispatch_hook_runs.py +59 -0
tilth-0.1.0/tests/test_env_template_no_drift.py +20 -0
tilth-0.1.0/tests/test_evaluator_raw_capture.py +60 -0
tilth-0.1.0/tests/test_evaluator_sees_ledger.py +69 -0
tilth-0.1.0/tests/test_evaluator_verdict_parsing.py +255 -0
tilth-0.1.0/tests/test_feature_dir.py +88 -0
tilth-0.1.0/tests/test_files_path_escape.py +61 -0
tilth-0.1.0/tests/test_finish_reason.py +50 -0
tilth-0.1.0/tests/test_info_config.py +216 -0
tilth-0.1.0/tests/test_init_cmd.py +56 -0
tilth-0.1.0/tests/test_iter_events.py +60 -0
tilth-0.1.0/tests/test_ledger_includes_case.py +36 -0
tilth-0.1.0/tests/test_ledger_io.py +80 -0
tilth-0.1.0/tests/test_ledger_resume.py +33 -0
tilth-0.1.0/tests/test_limit_utilization.py +125 -0
tilth-0.1.0/tests/test_loop_provider_health.py +255 -0
tilth-0.1.0/tests/test_loop_structured_feedback.py +92 -0
tilth-0.1.0/tests/test_memory_manifest.py +105 -0
tilth-0.1.0/tests/test_missing_config.py +67 -0
tilth-0.1.0/tests/test_no_harness_filenames_in_agent_text.py +149 -0
tilth-0.1.0/tests/test_overview_injection.py +55 -0
tilth-0.1.0/tests/test_parse_json_lenient.py +53 -0
tilth-0.1.0/tests/test_paths.py +136 -0
tilth-0.1.0/tests/test_reasoning_request.py +75 -0
tilth-0.1.0/tests/test_response_health.py +138 -0
tilth-0.1.0/tests/test_run_end_to_end.py +157 -0
tilth-0.1.0/tests/test_run_requires_tasks.py +40 -0
tilth-0.1.0/tests/test_session_source_field.py +58 -0
tilth-0.1.0/tests/test_session_status.py +62 -0
tilth-0.1.0/tests/test_session_usage.py +116 -0
tilth-0.1.0/tests/test_summary.py +313 -0
tilth-0.1.0/tests/test_summary_rejection_category_counts.py +120 -0
tilth-0.1.0/tests/test_task_status.py +62 -0
tilth-0.1.0/tests/test_tasks_loading.py +211 -0
tilth-0.1.0/tests/test_trace_span_ids.py +24 -0
tilth-0.1.0/tests/test_usage.py +140 -0
tilth-0.1.0/tests/test_visualize_dashboard.py +252 -0
tilth-0.1.0/tests/test_visualize_server.py +367 -0
tilth-0.1.0/tests/test_worker_case_loop.py +51 -0
tilth-0.1.0/tests/test_worker_prompt_contains_full_prd.py +120 -0
tilth-0.1.0/tests/test_worker_prompt_contains_own_ledger.py +85 -0
tilth-0.1.0/tests/test_workspace.py +73 -0
tilth-0.1.0/tilth/__init__.py +5 -0
tilth-0.1.0/tilth/case.py +271 -0
tilth-0.1.0/tilth/cli.py +242 -0
tilth-0.1.0/tilth/client.py +309 -0
tilth-0.1.0/tilth/data/env.example +43 -0
tilth-0.1.0/tilth/hooks/__init__.py +17 -0
tilth-0.1.0/tilth/hooks/pre_tool.py +39 -0
tilth-0.1.0/tilth/loop.py +1833 -0
tilth-0.1.0/tilth/memory.py +338 -0
tilth-0.1.0/tilth/paths.py +60 -0
tilth-0.1.0/tilth/prompts/evaluator.md +70 -0
tilth-0.1.0/tilth/prompts/system.md +33 -0
tilth-0.1.0/tilth/session.py +404 -0
tilth-0.1.0/tilth/summary.py +216 -0
tilth-0.1.0/tilth/tasks.py +291 -0
tilth-0.1.0/tilth/tools/__init__.py +78 -0
tilth-0.1.0/tilth/tools/bash.py +64 -0
tilth-0.1.0/tilth/tools/files.py +139 -0
tilth-0.1.0/tilth/tools/search.py +106 -0
tilth-0.1.0/tilth/usage.py +143 -0
tilth-0.1.0/tilth/verdict.py +311 -0
tilth-0.1.0/tilth/visualize/__init__.py +14 -0
tilth-0.1.0/tilth/visualize/app.js +736 -0
tilth-0.1.0/tilth/visualize/render.py +425 -0
tilth-0.1.0/tilth/visualize/server.py +332 -0
tilth-0.1.0/tilth/visualize/theme.css +807 -0
tilth-0.1.0/tilth/visualize/theme.py +179 -0
tilth-0.1.0/tilth/workspace.py +259 -0
tilth-0.1.0/uv.lock +987 -0

tilth-0.1.0/.claude/settings.json ADDED Viewed

@@ -0,0 +1,14 @@
+{
+  "hooks": {
+    "InstructionsLoaded": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "jq -r '({\"User\":\"👤\",\"Project\":\"📁\",\"Local\":\"📍\",\"Plugin\":\"🔌\"}[.memory_type] // \"📄\") as $e | \"\\($e) \\(.memory_type) \\(.file_path) \\(.load_reason)\"' >> instructions-loaded.log"
+          }
+        ]
+      }
+    ]
+  }
+}

tilth-0.1.0/.env.example ADDED Viewed

@@ -0,0 +1,43 @@
+# Provider configuration. Tilth talks to an OpenAI-compatible endpoint.
+# All three of these are REQUIRED — Tilth refuses to start without them.
+# Tilth is tested against OpenRouter today; other OpenAI-flavour gateways
+# should work via the OpenAI SDK but haven't been validated yet.
+TILTH_BASE_URL=https://openrouter.ai/api/v1
+TILTH_API_KEY=sk-or-your_api_key_here
+TILTH_WORKER_MODEL=deepseek/deepseek-v4-flash
+# Evaluator model. Defaults to the worker if TILTH_EVALUATOR_MODEL is unset.
+#
+# Routing the evaluator to a *different provider* (set TILTH_EVALUATOR_BASE_URL +
+# TILTH_EVALUATOR_API_KEY) is mostly about INDEPENDENCE, not cost. Cross-family
+# judging (e.g. open-source worker, frontier closed evaluator) catches failure
+# modes a same-family evaluator will miss.
+#
+# As a rule, the evaluator should be AT LEAST as capable as the worker — for code
+# diff review, a weaker evaluator silently accepts bad work because it doesn't
+# notice the problem. "Cheap evaluator" only works for shallow checks (binary,
+# regex, policy gates), not for correctness gating in a Ralph loop.
+TILTH_EVALUATOR_MODEL=deepseek/deepseek-v4-pro
+# Project-context files. The worker and evaluator read these from the workspace
+# root (in order, concatenated) as the project-conventions channel — Tilth never
+# writes them. Comma-separated; defaults to AGENTS.md,CLAUDE.md so both the
+# emerging AGENTS.md standard and existing Claude Code repos work out of the box.
+# TILTH_CONTEXT_FILES=AGENTS.md,CLAUDE.md
+# Safety caps.
+TILTH_MAX_ITERATIONS_PER_TASK=32
+TILTH_MAX_WALL_CLOCK_MINUTES=120
+# Cumulative USD spend cap for the whole session (worker + evaluator), checked
+# between tasks. Read from the provider's own per-call `cost` figure — verified
+# against OpenRouter; gateways that don't report cost leave this at $0 spent, so
+# the dollar cap never trips for them and wall-clock is the backstop.
+TILTH_MAX_TOKEN_DOLLAR_SPEND=10.00
+# Optional cap on evaluator calls per task. The evaluator runs every time the
+# worker submits a case ("done"); a stuck task can ping-pong worker↔evaluator and
+# burn the whole spend budget on one task. Set this to halt a task after N
+# evaluator rejections (the task is marked failed, the run stops). Unset or
+# 0 = unlimited.
+# MAX_EVALUATOR_CALLS_PER_TASK=3

tilth-0.1.0/.github/workflows/docs.yml ADDED Viewed

@@ -0,0 +1,23 @@
+name: docs
+on:
+  push:
+    branches: [main]
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Configure Git Credentials
+        run: |
+          git config user.name  "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv sync --extra docs
+      # Strict gate: broken nav refs / dead relative links fail CI here rather
+      # than publishing silently. Mirrors the local `mkdocs build --strict`.
+      - name: Strict build (gate)
+        run: uv run mkdocs build --strict --site-dir /tmp/tilth-site
+      - name: Deploy to gh-pages
+        run: uv run mkdocs gh-deploy --force

tilth-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,36 @@
+name: Release to PyPI
+# Publishes the package to PyPI when a GitHub Release is published.
+# Tag the release `vX.Y.Z` and match it to `version` in pyproject.toml.
+#
+# Authentication is PyPI Trusted Publishing (OIDC) — no API token is stored
+# anywhere. One-time setup: add this repo + workflow as a "pending publisher"
+# on PyPI before the first release. See docs/reference/releasing.md.
+on:
+  release:
+    types: [published]
+permissions:
+  id-token: write   # required for OIDC trusted publishing
+  contents: read
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi   # configure this environment on the repo (optional gate)
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+      - name: Build sdist and wheel
+        run: uv build
+      - name: Verify the wheel exposes the tilth entry point
+        run: uv run --isolated --no-project --with dist/*.whl tilth --help
+      - name: Publish to PyPI
+        run: uv publish

tilth-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,47 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+dist/
+*.egg-info/
+.venv/
+venv/
+# Environments
+.env
+.env.local
+# Sessions (per-run state, large jsonl files, worktrees)
+sessions/
+!sessions/.gitkeep
+# `examples/seed-reference/` is tracked — frozen seeds captured as teaching
+# artifacts (see examples/seed-reference/todo-cli/README.md). Anything else
+# under examples/ is treated as a safety net for stray clones, since the
+# canonical demo path is {{your projects folder}}/tilth-demo (sibling of
+# Tilth). Demo source: https://github.com/AlteredCraft/tilth-demo-todo-cli
+examples/*
+!examples/seed-reference/
+# Editor
+.vscode/
+.idea/
+*.swp
+*.swo
+.DS_Store
+# Tooling
+.mypy_cache/
+.ruff_cache/
+.pytest_cache/
+.coverage
+htmlcov/
+# MkDocs build output
+site/
+# Claude Code (per-clone observability artefacts)
+instructions-loaded.log

tilth-0.1.0/CLAUDE.md ADDED Viewed

@@ -0,0 +1,218 @@
+# CLAUDE.md
+Guidance for Claude Code (claude.ai/code) when working in this repo.
+## What this is
+A minimal long-running agent harness against any OpenAI-compatible LLM endpoint. It implements the Brain / Hands / Session split, the Ralph loop, and the four memory channels from Addy Osmani's posts on long-running agents (plus a fifth Tilth adds — the per-task evaluator ledger). Built as both a working tool and the practical centerpiece of an Altered Craft article.
+The ultimate goal is a minimal, productive agent harness with **hyper-observability**: every prompt the harness sends is accessible and adaptable, and every run is fully inspectable after the fact. The [Visualizing a session](docs/getting-started/visualizing.md) page is an early example of that extended observability — every run browsable as a chat-style web app, tailing an active loop in near-realtime or replaying a finished one end-to-end from its `events.jsonl`.
+## Where to look first
+- **`mkdocs.yml`** — **the canonical map of the documentation set**, and your primary entry point when looking for docs by topic. The `nav:` block has a one- to four-line comment above each leaf entry summarising what the linked `.md` covers and when you'd reach for it; skim those comments first, then open the page that fits. Everything that matters for users and contributors lives under `docs/`; `README.md` is the GitHub landing page and points into `docs/` for anything beyond the elevator pitch.
+- **`README.md`** — terse GitHub landing page: product elevator pitch (with the Brain/Hands/Session image), a minimal quickstart, and the working-with-the-codebase commands (lint, tests, docs). **Not a mirror** of `docs/index.md` — for any product detail beyond the pitch, README points readers into `docs/`. Edit the two files independently.
+- **`docs/getting-started/your-own-project.md`** — the "honest version" of using Tilth on a non-demo codebase: what prep your repo actually needs (a clean git repo, optionally an `AGENTS.md`), authoring the feature as markdown in a named feature dir under `.tilth/<feature>/` (you write it by hand, or have a model fill the templates — there's no interview/seed step), picking an evaluator model, the caveats that aren't obvious from a demo run, and when it's the wrong tool. (Successor to the old root-level `USAGE.md`.)
+- **`docs/deep-dives/`** — code-level walk-throughs of the two loops, the worker↔evaluator dialogue (case / verdict / ledger), iteration accounting, token recording/enforcement, and the agent-visibility boundary. Read this before changing any of those mechanics. (Successor to the old root-level `deep-dives.md`.)
+- **The demo workspace** — lives in its own repo at [`AlteredCraft/tilth-demo-todo-cli`](https://github.com/AlteredCraft/tilth-demo-todo-cli). The docs use `~/projects/tilth-demo` as an illustrative path, but Tilth treats the path as just an argument so any layout works.
+- **`docs/assets/IMAGE_STYLE.md`** — the prompt scaffold for generating new docs *images*, anchored to the canonical `brain-hands-session.png`. Use this whenever you generate a new diagram or illustration so the visual voice stays consistent across pages. Not in the published nav (excluded via `not_in_nav` in `mkdocs.yml`).
+- **`docs/assets/SITE_STYLE.md`** — the visual identity for the rendered docs *site* (Material for MkDocs + custom CSS). Documents the provenance of the theme (Hex, from [refero.design](https://refero.design)), the load-bearing tokens, and the do's-and-don'ts to follow when editing `docs/stylesheets/extra.css` or `mkdocs.yml`. Companion to `IMAGE_STYLE.md`; also excluded from the published nav.
+## Don't confuse the three "agent instruction" files
+The repo has *three* files that look like agent instructions but speak to different audiences:
+| File | Audience | Purpose |
+|---|---|---|
+| `CLAUDE.md` (this file) | Claude Code working on the harness itself | Conventions for editing this codebase |
+| `tilth/prompts/system.md` | The worker agent inside the harness loop | Role, tool guidance, "done" criteria |
+| `<demo-workspace>/AGENTS.md` | The worker agent operating on the demo workspace | Project conventions for the toy todo-cli |
+When the user says "update the agent's instructions," ask which one — they're not the same thing.
+## Repo layout
+```
+tilth/
+├── README.md, CLAUDE.md, mkdocs.yml
+├── docs/                  # MkDocs source (annotated nav in mkdocs.yml is the topic index)
+├── pyproject.toml, .env.example, .gitignore
+├── tilth/
+│   ├── cli.py             # verb-routed entry: init / run / resume / reset / visualize
+│   ├── paths.py           # ~/.tilth resolution: sessions dir + .env search order
+│   ├── loop.py            # Ralph loop + inner tool-use loop + subcommand handlers
+│   ├── client.py          # OpenAI-compat wrapper, dual-client routing (worker / evaluator)
+│   ├── session.py         # events.jsonl + checkpoint.json + per-task ledger + wake()
+│   ├── summary.py         # roll events.jsonl into summary.json (denormalised view)
+│   ├── memory.py          # AGENTS.md / progress.txt / overview / full-plan injection
+│   ├── tasks.py           # load + validate <repo>/.tilth/<feature>/ (overview + T-NNN files)
+│   ├── workspace.py       # git worktree create / commit / diff
+│   ├── case.py            # worker submit_case schema / parse / render
+│   ├── verdict.py         # evaluator submit_verdict schema / parse / ledger format
+│   ├── tools/             # bash, files, search — registered in __init__.py (worker)
+│   ├── hooks/             # pre_tool
+│   ├── prompts/           # system.md, evaluator.md
+│   ├── data/              # env.example template (shipped in the wheel for `tilth init`)
+│   └── visualize/         # tilth visualize: live web viewer over ~/.tilth/sessions/ (stdlib http server)
+├── examples/seed-reference/  # frozen pre-prompt-driven example (historical teaching artifact)
+└── tests/                 # the harness's own pytest suite (run state now lives in ~/.tilth/)
+```
+The demo workspace is a separate repo (`AlteredCraft/tilth-demo-todo-cli`) — not part of the Tilth repo. Clone it wherever you keep code; the docs use `~/projects/tilth-demo` as an illustrative path, but the location is arbitrary.
+## Conventions
+- **Python 3.12.** `from __future__ import annotations` everywhere.
+- **`uv` for env management.** `uv sync`
+- **`ruff` for lint.** Config in `pyproject.toml`. Run `ruff check tilth/` before declaring work done.
+- **Type hints on public functions.** Internal helpers can skip them.
+- **No comments unless the WHY is non-obvious.** Don't narrate WHAT the code does.
+- **Standard library first.** Third-party deps live in `pyproject.toml`; resist adding more.
+- **External interfaces — verify, don't guess.** For provider APIs (OpenRouter, OpenAI SDK, Ollama, etc.), library specs, or any third-party wire format: consult the official docs first (use Context7, WebFetch, or the provider's sitemap to find them), and probe the live response shape with a tiny one-shot script before writing the fix. Don't infer field names from error messages — providers often surface their *upstream* internal field names in errors (e.g. SiliconFlow says `reasoning_content` but the OpenRouter wire field is `reasoning_details`). Don't infer from training data — these surfaces churn. A synthetic unit test built on a guessed shape gives false confidence; the test passes against your made-up contract while the real bug stays. Probe → write the test against the real shape → fix.
+## Architecture invariants worth preserving
+These are load-bearing. Read the relevant page under `docs/deep-dives/` before breaking any of them.
+1. **Brain / Hands / Session split.** Don't blur the three. New code goes in the module whose job it is — model calls in `client.py`, sandbox/tool ops in `workspace.py` and `tools/`, durable state in `session.py`.
+2. **The agent doesn't see harness mechanics.** No `task-status.json` or status fields, no `events.jsonl`, no `summary.json`, no token counts, no checkpoints, no cross-task evaluator. Hiding these prevents gaming, shortcutting, and self-managed state. (The visibility expansion deliberately softened this: the worker sees the feature overview and the whole task list *as prose context* — not the mutable status store — plus the evaluator's prior verdicts on its *current* task, so it can act on review feedback. The harness files, token counts, checkpoints, and the wider evaluation machinery stay hidden.) New features should preserve this boundary unless the user explicitly asks otherwise.
+    **Honest scope.** This is a *design goal*, not an enforcement guarantee in default mode. The worker has `bash` and the worktree is mounted at `~/.tilth/sessions/<id>/workspace/`, so a determined model can reach harness state via relative paths — `events.jsonl`, `summary.json`, `checkpoint.json`, `task-status.json` all live one directory up (`../`). The invariant's near-term purpose is to keep new code from making harness state *more* obviously surfaced to the worker; real enforcement is opt-in process isolation, planned in [#13](https://github.com/AlteredCraft/tilth/issues/13).
+3. **Tool registry is the canonical source for "what tools exist".** `tilth/tools/__init__.py` defines the registry; system.md should *not* enumerate tools (it gets stale).
+4. **Hook contract: "success silent, failures verbose" — to the *agent*.** Pass states inject nothing into the loop's message history; failures inject a feedback message that the next worker iteration sees. **Telemetry is separate.** Every hook invocation should emit a `hook_run` event regardless of outcome — observability is for the developer reading `events.jsonl`, not the agent. "Silent to the agent" must not mean "invisible in the log".
+5. **The worktree branch is never auto-merged.** `commit_task` commits to the session branch; humans review and merge. Don't add an "auto-merge on success" feature without an explicit ask.
+6. **Token cap enforcement is between tasks, not mid-task.** The "always finish the current task cleanly" property matters; preserve it.
+## Where to file new things
+| Adding... | Lives in... | Don't forget... |
+|---|---|---|
+| A tool | `tilth/tools/{name}.py` | Register in `tools/__init__.py:_registry()` |
+| A hook | `tilth/hooks/{name}.py` | Wire into `tools/__init__.py:dispatch()` |
+| A prompt | `tilth/prompts/{name}.md` | Add a loader in `loop.py` |
+| A session event type | Use it in `session.log("...", {...})` | Document the type in `session.py`'s module docstring |
+| A summary metric | `tilth/summary.py:build_from_events()` | Update the schema in the module docstring; bump `SUMMARY_VERSION` if shape breaks |
+## Common commands
+```bash
+# Setup (contributor flow: run from the clone with `uv run tilth …`, no install)
+uv sync
+uv run tilth init          # writes ~/.tilth/.env (or `cp .env.example .env` for a
+                           # clone-local one — Tilth's .env search falls back to CWD)
+# edit the .env: TILTH_BASE_URL, TILTH_API_KEY, TILTH_WORKER_MODEL
+# Lint
+.venv/bin/python -m ruff check tilth/
+# Docs — strict build (catches broken nav refs, missing files, dead relative
+# links). Run after editing mkdocs.yml or anything under docs/. This is the
+# command CI will run when docs validation gets wired in; keep it green.
+uv run --extra docs mkdocs build --strict --site-dir /tmp/tilth-site
+# Docs — live preview at http://127.0.0.1:8000
+uv run --extra docs mkdocs serve
+# Demo (needs provider config from Setup, and a local clone of the demo repo
+# at AlteredCraft/tilth-demo-todo-cli — clone it wherever; path below is illustrative)
+git clone git@github.com:AlteredCraft/tilth-demo-todo-cli.git ~/projects/tilth-demo
+# author ~/projects/tilth-demo/.tilth/<feature>/  (overview.md + T-NNN-*.md; run prints
+# ready-to-fill templates if the dir is missing) — there is no prep/interview step
+uv run tilth run ~/projects/tilth-demo/.tilth/<feature>
+# Resume an interrupted session (latest in ~/.tilth/sessions/, or by id)
+uv run tilth resume
+uv run tilth resume <session_id>
+# Reset a session — removes the worktree, deletes session/<id>, drops ~/.tilth/sessions/<id>/
+uv run tilth reset
+uv run tilth reset <session_id>
+uv run tilth reset --yes  # skip the confirmation prompt
+# Serve the live session viewer (read-only, 127.0.0.1:8765; --port to change).
+# Index of all sessions + per-session chat view that tails an active run.
+uv run tilth visualize
+uv run tilth visualize <session_id>   # deep-link this session on startup
+# Inspect a session log
+jq -c . ~/.tilth/sessions/<session_id>/events.jsonl | head -40
+```
+## Working with GitHub milestones
+There is no `gh milestone` command. Manage the milestone *object* through the REST API (`gh api`); assign issues to it with the built-in `gh issue` flags — which reference the milestone by **title**, so it must exist first.
+```bash
+# Manage the milestone object (use the milestone NUMBER, not an issue id, for edit/delete)
+gh api repos/{owner}/{repo}/milestones -f title="…" -f description="…" -f due_on="2026-07-01T00:00:00Z"
+gh api repos/{owner}/{repo}/milestones --jq '.[] | "#\(.number) \(.state) \(.title)"'
+gh api -X PATCH  repos/{owner}/{repo}/milestones/<n> -f state=closed
+gh api -X DELETE repos/{owner}/{repo}/milestones/<n>
+# Assign / filter by milestone title
+gh issue edit <n>... --milestone "…"   # accepts multiple issues; --remove-milestone detaches
+gh issue list --milestone "…"
+```
+Creating issues or milestones is network-side — per *Things not to do without asking*, only do it on an explicit request.
+## Working with the demo
+The demo lives in its own repo at [`AlteredCraft/tilth-demo-todo-cli`](https://github.com/AlteredCraft/tilth-demo-todo-cli). Clone it wherever you keep code before running it. The path is just an argument to `uv run tilth`, so any layout works; the docs use `~/projects/tilth-demo` as an illustrative example.
+The demo has to be a git repo because Tilth's worktree machinery requires it. To tear down a session's artifacts (worktree, `session/<id>` branch, `~/.tilth/sessions/<id>/`), use `tilth reset` rather than the manual recipe:
+```bash
+uv run tilth reset                # most recent session
+uv run tilth reset <session_id>   # explicit
+```
+`tilth reset` reads the session's checkpoint and `session_start` event to recover the source repo + worktree path + branch, runs `git worktree remove --force` and `git branch -D` against the source repo, and deletes `~/.tilth/sessions/<id>/`. Force-removes a dirty worktree by design — its whole purpose is to discard a session's work; the `[y/N]` prompt is the safety gate.
+If `tilth reset` itself can't run (e.g., session metadata missing), the manual fallback is:
+```bash
+cd <demo-clone-path>                  # e.g. ~/projects/tilth-demo
+git worktree prune
+git branch -D session/<id>            # if it still exists
+rm -rf ~/.tilth/sessions/<id>/        # or $TILTH_SESSIONS_DIR/<id>/ if overridden
+```
+Don't commit changes the agent made on `session/*` branches into the demo clone's `main`. Those are run artefacts; the demo's `main` should stay clean of them. The session worktree and all harness state live under `~/.tilth/`, never in the demo repo — the only thing you'd intentionally add to the demo is the `.tilth/<feature>/` directory you author.
+## Things not to do without asking
+- Commit changes (per the user's standing instruction — only commit when explicitly asked).
+- Push to a remote, create PRs, or do anything network-side beyond running the harness itself.
+- Change the architecture invariants above.
+- Add a new dependency to `pyproject.toml` for convenience — justify the addition.
+- Rewrite the system prompts to be more verbose. They are short on purpose; every character ships every turn.
+- Auto-fix the demo workspace to pass tests yourself if a demo run fails — that defeats the point of the demo. Investigate why the harness didn't.
+## Article context
+This codebase is the practical centerpiece of an article in the user's PKM vault at:
+```
+~/_PRIMARY_VAULT/AlteredCraft/Altered Craft Publications/Notes/Long running agents/
+```
+That folder has `research-findings.md`, `research-links.md`, `mvp-spec.md`, and `draft.md`. When changes here are likely to be article-worthy (e.g. surprising findings from a demo run, new lessons from extending a slice), surface them so the user can update the draft. Don't edit those files unless asked.
+### Session-start sweep for article-worthy learnings
+A separate running notes file lives at:
+```
+~/_PRIMARY_VAULT/AlteredCraft/Altered Craft Publications/Notes/tilth-learnings/notes.md
+```
+It's a bulleted, themed corpus of transferable lessons from Tilth development (provider quirks, robustness patterns, multi-agent failure modes, observability wins) with commit-SHA links into [`AlteredCraft/tilth`](https://github.com/AlteredCraft/tilth). At the start of a new session, spawn a general-purpose subagent to:
+1. `git log --since="3 days ago"` — see what's landed.
+2. Read the notes file above.
+3. For any commit surfacing a non-obvious lesson that *isn't* already represented, append a bullet under the right themed section (or open a new section) with the commit SHA linked and the bullet's date tagged on the section's `*Observed:*` line.
+4. Skip docs-sync / lint / surface-polish commits — the file is for transferable lessons, not changelog.
+5. Report back a short summary of what was added (and what was already there).
+Match the existing voice: terse bullets, themed sections (not chronological), date range under each section header, lesson framed as a transferable principle plus a concrete commit anchor.

tilth-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Sam Keen / AlteredCraft
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

tilth-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,143 @@
+Metadata-Version: 2.4
+Name: tilth
+Version: 0.1.0
+Summary: Tilth — minimal long-running agent harness against any OpenAI-compatible endpoint. Brain/Hands/Session split, Ralph loop, four memory channels.
+Project-URL: Homepage, https://github.com/AlteredCraft/tilth
+Project-URL: Documentation, https://alteredcraft.github.io/tilth/
+Project-URL: Repository, https://github.com/AlteredCraft/tilth
+Project-URL: Issues, https://github.com/AlteredCraft/tilth/issues
+Author-email: Sam Keen <sam@alteredcraft.com>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: agent,agent-harness,autonomous-agents,llm,long-running-agents,openai-compatible,openrouter,ralph-loop
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Code Generators
+Requires-Python: >=3.12
+Requires-Dist: openai>=1.40
+Requires-Dist: pydantic>=2.6
+Requires-Dist: python-dotenv>=1.0
+Requires-Dist: rich>=13.7
+Provides-Extra: docs
+Requires-Dist: mkdocs-glightbox>=0.4; extra == 'docs'
+Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
+Requires-Dist: mkdocs>=1.6; extra == 'docs'
+Description-Content-Type: text/markdown
+# Tilth
+> *Prepare the ground, let the agent grow the work.*
+A minimal long-running agent harness against an **OpenAI-compatible** LLM endpoint. Tested today against [OpenRouter](https://openrouter.ai); the OpenAI SDK underneath means other OpenAI-flavour gateways should work, but support for them is on the roadmap rather than validated. Built to learn (and demonstrate) the Brain/Hands/Session split, the Ralph loop, and the file-backed memory channels described in Addy Osmani's [long-running agents](https://addyosmani.com/blog/long-running-agents/) and [agent harness engineering](https://addyosmani.com/blog/agent-harness-engineering/) posts.
+![Brain / Hands / Session split — three boxes connected by flow arrows, with the files that implement each piece](https://raw.githubusercontent.com/AlteredCraft/tilth/main/docs/assets/brain-hands-session.png)
+**Audience:** This is an active research project for my work in [Altered Craft](https://alteredcraft.com). I do actively use it for real work, so I'd suggest it for single-dev / few-dev teams who want to *understand* what a long-running agent harness actually does. That's today (June 2026); the future, we shall see.
+**Target run:** 10–60 minutes of autonomous work against an open model (default `deepseek/deepseek-v4-flash` on OpenRouter for the worker; the evaluator defaults to `deepseek/deepseek-v4-pro`), completing a short task list against a small project on a per-session git worktree.
+> **Status — prompt-driven core.** Tilth is deliberately small and currently being driven *down* to its essentials: a worker and an independent evaluator, the base file/search/bash tools, and full observability. There is **no codified test/lint gate** — the evaluator is the only gate — and **no interview step**: you author the work as markdown and run it. Capabilities get added back only as testing shows they're needed.
+## How Tilth differs
+Many minimal coding agents are *interactive* — a developer watches the output and course-corrects, kills a bad run, or re-prompts. Tilth runs *autonomously* for the length of a run, with no one watching mid-task. That single difference is why it carries machinery a pair-programming agent can skip: an **evaluator** — a second model that judges whether a change is a *proper* solution against the task's acceptance criteria, not just whether the code runs; **between-task caps** that stand in for the budget ceiling a human would otherwise impose; a per-task **evaluator ledger** so a retried task sees the reviewer's prior verdicts; **state kept out of the model's context**; and **offline-first observability** (detailed just below). None of this is a knock on interactive agents; it's a different shape for a different job.
+### Hyper-observability
+If no one is watching a run mid-flight, the recording *is* the supervision. Tilth's standing goal is **hyper-observability** — *every prompt the harness sends is accessible, and every run is fully inspectable after the fact.* Every assembled prompt, memory load, model call, and evaluator verdict lands in an append-only `events.jsonl`, and `tilth visualize` serves the whole thing as a local chat-style web app — tail an active run in near-realtime or replay a finished one end-to-end, with no state hidden out of reach.
+![A finished Tilth run rendered by `tilth visualize`: the dashboard band — a header with an `all_done` status chip plus token-cost and event-count chips, a "Limit utilization" row of cost-budget, wall-clock, and per-task iteration meters, a stat band (tokens, cost, model and tool calls, verdicts), a session timeline, and a context-pressure chart](https://raw.githubusercontent.com/AlteredCraft/tilth/main/docs/assets/session-render.png)
+*A finished run, rendered by `tilth visualize`.*
+It's an early example of the goal, not a finished product. For the full product story — the Brain/Hands/Session split in detail, the memory channels, the two loops, and the worker↔evaluator dialogue — see the **[docs site](https://alteredcraft.github.io/tilth/)**. (The docs are mid-revision for the prompt-driven core; the README is the current source of truth for the run flow.)
+## Quickstart
+```bash
+# Install the CLI from PyPI (puts `tilth` on your PATH, runnable from anywhere)
+uv tool install tilth
+# …or run it without installing, npx-style:   uvx tilth --help
+# …or with pipx:                               pipx install tilth
+tilth init                     # scaffolds ~/.tilth/.env
+# edit ~/.tilth/.env — TILTH_BASE_URL, TILTH_API_KEY, TILTH_WORKER_MODEL are all
+# required (Tilth refuses to start without them so a misconfigured run can't
+# silently fall back to a provider/model your account doesn't have)
+```
+> **Working *on* Tilth?** Install from a clone instead: `uv tool install --editable .` puts `tilth` on your PATH while tracking your working copy — see [Working with the codebase](#working-with-the-codebase).
+Tilth keeps all per-user state under `~/.tilth/` — the `.env` above and every run's `sessions/<id>/`. Relocate it with `$TILTH_HOME` (whole tree) or `$TILTH_SESSIONS_DIR` (just the runs).
+You author the feature as markdown in the target repo, then run it — there's no interview step. The work lives in a feature directory you name under `<repo>/.tilth/<feature>/` (one repo can hold several features):
+```
+.tilth/todo-cli/
+├── overview.md            # the feature's goal + scope boundaries (required)
+├── T-001-<slug>.md        # one file per task, ordered by id
+├── T-002-<slug>.md
+└── ...
+```
+Each task file is small frontmatter plus two sections:
+```markdown
+---
+id: T-001
+title: Add the `add` subcommand
+---
+## Description
+What to build, in the worker's voice. Real paths/symbols
+(todo_cli/__main__.py:main()), not "the entrypoint".
+## Acceptance criteria
+- An externally checkable behaviour
+- Another one
+```
+Then point Tilth at the feature directory:
+```bash
+git clone git@github.com:AlteredCraft/tilth-demo-todo-cli.git tilth-demo
+# author tilth-demo/.tilth/todo-cli/  (run prints ready-to-fill templates if it's missing)
+tilth run ./tilth-demo/.tilth/todo-cli
+```
+For each pending task, Tilth resets context from disk, lets the worker work with the file/search/bash tools until it calls `submit_case`, hands the case + diff to the evaluator in a fresh context, and on `accept` commits one task = one commit on the `session/<id>` branch (humans review and merge — Tilth never auto-merges). A run stops on all-tasks-done or a cap (iterations / wall-clock / dollar spend / evaluator calls). Interrupt with Ctrl-C; resume with `tilth resume`.
+```bash
+tilth resume                 # continue the latest session
+tilth reset                  # tear down a session's worktree + branch + dir
+tilth visualize              # serve the live session viewer (127.0.0.1:8765)
+tilth info                   # list sessions (status, progress, tokens); `tilth info <id>` for one session's detail
+tilth config                 # show resolved provider config + run caps (API keys masked)
+```
+The `TILTH_*` env-var table (caps, evaluator routing, context-file selection) is documented in the generated `~/.tilth/.env` (copied from `.env.example`).
+## Working with the codebase
+Working *on* Tilth itself rather than using it? `uv sync` for the dev env, then run the CLI straight from the clone with `uv run tilth …` (no install needed — sessions still land in `~/.tilth/` unless you set `$TILTH_HOME`).
+```bash
+# Lint
+.venv/bin/python -m ruff check tilth/
+# Tests
+.venv/bin/python -m pytest
+# Docs — live preview at http://127.0.0.1:8000
+uv run --extra docs mkdocs serve
+# Docs — strict build (the CI gate; catches broken nav refs, missing files, dead links)
+uv run --extra docs mkdocs build --strict --site-dir /tmp/tilth-site
+```
+See [`CLAUDE.md`](./CLAUDE.md) for repo conventions and the architecture invariants worth preserving when editing the harness itself.