tilth 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tilth/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Tilth — minimal long-running agent harness against any OpenAI-compatible endpoint."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ __all__ = ["__version__"]
tilth/case.py ADDED
@@ -0,0 +1,271 @@
1
+ """Worker `submit_case` schema, parsing, and prompt rendering (v1 Phase 3).
2
+
3
+ The worker no longer signals "done" by ceasing to call tools. Instead, when
4
+ it believes the task is complete, it calls `submit_case` with a structured
5
+ argument: a summary, an explicit AC↔change mapping (`ac_coverage`), any
6
+ `work_arounds` it had to make, and `uncertainties` it wants flagged. The
7
+ evaluator reads this case alongside the diff and the ledger.
8
+
9
+ This module is the worker-side mirror of `tilth/verdict.py` (evaluator side):
10
+ same tool-call + defensive-parse + value-local-normalize + single-error
11
+ pattern. Bump `CASE_SCHEMA_VERSION` on shape changes; no migration.
12
+
13
+ `submit_case` is a *control-flow* tool — it ends the worker's turn — not a
14
+ worktree operation, so it is NOT in `tilth/tools` REGISTRY. Its schema is
15
+ offered to the worker via the `tools=` list and intercepted in
16
+ `loop._run_task`, parallel to how `submit_verdict` is intercepted on the
17
+ evaluator side.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import re
24
+ from typing import Any
25
+
26
+ CASE_SCHEMA_VERSION = 1
27
+ NAME_SUBMIT_CASE = "submit_case"
28
+
29
+ WORK_AROUNDS_CAP = 5 # OQ #2: force the worker to triage rather than list everything
30
+
31
+ _TOP_KEYS = frozenset({"summary", "ac_coverage", "work_arounds", "uncertainties"})
32
+ _AC_KEYS = frozenset({"criterion", "addressed_by", "evidence"})
33
+
34
+ SUBMIT_CASE_TOOL: dict[str, Any] = {
35
+ "type": "function",
36
+ "function": {
37
+ "name": NAME_SUBMIT_CASE,
38
+ "description": (
39
+ "Submit your case that the task is complete. Call this exactly "
40
+ "once, when the work is done and verified — it ends your turn and "
41
+ "hands the case to an independent reviewer. Present the case "
42
+ "honestly: map each acceptance criterion to the change that "
43
+ "satisfies it, name any work-arounds you had to make, and flag "
44
+ "anything you're unsure about. This is not a place to argue past "
45
+ "a failing test — the mechanical checks run regardless."
46
+ ),
47
+ "parameters": {
48
+ "type": "object",
49
+ "additionalProperties": False,
50
+ "required": ["summary", "ac_coverage"],
51
+ "properties": {
52
+ "summary": {
53
+ "type": "string",
54
+ "description": "One to three sentences: what you did.",
55
+ },
56
+ "ac_coverage": {
57
+ "type": "array",
58
+ "description": (
59
+ "One entry per acceptance criterion you addressed."
60
+ ),
61
+ "items": {
62
+ "type": "object",
63
+ "additionalProperties": False,
64
+ "required": ["criterion", "addressed_by"],
65
+ "properties": {
66
+ "criterion": {
67
+ "type": "string",
68
+ "description": "The AC text (or a clear paraphrase).",
69
+ },
70
+ "addressed_by": {
71
+ "type": "string",
72
+ "description": (
73
+ "A file:symbol pointer with a brief "
74
+ "annotation, e.g. "
75
+ "'todo_cli/__main__.py:main() — argparse "
76
+ "handles add'. A pointer, not prose."
77
+ ),
78
+ },
79
+ "evidence": {
80
+ "type": "string",
81
+ "description": (
82
+ "Optional: the test that proves it, e.g. "
83
+ "'tests/test_t002.py::test_add'."
84
+ ),
85
+ },
86
+ },
87
+ },
88
+ },
89
+ "work_arounds": {
90
+ "type": "array",
91
+ "items": {"type": "string"},
92
+ "description": (
93
+ "Things you had to touch that the AC doesn't name "
94
+ "(e.g. side-effect files of an authorised command). "
95
+ f"Triage to the {WORK_AROUNDS_CAP} that matter most."
96
+ ),
97
+ },
98
+ "uncertainties": {
99
+ "type": "array",
100
+ "items": {"type": "string"},
101
+ "description": (
102
+ "Ambiguities you resolved by guessing, or anything "
103
+ "you want the reviewer to double-check."
104
+ ),
105
+ },
106
+ },
107
+ },
108
+ },
109
+ }
110
+
111
+
112
+ # A string "looks like a pointer" unless it is clearly a prose sentence. We
113
+ # only reject the obviously-prose case (the sketch's mitigation #1) — terse
114
+ # pointers without a classic path token (e.g. "main() in __main__") pass.
115
+ _POINTER_RE = re.compile(r"[/]|::|\.[A-Za-z]{1,5}\b|\w+\.\w+|:\d+")
116
+
117
+
118
+ def _looks_like_pointer(s: str) -> bool:
119
+ if _POINTER_RE.search(s):
120
+ return True
121
+ return len(s.split()) < 8
122
+
123
+
124
+ def _normalize(args: dict[str, Any]) -> dict[str, Any]:
125
+ """Value-local cleanup before validation (mirrors verdict._normalize).
126
+
127
+ Optional list fields absent/None → []; empty/whitespace-only strings are
128
+ dropped from the list fields (an empty work-around is noise, not a claim).
129
+ No cross-field heuristics.
130
+ """
131
+ out = dict(args)
132
+ for key in ("work_arounds", "uncertainties"):
133
+ val = out.get(key)
134
+ if val is None:
135
+ out[key] = []
136
+ elif isinstance(val, list):
137
+ out[key] = [s for s in val if not (isinstance(s, str) and not s.strip())]
138
+ return out
139
+
140
+
141
+ def _validate(args: dict[str, Any]) -> str | None:
142
+ """Return the first schema violation, or None. Single-error by design."""
143
+ extra = set(args) - _TOP_KEYS
144
+ if extra:
145
+ return f"unexpected keys: {sorted(extra)}"
146
+
147
+ summary = args.get("summary")
148
+ if summary is None:
149
+ return "missing required field 'summary'"
150
+ if not isinstance(summary, str) or not summary.strip():
151
+ return "'summary' must be a non-empty string"
152
+
153
+ ac = args.get("ac_coverage")
154
+ if ac is None:
155
+ return "missing required field 'ac_coverage'"
156
+ if not isinstance(ac, list):
157
+ return "'ac_coverage' must be a list"
158
+ for i, entry in enumerate(ac):
159
+ if not isinstance(entry, dict):
160
+ return f"ac_coverage[{i}] must be an object"
161
+ extra = set(entry) - _AC_KEYS
162
+ if extra:
163
+ return f"ac_coverage[{i}] has unexpected keys: {sorted(extra)}"
164
+ crit = entry.get("criterion")
165
+ if not isinstance(crit, str) or not crit.strip():
166
+ return f"ac_coverage[{i}] missing non-empty 'criterion'"
167
+ addr = entry.get("addressed_by")
168
+ if not isinstance(addr, str) or not addr.strip():
169
+ return f"ac_coverage[{i}] missing non-empty 'addressed_by'"
170
+ if not _looks_like_pointer(addr):
171
+ return (
172
+ f"ac_coverage[{i}] 'addressed_by' reads as prose, not a "
173
+ "file:symbol pointer — cite where the work lives "
174
+ "(e.g. 'todo_cli/__main__.py:main()'), don't describe it"
175
+ )
176
+ ev = entry.get("evidence")
177
+ if ev is not None and not isinstance(ev, str):
178
+ return f"ac_coverage[{i}] 'evidence' must be a string"
179
+
180
+ for key in ("work_arounds", "uncertainties"):
181
+ val = args.get(key, [])
182
+ if not isinstance(val, list) or any(not isinstance(s, str) for s in val):
183
+ return f"'{key}' must be a list of strings"
184
+ if len(args.get("work_arounds", [])) > WORK_AROUNDS_CAP:
185
+ return (
186
+ f"too many 'work_arounds' (max {WORK_AROUNDS_CAP}); triage to the "
187
+ "ones that actually matter"
188
+ )
189
+
190
+ return None
191
+
192
+
193
+ def parse_case(
194
+ msg: dict[str, Any],
195
+ ) -> tuple[dict[str, Any] | None, str | None]:
196
+ """Pick the first valid `submit_case` tool call from an assistant message.
197
+
198
+ Returns `(case_dict, None)` on success or `(None, error_for_model)` on
199
+ failure. The error is forwarded to the model as `tool_result` content so
200
+ the next attempt can self-correct — the `verdict.parse_verdict` pattern.
201
+ """
202
+ tool_calls = msg.get("tool_calls") or []
203
+ candidate_errors: list[str] = []
204
+ saw = False
205
+ for tc in tool_calls:
206
+ fn = tc.get("function") or {}
207
+ if fn.get("name") != NAME_SUBMIT_CASE:
208
+ continue
209
+ saw = True
210
+ raw = fn.get("arguments")
211
+ if isinstance(raw, dict):
212
+ args = raw
213
+ elif isinstance(raw, str):
214
+ try:
215
+ args = json.loads(raw)
216
+ except json.JSONDecodeError as exc:
217
+ candidate_errors.append(f"JSON parse: {exc}")
218
+ continue
219
+ else:
220
+ candidate_errors.append(
221
+ f"arguments was {type(raw).__name__}, expected str or dict"
222
+ )
223
+ continue
224
+ if not isinstance(args, dict):
225
+ candidate_errors.append("arguments did not parse to a JSON object")
226
+ continue
227
+ args = _normalize(args)
228
+ err = _validate(args)
229
+ if err is None:
230
+ return args, None
231
+ candidate_errors.append(err)
232
+
233
+ if not saw:
234
+ return None, (
235
+ "No `submit_case` tool call in your response. When the task is "
236
+ "complete and verified, call `submit_case` to present it."
237
+ )
238
+ return None, (
239
+ "Your `submit_case` call could not be accepted: "
240
+ + " | ".join(candidate_errors)
241
+ + ". Call `submit_case` again with a corrected payload."
242
+ )
243
+
244
+
245
+ def format_case_section(case: dict[str, Any]) -> str:
246
+ """Render the worker's case for injection into the evaluator's prompt."""
247
+ lines = ["## Worker's case", "", f"Summary: {(case.get('summary') or '').strip()}"]
248
+
249
+ ac = case.get("ac_coverage") or []
250
+ if ac:
251
+ lines += ["", "AC coverage (worker's claim):"]
252
+ for entry in ac:
253
+ crit = (entry.get("criterion") or "").strip()
254
+ addr = (entry.get("addressed_by") or "").strip()
255
+ ev = (entry.get("evidence") or "").strip()
256
+ line = f"- {crit} → {addr}"
257
+ if ev:
258
+ line += f" [evidence: {ev}]"
259
+ lines.append(line)
260
+
261
+ work_arounds = case.get("work_arounds") or []
262
+ if work_arounds:
263
+ lines += ["", "Work-arounds the worker claims (treat skeptically):"]
264
+ lines += [f"- {w}" for w in work_arounds]
265
+
266
+ uncertainties = case.get("uncertainties") or []
267
+ if uncertainties:
268
+ lines += ["", "Uncertainties the worker flagged:"]
269
+ lines += [f"- {u}" for u in uncertainties]
270
+
271
+ return "\n".join(lines)
tilth/cli.py ADDED
@@ -0,0 +1,242 @@
1
+ """Verb-routed CLI entry point.
2
+
3
+ Subcommands:
4
+
5
+ tilth run <feature-dir>
6
+ tilth resume [<session_id>]
7
+ tilth reset [<session_id>] [-y]
8
+ tilth visualize [<session_id>] [--port N]
9
+ tilth info [<session_id>]
10
+ tilth config
11
+
12
+ The feature is authored as markdown in a feature directory (conventionally
13
+ `<repo>/.tilth/<feature>/`): an `overview.md` plus one `T-NNN-*.md` per task —
14
+ see `tilth/tasks.py`. There is no separate prep step: `tilth run` is given that
15
+ directory's path, derives the enclosing git repo, creates a fresh session +
16
+ worktree, and runs the Ralph loop.
17
+
18
+ Dispatch:
19
+ 1. No args at all → print config locations + top-level help, exit 1.
20
+ 2. First arg is `-h` → print config locations + help, exit 0.
21
+ 3. A known subcommand → parse with the subparser and dispatch.
22
+ 4. Anything else → argparse usage error.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import sys
28
+
29
+ from dotenv import load_dotenv
30
+ from rich.console import Console
31
+
32
+ from tilth import loop, paths
33
+
34
+ console = Console()
35
+
36
+ SUBCOMMANDS = frozenset({"init", "run", "resume", "reset", "visualize", "info", "config"})
37
+
38
+
39
+ def _load_env() -> None:
40
+ """Load the resolved .env (first hit in the search order), if any. No file is
41
+ not an error — `tilth init` and `tilth visualize` don't need provider config."""
42
+ env_file = paths.resolve_env_file()
43
+ if env_file is not None:
44
+ load_dotenv(env_file, override=False)
45
+
46
+
47
+ def _print_config_locations() -> None:
48
+ """Show resolved Tilth home and .env on top-level help."""
49
+ home = paths.tilth_home()
50
+ env_file = paths.resolve_env_file()
51
+ write_target = paths.env_file_write_target()
52
+
53
+ console.print("[bold]Config locations[/bold]")
54
+ if home.is_dir():
55
+ console.print(f" Tilth home: {home}", soft_wrap=True)
56
+ else:
57
+ console.print(
58
+ f" Tilth home: {home} "
59
+ "[dim](not found — run [bold]tilth init[/bold])[/dim]",
60
+ soft_wrap=True,
61
+ )
62
+
63
+ if env_file is not None:
64
+ console.print(f" .env: {env_file}", soft_wrap=True)
65
+ else:
66
+ console.print(
67
+ f" .env: {write_target} "
68
+ "[dim](not found — run [bold]tilth init[/bold])[/dim]",
69
+ soft_wrap=True,
70
+ )
71
+ console.print()
72
+
73
+
74
+ def _print_help(parser) -> None:
75
+ _print_config_locations()
76
+ parser.print_help()
77
+
78
+
79
+ def _build_parser():
80
+ import argparse
81
+ from pathlib import Path
82
+
83
+ parser = argparse.ArgumentParser(
84
+ prog="tilth",
85
+ description="Tilth — a minimal long-running agent harness.",
86
+ )
87
+ sub = parser.add_subparsers(dest="command", metavar="<command>")
88
+
89
+ sub.add_parser(
90
+ "init",
91
+ help="Scaffold ~/.tilth so the installed tool runs from anywhere.",
92
+ description=(
93
+ "Create the Tilth home directory ($TILTH_HOME, default ~/.tilth) with "
94
+ "a sessions/ dir and a .env from the template. Does not overwrite an "
95
+ "existing .env. Run once after `uv tool install`."
96
+ ),
97
+ )
98
+
99
+ run_p = sub.add_parser(
100
+ "run",
101
+ help="Run the worker loop against a feature directory.",
102
+ description=(
103
+ "Read a feature from the given directory (overview.md + one T-NNN-*.md "
104
+ "per task), derive its git repo, create a fresh session + worktree, and "
105
+ "run the Ralph loop. Fails fast with the templates if the directory has "
106
+ "no feature."
107
+ ),
108
+ )
109
+ run_p.add_argument(
110
+ "feature_dir",
111
+ type=Path,
112
+ help="Path to the feature directory (e.g. <repo>/.tilth/<feature>/) "
113
+ "holding overview.md + T-NNN-*.md.",
114
+ )
115
+
116
+ resume_p = sub.add_parser(
117
+ "resume",
118
+ help="Resume an interrupted session.",
119
+ description=(
120
+ "Resume a session that stopped on wall-clock / token-cap / "
121
+ "interrupt / error. Trailing failed tasks are flipped back to "
122
+ "pending and their FAILED placeholder commit is unwound."
123
+ ),
124
+ )
125
+ resume_p.add_argument(
126
+ "session_id",
127
+ nargs="?",
128
+ help="Session ID to resume; defaults to the latest session.",
129
+ )
130
+
131
+ reset_p = sub.add_parser(
132
+ "reset",
133
+ help="Tear down a session (worktree, branch, session dir).",
134
+ description=(
135
+ "Remove a session's worktree (even if dirty), delete its "
136
+ "session/<id> branch from the source repo, and drop sessions/<id>/."
137
+ ),
138
+ )
139
+ reset_p.add_argument(
140
+ "session_id",
141
+ nargs="?",
142
+ help="Session ID to reset; defaults to the latest session.",
143
+ )
144
+ reset_p.add_argument(
145
+ "-y", "--yes", action="store_true", help="Skip the confirmation prompt."
146
+ )
147
+
148
+ viz_p = sub.add_parser(
149
+ "visualize",
150
+ help="Serve the live session viewer (reads sessions/ in near-realtime).",
151
+ description=(
152
+ "Start a read-only local web app over the sessions/ directory: an "
153
+ "index of every run, and a per-session chat view that tails "
154
+ "events.jsonl while a run is active. Loopback-only."
155
+ ),
156
+ )
157
+ viz_p.add_argument(
158
+ "session_id",
159
+ nargs="?",
160
+ help="Session ID to deep-link on startup; defaults to the latest session.",
161
+ )
162
+ viz_p.add_argument(
163
+ "--port",
164
+ type=int,
165
+ default=8765,
166
+ help="Port to bind on 127.0.0.1 (default: 8765).",
167
+ )
168
+
169
+ info_p = sub.add_parser(
170
+ "info",
171
+ help="Show sessions, or one session's full detail (incl. worktree location).",
172
+ description=(
173
+ "Without an id: list every session newest-first with status, task "
174
+ "progress, and tokens. With an id: the full dossier — source repo, "
175
+ "feature, the worktree folder and its git admin dir (the `.git` "
176
+ "mapping), branch, and registration health. Read-only."
177
+ ),
178
+ )
179
+ info_p.add_argument(
180
+ "session_id",
181
+ nargs="?",
182
+ help="Session ID to detail; omit to list all sessions.",
183
+ )
184
+
185
+ sub.add_parser(
186
+ "config",
187
+ help="Show resolved provider config and run caps (API keys masked).",
188
+ description=(
189
+ "Print the configuration the harness would run with — worker and "
190
+ "evaluator endpoints/models, the per-task and per-run caps, and "
191
+ "context files — plus which .env it resolved. API keys are masked. "
192
+ "Works with a partial config; flags what's missing."
193
+ ),
194
+ )
195
+
196
+ return parser
197
+
198
+
199
+ def _dispatch(args) -> int:
200
+ if args.command == "init":
201
+ return loop.do_init_cmd()
202
+ if args.command == "run":
203
+ return loop.do_run_cmd(args.feature_dir)
204
+ if args.command == "resume":
205
+ return loop.do_resume_cmd(args.session_id)
206
+ if args.command == "reset":
207
+ return loop.do_reset_cmd(args.session_id, args.yes)
208
+ if args.command == "visualize":
209
+ return loop.do_visualize_cmd(args.session_id, port=args.port)
210
+ if args.command == "info":
211
+ return loop.do_info_cmd(args.session_id)
212
+ if args.command == "config":
213
+ return loop.do_config_cmd()
214
+ raise AssertionError(f"unknown subcommand {args.command!r}")
215
+
216
+
217
+ def main() -> int:
218
+ _load_env()
219
+ # Re-resolve after the .env is loaded so a .env-provided $TILTH_SESSIONS_DIR
220
+ # (or $TILTH_HOME) takes effect; loop.SESSIONS_DIR was set at import time.
221
+ loop.SESSIONS_DIR = paths.sessions_dir()
222
+ argv = sys.argv[1:]
223
+
224
+ parser = _build_parser()
225
+
226
+ if not argv:
227
+ _print_help(parser)
228
+ return 1
229
+
230
+ if argv[0] in {"-h", "--help"}:
231
+ _print_help(parser)
232
+ return 0
233
+
234
+ args = parser.parse_args(argv)
235
+ if args.command is None:
236
+ _print_help(parser)
237
+ return 1
238
+ return _dispatch(args)
239
+
240
+
241
+ if __name__ == "__main__":
242
+ sys.exit(main())