PyPI - tilth - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tilth 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

tilth/__init__.py +5 -0
tilth/case.py +271 -0
tilth/cli.py +242 -0
tilth/client.py +309 -0
tilth/data/env.example +43 -0
tilth/hooks/__init__.py +17 -0
tilth/hooks/pre_tool.py +39 -0
tilth/loop.py +1833 -0
tilth/memory.py +338 -0
tilth/paths.py +60 -0
tilth/prompts/evaluator.md +70 -0
tilth/prompts/system.md +33 -0
tilth/session.py +404 -0
tilth/summary.py +216 -0
tilth/tasks.py +291 -0
tilth/tools/__init__.py +78 -0
tilth/tools/bash.py +64 -0
tilth/tools/files.py +139 -0
tilth/tools/search.py +106 -0
tilth/usage.py +143 -0
tilth/verdict.py +311 -0
tilth/visualize/__init__.py +14 -0
tilth/visualize/app.js +736 -0
tilth/visualize/render.py +425 -0
tilth/visualize/server.py +332 -0
tilth/visualize/theme.css +807 -0
tilth/visualize/theme.py +179 -0
tilth/workspace.py +259 -0
tilth-0.1.0.dist-info/METADATA +143 -0
tilth-0.1.0.dist-info/RECORD +33 -0
tilth-0.1.0.dist-info/WHEEL +4 -0
tilth-0.1.0.dist-info/entry_points.txt +2 -0
tilth-0.1.0.dist-info/licenses/LICENSE +21 -0

tilth/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Tilth — minimal long-running agent harness against any OpenAI-compatible endpoint."""
+__version__ = "0.1.0"
+__all__ = ["__version__"]

tilth/case.py ADDED Viewed

@@ -0,0 +1,271 @@
+"""Worker `submit_case` schema, parsing, and prompt rendering (v1 Phase 3).
+The worker no longer signals "done" by ceasing to call tools. Instead, when
+it believes the task is complete, it calls `submit_case` with a structured
+argument: a summary, an explicit AC↔change mapping (`ac_coverage`), any
+`work_arounds` it had to make, and `uncertainties` it wants flagged. The
+evaluator reads this case alongside the diff and the ledger.
+This module is the worker-side mirror of `tilth/verdict.py` (evaluator side):
+same tool-call + defensive-parse + value-local-normalize + single-error
+pattern. Bump `CASE_SCHEMA_VERSION` on shape changes; no migration.
+`submit_case` is a *control-flow* tool — it ends the worker's turn — not a
+worktree operation, so it is NOT in `tilth/tools` REGISTRY. Its schema is
+offered to the worker via the `tools=` list and intercepted in
+`loop._run_task`, parallel to how `submit_verdict` is intercepted on the
+evaluator side.
+"""
+from __future__ import annotations
+import json
+import re
+from typing import Any
+CASE_SCHEMA_VERSION = 1
+NAME_SUBMIT_CASE = "submit_case"
+WORK_AROUNDS_CAP = 5  # OQ #2: force the worker to triage rather than list everything
+_TOP_KEYS = frozenset({"summary", "ac_coverage", "work_arounds", "uncertainties"})
+_AC_KEYS = frozenset({"criterion", "addressed_by", "evidence"})
+SUBMIT_CASE_TOOL: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": NAME_SUBMIT_CASE,
+        "description": (
+            "Submit your case that the task is complete. Call this exactly "
+            "once, when the work is done and verified — it ends your turn and "
+            "hands the case to an independent reviewer. Present the case "
+            "honestly: map each acceptance criterion to the change that "
+            "satisfies it, name any work-arounds you had to make, and flag "
+            "anything you're unsure about. This is not a place to argue past "
+            "a failing test — the mechanical checks run regardless."
+        ),
+        "parameters": {
+            "type": "object",
+            "additionalProperties": False,
+            "required": ["summary", "ac_coverage"],
+            "properties": {
+                "summary": {
+                    "type": "string",
+                    "description": "One to three sentences: what you did.",
+                },
+                "ac_coverage": {
+                    "type": "array",
+                    "description": (
+                        "One entry per acceptance criterion you addressed."
+                    ),
+                    "items": {
+                        "type": "object",
+                        "additionalProperties": False,
+                        "required": ["criterion", "addressed_by"],
+                        "properties": {
+                            "criterion": {
+                                "type": "string",
+                                "description": "The AC text (or a clear paraphrase).",
+                            },
+                            "addressed_by": {
+                                "type": "string",
+                                "description": (
+                                    "A file:symbol pointer with a brief "
+                                    "annotation, e.g. "
+                                    "'todo_cli/__main__.py:main() — argparse "
+                                    "handles add'. A pointer, not prose."
+                                ),
+                            },
+                            "evidence": {
+                                "type": "string",
+                                "description": (
+                                    "Optional: the test that proves it, e.g. "
+                                    "'tests/test_t002.py::test_add'."
+                                ),
+                            },
+                        },
+                    },
+                },
+                "work_arounds": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": (
+                        "Things you had to touch that the AC doesn't name "
+                        "(e.g. side-effect files of an authorised command). "
+                        f"Triage to the {WORK_AROUNDS_CAP} that matter most."
+                    ),
+                },
+                "uncertainties": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": (
+                        "Ambiguities you resolved by guessing, or anything "
+                        "you want the reviewer to double-check."
+                    ),
+                },
+            },
+        },
+    },
+}
+# A string "looks like a pointer" unless it is clearly a prose sentence. We
+# only reject the obviously-prose case (the sketch's mitigation #1) — terse
+# pointers without a classic path token (e.g. "main() in __main__") pass.
+_POINTER_RE = re.compile(r"[/]|::|\.[A-Za-z]{1,5}\b|\w+\.\w+|:\d+")
+def _looks_like_pointer(s: str) -> bool:
+    if _POINTER_RE.search(s):
+        return True
+    return len(s.split()) < 8
+def _normalize(args: dict[str, Any]) -> dict[str, Any]:
+    """Value-local cleanup before validation (mirrors verdict._normalize).
+    Optional list fields absent/None → []; empty/whitespace-only strings are
+    dropped from the list fields (an empty work-around is noise, not a claim).
+    No cross-field heuristics.
+    """
+    out = dict(args)
+    for key in ("work_arounds", "uncertainties"):
+        val = out.get(key)
+        if val is None:
+            out[key] = []
+        elif isinstance(val, list):
+            out[key] = [s for s in val if not (isinstance(s, str) and not s.strip())]
+    return out
+def _validate(args: dict[str, Any]) -> str | None:
+    """Return the first schema violation, or None. Single-error by design."""
+    extra = set(args) - _TOP_KEYS
+    if extra:
+        return f"unexpected keys: {sorted(extra)}"
+    summary = args.get("summary")
+    if summary is None:
+        return "missing required field 'summary'"
+    if not isinstance(summary, str) or not summary.strip():
+        return "'summary' must be a non-empty string"
+    ac = args.get("ac_coverage")
+    if ac is None:
+        return "missing required field 'ac_coverage'"
+    if not isinstance(ac, list):
+        return "'ac_coverage' must be a list"
+    for i, entry in enumerate(ac):
+        if not isinstance(entry, dict):
+            return f"ac_coverage[{i}] must be an object"
+        extra = set(entry) - _AC_KEYS
+        if extra:
+            return f"ac_coverage[{i}] has unexpected keys: {sorted(extra)}"
+        crit = entry.get("criterion")
+        if not isinstance(crit, str) or not crit.strip():
+            return f"ac_coverage[{i}] missing non-empty 'criterion'"
+        addr = entry.get("addressed_by")
+        if not isinstance(addr, str) or not addr.strip():
+            return f"ac_coverage[{i}] missing non-empty 'addressed_by'"
+        if not _looks_like_pointer(addr):
+            return (
+                f"ac_coverage[{i}] 'addressed_by' reads as prose, not a "
+                "file:symbol pointer — cite where the work lives "
+                "(e.g. 'todo_cli/__main__.py:main()'), don't describe it"
+            )
+        ev = entry.get("evidence")
+        if ev is not None and not isinstance(ev, str):
+            return f"ac_coverage[{i}] 'evidence' must be a string"
+    for key in ("work_arounds", "uncertainties"):
+        val = args.get(key, [])
+        if not isinstance(val, list) or any(not isinstance(s, str) for s in val):
+            return f"'{key}' must be a list of strings"
+    if len(args.get("work_arounds", [])) > WORK_AROUNDS_CAP:
+        return (
+            f"too many 'work_arounds' (max {WORK_AROUNDS_CAP}); triage to the "
+            "ones that actually matter"
+        )
+    return None
+def parse_case(
+    msg: dict[str, Any],
+) -> tuple[dict[str, Any] | None, str | None]:
+    """Pick the first valid `submit_case` tool call from an assistant message.
+    Returns `(case_dict, None)` on success or `(None, error_for_model)` on
+    failure. The error is forwarded to the model as `tool_result` content so
+    the next attempt can self-correct — the `verdict.parse_verdict` pattern.
+    """
+    tool_calls = msg.get("tool_calls") or []
+    candidate_errors: list[str] = []
+    saw = False
+    for tc in tool_calls:
+        fn = tc.get("function") or {}
+        if fn.get("name") != NAME_SUBMIT_CASE:
+            continue
+        saw = True
+        raw = fn.get("arguments")
+        if isinstance(raw, dict):
+            args = raw
+        elif isinstance(raw, str):
+            try:
+                args = json.loads(raw)
+            except json.JSONDecodeError as exc:
+                candidate_errors.append(f"JSON parse: {exc}")
+                continue
+        else:
+            candidate_errors.append(
+                f"arguments was {type(raw).__name__}, expected str or dict"
+            )
+            continue
+        if not isinstance(args, dict):
+            candidate_errors.append("arguments did not parse to a JSON object")
+            continue
+        args = _normalize(args)
+        err = _validate(args)
+        if err is None:
+            return args, None
+        candidate_errors.append(err)
+    if not saw:
+        return None, (
+            "No `submit_case` tool call in your response. When the task is "
+            "complete and verified, call `submit_case` to present it."
+        )
+    return None, (
+        "Your `submit_case` call could not be accepted: "
+        + " | ".join(candidate_errors)
+        + ". Call `submit_case` again with a corrected payload."
+    )
+def format_case_section(case: dict[str, Any]) -> str:
+    """Render the worker's case for injection into the evaluator's prompt."""
+    lines = ["## Worker's case", "", f"Summary: {(case.get('summary') or '').strip()}"]
+    ac = case.get("ac_coverage") or []
+    if ac:
+        lines += ["", "AC coverage (worker's claim):"]
+        for entry in ac:
+            crit = (entry.get("criterion") or "").strip()
+            addr = (entry.get("addressed_by") or "").strip()
+            ev = (entry.get("evidence") or "").strip()
+            line = f"- {crit} → {addr}"
+            if ev:
+                line += f" [evidence: {ev}]"
+            lines.append(line)
+    work_arounds = case.get("work_arounds") or []
+    if work_arounds:
+        lines += ["", "Work-arounds the worker claims (treat skeptically):"]
+        lines += [f"- {w}" for w in work_arounds]
+    uncertainties = case.get("uncertainties") or []
+    if uncertainties:
+        lines += ["", "Uncertainties the worker flagged:"]
+        lines += [f"- {u}" for u in uncertainties]
+    return "\n".join(lines)

tilth/cli.py ADDED Viewed

@@ -0,0 +1,242 @@
+"""Verb-routed CLI entry point.
+Subcommands:
+    tilth run       <feature-dir>
+    tilth resume    [<session_id>]
+    tilth reset     [<session_id>] [-y]
+    tilth visualize [<session_id>] [--port N]
+    tilth info      [<session_id>]
+    tilth config
+The feature is authored as markdown in a feature directory (conventionally
+`<repo>/.tilth/<feature>/`): an `overview.md` plus one `T-NNN-*.md` per task —
+see `tilth/tasks.py`. There is no separate prep step: `tilth run` is given that
+directory's path, derives the enclosing git repo, creates a fresh session +
+worktree, and runs the Ralph loop.
+Dispatch:
+  1. No args at all     → print config locations + top-level help, exit 1.
+  2. First arg is `-h`  → print config locations + help, exit 0.
+  3. A known subcommand → parse with the subparser and dispatch.
+  4. Anything else      → argparse usage error.
+"""
+from __future__ import annotations
+import sys
+from dotenv import load_dotenv
+from rich.console import Console
+from tilth import loop, paths
+console = Console()
+SUBCOMMANDS = frozenset({"init", "run", "resume", "reset", "visualize", "info", "config"})
+def _load_env() -> None:
+    """Load the resolved .env (first hit in the search order), if any. No file is
+    not an error — `tilth init` and `tilth visualize` don't need provider config."""
+    env_file = paths.resolve_env_file()
+    if env_file is not None:
+        load_dotenv(env_file, override=False)
+def _print_config_locations() -> None:
+    """Show resolved Tilth home and .env on top-level help."""
+    home = paths.tilth_home()
+    env_file = paths.resolve_env_file()
+    write_target = paths.env_file_write_target()
+    console.print("[bold]Config locations[/bold]")
+    if home.is_dir():
+        console.print(f"  Tilth home:  {home}", soft_wrap=True)
+    else:
+        console.print(
+            f"  Tilth home:  {home}  "
+            "[dim](not found — run [bold]tilth init[/bold])[/dim]",
+            soft_wrap=True,
+        )
+    if env_file is not None:
+        console.print(f"  .env:        {env_file}", soft_wrap=True)
+    else:
+        console.print(
+            f"  .env:        {write_target}  "
+            "[dim](not found — run [bold]tilth init[/bold])[/dim]",
+            soft_wrap=True,
+        )
+    console.print()
+def _print_help(parser) -> None:
+    _print_config_locations()
+    parser.print_help()
+def _build_parser():
+    import argparse
+    from pathlib import Path
+    parser = argparse.ArgumentParser(
+        prog="tilth",
+        description="Tilth — a minimal long-running agent harness.",
+    )
+    sub = parser.add_subparsers(dest="command", metavar="<command>")
+    sub.add_parser(
+        "init",
+        help="Scaffold ~/.tilth so the installed tool runs from anywhere.",
+        description=(
+            "Create the Tilth home directory ($TILTH_HOME, default ~/.tilth) with "
+            "a sessions/ dir and a .env from the template. Does not overwrite an "
+            "existing .env. Run once after `uv tool install`."
+        ),
+    )
+    run_p = sub.add_parser(
+        "run",
+        help="Run the worker loop against a feature directory.",
+        description=(
+            "Read a feature from the given directory (overview.md + one T-NNN-*.md "
+            "per task), derive its git repo, create a fresh session + worktree, and "
+            "run the Ralph loop. Fails fast with the templates if the directory has "
+            "no feature."
+        ),
+    )
+    run_p.add_argument(
+        "feature_dir",
+        type=Path,
+        help="Path to the feature directory (e.g. <repo>/.tilth/<feature>/) "
+        "holding overview.md + T-NNN-*.md.",
+    )
+    resume_p = sub.add_parser(
+        "resume",
+        help="Resume an interrupted session.",
+        description=(
+            "Resume a session that stopped on wall-clock / token-cap / "
+            "interrupt / error. Trailing failed tasks are flipped back to "
+            "pending and their FAILED placeholder commit is unwound."
+        ),
+    )
+    resume_p.add_argument(
+        "session_id",
+        nargs="?",
+        help="Session ID to resume; defaults to the latest session.",
+    )
+    reset_p = sub.add_parser(
+        "reset",
+        help="Tear down a session (worktree, branch, session dir).",
+        description=(
+            "Remove a session's worktree (even if dirty), delete its "
+            "session/<id> branch from the source repo, and drop sessions/<id>/."
+        ),
+    )
+    reset_p.add_argument(
+        "session_id",
+        nargs="?",
+        help="Session ID to reset; defaults to the latest session.",
+    )
+    reset_p.add_argument(
+        "-y", "--yes", action="store_true", help="Skip the confirmation prompt."
+    )
+    viz_p = sub.add_parser(
+        "visualize",
+        help="Serve the live session viewer (reads sessions/ in near-realtime).",
+        description=(
+            "Start a read-only local web app over the sessions/ directory: an "
+            "index of every run, and a per-session chat view that tails "
+            "events.jsonl while a run is active. Loopback-only."
+        ),
+    )
+    viz_p.add_argument(
+        "session_id",
+        nargs="?",
+        help="Session ID to deep-link on startup; defaults to the latest session.",
+    )
+    viz_p.add_argument(
+        "--port",
+        type=int,
+        default=8765,
+        help="Port to bind on 127.0.0.1 (default: 8765).",
+    )
+    info_p = sub.add_parser(
+        "info",
+        help="Show sessions, or one session's full detail (incl. worktree location).",
+        description=(
+            "Without an id: list every session newest-first with status, task "
+            "progress, and tokens. With an id: the full dossier — source repo, "
+            "feature, the worktree folder and its git admin dir (the `.git` "
+            "mapping), branch, and registration health. Read-only."
+        ),
+    )
+    info_p.add_argument(
+        "session_id",
+        nargs="?",
+        help="Session ID to detail; omit to list all sessions.",
+    )
+    sub.add_parser(
+        "config",
+        help="Show resolved provider config and run caps (API keys masked).",
+        description=(
+            "Print the configuration the harness would run with — worker and "
+            "evaluator endpoints/models, the per-task and per-run caps, and "
+            "context files — plus which .env it resolved. API keys are masked. "
+            "Works with a partial config; flags what's missing."
+        ),
+    )
+    return parser
+def _dispatch(args) -> int:
+    if args.command == "init":
+        return loop.do_init_cmd()
+    if args.command == "run":
+        return loop.do_run_cmd(args.feature_dir)
+    if args.command == "resume":
+        return loop.do_resume_cmd(args.session_id)
+    if args.command == "reset":
+        return loop.do_reset_cmd(args.session_id, args.yes)
+    if args.command == "visualize":
+        return loop.do_visualize_cmd(args.session_id, port=args.port)
+    if args.command == "info":
+        return loop.do_info_cmd(args.session_id)
+    if args.command == "config":
+        return loop.do_config_cmd()
+    raise AssertionError(f"unknown subcommand {args.command!r}")
+def main() -> int:
+    _load_env()
+    # Re-resolve after the .env is loaded so a .env-provided $TILTH_SESSIONS_DIR
+    # (or $TILTH_HOME) takes effect; loop.SESSIONS_DIR was set at import time.
+    loop.SESSIONS_DIR = paths.sessions_dir()
+    argv = sys.argv[1:]
+    parser = _build_parser()
+    if not argv:
+        _print_help(parser)
+        return 1
+    if argv[0] in {"-h", "--help"}:
+        _print_help(parser)
+        return 0
+    args = parser.parse_args(argv)
+    if args.command is None:
+        _print_help(parser)
+        return 1
+    return _dispatch(args)
+if __name__ == "__main__":
+    sys.exit(main())