PyPI - spec-runner - Versions diffs - 2.3.0__tar.gz → 2.4.0__tar.gz - Mend

spec-runner 2.3.0tar.gz → 2.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

{spec_runner-2.3.0/src/spec_runner.egg-info → spec_runner-2.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: spec-runner
-Version: 2.3.0
+Version: 2.4.0
 Summary: Task automation from markdown specs via Claude CLI
 Author: Andrei
 License-Expression: MIT
@@ -134,7 +134,8 @@ Tasks are defined in `spec/tasks.md`:
 # Execution
 spec-runner run                            # Execute next ready task
 spec-runner run --task=TASK-001            # Execute specific task
-spec-runner run --all                      # Execute all ready tasks
+spec-runner run --all                      # Execute all ready tasks (resets failed→pending by default)
+spec-runner run --all --no-reset-failed    # Keep failed tasks sticky (skip the default reset)
 spec-runner run --all --hitl-review        # Interactive HITL approval gate
 spec-runner run --force                    # Skip lock check (stale lock)
 spec-runner run --tui                      # Execute with live TUI dashboard
@@ -177,6 +178,14 @@ spec-runner report --json                  # JSON matrix output
 # Planning
 spec-runner plan "description"             # Interactive task planning
 spec-runner plan --full "description"      # Generate full spec (requirements + design + tasks)
+spec-runner plan --full --from-file spec.md  # Read the description from a file instead of an arg
+# Diagnostics
+spec-runner doctor                              # Probe the configured CLI/model (real mini-task)
+spec-runner doctor --cli=codex --model=gpt-5.4  # Probe an ad-hoc CLI+model
+spec-runner doctor --with-review                # Also probe the review stage
+spec-runner doctor --json --yes                 # Machine-readable, no confirmation (CI)
+spec-runner doctor --strict                     # Exit non-zero on DEGRADED too
 # Integration
 spec-runner mcp                            # Launch MCP server (stdio)
@@ -368,13 +377,34 @@ paths:
 | CLI | Auto-detected | Example template |
 |-----|--------------|------------------|
 | Claude | Yes | `{cmd} -p {prompt} --model {model}` |
-| Codex | Yes | `{cmd} -p {prompt} --model {model}` |
+| Codex | Yes | `{cmd} exec -m {model} {prompt}` (codex's `-p` is `--profile`, not the prompt) |
 | OpenCode ([sst/opencode](https://opencode.ai)) | Yes | `{cmd} run --model {model} {prompt}` |
 | Pi Agent ([pi.dev](https://pi.dev)) | Yes (basename match) | `{cmd} -p --model {model} {prompt}` |
 | Ollama | Yes | `{cmd} run {model} {prompt}` |
 | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
 | Custom | Use template | `{cmd} --prompt {prompt}` |
+> **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
+> skills, per-stage tool control and a read-only review gate) using only config and a small
+> script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
+> [examples/pi-loop/](examples/pi-loop/).
+### Checking CLI/model compatibility
+`spec-runner doctor` runs a real one-task probe through the actual execution
+path and reports, per capability, whether your CLI/model works:
+- **invocation** — the command runs and authenticates
+- **completion_marker** — the model prints `TASK_COMPLETE` (not all models do)
+- **task_action** — the model actually performs the work
+- **cost_tracking** — token/cost parsing works (needed for `costs`/`--budget`)
+- **error_classification** — failures are classified (diagnostic)
+- **review** *(with `--with-review`)* — the reviewer prints `REVIEW_PASSED`/`FAILED`
+Verdict: **READY** / **DEGRADED** (works, but something like cost tracking is
+unavailable) / **BROKEN**. It makes real, billable model calls (capped by
+`--budget`, default $0.50) and asks for confirmation unless `--yes`.
 ## Project Structure
 ```
@@ -391,6 +421,8 @@ project/
 │       ├── cli_info.py          # Status, costs, logs, validate, verify, report, TUI, MCP
 │       ├── cli_plan.py          # Interactive planning command
 │       ├── execution.py         # Task execution + retry logic
+│       ├── errors.py            # CLI stderr → human-readable failure reasons
+│       ├── stages.py            # Per-task sub-stage tracking (StageReporter)
 │       ├── config.py            # ExecutorConfig + YAML loading
 │       ├── state.py             # SQLite state persistence + degraded-mode fallback
 │       ├── prompt.py            # Prompt building + templates
@@ -407,7 +439,8 @@ project/
 │       ├── report.py            # Traceability matrix generation
 │       ├── validate.py          # Config + task validation
 │       ├── plugins.py           # Plugin discovery + hooks
-│       ├── logging.py           # Structured logging (structlog)
+│       ├── logging.py           # Structured logging (structlog back-compat shim)
+│       ├── obs.py               # OTel JSONL observability emitter (shared contract)
 │       ├── events.py            # EventBus for streaming to TUI
 │       ├── notifications.py     # Telegram + webhook notifications
 │       ├── tui.py               # Textual TUI dashboard

{spec_runner-2.3.0 → spec_runner-2.4.0}/README.md RENAMED Viewed

@@ -99,7 +99,8 @@ Tasks are defined in `spec/tasks.md`:
 # Execution
 spec-runner run                            # Execute next ready task
 spec-runner run --task=TASK-001            # Execute specific task
-spec-runner run --all                      # Execute all ready tasks
+spec-runner run --all                      # Execute all ready tasks (resets failed→pending by default)
+spec-runner run --all --no-reset-failed    # Keep failed tasks sticky (skip the default reset)
 spec-runner run --all --hitl-review        # Interactive HITL approval gate
 spec-runner run --force                    # Skip lock check (stale lock)
 spec-runner run --tui                      # Execute with live TUI dashboard
@@ -142,6 +143,14 @@ spec-runner report --json                  # JSON matrix output
 # Planning
 spec-runner plan "description"             # Interactive task planning
 spec-runner plan --full "description"      # Generate full spec (requirements + design + tasks)
+spec-runner plan --full --from-file spec.md  # Read the description from a file instead of an arg
+# Diagnostics
+spec-runner doctor                              # Probe the configured CLI/model (real mini-task)
+spec-runner doctor --cli=codex --model=gpt-5.4  # Probe an ad-hoc CLI+model
+spec-runner doctor --with-review                # Also probe the review stage
+spec-runner doctor --json --yes                 # Machine-readable, no confirmation (CI)
+spec-runner doctor --strict                     # Exit non-zero on DEGRADED too
 # Integration
 spec-runner mcp                            # Launch MCP server (stdio)
@@ -333,13 +342,34 @@ paths:
 | CLI | Auto-detected | Example template |
 |-----|--------------|------------------|
 | Claude | Yes | `{cmd} -p {prompt} --model {model}` |
-| Codex | Yes | `{cmd} -p {prompt} --model {model}` |
+| Codex | Yes | `{cmd} exec -m {model} {prompt}` (codex's `-p` is `--profile`, not the prompt) |
 | OpenCode ([sst/opencode](https://opencode.ai)) | Yes | `{cmd} run --model {model} {prompt}` |
 | Pi Agent ([pi.dev](https://pi.dev)) | Yes (basename match) | `{cmd} -p --model {model} {prompt}` |
 | Ollama | Yes | `{cmd} run {model} {prompt}` |
 | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
 | Custom | Use template | `{cmd} --prompt {prompt}` |
+> **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
+> skills, per-stage tool control and a read-only review gate) using only config and a small
+> script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
+> [examples/pi-loop/](examples/pi-loop/).
+### Checking CLI/model compatibility
+`spec-runner doctor` runs a real one-task probe through the actual execution
+path and reports, per capability, whether your CLI/model works:
+- **invocation** — the command runs and authenticates
+- **completion_marker** — the model prints `TASK_COMPLETE` (not all models do)
+- **task_action** — the model actually performs the work
+- **cost_tracking** — token/cost parsing works (needed for `costs`/`--budget`)
+- **error_classification** — failures are classified (diagnostic)
+- **review** *(with `--with-review`)* — the reviewer prints `REVIEW_PASSED`/`FAILED`
+Verdict: **READY** / **DEGRADED** (works, but something like cost tracking is
+unavailable) / **BROKEN**. It makes real, billable model calls (capped by
+`--budget`, default $0.50) and asks for confirmation unless `--yes`.
 ## Project Structure
 ```
@@ -356,6 +386,8 @@ project/
 │       ├── cli_info.py          # Status, costs, logs, validate, verify, report, TUI, MCP
 │       ├── cli_plan.py          # Interactive planning command
 │       ├── execution.py         # Task execution + retry logic
+│       ├── errors.py            # CLI stderr → human-readable failure reasons
+│       ├── stages.py            # Per-task sub-stage tracking (StageReporter)
 │       ├── config.py            # ExecutorConfig + YAML loading
 │       ├── state.py             # SQLite state persistence + degraded-mode fallback
 │       ├── prompt.py            # Prompt building + templates
@@ -372,7 +404,8 @@ project/
 │       ├── report.py            # Traceability matrix generation
 │       ├── validate.py          # Config + task validation
 │       ├── plugins.py           # Plugin discovery + hooks
-│       ├── logging.py           # Structured logging (structlog)
+│       ├── logging.py           # Structured logging (structlog back-compat shim)
+│       ├── obs.py               # OTel JSONL observability emitter (shared contract)
 │       ├── events.py            # EventBus for streaming to TUI
 │       ├── notifications.py     # Telegram + webhook notifications
 │       ├── tui.py               # Textual TUI dashboard

{spec_runner-2.3.0 → spec_runner-2.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spec-runner"
-version = "2.3.0"
+version = "2.4.0"
 description = "Task automation from markdown specs via Claude CLI"
 readme = "README.md"
 requires-python = ">=3.10"

{spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/cli.py RENAMED Viewed

@@ -202,9 +202,7 @@ def _run_tasks(args, config: ExecutorConfig):
             tasks = parse_tasks(config.tasks_file)
         # v2.3.0: reset failed-task state on `run --all` unless opted out.
-        reset_enabled = getattr(args, "all", False) and not getattr(
-            args, "no_reset_failed", False
-        )
+        reset_enabled = getattr(args, "all", False) and not getattr(args, "no_reset_failed", False)
         previously_failed: set[str] = set()  # used by T17 second-pass detection
         if reset_enabled:
             previously_failed = state.reset_failed_to_pending()
@@ -416,8 +414,7 @@ def _run_tasks(args, config: ExecutorConfig):
                     else:
                         stop_reason = "max_consecutive_failures"
                         stop_detail = (
-                            f"{state.consecutive_failures}/"
-                            f"{config.max_consecutive_failures}"
+                            f"{state.consecutive_failures}/{config.max_consecutive_failures}"
                         )
                     logger.warning("Stopping: too many consecutive failures")
                     break
@@ -460,8 +457,7 @@ def _run_tasks(args, config: ExecutorConfig):
                     else:
                         stop_reason = "max_consecutive_failures"
                         stop_detail = (
-                            f"{state.consecutive_failures}/"
-                            f"{config.max_consecutive_failures}"
+                            f"{state.consecutive_failures}/{config.max_consecutive_failures}"
                         )
                     logger.warning("Stopping: too many consecutive failures")
                     break
@@ -663,6 +659,25 @@ def cmd_watch(args: argparse.Namespace, config: ExecutorConfig) -> None:
         time.sleep(1)
+def cmd_doctor(args: argparse.Namespace, config: ExecutorConfig) -> None:
+    """Run the CLI/model compatibility probe and exit with its status code."""
+    from .doctor import run_doctor
+    code = run_doctor(
+        config,
+        cli=args.cli,
+        model=args.model,
+        with_review=args.with_review,
+        budget=args.budget,
+        timeout_min=getattr(args, "timeout", None),
+        assume_yes=args.yes,
+        strict=args.strict,
+        as_json=args.json,
+        keep=args.keep,
+    )
+    raise SystemExit(code)
 # === Main ===
@@ -853,7 +868,14 @@ def _build_parser() -> argparse.ArgumentParser:
     # plan
     plan_parser = subparsers.add_parser("plan", parents=[common], help="Interactive task planning")
-    plan_parser.add_argument("description", help="Feature description")
+    plan_parser.add_argument(
+        "description", nargs="?", default=None, help="Feature description (or use --from-file)"
+    )
+    plan_parser.add_argument(
+        "--from-file",
+        metavar="PATH",
+        help="Read the feature description from a file instead of the positional argument",
+    )
     plan_parser.add_argument(
         "--full",
         action="store_true",
@@ -943,6 +965,28 @@ def _build_parser() -> argparse.ArgumentParser:
     # mcp
     subparsers.add_parser("mcp", parents=[common], help="Launch read-only MCP server")
+    # doctor
+    doctor_parser = subparsers.add_parser(
+        "doctor", parents=[common], help="Probe CLI/model compatibility (real mini-task)"
+    )
+    doctor_parser.add_argument("--cli", help="Override the CLI command (claude/codex/pi/...)")
+    doctor_parser.add_argument("--model", help="Override the model (executor + review)")
+    doctor_parser.add_argument(
+        "--with-review",
+        action="store_true",
+        help="Also probe the review stage (2nd model call)",
+    )
+    doctor_parser.add_argument(
+        "--yes", "-y", action="store_true", help="Skip the cost-gate confirmation"
+    )
+    doctor_parser.add_argument(
+        "--strict", action="store_true", help="Exit non-zero on DEGRADED too"
+    )
+    doctor_parser.add_argument("--json", action="store_true", help="Machine-readable output")
+    doctor_parser.add_argument("--keep", action="store_true", help="Keep the scratch workspace")
+    # --budget is inherited from common (default None); override default to 0.50 for doctor
+    doctor_parser.set_defaults(budget=0.5)
     # task (unified: replaces spec-task binary)
     task_parser = subparsers.add_parser(
         "task", help="Task management (list, show, start, done, graph, sync)"
@@ -1038,6 +1082,7 @@ def main():
         "tui": cmd_tui,
         "watch": cmd_watch,
         "mcp": cmd_mcp,
+        "doctor": cmd_doctor,
     }
     # Handle unified task subcommand

{spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/cli_plan.py RENAMED Viewed

@@ -4,6 +4,7 @@ import re
 import subprocess
 import sys
 from datetime import datetime
+from pathlib import Path
 from .config import ExecutorConfig
 from .logging import get_logger
@@ -23,6 +24,32 @@ from .task import (
 logger = get_logger("cli")
+def resolve_plan_description(description: str | None, from_file: str | None) -> str:
+    """Resolve the plan description from --from-file (preferred) or the positional
+    argument. Exits with an error if neither is usable.
+    Args:
+        description: the positional description (may be None when --from-file is used).
+        from_file: path to a file whose contents are the description.
+    """
+    if from_file:
+        path = Path(from_file)
+        if not path.is_file():
+            raise SystemExit(f"plan --from-file: not a readable file: {from_file}")
+        try:
+            text = path.read_text(encoding="utf-8").strip()
+        except UnicodeDecodeError as e:
+            raise SystemExit(f"plan --from-file: not valid UTF-8 text: {from_file}") from e
+        except OSError as e:
+            raise SystemExit(f"plan --from-file: cannot read {from_file}: {e}") from e
+        if not text:
+            raise SystemExit(f"plan --from-file: file is empty: {from_file}")
+        return text
+    if description and description.strip():
+        return description
+    raise SystemExit("plan: provide a description argument or --from-file PATH")
 def cmd_plan(args, config: ExecutorConfig):
     """Interactive task planning via Claude.
@@ -30,7 +57,7 @@ def cmd_plan(args, config: ExecutorConfig):
     requirements, design, and tasks files from a description.
     """
-    description = args.description
+    description = resolve_plan_description(args.description, getattr(args, "from_file", None))
     if getattr(args, "full", False):
         from .prompt import build_generation_prompt, parse_spec_marker

{spec_runner-2.3.0 → spec_runner-2.4.0}/src/spec_runner/config.py RENAMED Viewed

@@ -153,6 +153,7 @@ class ExecutorConfig:
     create_git_branch: bool = True  # Create branch on start
     auto_commit: bool = True  # Auto-commit on success
     main_branch: str = ""  # Main branch name (empty = auto-detect: main/master)
+    sync_deps: bool = True  # Run `uv sync` in pre_start_hook (doctor disables this)
     # Code review
     run_review: bool = True  # Run code review after task completion
@@ -377,6 +378,7 @@ def load_config_from_yaml(config_path: Path | None = None) -> dict:
             "claude_model": executor_config.get("claude_model"),
             "skip_permissions": executor_config.get("skip_permissions"),
             "create_git_branch": pre_start.get("create_git_branch"),
+            "sync_deps": pre_start.get("sync_deps"),
             "main_branch": executor_config.get("main_branch"),
             "run_tests_on_done": post_done.get("run_tests"),
             "run_lint_on_done": post_done.get("run_lint"),

spec-runner 2.3.0__tar.gz → 2.4.0__tar.gz

spec-runner 2.3.0tar.gz → 2.4.0tar.gz