PyPI - spec-runner - Versions diffs - 2.2.2__tar.gz → 2.3.1__tar.gz - Mend

spec-runner 2.2.2tar.gz → 2.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

{spec_runner-2.2.2/src/spec_runner.egg-info → spec_runner-2.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: spec-runner
-Version: 2.2.2
+Version: 2.3.1
 Summary: Task automation from markdown specs via Claude CLI
 Author: Andrei
 License-Expression: MIT
@@ -134,7 +134,8 @@ Tasks are defined in `spec/tasks.md`:
 # Execution
 spec-runner run                            # Execute next ready task
 spec-runner run --task=TASK-001            # Execute specific task
-spec-runner run --all                      # Execute all ready tasks
+spec-runner run --all                      # Execute all ready tasks (resets failed→pending by default)
+spec-runner run --all --no-reset-failed    # Keep failed tasks sticky (skip the default reset)
 spec-runner run --all --hitl-review        # Interactive HITL approval gate
 spec-runner run --force                    # Skip lock check (stale lock)
 spec-runner run --tui                      # Execute with live TUI dashboard
@@ -375,6 +376,11 @@ paths:
 | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
 | Custom | Use template | `{cmd} --prompt {prompt}` |
+> **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
+> skills, per-stage tool control and a read-only review gate) using only config and a small
+> script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
+> [examples/pi-loop/](examples/pi-loop/).
 ## Project Structure
 ```

{spec_runner-2.2.2 → spec_runner-2.3.1}/README.md RENAMED Viewed

@@ -99,7 +99,8 @@ Tasks are defined in `spec/tasks.md`:
 # Execution
 spec-runner run                            # Execute next ready task
 spec-runner run --task=TASK-001            # Execute specific task
-spec-runner run --all                      # Execute all ready tasks
+spec-runner run --all                      # Execute all ready tasks (resets failed→pending by default)
+spec-runner run --all --no-reset-failed    # Keep failed tasks sticky (skip the default reset)
 spec-runner run --all --hitl-review        # Interactive HITL approval gate
 spec-runner run --force                    # Skip lock check (stale lock)
 spec-runner run --tui                      # Execute with live TUI dashboard
@@ -340,6 +341,11 @@ paths:
 | llama-cli | Yes | `{cmd} -m {model} -p {prompt} --no-display-prompt` |
 | Custom | Use template | `{cmd} --prompt {prompt}` |
+> **Full pi-driven loop:** `pi` can run the entire dev → review → test cycle (with native
+> skills, per-stage tool control and a read-only review gate) using only config and a small
+> script — no core code. See [docs/pi-workflow.md](docs/pi-workflow.md) and the runnable
+> [examples/pi-loop/](examples/pi-loop/).
 ## Project Structure
 ```

{spec_runner-2.2.2 → spec_runner-2.3.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spec-runner"
-version = "2.2.2"
+version = "2.3.1"
 description = "Task automation from markdown specs via Claude CLI"
 readme = "README.md"
 requires-python = ">=3.10"

{spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/cli.py RENAMED Viewed

@@ -201,6 +201,19 @@ def _run_tasks(args, config: ExecutorConfig):
             logger.warning("Recovered stale tasks", task_ids=recovered)
             tasks = parse_tasks(config.tasks_file)
+        # v2.3.0: reset failed-task state on `run --all` unless opted out.
+        reset_enabled = getattr(args, "all", False) and not getattr(
+            args, "no_reset_failed", False
+        )
+        previously_failed: set[str] = set()  # used by T17 second-pass detection
+        if reset_enabled:
+            previously_failed = state.reset_failed_to_pending()
+            state.consecutive_failures = 0
+            state.clear_second_pass_fails()
+            state._save()
+        stop_reason: str = "completed"  # used by T18 stop-reason capture
+        stop_detail: str = ""  # used by T18 stop-reason capture
         # Pre-run validation
         from .validate import format_results, validate_all
@@ -255,6 +268,8 @@ def _run_tasks(args, config: ExecutorConfig):
             logger.info("No tasks ready to execute")
             if getattr(args, "json_result", False):
                 print(json.dumps({"tasks": [], "message": "No tasks ready to execute"}))
+            state.set_meta("last_run_stop_reason", stop_reason)
+            state.set_meta("last_run_stop_detail", stop_detail)
             return
         # --dry-run: show what would execute and exit
@@ -373,11 +388,37 @@ def _run_tasks(args, config: ExecutorConfig):
                 result = run_with_retries(task, config, state)
                 last_activity = time.monotonic()
+                # v2.3.0: detect tasks that fail again on a second pass.
+                # Use the persisted task status (set to "failed" when retries
+                # are exhausted) rather than `result is False`, because the
+                # default on_task_failure="skip" mode returns "SKIP" for a
+                # fully-failed task — so a result-based check would miss it.
+                # Must run BEFORE the SKIP `continue` below, which short-circuits.
+                if (
+                    task.id in previously_failed
+                    and state.get_task_state(task.id).status == "failed"
+                ):
+                    log_progress(
+                        f"💡 [{task.id}] repeated failure — review logs at "
+                        f"{config.logs_dir}/{task.id}-*.log"
+                    )
+                    state.add_second_pass_fail(task.id)
                 # "SKIP" means continue to next task
                 if result == "SKIP":
                     continue
                 if result is False and state.should_stop():
+                    last = state.most_recent_failed_attempt()
+                    if last and last.error_kind and last.error_kind != "unknown":
+                        stop_reason = f"error_{last.error_kind}"
+                        stop_detail = last.error or ""
+                    else:
+                        stop_reason = "max_consecutive_failures"
+                        stop_detail = (
+                            f"{state.consecutive_failures}/"
+                            f"{config.max_consecutive_failures}"
+                        )
                     logger.warning("Stopping: too many consecutive failures")
                     break
         else:
@@ -392,13 +433,43 @@ def _run_tasks(args, config: ExecutorConfig):
                 result = run_with_retries(task, config, state)
+                # v2.3.0: detect tasks that fail again on a second pass.
+                # Use the persisted task status (set to "failed" when retries
+                # are exhausted) rather than `result is False`, because the
+                # default on_task_failure="skip" mode returns "SKIP" for a
+                # fully-failed task — so a result-based check would miss it.
+                # Must run BEFORE the SKIP `continue` below, which short-circuits.
+                if (
+                    task.id in previously_failed
+                    and state.get_task_state(task.id).status == "failed"
+                ):
+                    log_progress(
+                        f"💡 [{task.id}] repeated failure — review logs at "
+                        f"{config.logs_dir}/{task.id}-*.log"
+                    )
+                    state.add_second_pass_fail(task.id)
                 if result == "SKIP":
                     continue
                 if result is False and state.should_stop():
+                    last = state.most_recent_failed_attempt()
+                    if last and last.error_kind and last.error_kind != "unknown":
+                        stop_reason = f"error_{last.error_kind}"
+                        stop_detail = last.error or ""
+                    else:
+                        stop_reason = "max_consecutive_failures"
+                        stop_detail = (
+                            f"{state.consecutive_failures}/"
+                            f"{config.max_consecutive_failures}"
+                        )
                     logger.warning("Stopping: too many consecutive failures")
                     break
+        # v2.3.0: persist stop-reason for this run
+        state.set_meta("last_run_stop_reason", stop_reason)
+        state.set_meta("last_run_stop_detail", stop_detail)
         # Summary
         # Re-read tasks to get updated statuses after execution
         tasks = parse_tasks(config.tasks_file)
@@ -647,7 +718,11 @@ def _dispatch_task_command(args: argparse.Namespace) -> None:
         read_commands[task_cmd](args, tasks)
-def main():
+def _build_parser() -> argparse.ArgumentParser:
+    """Build and return the top-level argument parser.
+    Extracted from main() to allow programmatic use and testing.
+    """
     # Shared options available to every subcommand
     common = argparse.ArgumentParser(add_help=False)
     common.add_argument(
@@ -743,6 +818,12 @@ def main():
         action="store_true",
         help="Output structured JSON result per task (for Maestro interop)",
     )
+    run_parser.add_argument(
+        "--no-reset-failed",
+        action="store_true",
+        help="Do not reset failed→pending or clear consecutive_failures "
+        "at the start of `run --all` (default: reset enabled).",
+    )
     # status
     status_parser = subparsers.add_parser("status", parents=[common], help="Show execution status")
@@ -910,6 +991,11 @@ def main():
         "sync-from-gh", parents=[task_common], help="Sync GitHub Issues to tasks.md"
     )
+    return parser
+def main():
+    parser = _build_parser()
     args = parser.parse_args()
     if not args.command:

{spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/cli_info.py RENAMED Viewed

@@ -23,8 +23,9 @@ from .task import (
 logger = get_logger("cli")
-def cmd_status(args, config: ExecutorConfig):
-    """Execution status"""
+def print_status(config: ExecutorConfig) -> None:
+    """Print human-readable status to stdout."""
+    from . import __version__
     with ExecutorState(config) as state:
         # Parse tasks from tasks.md to cross-reference
@@ -32,29 +33,6 @@ def cmd_status(args, config: ExecutorConfig):
         if config.tasks_file.exists():
             all_tasks = parse_tasks(config.tasks_file)
-        # --json: output matching MCP server format
-        if getattr(args, "json_output", False):
-            completed = sum(1 for ts in state.tasks.values() if ts.status == "success")
-            failed = sum(1 for ts in state.tasks.values() if ts.status == "failed")
-            running = sum(1 for ts in state.tasks.values() if ts.status == "running")
-            cost = state.total_cost()
-            inp, out = state.total_tokens()
-            print(
-                json.dumps(
-                    {
-                        "total_tasks": len(all_tasks),
-                        "completed": completed,
-                        "failed": failed,
-                        "running": running,
-                        "not_started": len(all_tasks) - completed - failed - running,
-                        "total_cost": round(cost, 2),
-                        "input_tokens": inp,
-                        "output_tokens": out,
-                        "budget_usd": config.budget_usd,
-                    }
-                )
-            )
-            return
         total_in_spec = len(all_tasks)
         # Calculate statistics from actual task state
@@ -69,7 +47,21 @@ def cmd_status(args, config: ExecutorConfig):
         state_ids = set(state.tasks.keys())
         not_started = [t for t in all_tasks if t.id not in state_ids]
-        print("\n📊 spec-runner Status")
+        print(f"\n📊 spec-runner v{__version__}")
+        # Stop-reason warning from executor_meta
+        reason = state.get_meta("last_run_stop_reason")
+        detail = state.get_meta("last_run_stop_detail") or ""
+        if reason and reason != "completed":
+            if reason == "max_consecutive_failures":
+                human = f"max_consecutive_failures reached ({detail})"
+            elif reason.startswith("error_"):
+                kind = reason.removeprefix("error_")
+                human = f"{kind} — {detail}" if detail else kind
+            else:
+                human = reason
+            print(f"⚠️ Last run stopped: {human}")
         print(f"{'=' * 50}")
         print(f"Tasks in spec:         {total_in_spec}")
         print(f"Tasks completed:       {completed_tasks}")
@@ -101,6 +93,7 @@ def cmd_status(args, config: ExecutorConfig):
         # Tasks with attempts
         attempted = [ts for ts in state.tasks.values() if ts.attempts]
+        second_pass = state.get_second_pass_fails()
         if attempted:
             print("\n📝 Task History:")
             for ts in attempted:
@@ -111,16 +104,27 @@ def cmd_status(args, config: ExecutorConfig):
                 task_cost = state.task_cost(ts.task_id)
                 if task_cost > 0:
                     attempts_info += f", ${task_cost:.2f}"
-                print(f"   {icon} {ts.task_id}: {ts.status} ({attempts_info})")
+                # Stage tag on the task header line
+                stage_tag = ""
+                if ts.status == "failed" and ts.attempts and ts.attempts[-1].error_stage:
+                    stage_tag = f" [at: {ts.attempts[-1].error_stage}]"
+                print(f"   {icon} {ts.task_id}: {ts.status} ({attempts_info}){stage_tag}")
                 # Show review verdict from last attempt
                 if ts.attempts:
                     last_attempt = ts.attempts[-1]
                     if last_attempt.review_status and last_attempt.review_status != "skipped":
                         print(f"      Review: {last_attempt.review_status}")
+                # Kind tag on the error line
                 if ts.status == "failed" and ts.last_error:
-                    print(f"      Last error: {ts.last_error[:50]}...")
+                    kind = ts.attempts[-1].error_kind if ts.attempts else None
+                    kind_tag = f"[{kind}] " if kind else ""
+                    print(f"      Last error: {kind_tag}{ts.last_error[:50]}...")
                 elif ts.status == "running" and ts.last_error:
                     print(f"      ⚠️  Last attempt failed: {ts.last_error[:50]}...")
+                # Second-pass hint
+                if ts.status == "failed" and ts.task_id in second_pass:
+                    print("      💡 Repeated failure across runs — review:")
+                    print(f"         {config.logs_dir}/{ts.task_id}-*.log")
         # Show tasks not yet in executor state
         if not_started:
@@ -129,6 +133,40 @@ def cmd_status(args, config: ExecutorConfig):
                 print(f"   ⬜ {t.id}: {t.name}")
+def cmd_status(args, config: ExecutorConfig):
+    """Execution status"""
+    if getattr(args, "json_output", False):
+        with ExecutorState(config) as state:
+            # Parse tasks from tasks.md to cross-reference
+            all_tasks: list[Task] = []
+            if config.tasks_file.exists():
+                all_tasks = parse_tasks(config.tasks_file)
+            completed = sum(1 for ts in state.tasks.values() if ts.status == "success")
+            failed = sum(1 for ts in state.tasks.values() if ts.status == "failed")
+            running = sum(1 for ts in state.tasks.values() if ts.status == "running")
+            cost = state.total_cost()
+            inp, out = state.total_tokens()
+            print(
+                json.dumps(
+                    {
+                        "total_tasks": len(all_tasks),
+                        "completed": completed,
+                        "failed": failed,
+                        "running": running,
+                        "not_started": len(all_tasks) - completed - failed - running,
+                        "total_cost": round(cost, 2),
+                        "input_tokens": inp,
+                        "output_tokens": out,
+                        "budget_usd": config.budget_usd,
+                    }
+                )
+            )
+        return
+    print_status(config)
 def cmd_costs(args: argparse.Namespace, config: ExecutorConfig) -> None:
     """Show cost breakdown per task with optional JSON output."""
     tasks = parse_tasks(config.tasks_file)

{spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/config.py RENAMED Viewed

@@ -8,6 +8,7 @@ import argparse
 import contextlib
 import fcntl
 import os
+import subprocess
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -137,7 +138,7 @@ class ExecutorConfig:
     # Command template for custom CLIs. Placeholders: {cmd}, {model}, {prompt}, {prompt_file}
     # Examples:
     #   claude: "{cmd} -p {prompt}" or "{cmd} -p {prompt} --model {model}"
-    #   codex: "{cmd} -p {prompt}"
+    #   codex: "{cmd} exec {prompt}"   # -p is --profile in codex, not the prompt
     #   opencode: "{cmd} run --model {model} {prompt}"
     #   pi: "{cmd} -p --model {model} {prompt}"
     #   ollama: "{cmd} run {model} {prompt}"
@@ -281,6 +282,32 @@ def _parse_personas(raw: dict) -> dict[str, Persona] | None:
     return personas if personas else None
+def _detect_subdir_repo(project_root: Path) -> Path | None:
+    """Return the git repo toplevel if `project_root` is a strict subdir of
+    a git repo. Return None when project_root IS the toplevel, when no git
+    repo wraps it, or when git is not installed.
+    """
+    try:
+        result = subprocess.run(
+            ["git", "-C", str(project_root), "rev-parse", "--show-toplevel"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return None
+    toplevel = Path(result.stdout.strip()).resolve()
+    return toplevel if toplevel != project_root.resolve() else None
+def _user_set(yaml_config: dict, args: argparse.Namespace, key: str) -> bool:
+    """True if user explicitly set this key in YAML or CLI."""
+    if yaml_config.get(key) is not None:
+        return True
+    val = getattr(args, key, None)
+    return val not in (None, False)
 def _resolve_config_path() -> Path:
     """Find the config file, preferring new location over legacy.
@@ -448,4 +475,26 @@ def build_config(yaml_config: dict, args: argparse.Namespace) -> ExecutorConfig:
     if hasattr(args, "log_level") and getattr(args, "log_level", None):
         config_kwargs["log_level"] = args.log_level
-    return ExecutorConfig(**config_kwargs)
+    config = ExecutorConfig(**config_kwargs)
+    git_root = _detect_subdir_repo(config.project_root)
+    if git_root is not None:
+        flipped = []
+        if not _user_set(yaml_config, args, "create_git_branch"):
+            config.create_git_branch = False
+            flipped.append("create_git_branch")
+        if not _user_set(yaml_config, args, "auto_commit"):
+            config.auto_commit = False
+            flipped.append("auto_commit")
+        if flipped:
+            from .logging import get_logger
+            get_logger("config").warning(
+                "subdir_project_detected",
+                project_root=str(config.project_root),
+                git_root=str(git_root),
+                defaulted_off=flipped,
+                override_hint="set create_git_branch/auto_commit=true in YAML to opt-in",
+            )
+    return config

spec_runner-2.3.1/src/spec_runner/errors.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Error classification for CLI agent stderr (v2.3.0).
+Adds short, human-readable reasons to failures (previously surfaced as
+"Unknown error"). Pattern library + last-N-lines stderr fallback.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+STDERR_TAIL_LINES = 5
+@dataclass(frozen=True)
+class ErrorPattern:
+    """One classification pattern.
+    `template` supports {0}, {1}, ... substitutions from regex groups;
+    if the template uses no groups, regex match-only is enough.
+    """
+    kind: str
+    regex: re.Pattern[str]
+    template: str
+PATTERNS: list[ErrorPattern] = [
+    # codex / OpenAI quota — captures the "try again at <time>" hint
+    ErrorPattern(
+        kind="rate_limit",
+        regex=re.compile(
+            r"hit your usage limit.*?try again at ([\d:]+\s*[AP]M)", re.S
+        ),
+        template="OpenAI usage limit — try again at {0}",
+    ),
+    # generic rate-limit (claude, generic providers)
+    ErrorPattern(
+        kind="rate_limit",
+        regex=re.compile(r"rate[_\s-]?limit", re.I),
+        template="Rate limit hit",
+    ),
+    # auth failures
+    ErrorPattern(
+        kind="auth",
+        regex=re.compile(r"unauthor|invalid api key|forbidden", re.I),
+        template="Authentication failed",
+    ),
+    # network failures
+    ErrorPattern(
+        kind="network",
+        regex=re.compile(
+            r"ECONNREFUSED|timed out|name or service not known|dns",
+            re.I,
+        ),
+        template="Network error",
+    ),
+    # generic CLI error line (last resort before unknown fallback)
+    ErrorPattern(
+        kind="cli_error",
+        regex=re.compile(r"^error:\s*(.+)$", re.M),
+        template="{0}",
+    ),
+]
+def classify(stderr: str, returncode: int) -> tuple[str, str]:
+    """Return (kind, human_message) for a failed CLI invocation.
+    - Tries each pattern in PATTERNS order; first match wins.
+    - Falls back to ("unknown", last N lines of stderr) when nothing matches.
+    - When stderr is empty, falls back to ("unknown", "CLI exited with code N").
+    """
+    for p in PATTERNS:
+        m = p.regex.search(stderr)
+        if m:
+            try:
+                return p.kind, p.template.format(*m.groups())
+            except IndexError:
+                return p.kind, p.template
+    tail = "\n".join(stderr.strip().splitlines()[-STDERR_TAIL_LINES:])
+    return "unknown", tail or f"CLI exited with code {returncode}"

{spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/execution.py RENAMED Viewed

@@ -6,6 +6,7 @@ import time
 from datetime import datetime
 from .config import ExecutorConfig
+from .errors import classify
 from .hooks import post_done_hook, pre_start_hook
 from .logging import get_logger
 from .prompt import build_task_prompt, extract_test_failures
@@ -16,6 +17,7 @@ from .runner import (
     parse_token_usage,
     send_callback,
 )
+from .stages import StageReporter
 from .state import (
     ErrorCode,
     ExecutorState,
@@ -42,11 +44,12 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
     """
     task_id = task.id
+    reporter = StageReporter(task.id, lambda line: log_progress(line))
     log_progress(f"\U0001f680 Starting: {task.name}", task_id)
     logger.info("Executing task", task_id=task_id, name=task.name)
     # Pre-start hook
-    if not pre_start_hook(task, config):
+    if not pre_start_hook(task, config, reporter=reporter):
         logger.error("Pre-start hook failed", task_id=task_id)
         state.record_attempt(
             task_id,
@@ -118,6 +121,7 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
             skip_permissions=config.skip_permissions,
         )
+        reporter.enter("codex")
         result = subprocess.run(
             cmd,
             capture_output=True,
@@ -181,6 +185,7 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
         success = (has_complete_marker and not has_failed_marker) or implicit_success
         if success:
+            reporter.enter("parse")
             if has_complete_marker:
                 logger.info("Task completed by Claude", task_id=task_id)
             else:
@@ -188,7 +193,7 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
             # Post-done hook (tests, lint, review)
             hook_success, hook_error, review_status, review_findings = post_done_hook(
-                task, config, True
+                task, config, True, reporter=reporter
             )
             if hook_success:
@@ -263,7 +268,11 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
         else:
             # Claude reported failure
             error_match = re.search(r"TASK_FAILED:\s*(.+)", output)
-            error = error_match.group(1) if error_match else "Unknown error"
+            if error_match:
+                error = error_match.group(1)
+                error_kind = "cli_error"
+            else:
+                error_kind, error = classify(result.stderr, result.returncode)
             state.record_attempt(
                 task_id,
                 False,
@@ -274,6 +283,8 @@ def execute_task(task: Task, config: ExecutorConfig, state: ExecutorState) -> bo
                 input_tokens=input_tokens,
                 output_tokens=output_tokens,
                 cost_usd=cost_usd,
+                error_kind=error_kind,
+                error_stage=reporter.current,
             )
             log_progress(f"\u274c Failed: {error[:50]}", task_id)
             send_callback(

{spec_runner-2.2.2 → spec_runner-2.3.1}/src/spec_runner/hooks.py RENAMED Viewed

@@ -23,6 +23,7 @@ from .review import (
     run_code_review,
     run_parallel_review,
 )
+from .stages import StageReporter
 from .state import ReviewVerdict
 from .task import Task
@@ -46,11 +47,13 @@ __all__ = [
 ]
-def pre_start_hook(task: Task, config: ExecutorConfig) -> bool:
+def pre_start_hook(task: Task, config: ExecutorConfig, *, reporter: StageReporter | None = None) -> bool:
     """Hook before starting task"""
     logger.info("Pre-start hook", task_id=task.id)
     # Sync dependencies
+    if reporter:
+        reporter.enter("sync_deps")
     logger.info("Syncing dependencies")
     result = subprocess.run(["uv", "sync"], capture_output=True, text=True, cwd=config.project_root)
     if result.returncode == 0:
@@ -60,6 +63,8 @@ def pre_start_hook(task: Task, config: ExecutorConfig) -> bool:
     # Create git branch
     if config.create_git_branch:
+        if reporter:
+            reporter.enter("branch")
         branch_name = get_task_branch_name(task)
         try:
             # Check if git exists
@@ -158,6 +163,8 @@ def post_done_hook(
     config: ExecutorConfig,
     success: bool,
     changed_since: float | None = None,
+    *,
+    reporter: StageReporter | None = None,
 ) -> tuple[bool, str | None, str, str]:
     """Hook after task completion.
@@ -175,6 +182,8 @@ def post_done_hook(
     # Run tests — capture output for review context
     test_output_str: str | None = None
     if config.run_tests_on_done:
+        if reporter:
+            reporter.enter("tests")
         test_cmd = config.test_command
         # Scope tests to changed files when running in parallel mode
@@ -220,6 +229,8 @@ def post_done_hook(
     # Run lint — capture output for review context
     lint_output_str: str | None = None
     if config.run_lint_on_done and config.lint_command:
+        if reporter:
+            reporter.enter("lint")
         logger.info("Running lint")
         result = subprocess.run(
             config.lint_command,
@@ -287,6 +298,8 @@ def post_done_hook(
     if config.hitl_review and not config.run_review:
         logger.warning("hitl_review enabled but run_review is False; HITL gate skipped")
     if config.run_review:
+        if reporter:
+            reporter.enter("review")
         review_fn = run_parallel_review if config.review_parallel else run_code_review
         logger.info(
             "Running code review",
@@ -332,6 +345,8 @@ def post_done_hook(
     # Auto-commit
     if config.auto_commit:
+        if reporter:
+            reporter.enter("commit")
         try:
             # Check if there are changes to commit
             status_result = subprocess.run(
@@ -366,6 +381,8 @@ def post_done_hook(
     # Merge branch to main
     if config.create_git_branch:
+        if reporter:
+            reporter.enter("merge")
         try:
             branch_name = get_task_branch_name(task)
             main_branch = get_main_branch(config)

spec-runner 2.2.2__tar.gz → 2.3.1__tar.gz

spec-runner 2.2.2tar.gz → 2.3.1tar.gz