steerdev 1.0.39__tar.gz → 1.0.52__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {steerdev-1.0.39 → steerdev-1.0.52}/PKG-INFO +1 -1
  2. {steerdev-1.0.39 → steerdev-1.0.52}/pyproject.toml +1 -1
  3. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/agent_loop.py +33 -13
  4. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/__init__.py +2 -0
  5. steerdev-1.0.52/src/steerdev_agent/api/reports.py +82 -0
  6. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/cli.py +82 -8
  7. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/config/models.py +54 -4
  8. steerdev-1.0.52/src/steerdev_agent/evidence.py +156 -0
  9. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/executor/__init__.py +4 -4
  10. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/executor/claude.py +9 -7
  11. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/runner.py +265 -23
  12. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/claude_setup.py +114 -0
  13. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/repo_setup.py +20 -10
  14. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/settings.json +6 -1
  15. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-single-task-merge-skill/SKILL.md +7 -3
  16. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-wave-tasks-merge-skill/SKILL.md +7 -3
  17. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/steerdev.yaml +16 -0
  18. steerdev-1.0.52/src/steerdev_agent/setup/templates/worktrunk.config.toml +40 -0
  19. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workflow/executor.py +21 -3
  20. steerdev-1.0.52/src/steerdev_agent/worktree.py +264 -0
  21. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_agent_loop.py +8 -2
  22. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_agent_loop_extended.py +13 -6
  23. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_claude_executor.py +13 -6
  24. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_claude_setup.py +44 -2
  25. steerdev-1.0.52/tests/test_reports_client.py +115 -0
  26. steerdev-1.0.52/tests/test_runner_worktrees.py +92 -0
  27. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_tasks.py +6 -6
  28. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_workspace_extended.py +2 -5
  29. steerdev-1.0.52/tests/test_worktree.py +153 -0
  30. {steerdev-1.0.39 → steerdev-1.0.52}/.github/workflows/pre-commit.yml +0 -0
  31. {steerdev-1.0.39 → steerdev-1.0.52}/.github/workflows/publish.yml +0 -0
  32. {steerdev-1.0.39 → steerdev-1.0.52}/.gitignore +0 -0
  33. {steerdev-1.0.39 → steerdev-1.0.52}/.pre-commit-config.yaml +0 -0
  34. {steerdev-1.0.39 → steerdev-1.0.52}/AGENTS.md +0 -0
  35. {steerdev-1.0.39 → steerdev-1.0.52}/CLAUDE.md +0 -0
  36. {steerdev-1.0.39 → steerdev-1.0.52}/README.md +0 -0
  37. {steerdev-1.0.39 → steerdev-1.0.52}/scripts/pre-commit-version-bump.sh +0 -0
  38. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/__init__.py +0 -0
  39. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/activity.py +0 -0
  40. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/agents.py +0 -0
  41. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/canals.py +0 -0
  42. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/client.py +0 -0
  43. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/commands.py +0 -0
  44. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/configs.py +0 -0
  45. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/context.py +0 -0
  46. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/events.py +0 -0
  47. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/hooks.py +0 -0
  48. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/implementation_plan.py +0 -0
  49. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/messages.py +0 -0
  50. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/prd.py +0 -0
  51. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/runs.py +0 -0
  52. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/sessions.py +0 -0
  53. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/specs.py +0 -0
  54. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/tasks.py +0 -0
  55. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/workflow_runs.py +0 -0
  56. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/api/workflows.py +0 -0
  57. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/config/__init__.py +0 -0
  58. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/config/platform.py +0 -0
  59. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/config/settings.py +0 -0
  60. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/executor/base.py +0 -0
  61. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/executor/stream.py +0 -0
  62. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/handlers/__init__.py +0 -0
  63. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/handlers/prd.py +0 -0
  64. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/integration.py +0 -0
  65. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/prompt/__init__.py +0 -0
  66. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/prompt/builder.py +0 -0
  67. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/prompt/templates.py +0 -0
  68. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/prompt/workflow_template.py +0 -0
  69. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/py.typed +0 -0
  70. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/retry.py +0 -0
  71. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/__init__.py +0 -0
  72. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/ci/canal-integration.yml +0 -0
  73. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/claude_md_section.md +0 -0
  74. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-activity-skill/SKILL.md +0 -0
  75. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-canal-workflow-skill/SKILL.md +0 -0
  76. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-context-skill/SKILL.md +0 -0
  77. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-git-workflow-skill/SKILL.md +0 -0
  78. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-merge-into-canal-skill/SKILL.md +0 -0
  79. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-progress-logging-skill/SKILL.md +0 -0
  80. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-specs-management-skill/SKILL.md +0 -0
  81. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/setup/templates/skills/steerdev-task-management-skill/SKILL.md +0 -0
  82. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/update_check.py +0 -0
  83. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/version.py +0 -0
  84. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workflow/__init__.py +0 -0
  85. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workflow/context.py +0 -0
  86. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workflow/memory.py +0 -0
  87. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workspace/__init__.py +0 -0
  88. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workspace/project_manager.py +0 -0
  89. {steerdev-1.0.39 → steerdev-1.0.52}/src/steerdev_agent/workspace/tool_detection.py +0 -0
  90. {steerdev-1.0.39 → steerdev-1.0.52}/tests/__init__.py +0 -0
  91. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_agents_api.py +0 -0
  92. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_api_client.py +0 -0
  93. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_client_methods.py +0 -0
  94. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_commands_api.py +0 -0
  95. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_config.py +0 -0
  96. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_config_extended.py +0 -0
  97. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_context_search.py +0 -0
  98. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_executor.py +0 -0
  99. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_platform_config.py +0 -0
  100. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_prompt.py +0 -0
  101. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_retry.py +0 -0
  102. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_runner_merge_modes.py +0 -0
  103. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_stream_parser.py +0 -0
  104. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_version.py +0 -0
  105. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_workflow_context.py +0 -0
  106. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_workflow_memory.py +0 -0
  107. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_workflow_prompt_template.py +0 -0
  108. {steerdev-1.0.39 → steerdev-1.0.52}/tests/test_workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: steerdev
3
- Version: 1.0.39
3
+ Version: 1.0.52
4
4
  Summary: Backend task runner for steerdev.com - orchestrates CLI coding agents with activity reporting
5
5
  Project-URL: Homepage, https://github.com/pentoai/steerdev-agent
6
6
  Project-URL: Repository, https://github.com/pentoai/steerdev-agent
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "steerdev"
3
- version = "1.0.39"
3
+ version = "1.0.52"
4
4
  description = "Backend task runner for steerdev.com - orchestrates CLI coding agents with activity reporting"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -36,9 +36,11 @@ from steerdev_agent.api.sessions import SessionCreateRequest, SessionsClient
36
36
  from steerdev_agent.api.tasks import TasksClient
37
37
  from steerdev_agent.config.models import (
38
38
  AgentLoopConfig,
39
+ EvidenceConfig,
39
40
  ExecutorConfig,
40
41
  RetryConfig,
41
42
  WorkspaceConfig,
43
+ WorktreeConfig,
42
44
  )
43
45
  from steerdev_agent.executor import ExecutorFactory
44
46
  from steerdev_agent.executor.base import EventType
@@ -60,7 +62,7 @@ class CommandExecutor:
60
62
  """Shared command execution dispatch for both project and workspace agents.
61
63
 
62
64
  Subclasses provide: _api_key, agent_type, agent_name, model, max_turns,
63
- _executor_config, _workflow_id, _enable_worktrees, _agent_loop_config,
65
+ _executor_config, _workflow_id, _worktree_config, _agent_loop_config,
64
66
  _commands_client, _sessions_client, _shutdown_event, and the stats counters.
65
67
  """
66
68
 
@@ -75,7 +77,8 @@ class CommandExecutor:
75
77
  _workflow_id: str | None
76
78
  _enable_waves: bool
77
79
  _enable_canals: bool
78
- _enable_worktrees: bool
80
+ _worktree_config: WorktreeConfig
81
+ _evidence_config: EvidenceConfig
79
82
  _agent_loop_config: AgentLoopConfig
80
83
  _commands_client: CommandsClient | None
81
84
  _sessions_client: SessionsClient | None
@@ -171,7 +174,8 @@ class CommandExecutor:
171
174
  max_turns=self.max_turns,
172
175
  enable_waves=self._enable_waves,
173
176
  enable_canals=self._enable_canals,
174
- enable_worktrees=self._enable_worktrees,
177
+ worktree_config=self._worktree_config,
178
+ evidence_config=self._evidence_config,
175
179
  executor_config=self._executor_config,
176
180
  force_workflow_id=None,
177
181
  shutdown_event=self._shutdown_event,
@@ -434,7 +438,8 @@ class AgentLoop(CommandExecutor):
434
438
  force_workflow_id: str | None = None,
435
439
  enable_waves: bool = True,
436
440
  enable_canals: bool = False,
437
- enable_worktrees: bool = False,
441
+ worktree_config: WorktreeConfig | None = None,
442
+ evidence_config: EvidenceConfig | None = None,
438
443
  retry_config: RetryConfig | None = None,
439
444
  ) -> None:
440
445
  self.project_id = project_id
@@ -450,7 +455,8 @@ class AgentLoop(CommandExecutor):
450
455
  self._workflow_id = force_workflow_id
451
456
  self._enable_waves = enable_waves
452
457
  self._enable_canals = enable_canals
453
- self._enable_worktrees = enable_worktrees
458
+ self._worktree_config = worktree_config or WorktreeConfig()
459
+ self._evidence_config = evidence_config or EvidenceConfig()
454
460
 
455
461
  # State
456
462
  self._agent_id: str | None = None
@@ -475,7 +481,11 @@ class AgentLoop(CommandExecutor):
475
481
  workflow_status = self._workflow_id or "single-phase"
476
482
  waves_status = "enabled" if self._enable_waves else "disabled"
477
483
  canals_status = "enabled" if self._enable_canals else "disabled"
478
- worktree_status = "enabled" if self._enable_worktrees else "disabled"
484
+ worktree_status = (
485
+ f"enabled ({self._worktree_config.provider})"
486
+ if self._worktree_config.enabled
487
+ else "disabled"
488
+ )
479
489
 
480
490
  console.print(
481
491
  Panel(
@@ -795,7 +805,8 @@ class WorkspaceAgentLoop(CommandExecutor):
795
805
  force_workflow_id: str | None = None,
796
806
  enable_waves: bool = True,
797
807
  enable_canals: bool = False,
798
- enable_worktrees: bool = False,
808
+ worktree_config: WorktreeConfig | None = None,
809
+ evidence_config: EvidenceConfig | None = None,
799
810
  retry_config: RetryConfig | None = None,
800
811
  ) -> None:
801
812
  self.workspace_path = Path(workspace_path)
@@ -811,7 +822,8 @@ class WorkspaceAgentLoop(CommandExecutor):
811
822
  self._workflow_id = force_workflow_id
812
823
  self._enable_waves = enable_waves
813
824
  self._enable_canals = enable_canals
814
- self._enable_worktrees = enable_worktrees
825
+ self._worktree_config = worktree_config or WorktreeConfig()
826
+ self._evidence_config = evidence_config or EvidenceConfig()
815
827
 
816
828
  # State
817
829
  self._agent_id: str | None = None
@@ -841,7 +853,11 @@ class WorkspaceAgentLoop(CommandExecutor):
841
853
 
842
854
  waves_status = "enabled" if self._enable_waves else "disabled"
843
855
  canals_status = "enabled" if self._enable_canals else "disabled"
844
- worktree_status = "enabled" if self._enable_worktrees else "disabled"
856
+ worktree_status = (
857
+ f"enabled ({self._worktree_config.provider})"
858
+ if self._worktree_config.enabled
859
+ else "disabled"
860
+ )
845
861
 
846
862
  console.print(
847
863
  Panel(
@@ -1211,7 +1227,8 @@ async def run_agent_loop(
1211
1227
  force_workflow_id: str | None = None,
1212
1228
  enable_waves: bool = True,
1213
1229
  enable_canals: bool = False,
1214
- enable_worktrees: bool = False,
1230
+ worktree_config: WorktreeConfig | None = None,
1231
+ evidence_config: EvidenceConfig | None = None,
1215
1232
  retry_config: RetryConfig | None = None,
1216
1233
  ) -> None:
1217
1234
  """Run the project-scoped agent loop.
@@ -1231,7 +1248,8 @@ async def run_agent_loop(
1231
1248
  force_workflow_id=force_workflow_id,
1232
1249
  enable_waves=enable_waves,
1233
1250
  enable_canals=enable_canals,
1234
- enable_worktrees=enable_worktrees,
1251
+ worktree_config=worktree_config,
1252
+ evidence_config=evidence_config,
1235
1253
  retry_config=retry_config,
1236
1254
  )
1237
1255
  await agent.start()
@@ -1250,7 +1268,8 @@ async def run_workspace_agent_loop(
1250
1268
  force_workflow_id: str | None = None,
1251
1269
  enable_waves: bool = True,
1252
1270
  enable_canals: bool = False,
1253
- enable_worktrees: bool = False,
1271
+ worktree_config: WorktreeConfig | None = None,
1272
+ evidence_config: EvidenceConfig | None = None,
1254
1273
  retry_config: RetryConfig | None = None,
1255
1274
  ) -> None:
1256
1275
  """Run the workspace (multi-project) agent loop.
@@ -1270,7 +1289,8 @@ async def run_workspace_agent_loop(
1270
1289
  force_workflow_id=force_workflow_id,
1271
1290
  enable_waves=enable_waves,
1272
1291
  enable_canals=enable_canals,
1273
- enable_worktrees=enable_worktrees,
1292
+ worktree_config=worktree_config,
1293
+ evidence_config=evidence_config,
1274
1294
  retry_config=retry_config,
1275
1295
  )
1276
1296
  await agent.start()
@@ -4,6 +4,7 @@ from steerdev_agent.api.client import SteerDevClient, get_api_key, get_project_i
4
4
  from steerdev_agent.api.configs import ConfigsClient
5
5
  from steerdev_agent.api.events import EventData, EventsClient
6
6
  from steerdev_agent.api.hooks import HooksClient
7
+ from steerdev_agent.api.reports import ReportsClient
7
8
  from steerdev_agent.api.runs import RunCreateRequest, RunResponse, RunsClient
8
9
  from steerdev_agent.api.sessions import (
9
10
  SessionCreateRequest,
@@ -18,6 +19,7 @@ __all__ = [
18
19
  "EventData",
19
20
  "EventsClient",
20
21
  "HooksClient",
22
+ "ReportsClient",
21
23
  "RunCreateRequest",
22
24
  "RunResponse",
23
25
  "RunsClient",
@@ -0,0 +1,82 @@
1
+ """Evidence reports API client for SteerDev Agent.
2
+
3
+ Submits evidence reports to the platform after task/workflow completion,
4
+ making work visible on the project review dashboard.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from loguru import logger
12
+ from rich.console import Console
13
+
14
+ from steerdev_agent.api.client import SteerDevClient
15
+
16
+ console = Console()
17
+
18
+
19
+ class ReportsClient(SteerDevClient):
20
+ """Client for submitting evidence reports to the platform.
21
+
22
+ Evidence reports capture task completion summaries, making agent work
23
+ visible on the project review page (/projects/[id]/review).
24
+ """
25
+
26
+ def submit(
27
+ self,
28
+ *,
29
+ project_id: str,
30
+ summary: str,
31
+ blocks: list[dict[str, Any]],
32
+ task_id: str | None = None,
33
+ wave_id: str | None = None,
34
+ agent_id: str | None = None,
35
+ evaluation_steps: list[dict[str, Any]] | None = None,
36
+ ) -> dict[str, Any] | None:
37
+ """Submit an evidence report for a completed task or workflow.
38
+
39
+ Args:
40
+ project_id: SteerDev project ID.
41
+ summary: Report title/summary (required for reviewability).
42
+ blocks: List of report blocks (type, content, order).
43
+ task_id: Associated task ID.
44
+ wave_id: Associated wave ID.
45
+ agent_id: Associated agent ID.
46
+ evaluation_steps: Optional evaluation steps (title, result, order).
47
+
48
+ Returns:
49
+ Response dict with report_id and is_reviewable, or None on failure.
50
+ """
51
+ payload: dict[str, Any] = {
52
+ "project_id": project_id,
53
+ "summary": summary,
54
+ "blocks": blocks,
55
+ }
56
+ if task_id:
57
+ payload["task_id"] = task_id
58
+ if wave_id:
59
+ payload["wave_id"] = wave_id
60
+ if agent_id:
61
+ payload["agent_id"] = agent_id
62
+ if evaluation_steps:
63
+ payload["evaluation_steps"] = evaluation_steps
64
+
65
+ try:
66
+ response = self.post("/reports", json=payload)
67
+ if response.status_code in (200, 201):
68
+ data = response.json()
69
+ report_id = data.get("report_id", "unknown")
70
+ is_reviewable = data.get("is_reviewable", False)
71
+ console.print(
72
+ f"[dim]Evidence report submitted: {report_id} "
73
+ f"(reviewable: {is_reviewable})[/dim]"
74
+ )
75
+ return data
76
+ logger.warning(
77
+ f"Evidence report submission failed: {response.status_code} - {response.text}"
78
+ )
79
+ return None
80
+ except Exception:
81
+ logger.debug("Evidence report submission error", exc_info=True)
82
+ return None
@@ -1509,7 +1509,10 @@ def _resolve_run_config(
1509
1509
  resolved_model = model if model is not None else config.agent.model
1510
1510
  resolved_max_turns = max_turns if max_turns is not None else config.agent.max_turns
1511
1511
  resolved_timeout = timeout if timeout is not None else config.agent.timeout_seconds
1512
- resolved_worktrees = worktrees if worktrees is not None else config.worktrees.enabled
1512
+ # Resolve worktree config: CLI flag overrides config file enabled state
1513
+ resolved_worktree_config = config.worktrees.model_copy()
1514
+ if worktrees is not None:
1515
+ resolved_worktree_config.enabled = worktrees
1513
1516
 
1514
1517
  resolved_api_key = api_key
1515
1518
  if not resolved_api_key:
@@ -1528,7 +1531,7 @@ def _resolve_run_config(
1528
1531
  "timeout_seconds": resolved_timeout,
1529
1532
  "enable_waves": waves,
1530
1533
  "enable_canals": canals,
1531
- "enable_worktrees": resolved_worktrees,
1534
+ "worktree_config": resolved_worktree_config,
1532
1535
  "force_workflow_id": resolved_workflow_id,
1533
1536
  "dry_run": dry_run,
1534
1537
  }
@@ -1612,6 +1615,13 @@ def run(
1612
1615
  help="Enable git worktree isolation (default: from config or disabled)",
1613
1616
  ),
1614
1617
  ] = None,
1618
+ evidence: Annotated[
1619
+ bool | None,
1620
+ typer.Option(
1621
+ "--evidence/--no-evidence",
1622
+ help="Submit evidence reports after task completion (default: from config or disabled)",
1623
+ ),
1624
+ ] = None,
1615
1625
  dry_run: Annotated[
1616
1626
  bool,
1617
1627
  typer.Option(
@@ -1700,7 +1710,14 @@ def run(
1700
1710
  resolved_model = model if model is not None else config.agent.model
1701
1711
  resolved_max_turns = max_turns if max_turns is not None else config.agent.max_turns
1702
1712
  resolved_timeout = timeout if timeout is not None else config.agent.timeout_seconds
1703
- resolved_worktrees = worktrees if worktrees is not None else config.worktrees.enabled
1713
+ # Resolve worktree config: CLI flag overrides config file enabled state
1714
+ resolved_worktree_config = config.worktrees.model_copy()
1715
+ if worktrees is not None:
1716
+ resolved_worktree_config.enabled = worktrees
1717
+ # Resolve evidence config: CLI flag overrides config file enabled state
1718
+ resolved_evidence_config = config.evidence.model_copy()
1719
+ if evidence is not None:
1720
+ resolved_evidence_config.enabled = evidence
1704
1721
 
1705
1722
  # API key: CLI > env (via envvar) > config env var
1706
1723
  resolved_api_key = api_key
@@ -1711,7 +1728,12 @@ def run(
1711
1728
  if not resolved_workflow_id:
1712
1729
  resolved_workflow_id = config.agent.workflow_id
1713
1730
 
1714
- worktree_status = "enabled" if resolved_worktrees else "disabled"
1731
+ worktree_status = (
1732
+ f"enabled ({resolved_worktree_config.provider})"
1733
+ if resolved_worktree_config.enabled
1734
+ else "disabled"
1735
+ )
1736
+ evidence_status = "enabled" if resolved_evidence_config.enabled else "disabled"
1715
1737
  dry_run_status = "enabled" if dry_run else "disabled"
1716
1738
  waves_status = "enabled" if waves else "disabled"
1717
1739
  canals_status = "enabled" if canals else "disabled"
@@ -1732,6 +1754,7 @@ def run(
1732
1754
  f"Waves: {waves_status}\n"
1733
1755
  f"Canals: {canals_status}\n"
1734
1756
  f"Worktrees: {worktree_status}\n"
1757
+ f"Evidence: {evidence_status}\n"
1735
1758
  f"Dry Run: {dry_run_status}",
1736
1759
  title="Starting",
1737
1760
  )
@@ -1762,7 +1785,8 @@ def run(
1762
1785
  timeout_seconds=resolved_timeout,
1763
1786
  enable_waves=waves,
1764
1787
  enable_canals=canals,
1765
- enable_worktrees=resolved_worktrees,
1788
+ worktree_config=resolved_worktree_config,
1789
+ evidence_config=resolved_evidence_config,
1766
1790
  force_workflow_id=resolved_workflow_id,
1767
1791
  dry_run=dry_run,
1768
1792
  retry_config=retry_config,
@@ -1911,6 +1935,13 @@ def agent(
1911
1935
  help="Enable canal merge flow selection (default: disabled)",
1912
1936
  ),
1913
1937
  ] = False,
1938
+ evidence: Annotated[
1939
+ bool | None,
1940
+ typer.Option(
1941
+ "--evidence/--no-evidence",
1942
+ help="Submit evidence reports after task completion (default: from config or disabled)",
1943
+ ),
1944
+ ] = None,
1914
1945
  ) -> None:
1915
1946
  """Run the agent in persistent mode.
1916
1947
 
@@ -1970,7 +2001,10 @@ def agent(
1970
2001
  if gap_seconds is not None:
1971
2002
  agent_loop_config.gap_seconds = gap_seconds
1972
2003
 
1973
- resolved_worktrees = config.worktrees.enabled
2004
+ resolved_worktree_config = config.worktrees.model_copy()
2005
+ resolved_evidence_config = config.evidence.model_copy()
2006
+ if evidence is not None:
2007
+ resolved_evidence_config.enabled = evidence
1974
2008
 
1975
2009
  # Resolve retry config: CLI > config > default
1976
2010
  retry_config = config.retry.model_copy()
@@ -2002,7 +2036,8 @@ def agent(
2002
2036
  force_workflow_id=workflow_id,
2003
2037
  enable_waves=waves,
2004
2038
  enable_canals=canals,
2005
- enable_worktrees=resolved_worktrees,
2039
+ worktree_config=resolved_worktree_config,
2040
+ evidence_config=resolved_evidence_config,
2006
2041
  retry_config=retry_config,
2007
2042
  )
2008
2043
  )
@@ -2042,7 +2077,8 @@ def agent(
2042
2077
  force_workflow_id=resolved_workflow_id,
2043
2078
  enable_waves=waves,
2044
2079
  enable_canals=canals,
2045
- enable_worktrees=resolved_worktrees,
2080
+ worktree_config=resolved_worktree_config,
2081
+ evidence_config=resolved_evidence_config,
2046
2082
  retry_config=retry_config,
2047
2083
  )
2048
2084
  )
@@ -2207,6 +2243,32 @@ def _setup_repos(
2207
2243
  )
2208
2244
 
2209
2245
 
2246
+ def _display_dependency_check(deps: list) -> None:
2247
+ """Display CLI dependency check results as a table."""
2248
+ from rich.table import Table
2249
+
2250
+ table = Table(title="CLI Dependencies", show_header=True, header_style="bold")
2251
+ table.add_column("Tool", style="cyan")
2252
+ table.add_column("Status")
2253
+ table.add_column("Details", style="dim")
2254
+
2255
+ for dep in deps:
2256
+ if dep.found:
2257
+ status = "[green]Found[/green]"
2258
+ details = dep.version or dep.path or ""
2259
+ elif dep.required:
2260
+ status = "[red]Missing (required)[/red]"
2261
+ details = f"Install: {dep.install_hint}"
2262
+ else:
2263
+ status = "[yellow]Missing (optional)[/yellow]"
2264
+ details = f"Install: {dep.install_hint}"
2265
+ table.add_row(dep.name, status, details)
2266
+
2267
+ console.print()
2268
+ console.print(table)
2269
+ console.print()
2270
+
2271
+
2210
2272
  def _prompt_install_target() -> str:
2211
2273
  """Prompt the user to choose where to install Claude configs."""
2212
2274
  choices = {
@@ -2324,9 +2386,21 @@ def setup(
2324
2386
  - user: ~/.claude/ in your home directory (shared across all projects)
2325
2387
  """
2326
2388
  from steerdev_agent.setup import ClaudeSetup
2389
+ from steerdev_agent.setup.claude_setup import check_cli_dependencies
2327
2390
 
2328
2391
  target_dir = project_dir or Path.cwd()
2329
2392
 
2393
+ # ── Check CLI dependencies ──
2394
+ deps = check_cli_dependencies()
2395
+ _display_dependency_check(deps)
2396
+
2397
+ missing_required = [d for d in deps if d.required and not d.found]
2398
+ if missing_required:
2399
+ console.print(
2400
+ "\n[red]Missing required dependencies. Install them before running setup.[/red]"
2401
+ )
2402
+ raise typer.Exit(1)
2403
+
2330
2404
  # Prompt for install target if not provided
2331
2405
  if install_target is None:
2332
2406
  install_target = _prompt_install_target()
@@ -10,15 +10,45 @@ from steerdev_agent.api.client import DEFAULT_API_ENDPOINT
10
10
 
11
11
 
12
12
  class WorktreeConfig(BaseModel):
13
- """Git worktree isolation configuration.
13
+ """Worktree isolation configuration using worktrunk.dev (wt CLI).
14
14
 
15
- When enabled, the Claude CLI --worktree flag is used to run each task
16
- in an isolated git worktree. Worktree lifecycle is managed by Claude CLI.
15
+ When enabled with provider "worktrunk", `wt switch --create <branch>`
16
+ provisions isolated worktrees with lifecycle hooks (env setup, pre-merge checks).
17
+ Legacy provider "claude" uses Claude CLI's --worktree flag directly.
17
18
  """
18
19
 
19
20
  enabled: Annotated[
20
21
  bool,
21
- Field(default=False, description="Enable Claude CLI --worktree isolation per task"),
22
+ Field(default=False, description="Enable worktree isolation per task/wave"),
23
+ ]
24
+ provider: Annotated[
25
+ str,
26
+ Field(
27
+ default="worktrunk",
28
+ description='Worktree provider: "worktrunk" (recommended) or "claude" (legacy --worktree flag)',
29
+ ),
30
+ ]
31
+ cleanup_on_complete: Annotated[
32
+ bool,
33
+ Field(default=True, description="Remove worktree after successful task completion"),
34
+ ]
35
+ cleanup_on_failure: Annotated[
36
+ bool,
37
+ Field(default=False, description="Remove worktree after task failure"),
38
+ ]
39
+ copy_gitignored: Annotated[
40
+ list[str],
41
+ Field(
42
+ default_factory=lambda: [".env", ".env.local"],
43
+ description="Gitignored files to copy into new worktrees (via wt step)",
44
+ ),
45
+ ]
46
+ pre_merge_checks: Annotated[
47
+ list[str],
48
+ Field(
49
+ default_factory=list,
50
+ description='Commands to run in pre-merge hook (e.g., ["pnpm check", "pnpm typecheck"])',
51
+ ),
22
52
  ]
23
53
 
24
54
 
@@ -216,6 +246,19 @@ class CanalConfig(BaseModel):
216
246
  ]
217
247
 
218
248
 
249
+ class EvidenceConfig(BaseModel):
250
+ """Evidence report configuration.
251
+
252
+ When enabled, the agent submits evidence reports to the platform after
253
+ completing tasks or workflow phases. Reports appear on the project review page.
254
+ """
255
+
256
+ enabled: Annotated[
257
+ bool,
258
+ Field(default=False, description="Submit evidence reports after task completion"),
259
+ ]
260
+
261
+
219
262
  class RetryConfig(BaseModel):
220
263
  """Retry configuration for failed task/session execution."""
221
264
 
@@ -295,6 +338,13 @@ class SteerDevConfig(BaseModel):
295
338
  WorkspaceConfig,
296
339
  Field(default_factory=WorkspaceConfig, description="Workspace agent configuration"),
297
340
  ]
341
+ evidence: Annotated[
342
+ EvidenceConfig,
343
+ Field(
344
+ default_factory=EvidenceConfig,
345
+ description="Evidence report configuration for project review",
346
+ ),
347
+ ]
298
348
  retry: Annotated[
299
349
  RetryConfig,
300
350
  Field(default_factory=RetryConfig, description="Retry configuration for failed tasks"),
@@ -0,0 +1,156 @@
1
+ """Evidence collection: run a verification session after task completion.
2
+
3
+ After a task or workflow finishes, this module spawns a short agent session
4
+ that reviews what was done — checking git changes, using agent-browser for
5
+ visual verification, and writing a structured review.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import contextlib
12
+ from typing import Any
13
+
14
+ from loguru import logger
15
+ from rich.console import Console
16
+
17
+ from steerdev_agent.executor import ExecutorFactory
18
+ from steerdev_agent.executor.base import EventType
19
+
20
+ console = Console()
21
+
22
+ # Maximum turns for the evidence-collection session. Enough for:
23
+ # git diff/log (2), optional file reads (3), agent-browser (4), summary (1)
24
+ EVIDENCE_MAX_TURNS = 25
25
+
26
+
27
+ def build_evidence_prompt(
28
+ task: dict[str, Any],
29
+ success: bool,
30
+ phase_summaries: list[dict[str, Any]] | None = None,
31
+ evidence_messages: list[str] | None = None,
32
+ ) -> str:
33
+ """Build the prompt sent to the agent for evidence collection.
34
+
35
+ Args:
36
+ task: The completed task dict.
37
+ success: Whether the task succeeded.
38
+ phase_summaries: Per-phase summaries (workflow tasks only).
39
+ evidence_messages: Assistant messages collected during execution
40
+ (non-workflow tasks only).
41
+ """
42
+ task_title = task.get("title", "Unknown Task")
43
+ task_prompt = task.get("prompt", "")
44
+ status = "Completed successfully" if success else "Failed"
45
+
46
+ sections: list[str] = [
47
+ "# Evidence Collection\n",
48
+ "You have just completed a task. Your job now is to **review and document "
49
+ "what was accomplished** so a human reviewer can verify the work.\n",
50
+ f"## Task\n- **Title:** {task_title}\n- **Status:** {status}\n",
51
+ ]
52
+
53
+ if task_prompt:
54
+ sections.append(f"## Original Requirement\n{task_prompt[:2000]}\n")
55
+
56
+ # Include work output from the execution
57
+ if phase_summaries:
58
+ lines = ["## Workflow Phase Results\n"]
59
+ for ps in phase_summaries:
60
+ name = ps.get("phase_name", "Phase")
61
+ ok = "Pass" if ps.get("success") else "Fail"
62
+ text = ps.get("summary", "")
63
+ lines.append(f"### {name} ({ok})\n{text[:1500]}\n")
64
+ sections.append("\n".join(lines))
65
+ elif evidence_messages:
66
+ last = evidence_messages[-1][:3000]
67
+ sections.append(f"## Agent Output (last message)\n{last}\n")
68
+
69
+ sections.append(
70
+ "## Your Instructions\n\n"
71
+ "Collect evidence that the work is correct. Follow these steps:\n\n"
72
+ "### 1. File Changes\n"
73
+ "Run `git diff --stat` and `git log --oneline -5` to see what changed.\n"
74
+ "Summarize the key modifications.\n\n"
75
+ "### 2. Visual Verification\n"
76
+ "If the task involved **UI or frontend changes**:\n"
77
+ "- Use `agent-browser open <url>` to navigate to the relevant page\n"
78
+ "- Use `agent-browser screenshot` to capture it\n"
79
+ "- Describe what you see and whether it matches the acceptance criteria\n\n"
80
+ "If the task was backend-only, verify via tests or logs instead.\n\n"
81
+ "### 3. Tests\n"
82
+ "If there are relevant tests, run them and report the results.\n\n"
83
+ "### 4. Review Summary\n"
84
+ "Write a structured review covering:\n"
85
+ "- What was implemented or changed\n"
86
+ "- Key files modified and why\n"
87
+ "- Visual verification or test results\n"
88
+ "- Any concerns, edge cases, or issues found\n\n"
89
+ "Be specific and factual. Include code snippets or file paths where relevant.\n"
90
+ )
91
+
92
+ return "\n".join(sections)
93
+
94
+
95
+ async def collect_evidence(
96
+ *,
97
+ task: dict[str, Any],
98
+ success: bool,
99
+ working_directory: str,
100
+ executor_config: Any,
101
+ model: str | None,
102
+ api_key: str | None,
103
+ dry_run: bool = False,
104
+ phase_summaries: list[dict[str, Any]] | None = None,
105
+ evidence_messages: list[str] | None = None,
106
+ ) -> list[str]:
107
+ """Run a short agent session to collect evidence of completed work.
108
+
109
+ Returns a list of assistant messages produced during the evidence session.
110
+ Best-effort: returns an empty list on any failure.
111
+ """
112
+ prompt = build_evidence_prompt(
113
+ task,
114
+ success=success,
115
+ phase_summaries=phase_summaries,
116
+ evidence_messages=evidence_messages,
117
+ )
118
+
119
+ if dry_run:
120
+ console.print("[dim]Evidence collection (dry run): would run evidence prompt[/dim]")
121
+ return []
122
+
123
+ executor = ExecutorFactory.create(
124
+ config=executor_config,
125
+ working_directory=working_directory,
126
+ model=model,
127
+ max_turns=EVIDENCE_MAX_TURNS,
128
+ dry_run=False,
129
+ )
130
+
131
+ collected: list[str] = []
132
+
133
+ try:
134
+ await executor.start(prompt)
135
+ console.print("[dim]Evidence collection started...[/dim]")
136
+
137
+ async for event in executor.stream_events():
138
+ if event.event_type == EventType.ASSISTANT:
139
+ message = event.data.get("message", {})
140
+ content = message.get("content", "") if isinstance(message, dict) else str(message)
141
+ if isinstance(content, str) and content.strip():
142
+ collected.append(content)
143
+
144
+ await executor.wait()
145
+ console.print(f"[dim]Evidence collection done ({len(collected)} messages)[/dim]")
146
+
147
+ except Exception:
148
+ logger.debug("Evidence collection session failed", exc_info=True)
149
+ console.print("[dim]Evidence collection failed, using fallback[/dim]")
150
+
151
+ finally:
152
+ if executor.is_running:
153
+ with contextlib.suppress(asyncio.CancelledError, TimeoutError, Exception):
154
+ await asyncio.wait_for(executor.stop(), timeout=3.0)
155
+
156
+ return collected