tsugite-cli 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. tsugite/__init__.py +6 -0
  2. tsugite/agent_composition.py +163 -0
  3. tsugite/agent_inheritance.py +479 -0
  4. tsugite/agent_preparation.py +236 -0
  5. tsugite/agent_runner/__init__.py +45 -0
  6. tsugite/agent_runner/helpers.py +106 -0
  7. tsugite/agent_runner/history_integration.py +248 -0
  8. tsugite/agent_runner/metrics.py +100 -0
  9. tsugite/agent_runner/runner.py +1879 -0
  10. tsugite/agent_runner/validation.py +70 -0
  11. tsugite/agent_utils.py +167 -0
  12. tsugite/attachments/__init__.py +65 -0
  13. tsugite/attachments/auto_context.py +199 -0
  14. tsugite/attachments/base.py +34 -0
  15. tsugite/attachments/file.py +51 -0
  16. tsugite/attachments/inline.py +31 -0
  17. tsugite/attachments/storage.py +178 -0
  18. tsugite/attachments/url.py +59 -0
  19. tsugite/attachments/youtube.py +101 -0
  20. tsugite/benchmark/__init__.py +62 -0
  21. tsugite/benchmark/config.py +183 -0
  22. tsugite/benchmark/core.py +292 -0
  23. tsugite/benchmark/discovery.py +377 -0
  24. tsugite/benchmark/evaluators.py +671 -0
  25. tsugite/benchmark/execution.py +657 -0
  26. tsugite/benchmark/metrics.py +204 -0
  27. tsugite/benchmark/reports.py +420 -0
  28. tsugite/benchmark/utils.py +288 -0
  29. tsugite/builtin_agents/chat-assistant.md +53 -0
  30. tsugite/builtin_agents/default.md +140 -0
  31. tsugite/builtin_agents.py +5 -0
  32. tsugite/cache.py +195 -0
  33. tsugite/cli/__init__.py +1042 -0
  34. tsugite/cli/agents.py +148 -0
  35. tsugite/cli/attachments.py +193 -0
  36. tsugite/cli/benchmark.py +663 -0
  37. tsugite/cli/cache.py +113 -0
  38. tsugite/cli/config.py +272 -0
  39. tsugite/cli/helpers.py +534 -0
  40. tsugite/cli/history.py +193 -0
  41. tsugite/cli/init.py +387 -0
  42. tsugite/cli/mcp.py +193 -0
  43. tsugite/cli/tools.py +419 -0
  44. tsugite/config.py +204 -0
  45. tsugite/console.py +48 -0
  46. tsugite/constants.py +21 -0
  47. tsugite/core/__init__.py +19 -0
  48. tsugite/core/agent.py +774 -0
  49. tsugite/core/executor.py +300 -0
  50. tsugite/core/memory.py +67 -0
  51. tsugite/core/tools.py +271 -0
  52. tsugite/docker_cli.py +270 -0
  53. tsugite/events/__init__.py +55 -0
  54. tsugite/events/base.py +46 -0
  55. tsugite/events/bus.py +62 -0
  56. tsugite/events/events.py +224 -0
  57. tsugite/exceptions.py +40 -0
  58. tsugite/history/__init__.py +29 -0
  59. tsugite/history/index.py +210 -0
  60. tsugite/history/models.py +106 -0
  61. tsugite/history/storage.py +157 -0
  62. tsugite/mcp_client.py +219 -0
  63. tsugite/mcp_config.py +174 -0
  64. tsugite/md_agents.py +751 -0
  65. tsugite/models.py +257 -0
  66. tsugite/renderer.py +151 -0
  67. tsugite/shell_tool_config.py +265 -0
  68. tsugite/templates/assistant.md +14 -0
  69. tsugite/tools/__init__.py +265 -0
  70. tsugite/tools/agents.py +312 -0
  71. tsugite/tools/edit_strategies.py +393 -0
  72. tsugite/tools/fs.py +329 -0
  73. tsugite/tools/http.py +239 -0
  74. tsugite/tools/interactive.py +430 -0
  75. tsugite/tools/shell.py +129 -0
  76. tsugite/tools/shell_tools.py +214 -0
  77. tsugite/tools/tasks.py +339 -0
  78. tsugite/tsugite.py +7 -0
  79. tsugite/ui/__init__.py +46 -0
  80. tsugite/ui/base.py +638 -0
  81. tsugite/ui/chat.py +265 -0
  82. tsugite/ui/chat.tcss +92 -0
  83. tsugite/ui/chat_history.py +286 -0
  84. tsugite/ui/helpers.py +102 -0
  85. tsugite/ui/jsonl.py +125 -0
  86. tsugite/ui/live_template.py +529 -0
  87. tsugite/ui/plain.py +419 -0
  88. tsugite/ui/textual_chat.py +642 -0
  89. tsugite/ui/textual_handler.py +225 -0
  90. tsugite/ui/widgets/__init__.py +6 -0
  91. tsugite/ui/widgets/base_scroll_log.py +27 -0
  92. tsugite/ui/widgets/message_list.py +121 -0
  93. tsugite/ui/widgets/thought_log.py +80 -0
  94. tsugite/ui_context.py +90 -0
  95. tsugite/utils.py +367 -0
  96. tsugite/xdg.py +104 -0
  97. tsugite_cli-0.3.3.dist-info/METADATA +325 -0
  98. tsugite_cli-0.3.3.dist-info/RECORD +101 -0
  99. tsugite_cli-0.3.3.dist-info/WHEEL +4 -0
  100. tsugite_cli-0.3.3.dist-info/entry_points.txt +5 -0
  101. tsugite_cli-0.3.3.dist-info/licenses/LICENSE +235 -0
@@ -0,0 +1,1879 @@
1
+ """Agent execution engine using TsugiteAgent."""
2
+
3
+ import asyncio
4
+ import time
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
7
+
8
+ from tsugite.core.agent import TsugiteAgent
9
+ from tsugite.core.executor import LocalExecutor
10
+ from tsugite.exceptions import AgentExecutionError
11
+ from tsugite.md_agents import AgentConfig, parse_agent_file
12
+ from tsugite.renderer import AgentRenderer
13
+ from tsugite.tools import call_tool
14
+ from tsugite.tools.tasks import TaskStatus, get_task_manager, reset_task_manager
15
+ from tsugite.utils import is_interactive
16
+
17
+ from .helpers import (
18
+ _stderr_console,
19
+ clear_current_agent,
20
+ clear_multistep_ui_context,
21
+ get_display_console,
22
+ get_ui_handler,
23
+ print_step_progress,
24
+ set_current_agent,
25
+ set_multistep_ui_context,
26
+ )
27
+ from .metrics import StepMetrics, display_step_metrics
28
+
29
+ # Display constants for truncating long output
30
+ MAX_VARIABLE_PREVIEW_LENGTH = 100 # Max characters to show in variable documentation
31
+ MAX_CONTENT_PREVIEW_LENGTH = 200 # Max characters to show in debug attachment previews
32
+
33
+ if TYPE_CHECKING:
34
+ from tsugite.agent_preparation import PreparedAgent
35
+ from tsugite.events import EventBus
36
+
37
+
38
+ def _get_model_string(model_override: Optional[str], agent_config: AgentConfig) -> str:
39
+ """Get model string with fallback to config default.
40
+
41
+ Args:
42
+ model_override: Model override from CLI
43
+ agent_config: Agent configuration
44
+
45
+ Returns:
46
+ Model string
47
+
48
+ Raises:
49
+ RuntimeError: If no model is specified anywhere
50
+ """
51
+ model_string = model_override or agent_config.model
52
+ if not model_string:
53
+ from tsugite.config import load_config
54
+
55
+ config = load_config()
56
+ model_string = config.default_model
57
+
58
+ if not model_string:
59
+ raise RuntimeError(
60
+ "No model specified. Set a model in agent frontmatter, use --model flag, "
61
+ "or set a default with 'tsugite config set-default <model>'"
62
+ )
63
+
64
+ return model_string
65
+
66
+
67
+ def _populate_initial_tasks(agent_config: AgentConfig) -> None:
68
+ """Populate task manager with initial tasks from agent config.
69
+
70
+ Args:
71
+ agent_config: Agent configuration with initial_tasks list
72
+ """
73
+ if not agent_config.initial_tasks:
74
+ return
75
+
76
+ task_manager = get_task_manager()
77
+
78
+ for task_def in agent_config.initial_tasks:
79
+ # At this point, all tasks should be normalized dicts (done in AgentConfig.__post_init__)
80
+ title = task_def.get("title", "")
81
+ status_str = task_def.get("status", "pending")
82
+ optional = task_def.get("optional", False)
83
+
84
+ if not title:
85
+ continue
86
+
87
+ try:
88
+ status = TaskStatus(status_str)
89
+ except ValueError:
90
+ # If invalid status, default to pending
91
+ status = TaskStatus.PENDING
92
+
93
+ task_manager.add_task(title, status, parent_id=None, optional=optional)
94
+
95
+
96
+ def _build_step_error_message(
97
+ error_type: str,
98
+ step_name: str,
99
+ step_number: int,
100
+ total_steps: int,
101
+ errors: List[str],
102
+ available_vars: List[str],
103
+ previous_step: str,
104
+ max_attempts: int,
105
+ debug_tips: List[str],
106
+ ) -> str:
107
+ """Build detailed error message for step failures.
108
+
109
+ Args:
110
+ error_type: Type of error (e.g., "Template Rendering Failed", "Step Execution Failed")
111
+ step_name: Name of the failed step
112
+ step_number: Current step number (1-indexed)
113
+ total_steps: Total number of steps
114
+ errors: List of error messages from all attempts
115
+ available_vars: List of available variable names
116
+ previous_step: Name of the previous step
117
+ max_attempts: Maximum number of retry attempts
118
+ debug_tips: List of debugging suggestions
119
+
120
+ Returns:
121
+ Formatted error message string
122
+ """
123
+ error_lines = [
124
+ "",
125
+ f"Step {error_type}",
126
+ "━" * 60,
127
+ f"Step: {step_name} ({step_number}/{total_steps})",
128
+ f"Previous Step: {previous_step}",
129
+ f"Attempts: {max_attempts}",
130
+ "",
131
+ ]
132
+
133
+ # Add variables section (format depends on whether we have any)
134
+ if available_vars:
135
+ var_label = "Context Variables" if "Template" in error_type else "Available Variables"
136
+ error_lines.append(f"{var_label}: {', '.join(available_vars)}")
137
+ else:
138
+ error_lines.append("Available Variables: None")
139
+
140
+ error_lines.extend(["", "Errors:"])
141
+
142
+ # Add all error attempts
143
+ for idx, err in enumerate(errors, 1):
144
+ error_lines.append(f" Attempt {idx}: {err}")
145
+
146
+ # Add debugging tips
147
+ error_lines.extend(["━" * 60, "", "To debug:"])
148
+ for tip in debug_tips:
149
+ error_lines.append(f" {tip}")
150
+ error_lines.append("")
151
+
152
+ return "\n".join(error_lines)
153
+
154
+
155
+ def _combine_instructions(*segments: str) -> str:
156
+ """Join instruction segments, skipping empties.
157
+
158
+ Args:
159
+ *segments: Variable number of instruction strings
160
+
161
+ Returns:
162
+ Combined instructions with segments separated by double newlines
163
+ """
164
+ parts = [segment.strip() for segment in segments if segment and segment.strip()]
165
+ return "\n\n".join(parts)
166
+
167
+
168
+ def get_default_instructions(text_mode: bool = False) -> str:
169
+ """Get default instructions based on agent mode.
170
+
171
+ Args:
172
+ text_mode: Whether agent is in text mode
173
+
174
+ Returns:
175
+ Mode-appropriate default instructions
176
+ """
177
+ base = (
178
+ "You are operating inside the Tsugite micro-agent runtime. Follow the rendered task faithfully, use the available "
179
+ "tools when they meaningfully advance the work, and maintain a living plan via the task_* tools. Create or update "
180
+ "tasks whenever you define new sub-work, mark progress as you go, and rely on the task summary to decide the next "
181
+ "action. Provide a clear, actionable final response without unnecessary filler.\n\n"
182
+ )
183
+
184
+ if text_mode:
185
+ completion = (
186
+ "Task Completion: For conversational responses, use the format 'Thought: [your response]'. "
187
+ "When using tools or code, write Python code blocks and call final_answer(result) when complete.\n\n"
188
+ )
189
+ else:
190
+ completion = (
191
+ "Task Completion: Write Python code to accomplish your task. "
192
+ "When you have completed your task, call final_answer(result) to signal completion and return the result.\n\n"
193
+ )
194
+
195
+ interactive = (
196
+ "Interactive Mode: The `is_interactive` variable indicates whether you're running in an interactive terminal. "
197
+ "Interactive-only tools (like ask_user) are automatically available only when is_interactive is True."
198
+ )
199
+
200
+ return base + completion + interactive
201
+
202
+
203
+ def execute_prefetch(prefetch_config: List[Dict[str, Any]], event_bus: Optional["EventBus"] = None) -> Dict[str, Any]:
204
+ context = {}
205
+ for config in prefetch_config:
206
+ tool_name = config.get("tool")
207
+ args = config.get("args", {})
208
+ assign_name = config.get("assign")
209
+
210
+ if not tool_name or not assign_name:
211
+ continue
212
+
213
+ try:
214
+ context[assign_name] = call_tool(tool_name, **args)
215
+ except Exception as e:
216
+ if event_bus:
217
+ from tsugite.events import WarningEvent
218
+
219
+ event_bus.emit(WarningEvent(message=f"Prefetch tool '{tool_name}' failed: {e}"))
220
+ context[assign_name] = None
221
+
222
+ return context
223
+
224
+
225
+ def execute_tool_directives(
226
+ content: str, existing_context: Optional[Dict[str, Any]] = None, event_bus: Optional["EventBus"] = None
227
+ ) -> tuple[str, Dict[str, Any]]:
228
+ """Execute tool directives in content and return updated context.
229
+
230
+ Tool directives are inline <!-- tsu:tool --> comments that execute tools
231
+ during the rendering phase, similar to prefetch but embedded in content.
232
+
233
+ Args:
234
+ content: Markdown content with tool directives
235
+ existing_context: Current template context (for error messages, not used for execution)
236
+ event_bus: Optional event bus for emitting warnings
237
+
238
+ Returns:
239
+ Tuple of (modified_content, updated_context)
240
+ - modified_content: Directives replaced with execution notes
241
+ - updated_context: Original context + tool results
242
+
243
+ Example:
244
+ >>> content = '<!-- tsu:tool name="read_file" args={"path": "test.txt"} assign="data" -->'
245
+ >>> modified, context = execute_tool_directives(content)
246
+ >>> 'data' in context
247
+ True
248
+ """
249
+ from tsugite.md_agents import extract_tool_directives
250
+
251
+ if existing_context is None:
252
+ existing_context = {}
253
+
254
+ # Extract tool directives
255
+ try:
256
+ directives = extract_tool_directives(content)
257
+ except ValueError as e:
258
+ # If parsing fails, return content unchanged with empty context
259
+ if event_bus:
260
+ from tsugite.events import WarningEvent
261
+
262
+ event_bus.emit(WarningEvent(message=f"Failed to parse tool directives: {e}"))
263
+ return content, {}
264
+
265
+ if not directives:
266
+ # No directives to execute
267
+ return content, {}
268
+
269
+ # Execute directives in order
270
+ new_context = {}
271
+ modified_content = content
272
+
273
+ for directive in directives:
274
+ try:
275
+ # Execute the tool
276
+ result = call_tool(directive.name, **directive.args)
277
+ new_context[directive.assign_var] = result
278
+
279
+ # Replace directive with execution note
280
+ replacement = f"<!-- Tool '{directive.name}' executed, result in {directive.assign_var} -->"
281
+ modified_content = modified_content.replace(directive.raw_match, replacement)
282
+
283
+ except Exception as e:
284
+ if event_bus:
285
+ from tsugite.events import WarningEvent
286
+
287
+ event_bus.emit(WarningEvent(message=f"Tool directive '{directive.name}' failed: {e}"))
288
+ new_context[directive.assign_var] = None
289
+
290
+ # Replace with failure note
291
+ replacement = f"<!-- Tool '{directive.name}' failed: {e} -->"
292
+ modified_content = modified_content.replace(directive.raw_match, replacement)
293
+
294
+ return modified_content, new_context
295
+
296
+
297
+ def _extract_reasoning_content(agent: TsugiteAgent, custom_logger: Optional[Any] = None) -> None:
298
+ """Extract and display reasoning content from TsugiteAgent memory.
299
+
300
+ For models like Claude/Deepseek that expose reasoning_content, displays the actual reasoning.
301
+
302
+ Args:
303
+ agent: The TsugiteAgent instance that just completed execution
304
+ custom_logger: Custom logger to display reasoning content
305
+ """
306
+ if not hasattr(agent, "memory") or not agent.memory.reasoning_history:
307
+ return
308
+
309
+ # Display each reasoning entry
310
+ for reasoning_content in agent.memory.reasoning_history:
311
+ if reasoning_content and custom_logger:
312
+ # Check if custom_logger has ui_handler (custom UI mode)
313
+ ui_handler = get_ui_handler(custom_logger)
314
+ if ui_handler:
315
+ from tsugite.events import EventBus, ReasoningContentEvent
316
+
317
+ event_bus = EventBus()
318
+ event_bus.subscribe(ui_handler.handle_event)
319
+ event_bus.emit(ReasoningContentEvent(content=reasoning_content, step=None))
320
+
321
+
322
+ async def _execute_agent_with_prompt(
323
+ prepared: "PreparedAgent",
324
+ model_override: Optional[str] = None,
325
+ custom_logger: Optional[Any] = None,
326
+ trust_mcp_code: bool = False,
327
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
328
+ skip_task_reset: bool = False,
329
+ model_kwargs: Optional[Dict[str, Any]] = None,
330
+ injectable_vars: Optional[Dict[str, Any]] = None,
331
+ return_token_usage: bool = False,
332
+ stream: bool = False,
333
+ previous_messages: Optional[List[Dict]] = None,
334
+ ) -> str | tuple[str, Optional[int], Optional[float], int, list]:
335
+ """Execute agent with a prepared agent.
336
+
337
+ Low-level execution function used by both run_agent and run_multistep_agent.
338
+
339
+ Args:
340
+ prepared: Prepared agent with all context, tools, and instructions
341
+ model_override: Override agent's model
342
+ custom_logger: Custom logger
343
+ trust_mcp_code: Trust MCP server code
344
+ delegation_agents: Delegation agents list
345
+ skip_task_reset: Skip resetting task manager (for multi-step agents)
346
+ model_kwargs: Additional model parameters (response_format, temperature, etc.)
347
+ injectable_vars: Variables to inject into Python execution namespace
348
+ return_token_usage: Whether to return token usage and cost from LiteLLM
349
+ stream: Whether to stream responses in real-time
350
+ previous_messages: Previous conversation messages for continuation
351
+
352
+ Returns:
353
+ Agent execution result as string, or tuple of (result, token_count, cost, steps) if return_token_usage=True
354
+
355
+ Raises:
356
+ RuntimeError: If execution fails
357
+ """
358
+
359
+ # Initialize task manager for this execution (unless skipped for multi-step)
360
+ if not skip_task_reset:
361
+ reset_task_manager()
362
+
363
+ agent_config = prepared.agent_config
364
+
365
+ # Populate initial tasks if any
366
+ if not skip_task_reset:
367
+ _populate_initial_tasks(agent_config)
368
+
369
+ # Add variable documentation to instructions if variables are available
370
+ combined_instructions = prepared.combined_instructions
371
+ if injectable_vars:
372
+ var_docs = "\n\nAVAILABLE PYTHON VARIABLES:\n"
373
+ for var_name, var_value in injectable_vars.items():
374
+ preview = str(var_value)[:MAX_VARIABLE_PREVIEW_LENGTH]
375
+ if len(str(var_value)) > MAX_VARIABLE_PREVIEW_LENGTH:
376
+ preview += "..."
377
+ var_docs += f"- {var_name}: {preview}\n"
378
+ combined_instructions = prepared.combined_instructions + var_docs
379
+
380
+ # Extract ui_handler and create EventBus early so warnings can use it
381
+ ui_handler = get_ui_handler(custom_logger)
382
+
383
+ # Create EventBus and subscribe ui_handler
384
+ from tsugite.events import EventBus, InfoEvent, WarningEvent
385
+
386
+ event_bus = EventBus()
387
+ if ui_handler:
388
+ event_bus.subscribe(ui_handler.handle_event)
389
+
390
+ # Start with tools from prepared agent
391
+ tools = list(prepared.tools) # Make a copy
392
+
393
+ # Filter out interactive tools in subagent mode
394
+ import os
395
+
396
+ if os.environ.get("TSUGITE_SUBAGENT_MODE") == "1":
397
+ tools = [t for t in tools if t.name not in ["ask_user", "ask_user_batch"]]
398
+
399
+ # Register per-agent custom shell tools (if any)
400
+ if agent_config.custom_tools:
401
+ from tsugite.shell_tool_config import parse_tool_definition_from_dict
402
+ from tsugite.tools.shell_tools import register_shell_tools
403
+
404
+ try:
405
+ custom_tool_definitions = [
406
+ parse_tool_definition_from_dict(tool_dict) for tool_dict in agent_config.custom_tools
407
+ ]
408
+ register_shell_tools(custom_tool_definitions)
409
+
410
+ # Add custom tool names to the tool list
411
+ for tool_def in custom_tool_definitions:
412
+ from tsugite.core.tools import create_tool_from_tsugite
413
+
414
+ tools.append(create_tool_from_tsugite(tool_def.name))
415
+ except Exception as e:
416
+ event_bus.emit(WarningEvent(message=f"Failed to register custom tools: {e}"))
417
+
418
+ # Add delegation tools if provided
419
+ if delegation_agents:
420
+ from tsugite.agent_composition import create_delegation_tools
421
+
422
+ delegation_tools = create_delegation_tools(delegation_agents)
423
+ tools.extend(delegation_tools)
424
+
425
+ # Load MCP tools if configured
426
+ mcp_clients = [] # Track clients for cleanup
427
+ if agent_config.mcp_servers:
428
+ try:
429
+ from tsugite.mcp_client import load_mcp_tools
430
+ from tsugite.mcp_config import load_mcp_config
431
+
432
+ global_mcp_config = load_mcp_config()
433
+
434
+ # Load tools from each configured MCP server
435
+ for server_name, allowed_tools in agent_config.mcp_servers.items():
436
+ if server_name not in global_mcp_config:
437
+ event_bus.emit(WarningEvent(message=f"MCP server '{server_name}' not found in config. Skipping."))
438
+ continue
439
+
440
+ server_config = global_mcp_config[server_name]
441
+ try:
442
+ mcp_client, mcp_tools = await load_mcp_tools(server_config, allowed_tools)
443
+ mcp_clients.append(mcp_client) # Keep client alive for tools to work
444
+ tools.extend(mcp_tools)
445
+
446
+ event_bus.emit(InfoEvent(message=f"Loaded {len(mcp_tools)} tools from MCP server '{server_name}'"))
447
+ except Exception as e:
448
+ event_bus.emit(WarningEvent(message=f"Failed to load MCP tools from '{server_name}': {e}"))
449
+ except Exception as e:
450
+ event_bus.emit(WarningEvent(message=f"Failed to load MCP tools: {e}"))
451
+ event_bus.emit(WarningEvent(message="Continuing without MCP tools."))
452
+
453
+ # Get model string
454
+ model_string = _get_model_string(model_override, agent_config)
455
+
456
+ # Merge reasoning_effort from agent config into model_kwargs
457
+ final_model_kwargs = dict(model_kwargs or {})
458
+ if hasattr(agent_config, "reasoning_effort") and agent_config.reasoning_effort:
459
+ # Only add if not already specified in model_kwargs
460
+ if "reasoning_effort" not in final_model_kwargs:
461
+ final_model_kwargs["reasoning_effort"] = agent_config.reasoning_effort
462
+
463
+ # Create executor
464
+ executor = LocalExecutor()
465
+
466
+ # Inject variables into executor (for multi-step agents)
467
+ if injectable_vars:
468
+ await executor.send_variables(injectable_vars)
469
+
470
+ # Create and run agent
471
+ try:
472
+ agent = TsugiteAgent(
473
+ model_string=model_string,
474
+ tools=tools,
475
+ instructions=combined_instructions or "",
476
+ max_turns=agent_config.max_turns,
477
+ executor=executor,
478
+ model_kwargs=final_model_kwargs,
479
+ event_bus=event_bus,
480
+ model_name=model_string,
481
+ text_mode=agent_config.text_mode,
482
+ attachments=prepared.attachments,
483
+ previous_messages=previous_messages,
484
+ )
485
+
486
+ # Run agent
487
+ result = await agent.run(prepared.rendered_prompt, return_full_result=return_token_usage, stream=stream)
488
+
489
+ # Extract and display reasoning content if present
490
+ _extract_reasoning_content(agent, custom_logger)
491
+
492
+ # Return appropriate format
493
+ if return_token_usage:
494
+ from tsugite.core.agent import AgentResult
495
+
496
+ if isinstance(result, AgentResult):
497
+ step_count = len(result.steps) if result.steps else 0
498
+ steps_list = result.steps if result.steps else []
499
+
500
+ # If result has error, raise it AFTER we've already extracted the steps
501
+ # The exception will be caught by the benchmark, but steps are already available
502
+ if result.error:
503
+ # Create custom exception that includes execution details
504
+ raise AgentExecutionError(
505
+ f"Agent execution failed: {result.error}",
506
+ execution_steps=steps_list,
507
+ token_usage=result.token_usage,
508
+ cost=result.cost,
509
+ step_count=step_count,
510
+ )
511
+
512
+ return (
513
+ str(result.output),
514
+ result.token_usage,
515
+ result.cost,
516
+ step_count,
517
+ steps_list,
518
+ prepared.system_message,
519
+ prepared.attachments,
520
+ )
521
+ else:
522
+ return str(result), None, None, 0, [], None, []
523
+ else:
524
+ from tsugite.core.agent import AgentResult
525
+
526
+ if isinstance(result, AgentResult):
527
+ return str(result.output)
528
+ else:
529
+ return str(result)
530
+
531
+ except Exception as e:
532
+ # Preserve execution details if they're attached to the original exception
533
+ # (This happens when agent hits max_turns and we want execution trace for debugging)
534
+ if isinstance(e, AgentExecutionError):
535
+ # Already has execution details, just re-raise
536
+ raise
537
+ elif hasattr(e, "execution_steps"):
538
+ # Some other exception with attached details, convert to AgentExecutionError
539
+ raise AgentExecutionError(
540
+ f"Agent execution failed: {e}",
541
+ execution_steps=e.execution_steps,
542
+ token_usage=getattr(e, "token_usage", None),
543
+ cost=getattr(e, "cost", None),
544
+ step_count=getattr(e, "step_count", 0),
545
+ )
546
+ else:
547
+ raise RuntimeError(f"Agent execution failed: {e}")
548
+ finally:
549
+ # Clean up MCP client connections
550
+ for client in mcp_clients:
551
+ try:
552
+ await client.disconnect()
553
+ except Exception:
554
+ pass # Best effort cleanup
555
+
556
+ # Clean up any pending asyncio tasks (e.g., LiteLLM logging tasks)
557
+ # to prevent RuntimeWarning about tasks being destroyed while pending
558
+ # ONLY run cleanup for top-level agents, not spawned agents
559
+ # Spawned agents run in ThreadPoolExecutor threads and their event loops
560
+ # are cleaned up automatically by asyncio.run()
561
+ import threading
562
+
563
+ from tsugite.utils import cleanup_pending_tasks
564
+
565
+ if threading.current_thread() == threading.main_thread():
566
+ await cleanup_pending_tasks()
567
+
568
+
569
+ def run_agent(
570
+ agent_path: Path,
571
+ prompt: str,
572
+ context: Optional[Dict[str, Any]] = None,
573
+ model_override: Optional[str] = None,
574
+ debug: bool = False,
575
+ custom_logger: Optional[Any] = None,
576
+ trust_mcp_code: bool = False,
577
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
578
+ return_token_usage: bool = False,
579
+ stream: bool = False,
580
+ force_text_mode: bool = False,
581
+ continue_conversation_id: Optional[str] = None,
582
+ attachments: Optional[List[tuple[str, str]]] = None,
583
+ ) -> str | tuple[str, Optional[int], Optional[float], int, list]:
584
+ """Run a Tsugite agent.
585
+
586
+ Args:
587
+ agent_path: Path to agent markdown file
588
+ prompt: User prompt/task for the agent
589
+ context: Additional context variables
590
+ model_override: Override agent's default model
591
+ debug: Enable debug output (rendered prompt)
592
+ custom_logger: Custom logger for agent output
593
+ trust_mcp_code: Whether to trust remote code from MCP servers
594
+ delegation_agents: List of (name, path) tuples for agents to make available for delegation
595
+ return_token_usage: Whether to return token usage and cost from LiteLLM
596
+ stream: Whether to stream responses in real-time
597
+ force_text_mode: Force text_mode=True regardless of agent config (useful for chat UI)
598
+ continue_conversation_id: Optional conversation ID to continue (makes run mode multi-turn)
599
+ attachments: Optional list of (name, content) tuples for prompt caching
600
+
601
+ Returns:
602
+ Agent execution result as string, or tuple of (result, token_count, cost, step_count, execution_steps) if return_token_usage=True
603
+
604
+ Raises:
605
+ ValueError: If agent file is invalid
606
+ RuntimeError: If agent execution fails
607
+ """
608
+ # Check if running in subagent mode (subprocess-based execution)
609
+ import json
610
+ import os
611
+ import sys
612
+
613
+ subagent_mode = os.environ.get("TSUGITE_SUBAGENT_MODE") == "1"
614
+
615
+ if subagent_mode:
616
+ # Read context from stdin
617
+ try:
618
+ stdin_data = json.loads(sys.stdin.read())
619
+ prompt = stdin_data["prompt"]
620
+ context = stdin_data.get("context", {})
621
+ except Exception as e:
622
+ error_event = {"type": "error", "error": f"Failed to parse stdin JSON: {e}"}
623
+ print(json.dumps(error_event), flush=True)
624
+ sys.exit(1)
625
+
626
+ # Set up JSONL UI handler
627
+ from tsugite.ui.jsonl import JSONLUIHandler
628
+
629
+ custom_logger = type("CustomLogger", (), {"ui_handler": JSONLUIHandler()})()
630
+
631
+ if context is None:
632
+ context = {}
633
+
634
+ # Load conversation history if continuing
635
+ previous_messages = []
636
+ if continue_conversation_id:
637
+ from tsugite.agent_runner.history_integration import load_and_apply_history
638
+
639
+ previous_messages = load_and_apply_history(continue_conversation_id)
640
+
641
+ # Initialize task manager for this agent session
642
+ reset_task_manager()
643
+ task_manager = get_task_manager()
644
+
645
+ # Parse agent configuration (with inheritance resolution)
646
+ try:
647
+ agent = parse_agent_file(agent_path)
648
+ agent_config = agent.config
649
+ except Exception as e:
650
+ raise ValueError(f"Failed to parse agent file: {e}")
651
+
652
+ # Populate initial tasks from agent config
653
+ _populate_initial_tasks(agent_config)
654
+
655
+ # Set current agent in thread-local storage for spawn_agent tracking
656
+ set_current_agent(agent_config.name)
657
+
658
+ try:
659
+ # Override text_mode if force_text_mode is True (for chat UI) or continuing conversation
660
+ if force_text_mode or continue_conversation_id:
661
+ agent_config.text_mode = True
662
+
663
+ # Prepare agent using unified preparation pipeline
664
+ from tsugite.agent_preparation import AgentPreparer
665
+
666
+ preparer = AgentPreparer()
667
+ prepared = preparer.prepare(
668
+ agent=agent,
669
+ prompt=prompt,
670
+ context=context,
671
+ delegation_agents=delegation_agents,
672
+ task_summary=task_manager.get_task_summary(),
673
+ tasks=task_manager.get_tasks_for_template(),
674
+ attachments=attachments,
675
+ )
676
+
677
+ # Debug output if requested
678
+ if debug:
679
+ import sys
680
+
681
+ # Build complete debug output showing system prompt, attachments, and user prompt
682
+ debug_parts = ["\nDEBUG: Complete Prompt Context", "=" * 80, ""]
683
+
684
+ # Show system prompt
685
+ debug_parts.append("SYSTEM PROMPT:")
686
+ debug_parts.append("-" * 80)
687
+ debug_parts.append(prepared.system_message)
688
+ debug_parts.append("")
689
+
690
+ # Show attachments if any
691
+ if prepared.attachments:
692
+ debug_parts.append(f"ATTACHMENTS ({len(prepared.attachments)}):")
693
+ debug_parts.append("-" * 80)
694
+ for name, content in prepared.attachments:
695
+ preview = (
696
+ content[:MAX_CONTENT_PREVIEW_LENGTH] + "..."
697
+ if len(content) > MAX_CONTENT_PREVIEW_LENGTH
698
+ else content
699
+ )
700
+ debug_parts.append(f"• {name}")
701
+ debug_parts.append(f" {preview}")
702
+ debug_parts.append("")
703
+ else:
704
+ debug_parts.append("NO ATTACHMENTS")
705
+ debug_parts.append("")
706
+
707
+ # Show user prompt
708
+ debug_parts.append("USER PROMPT:")
709
+ debug_parts.append("-" * 80)
710
+ debug_parts.append(prepared.rendered_prompt)
711
+ debug_parts.append("")
712
+ debug_parts.append("=" * 80)
713
+
714
+ # Print directly to stderr
715
+ print("\n".join(debug_parts), file=sys.stderr)
716
+
717
+ # Execute with the low-level helper (wrapping async call)
718
+ return asyncio.run(
719
+ _execute_agent_with_prompt(
720
+ prepared=prepared,
721
+ model_override=model_override,
722
+ custom_logger=custom_logger,
723
+ trust_mcp_code=trust_mcp_code,
724
+ delegation_agents=delegation_agents,
725
+ return_token_usage=return_token_usage,
726
+ stream=stream,
727
+ previous_messages=previous_messages,
728
+ )
729
+ )
730
+ finally:
731
+ # Always clear the current agent context when done
732
+ clear_current_agent()
733
+
734
+
735
+ async def run_agent_async(
736
+ agent_path: Path,
737
+ prompt: str,
738
+ context: Optional[Dict[str, Any]] = None,
739
+ model_override: Optional[str] = None,
740
+ debug: bool = False,
741
+ custom_logger: Optional[Any] = None,
742
+ trust_mcp_code: bool = False,
743
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
744
+ return_token_usage: bool = False,
745
+ stream: bool = False,
746
+ force_text_mode: bool = False,
747
+ continue_conversation_id: Optional[str] = None,
748
+ attachments: Optional[List[tuple[str, str]]] = None,
749
+ ) -> str | tuple[str, Optional[int], Optional[float], int, list]:
750
+ """Run a Tsugite agent (async version for tests and async contexts).
751
+
752
+ This is the async version of run_agent() that can be awaited directly.
753
+ Use this in async contexts (like pytest-asyncio tests) to avoid event loop conflicts.
754
+
755
+ Args:
756
+ agent_path: Path to agent markdown file
757
+ prompt: User prompt/task for the agent
758
+ context: Additional context variables
759
+ model_override: Override agent's default model
760
+ debug: Enable debug output (rendered prompt)
761
+ custom_logger: Custom logger for agent output
762
+ trust_mcp_code: Whether to trust remote code from MCP servers
763
+ delegation_agents: List of (name, path) tuples for agents to make available for delegation
764
+ return_token_usage: Whether to return token usage and cost from LiteLLM
765
+ stream: Whether to stream responses in real-time
766
+ attachments: Optional list of (name, content) tuples for prompt caching
767
+ force_text_mode: Force text_mode=True regardless of agent config (useful for chat UI)
768
+ continue_conversation_id: Optional conversation ID to continue (makes run mode multi-turn)
769
+
770
+ Returns:
771
+ Agent execution result as string, or tuple of (result, token_count, cost, step_count, execution_steps) if return_token_usage=True
772
+
773
+ Raises:
774
+ ValueError: If agent file is invalid
775
+ RuntimeError: If agent execution fails
776
+ """
777
+ if context is None:
778
+ context = {}
779
+
780
+ # Load conversation history if continuing
781
+ previous_messages = []
782
+ if continue_conversation_id:
783
+ from tsugite.agent_runner.history_integration import load_and_apply_history
784
+
785
+ previous_messages = load_and_apply_history(continue_conversation_id)
786
+
787
+ # Initialize task manager for this agent session
788
+ reset_task_manager()
789
+ task_manager = get_task_manager()
790
+
791
+ # Parse agent configuration (with inheritance resolution)
792
+ try:
793
+ agent = parse_agent_file(agent_path)
794
+ agent_config = agent.config
795
+ except Exception as e:
796
+ raise ValueError(f"Failed to parse agent file: {e}")
797
+
798
+ # Populate initial tasks from agent config
799
+ _populate_initial_tasks(agent_config)
800
+
801
+ # Set current agent in thread-local storage for spawn_agent tracking
802
+ set_current_agent(agent_config.name)
803
+
804
+ try:
805
+ # Override text_mode if force_text_mode is True (for chat UI) or continuing conversation
806
+ if force_text_mode or continue_conversation_id:
807
+ agent_config.text_mode = True
808
+
809
+ # Prepare agent using unified preparation pipeline
810
+ from tsugite.agent_preparation import AgentPreparer
811
+
812
+ preparer = AgentPreparer()
813
+ prepared = preparer.prepare(
814
+ agent=agent,
815
+ prompt=prompt,
816
+ context=context,
817
+ delegation_agents=delegation_agents,
818
+ task_summary=task_manager.get_task_summary(),
819
+ tasks=task_manager.get_tasks_for_template(),
820
+ attachments=attachments,
821
+ )
822
+
823
+ # Debug output if requested
824
+ if debug:
825
+ import sys
826
+
827
+ # Build complete debug output showing system prompt, attachments, and user prompt
828
+ debug_parts = ["\nDEBUG: Complete Prompt Context", "=" * 80, ""]
829
+
830
+ # Show system prompt
831
+ debug_parts.append("SYSTEM PROMPT:")
832
+ debug_parts.append("-" * 80)
833
+ debug_parts.append(prepared.system_message)
834
+ debug_parts.append("")
835
+
836
+ # Show attachments if any
837
+ if prepared.attachments:
838
+ debug_parts.append(f"ATTACHMENTS ({len(prepared.attachments)}):")
839
+ debug_parts.append("-" * 80)
840
+ for name, content in prepared.attachments:
841
+ preview = (
842
+ content[:MAX_CONTENT_PREVIEW_LENGTH] + "..."
843
+ if len(content) > MAX_CONTENT_PREVIEW_LENGTH
844
+ else content
845
+ )
846
+ debug_parts.append(f"• {name}")
847
+ debug_parts.append(f" {preview}")
848
+ debug_parts.append("")
849
+ else:
850
+ debug_parts.append("NO ATTACHMENTS")
851
+ debug_parts.append("")
852
+
853
+ # Show user prompt
854
+ debug_parts.append("USER PROMPT:")
855
+ debug_parts.append("-" * 80)
856
+ debug_parts.append(prepared.rendered_prompt)
857
+ debug_parts.append("")
858
+ debug_parts.append("=" * 80)
859
+
860
+ # Print directly to stderr
861
+ print("\n".join(debug_parts), file=sys.stderr)
862
+
863
+ # Execute with the low-level helper (async - no asyncio.run wrapper)
864
+ return await _execute_agent_with_prompt(
865
+ prepared=prepared,
866
+ model_override=model_override,
867
+ custom_logger=custom_logger,
868
+ trust_mcp_code=trust_mcp_code,
869
+ delegation_agents=delegation_agents,
870
+ return_token_usage=return_token_usage,
871
+ stream=stream,
872
+ previous_messages=previous_messages,
873
+ )
874
+ finally:
875
+ # Always clear the current agent context when done
876
+ clear_current_agent()
877
+
878
+
879
+ # Predefined loop condition helpers
880
+ # These are plain Jinja2 expressions (no {{ }}) that can be used in {% if %} blocks
881
+ LOOP_HELPERS = {
882
+ "has_pending_tasks": "tasks | selectattr('status', 'equalto', 'pending') | list | length > 0",
883
+ "has_pending_required_tasks": (
884
+ "tasks | selectattr('status', 'equalto', 'pending') | "
885
+ "selectattr('optional', 'equalto', false) | list | length > 0"
886
+ ),
887
+ "all_tasks_complete": "(tasks | selectattr('status', 'equalto', 'completed') | list | length) == (tasks | length)",
888
+ "has_incomplete_tasks": "tasks | rejectattr('status', 'equalto', 'completed') | list | length > 0",
889
+ "has_in_progress_tasks": "tasks | selectattr('status', 'equalto', 'in_progress') | list | length > 0",
890
+ "has_blocked_tasks": "tasks | selectattr('status', 'equalto', 'blocked') | list | length > 0",
891
+ }
892
+
893
+
894
+ def _filter_injectable_vars(step_context: Dict[str, Any]) -> Dict[str, Any]:
895
+ """Filter step context to only include variables suitable for Python injection.
896
+
897
+ Removes metadata variables that are used for template rendering but shouldn't
898
+ be injected into the Python execution namespace.
899
+
900
+ Args:
901
+ step_context: Full step context dictionary
902
+
903
+ Returns:
904
+ Filtered dictionary containing only injectable variables
905
+ """
906
+ metadata_vars = {
907
+ "user_prompt",
908
+ "task_summary",
909
+ "step_number",
910
+ "step_name",
911
+ "total_steps",
912
+ "is_retry",
913
+ "retry_count",
914
+ "max_retries",
915
+ "last_error",
916
+ "all_errors",
917
+ "tasks",
918
+ "is_interactive",
919
+ "text_mode",
920
+ "tools",
921
+ "is_subagent",
922
+ "parent_agent",
923
+ "iteration",
924
+ "max_iterations",
925
+ "is_looping_step",
926
+ }
927
+ return {k: v for k, v in step_context.items() if k not in metadata_vars}
928
+
929
+
930
+ def _build_prepared_agent_for_step(
931
+ agent: Any,
932
+ rendered_step_prompt: str,
933
+ step_context: Dict[str, Any],
934
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
935
+ attachments: Optional[List[tuple[str, str]]] = None,
936
+ ) -> "PreparedAgent":
937
+ """Build a PreparedAgent for step execution.
938
+
939
+ This creates the PreparedAgent manually since multistep agents handle
940
+ their own rendering with accumulated context.
941
+
942
+ Args:
943
+ agent: Parsed agent with config
944
+ rendered_step_prompt: Already-rendered step prompt
945
+ step_context: Step execution context
946
+ delegation_agents: Optional list of delegation agents
947
+ attachments: List of (name, content) tuples for prompt caching
948
+
949
+ Returns:
950
+ PreparedAgent ready for execution
951
+ """
952
+ from tsugite.agent_preparation import PreparedAgent
953
+ from tsugite.core.agent import build_system_prompt
954
+ from tsugite.core.tools import create_tool_from_tsugite
955
+ from tsugite.tools import expand_tool_specs
956
+
957
+ # Build instructions
958
+ base_instructions = get_default_instructions(text_mode=agent.config.text_mode)
959
+ agent_instructions = getattr(agent.config, "instructions", "")
960
+ combined_instructions = _combine_instructions(base_instructions, agent_instructions)
961
+
962
+ # Expand and create tools
963
+ expanded_tools = expand_tool_specs(agent.config.tools) if agent.config.tools else []
964
+ task_tools = ["task_add", "task_update", "task_complete", "task_list", "task_get"]
965
+ all_tool_names = expanded_tools + task_tools
966
+ if delegation_agents:
967
+ all_tool_names.append("spawn_agent")
968
+ tools = [create_tool_from_tsugite(name) for name in all_tool_names]
969
+
970
+ # Build system message
971
+ system_message = build_system_prompt(tools, combined_instructions, agent.config.text_mode)
972
+
973
+ # Create PreparedAgent
974
+ return PreparedAgent(
975
+ agent=agent,
976
+ agent_config=agent.config,
977
+ system_message=system_message,
978
+ user_message=rendered_step_prompt,
979
+ rendered_prompt=rendered_step_prompt,
980
+ tools=tools,
981
+ context=step_context,
982
+ combined_instructions=combined_instructions,
983
+ prefetch_results={}, # Already executed in preamble
984
+ attachments=attachments or [],
985
+ )
986
+
987
+
988
+ def evaluate_loop_condition(expression: str, context: Dict[str, Any]) -> bool:
989
+ """Evaluate a Jinja2 expression or helper as a boolean condition.
990
+
991
+ Args:
992
+ expression: Jinja2 template expression or predefined helper name
993
+ context: Template context with tasks, variables, etc.
994
+
995
+ Returns:
996
+ Boolean result of condition evaluation
997
+
998
+ Raises:
999
+ ValueError: If expression is invalid or evaluation fails
1000
+ """
1001
+ from jinja2 import Template, TemplateSyntaxError
1002
+
1003
+ # Check if it's a predefined helper
1004
+ if expression in LOOP_HELPERS:
1005
+ expression = LOOP_HELPERS[expression]
1006
+
1007
+ try:
1008
+ # Wrap expression in {% if %} to get boolean result
1009
+ template_str = f"{{% if {expression} %}}true{{% endif %}}"
1010
+ template = Template(template_str)
1011
+ result = template.render(**context)
1012
+ return result.strip() == "true"
1013
+ except TemplateSyntaxError as e:
1014
+ raise ValueError(f"Invalid loop condition expression '{expression}': {e}") from e
1015
+ except Exception as e:
1016
+ raise ValueError(f"Error evaluating loop condition '{expression}': {e}") from e
1017
+
1018
+
1019
+ def _prepare_retry_context(step_context: Dict[str, Any], step: Any, attempt: int, errors: List[str]) -> None:
1020
+ """Add retry-specific variables to step context.
1021
+
1022
+ Args:
1023
+ step_context: Step context to update
1024
+ step: Step configuration
1025
+ attempt: Current attempt number (0-indexed)
1026
+ errors: List of previous errors
1027
+ """
1028
+ step_context["is_retry"] = attempt > 0
1029
+ step_context["retry_count"] = attempt
1030
+ step_context["max_retries"] = step.max_retries
1031
+ step_context["last_error"] = errors[-1] if errors else ""
1032
+ step_context["all_errors"] = errors
1033
+
1034
+
1035
+ def _show_step_progress_message(
1036
+ custom_logger: Any,
1037
+ step_header: str,
1038
+ attempt: int,
1039
+ max_retries: int,
1040
+ i: int,
1041
+ step_name: str,
1042
+ total_steps: int,
1043
+ max_attempts: int,
1044
+ debug: bool,
1045
+ event_bus: Optional["EventBus"],
1046
+ ) -> None:
1047
+ """Display step progress message in UI.
1048
+
1049
+ Args:
1050
+ custom_logger: Logger for UI updates
1051
+ step_header: Formatted step header
1052
+ attempt: Current attempt number (0-indexed)
1053
+ max_retries: Maximum retries allowed
1054
+ i: Step number (1-indexed)
1055
+ step_name: Name of the step
1056
+ total_steps: Total number of steps
1057
+ max_attempts: Total attempts (retries + 1)
1058
+ debug: Debug mode flag
1059
+ event_bus: Event bus for debug messages
1060
+ """
1061
+ if not debug:
1062
+ set_multistep_ui_context(custom_logger, i, step_name, total_steps)
1063
+ if attempt > 0:
1064
+ print_step_progress(custom_logger, step_header, f"Retry {attempt}/{max_retries}...", debug, "yellow")
1065
+ else:
1066
+ print_step_progress(custom_logger, step_header, "Starting...", debug, "cyan")
1067
+
1068
+ if debug and event_bus:
1069
+ from tsugite.events import DebugMessageEvent
1070
+
1071
+ if attempt > 0:
1072
+ event_bus.emit(
1073
+ DebugMessageEvent(
1074
+ message=f"DEBUG: Retrying Step {i}/{total_steps}: {step_name} (Attempt {attempt + 1}/{max_attempts})"
1075
+ )
1076
+ )
1077
+ else:
1078
+ event_bus.emit(DebugMessageEvent(message=f"DEBUG: Executing Step {i}/{total_steps}: {step_name}"))
1079
+
1080
+
1081
+ def _render_step_template(
1082
+ step: Any,
1083
+ step_context: Dict[str, Any],
1084
+ debug: bool,
1085
+ event_bus: Optional["EventBus"],
1086
+ ) -> str:
1087
+ """Render step template with current context.
1088
+
1089
+ Args:
1090
+ step: Step configuration
1091
+ step_context: Current step context
1092
+ debug: Debug mode flag
1093
+ event_bus: Event bus for debug output
1094
+
1095
+ Returns:
1096
+ Rendered step prompt
1097
+
1098
+ Raises:
1099
+ Exception: If template rendering fails
1100
+ """
1101
+ # Execute tool directives in this step's content
1102
+ step_modified_content, step_tool_context = execute_tool_directives(step.content, step_context, event_bus)
1103
+ step_context.update(step_tool_context)
1104
+
1105
+ # Render this step's content with current context
1106
+ renderer = AgentRenderer()
1107
+ rendered_step_prompt = renderer.render(step_modified_content, step_context)
1108
+
1109
+ if debug and event_bus:
1110
+ from tsugite.events import DebugMessageEvent
1111
+
1112
+ event_bus.emit(DebugMessageEvent(message="\n[bold]Rendered Prompt:[/bold]\n" + rendered_step_prompt))
1113
+
1114
+ return rendered_step_prompt
1115
+
1116
+
1117
+ async def _execute_step_with_retries(
1118
+ step: Any,
1119
+ step_context: Dict[str, Any],
1120
+ agent: Any,
1121
+ i: int,
1122
+ total_steps: int,
1123
+ steps: List[Any],
1124
+ step_header: str,
1125
+ model_override: Optional[str],
1126
+ custom_logger: Optional[Any],
1127
+ trust_mcp_code: bool,
1128
+ delegation_agents: Optional[List[tuple[str, Path]]],
1129
+ stream: bool,
1130
+ debug: bool,
1131
+ task_manager: Any,
1132
+ event_bus: Optional["EventBus"] = None,
1133
+ ) -> tuple[str, float]:
1134
+ """Execute a step with automatic retries on failure.
1135
+
1136
+ Handles template rendering, step execution, error handling, and metrics recording.
1137
+ Retries up to max_retries times before failing.
1138
+
1139
+ Args:
1140
+ step: Step configuration
1141
+ step_context: Current step context with variables
1142
+ agent: Parsed agent configuration
1143
+ i: Current step number (1-indexed)
1144
+ total_steps: Total number of steps
1145
+ steps: List of all steps (for error messages)
1146
+ step_header: Formatted step header for UI
1147
+ model_override: Optional model override
1148
+ custom_logger: Custom logger instance
1149
+ trust_mcp_code: Trust MCP code flag
1150
+ delegation_agents: Delegation agents list
1151
+ stream: Stream responses flag
1152
+ debug: Debug mode flag
1153
+ task_manager: Task manager instance
1154
+
1155
+ Returns:
1156
+ Tuple of (step_result, step_duration)
1157
+
1158
+ Raises:
1159
+ RuntimeError: If all retry attempts fail
1160
+ """
1161
+ max_attempts = step.max_retries + 1
1162
+ errors = []
1163
+ step_start_time = time.time()
1164
+
1165
+ for attempt in range(max_attempts):
1166
+ # Add retry context variables
1167
+ _prepare_retry_context(step_context, step, attempt, errors)
1168
+
1169
+ # Show progress message
1170
+ _show_step_progress_message(
1171
+ custom_logger,
1172
+ step_header,
1173
+ attempt,
1174
+ step.max_retries,
1175
+ i,
1176
+ step.name,
1177
+ total_steps,
1178
+ max_attempts,
1179
+ debug,
1180
+ event_bus,
1181
+ )
1182
+
1183
+ # Render step template
1184
+ try:
1185
+ rendered_step_prompt = _render_step_template(step, step_context, debug, event_bus)
1186
+ except Exception as e:
1187
+ error_msg = f"Template rendering failed: {e}"
1188
+ errors.append(error_msg)
1189
+
1190
+ if attempt == max_attempts - 1:
1191
+ clear_multistep_ui_context(custom_logger)
1192
+ error_msg = _build_step_error_message(
1193
+ error_type="Template Rendering Failed",
1194
+ step_name=step.name,
1195
+ step_number=i,
1196
+ total_steps=total_steps,
1197
+ errors=errors,
1198
+ available_vars=list(step_context.keys()),
1199
+ previous_step=steps[i - 2].name if i > 1 else "None",
1200
+ max_attempts=max_attempts,
1201
+ debug_tips=[
1202
+ "1. Check for undefined variables in step template",
1203
+ "2. Verify previous steps assigned expected variables",
1204
+ "3. Run with --debug to see full context",
1205
+ ],
1206
+ )
1207
+ raise RuntimeError(error_msg)
1208
+
1209
+ if step.retry_delay > 0:
1210
+ time.sleep(step.retry_delay)
1211
+ continue
1212
+
1213
+ # Prepare variables and build PreparedAgent
1214
+ injectable_vars = _filter_injectable_vars(step_context)
1215
+ prepared = _build_prepared_agent_for_step(
1216
+ agent=agent,
1217
+ rendered_step_prompt=rendered_step_prompt,
1218
+ step_context=step_context,
1219
+ delegation_agents=delegation_agents,
1220
+ )
1221
+
1222
+ # Execute this step as a full agent run
1223
+ try:
1224
+
1225
+ async def execute_step():
1226
+ coro = _execute_agent_with_prompt(
1227
+ prepared=prepared,
1228
+ model_override=model_override,
1229
+ custom_logger=custom_logger,
1230
+ trust_mcp_code=trust_mcp_code,
1231
+ delegation_agents=delegation_agents,
1232
+ skip_task_reset=True,
1233
+ model_kwargs=step.model_kwargs,
1234
+ injectable_vars=injectable_vars,
1235
+ stream=stream,
1236
+ )
1237
+ if step.timeout:
1238
+ return await asyncio.wait_for(coro, timeout=step.timeout)
1239
+ else:
1240
+ return await coro
1241
+
1242
+ step_result = await execute_step()
1243
+
1244
+ # Store result in context if assign variable specified
1245
+ if step.assign_var:
1246
+ step_context[step.assign_var] = step_result
1247
+ if debug and event_bus:
1248
+ from tsugite.events import DebugMessageEvent
1249
+
1250
+ event_bus.emit(DebugMessageEvent(message=f"Assigned result to variable: {step.assign_var}"))
1251
+
1252
+ # Update task summary and tasks list for next step
1253
+ step_context["task_summary"] = task_manager.get_task_summary()
1254
+ step_context["tasks"] = task_manager.get_tasks_for_template()
1255
+
1256
+ # Show step completion
1257
+ if not debug:
1258
+ clear_multistep_ui_context(custom_logger)
1259
+ print_step_progress(custom_logger, step_header, "Complete", debug, "green")
1260
+
1261
+ # Calculate duration and return
1262
+ step_duration = time.time() - step_start_time
1263
+ return step_result, step_duration
1264
+
1265
+ except asyncio.TimeoutError:
1266
+ error_msg = f"Step timed out after {step.timeout} seconds"
1267
+ errors.append(error_msg)
1268
+ except Exception as e:
1269
+ error_msg = str(e)
1270
+ errors.append(error_msg)
1271
+
1272
+ # If not last attempt, handle retry delay and continue
1273
+ if attempt < max_attempts - 1:
1274
+ if step.retry_delay > 0:
1275
+ time.sleep(step.retry_delay)
1276
+ if not debug and event_bus:
1277
+ from tsugite.events import WarningEvent
1278
+
1279
+ event_bus.emit(WarningEvent(message=f"Step '{step.name}' failed: {error_msg}"))
1280
+ else:
1281
+ # Last attempt failed
1282
+ clear_multistep_ui_context(custom_logger)
1283
+ error_msg = _build_step_error_message(
1284
+ error_type="Execution Failed",
1285
+ step_name=step.name,
1286
+ step_number=i,
1287
+ total_steps=total_steps,
1288
+ errors=errors,
1289
+ available_vars=list(_filter_injectable_vars(step_context).keys()),
1290
+ previous_step=steps[i - 2].name if i > 1 else "None",
1291
+ max_attempts=max_attempts,
1292
+ debug_tips=[
1293
+ "1. Run with --debug to see rendered prompts",
1294
+ "2. Check variable values in previous steps",
1295
+ "3. Verify step dependencies are correct",
1296
+ ],
1297
+ )
1298
+ raise RuntimeError(error_msg)
1299
+
1300
+ # Should never reach here, but for type safety
1301
+ raise RuntimeError("Unexpected: Retry loop completed without success or raising")
1302
+
1303
+
1304
+ def _should_repeat_step(
1305
+ step: Any, step_context: Dict[str, Any], iteration: int, debug: bool, event_bus: Optional["EventBus"] = None
1306
+ ) -> bool:
1307
+ """Determine if a step should repeat based on loop conditions.
1308
+
1309
+ Evaluates repeat_while, repeat_until, and max_iterations to decide
1310
+ whether the step should execute again.
1311
+
1312
+ Args:
1313
+ step: Step configuration with repeat conditions
1314
+ step_context: Current step context for condition evaluation
1315
+ iteration: Current iteration count (1-indexed)
1316
+ debug: Whether debug mode is active
1317
+ event_bus: Optional event bus for emitting debug/warning messages
1318
+
1319
+ Returns:
1320
+ True if step should repeat, False otherwise
1321
+ """
1322
+ should_repeat = False
1323
+
1324
+ # Evaluate repeat conditions
1325
+ if step.repeat_while:
1326
+ try:
1327
+ should_repeat = evaluate_loop_condition(step.repeat_while, step_context)
1328
+ if debug and event_bus:
1329
+ from tsugite.events import DebugMessageEvent
1330
+
1331
+ event_bus.emit(DebugMessageEvent(message=f"Loop condition (while): {should_repeat}"))
1332
+ except Exception as e:
1333
+ if event_bus:
1334
+ from tsugite.events import WarningEvent
1335
+
1336
+ event_bus.emit(WarningEvent(message=f"Loop condition evaluation failed: {e}"))
1337
+ should_repeat = False
1338
+
1339
+ elif step.repeat_until:
1340
+ try:
1341
+ condition_met = evaluate_loop_condition(step.repeat_until, step_context)
1342
+ should_repeat = not condition_met # Repeat UNTIL condition is true
1343
+ if debug and event_bus:
1344
+ from tsugite.events import DebugMessageEvent
1345
+
1346
+ event_bus.emit(
1347
+ DebugMessageEvent(
1348
+ message=f"Loop condition (until): condition_met={condition_met}, repeat={should_repeat}"
1349
+ )
1350
+ )
1351
+ except Exception as e:
1352
+ if event_bus:
1353
+ from tsugite.events import WarningEvent
1354
+
1355
+ event_bus.emit(WarningEvent(message=f"Loop condition evaluation failed: {e}"))
1356
+ should_repeat = False
1357
+
1358
+ # Safety check: max iterations
1359
+ if should_repeat and iteration >= step.max_iterations:
1360
+ if event_bus:
1361
+ from tsugite.events import WarningEvent
1362
+
1363
+ event_bus.emit(
1364
+ WarningEvent(
1365
+ message=f"⚠️ Step '{step.name}' reached max_iterations ({step.max_iterations}). "
1366
+ f'Use max_iterations="N" to increase limit.'
1367
+ )
1368
+ )
1369
+ should_repeat = False
1370
+
1371
+ return should_repeat
1372
+
1373
+
1374
+ async def _run_multistep_agent_impl(
1375
+ agent_path: Path,
1376
+ prompt: str,
1377
+ context: Optional[Dict[str, Any]] = None,
1378
+ model_override: Optional[str] = None,
1379
+ debug: bool = False,
1380
+ custom_logger: Optional[Any] = None,
1381
+ trust_mcp_code: bool = False,
1382
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
1383
+ stream: bool = False,
1384
+ ) -> str:
1385
+ """Async implementation of multi-step agent execution.
1386
+
1387
+ Core logic extracted from run_multistep_agent() and run_multistep_agent_async().
1388
+ This is the single source of truth for multi-step execution.
1389
+
1390
+ Multi-step agents use <!-- tsu:step --> directives to execute sequentially,
1391
+ with each step being a full agent run. Results from earlier steps can be
1392
+ assigned to variables and used in later steps.
1393
+
1394
+ Args:
1395
+ agent_path: Path to agent markdown file
1396
+ prompt: User prompt/task for the agent
1397
+ context: Additional context variables
1398
+ model_override: Override agent's default model
1399
+ debug: Enable debug output (rendered prompts for each step)
1400
+ custom_logger: Custom logger for agent output
1401
+ trust_mcp_code: Whether to trust remote code from MCP servers
1402
+ delegation_agents: List of (name, path) tuples for agents to make available
1403
+ stream: Whether to stream responses in real-time
1404
+
1405
+ Returns:
1406
+ Result from the final step
1407
+
1408
+ Raises:
1409
+ ValueError: If agent file is invalid or step parsing fails
1410
+ RuntimeError: If any step execution fails
1411
+ """
1412
+ from tsugite.md_agents import extract_step_directives, has_step_directives
1413
+
1414
+ if context is None:
1415
+ context = {}
1416
+
1417
+ # Parse agent (with inheritance resolution)
1418
+ try:
1419
+ agent = parse_agent_file(agent_path)
1420
+ except Exception as e:
1421
+ raise ValueError(f"Failed to parse agent file: {e}")
1422
+
1423
+ # Set current agent in thread-local storage for spawn_agent tracking
1424
+ set_current_agent(agent.config.name)
1425
+
1426
+ try:
1427
+ # Extract steps from raw markdown (before any rendering)
1428
+ if not has_step_directives(agent.content):
1429
+ raise ValueError(f"Agent {agent_path} does not contain step directives. Use run_agent() instead.")
1430
+
1431
+ preamble, steps = extract_step_directives(agent.content)
1432
+ except Exception as e:
1433
+ raise ValueError(f"Failed to parse step directives: {e}")
1434
+
1435
+ try:
1436
+ if not steps:
1437
+ raise ValueError("No valid step directives found in agent")
1438
+
1439
+ # Validate unique step names
1440
+ step_names = [s.name for s in steps]
1441
+ if len(step_names) != len(set(step_names)):
1442
+ duplicates = [name for name in step_names if step_names.count(name) > 1]
1443
+ raise ValueError(f"Duplicate step names found: {', '.join(set(duplicates))}")
1444
+
1445
+ # Initialize task manager ONCE for entire multi-step execution
1446
+ # This allows tasks to persist and accumulate across steps
1447
+ reset_task_manager()
1448
+ task_manager = get_task_manager()
1449
+
1450
+ # Populate initial tasks from agent config
1451
+ _populate_initial_tasks(agent.config)
1452
+
1453
+ # Create event_bus for emitting events throughout multi-step execution
1454
+ from tsugite.events import DebugMessageEvent, EventBus, InfoEvent, WarningEvent
1455
+
1456
+ event_bus = EventBus()
1457
+ ui_handler = get_ui_handler(custom_logger)
1458
+ if ui_handler:
1459
+ event_bus.subscribe(ui_handler.handle_event)
1460
+
1461
+ # Check if running in interactive mode
1462
+ interactive_mode = is_interactive()
1463
+
1464
+ # Initialize context with user prompt
1465
+ step_context = {
1466
+ **context,
1467
+ "user_prompt": prompt,
1468
+ "task_summary": task_manager.get_task_summary(),
1469
+ "tasks": task_manager.get_tasks_for_template(),
1470
+ "is_interactive": interactive_mode,
1471
+ "text_mode": agent.config.text_mode,
1472
+ "tools": agent.config.tools, # Make tools list available to templates
1473
+ # Subagent context (set by spawn_agent if this is a spawned agent)
1474
+ "is_subagent": context.get("is_subagent", False),
1475
+ "parent_agent": context.get("parent_agent", None),
1476
+ }
1477
+
1478
+ # Execute prefetch once (before any steps)
1479
+ if agent.config.prefetch:
1480
+ try:
1481
+ prefetch_context = execute_prefetch(agent.config.prefetch, event_bus)
1482
+ step_context.update(prefetch_context)
1483
+ except Exception as e:
1484
+ event_bus.emit(WarningEvent(message=f"Prefetch execution failed: {e}"))
1485
+
1486
+ # Execute each step sequentially
1487
+ final_result = None
1488
+ step_metrics: List[StepMetrics] = []
1489
+
1490
+ for i, step in enumerate(steps, 1):
1491
+ # Add step information to context for this step
1492
+ step_context["step_number"] = i
1493
+ step_context["step_name"] = step.name
1494
+ step_context["total_steps"] = len(steps)
1495
+
1496
+ # Loop control: iterate if step has repeat_while or repeat_until
1497
+ iteration = 0
1498
+ step_is_looping = bool(step.repeat_while or step.repeat_until)
1499
+
1500
+ while True:
1501
+ iteration += 1
1502
+
1503
+ # Add iteration context
1504
+ step_context["iteration"] = iteration
1505
+ step_context["max_iterations"] = step.max_iterations
1506
+ step_context["is_looping_step"] = step_is_looping
1507
+
1508
+ # Show step progress (unless in debug mode which has its own output)
1509
+ if step_is_looping:
1510
+ step_header = f"[Step {i}/{len(steps)}: {step.name} (Iteration {iteration})]"
1511
+ else:
1512
+ step_header = f"[Step {i}/{len(steps)}: {step.name}]"
1513
+
1514
+ # Execute step with automatic retries
1515
+ step_start_time = time.time()
1516
+ try:
1517
+ step_result, step_duration = await _execute_step_with_retries(
1518
+ step=step,
1519
+ step_context=step_context,
1520
+ agent=agent,
1521
+ i=i,
1522
+ total_steps=len(steps),
1523
+ steps=steps,
1524
+ step_header=step_header,
1525
+ model_override=model_override,
1526
+ custom_logger=custom_logger,
1527
+ trust_mcp_code=trust_mcp_code,
1528
+ delegation_agents=delegation_agents,
1529
+ stream=stream,
1530
+ debug=debug,
1531
+ task_manager=task_manager,
1532
+ event_bus=event_bus,
1533
+ )
1534
+
1535
+ # Success - store result and record metrics
1536
+ final_result = step_result
1537
+ step_metrics.append(
1538
+ StepMetrics(
1539
+ step_name=step.name,
1540
+ step_number=i,
1541
+ duration=step_duration,
1542
+ status="success",
1543
+ )
1544
+ )
1545
+
1546
+ except RuntimeError as e:
1547
+ # Step execution failed after all retries
1548
+ if step.continue_on_error:
1549
+ # Log warning but continue execution
1550
+ clear_multistep_ui_context(custom_logger)
1551
+
1552
+ warning_msg = f"⚠ Step '{step.name}' failed but continuing (continue_on_error=true)"
1553
+ event_bus.emit(WarningEvent(message=warning_msg))
1554
+ event_bus.emit(InfoEvent(message=f"Error: {str(e)}"))
1555
+
1556
+ # Assign None to the variable if specified
1557
+ if step.assign_var:
1558
+ step_context[step.assign_var] = None
1559
+ if debug:
1560
+ event_bus.emit(
1561
+ DebugMessageEvent(message=f"Assigned None to variable: {step.assign_var}")
1562
+ )
1563
+
1564
+ # Record metrics for skipped step
1565
+ step_duration = time.time() - step_start_time
1566
+ step_metrics.append(
1567
+ StepMetrics(
1568
+ step_name=step.name,
1569
+ step_number=i,
1570
+ duration=step_duration,
1571
+ status="skipped",
1572
+ error=str(e),
1573
+ )
1574
+ )
1575
+ else:
1576
+ # Re-raise if not continuing on error
1577
+ raise
1578
+
1579
+ # End of step execution - now check if we should repeat the step
1580
+
1581
+ # Check if we should repeat this step (loop control)
1582
+ should_repeat = _should_repeat_step(step, step_context, iteration, debug, event_bus)
1583
+
1584
+ # Exit while loop if we shouldn't repeat
1585
+ if not should_repeat:
1586
+ if step_is_looping and iteration > 1 and not debug:
1587
+ event_bus.emit(InfoEvent(message=f"Step '{step.name}' completed after {iteration} iterations"))
1588
+ break
1589
+
1590
+ # Update task context for next iteration
1591
+ step_context["task_summary"] = task_manager.get_task_summary()
1592
+ step_context["tasks"] = task_manager.get_tasks_for_template()
1593
+
1594
+ if not debug:
1595
+ event_bus.emit(InfoEvent(message=f"🔁 Repeating step '{step.name}' (iteration {iteration + 1})"))
1596
+
1597
+ # End of while True loop for step iteration
1598
+
1599
+ # Display metrics summary
1600
+ if step_metrics:
1601
+ display_step_metrics(step_metrics, custom_logger if custom_logger else None)
1602
+
1603
+ return final_result or ""
1604
+ finally:
1605
+ # Always clear the current agent context when done
1606
+ clear_current_agent()
1607
+
1608
+ # Clean up any pending asyncio tasks (e.g., LiteLLM logging tasks)
1609
+ # to prevent RuntimeWarning about tasks being destroyed while pending
1610
+ # ONLY run cleanup for top-level agents, not spawned agents
1611
+ # Spawned agents run in ThreadPoolExecutor threads and their event loops
1612
+ # are cleaned up automatically by asyncio.run()
1613
+ import threading
1614
+
1615
+ from tsugite.utils import cleanup_pending_tasks
1616
+
1617
+ if threading.current_thread() == threading.main_thread():
1618
+ await cleanup_pending_tasks()
1619
+
1620
+
1621
+ def run_multistep_agent(
1622
+ agent_path: Path,
1623
+ prompt: str,
1624
+ context: Optional[Dict[str, Any]] = None,
1625
+ model_override: Optional[str] = None,
1626
+ debug: bool = False,
1627
+ custom_logger: Optional[Any] = None,
1628
+ trust_mcp_code: bool = False,
1629
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
1630
+ stream: bool = False,
1631
+ ) -> str:
1632
+ """Synchronous wrapper for multi-step agent execution.
1633
+
1634
+ See _run_multistep_agent_impl() for actual implementation.
1635
+
1636
+ Multi-step agents use <!-- tsu:step --> directives to execute sequentially,
1637
+ with each step being a full agent run. Results from earlier steps can be
1638
+ assigned to variables and used in later steps.
1639
+
1640
+ Args:
1641
+ agent_path: Path to agent markdown file
1642
+ prompt: User prompt/task for the agent
1643
+ context: Additional context variables
1644
+ model_override: Override agent's default model
1645
+ debug: Enable debug output (rendered prompts for each step)
1646
+ custom_logger: Custom logger for agent output
1647
+ trust_mcp_code: Whether to trust remote code from MCP servers
1648
+ delegation_agents: List of (name, path) tuples for agents to make available
1649
+ stream: Whether to stream responses in real-time
1650
+
1651
+ Returns:
1652
+ Result from the final step
1653
+
1654
+ Raises:
1655
+ ValueError: If agent file is invalid or step parsing fails
1656
+ RuntimeError: If any step execution fails
1657
+ """
1658
+ return asyncio.run(
1659
+ _run_multistep_agent_impl(
1660
+ agent_path=agent_path,
1661
+ prompt=prompt,
1662
+ context=context,
1663
+ model_override=model_override,
1664
+ debug=debug,
1665
+ custom_logger=custom_logger,
1666
+ trust_mcp_code=trust_mcp_code,
1667
+ delegation_agents=delegation_agents,
1668
+ stream=stream,
1669
+ )
1670
+ )
1671
+
1672
+
1673
+ async def run_multistep_agent_async(
1674
+ agent_path: Path,
1675
+ prompt: str,
1676
+ context: Optional[Dict[str, Any]] = None,
1677
+ model_override: Optional[str] = None,
1678
+ debug: bool = False,
1679
+ custom_logger: Optional[Any] = None,
1680
+ trust_mcp_code: bool = False,
1681
+ delegation_agents: Optional[List[tuple[str, Path]]] = None,
1682
+ stream: bool = False,
1683
+ ) -> str:
1684
+ """Asynchronous wrapper for multi-step agent execution.
1685
+
1686
+ See _run_multistep_agent_impl() for actual implementation.
1687
+
1688
+ Multi-step agents use <!-- tsu:step --> directives to execute sequentially,
1689
+ with each step being a full agent run. Results from earlier steps can be
1690
+ assigned to variables and used in later steps.
1691
+
1692
+ Args:
1693
+ agent_path: Path to agent markdown file
1694
+ prompt: User prompt/task for the agent
1695
+ context: Additional context variables
1696
+ model_override: Override agent's default model
1697
+ debug: Enable debug output (rendered prompts for each step)
1698
+ custom_logger: Custom logger for agent output
1699
+ trust_mcp_code: Whether to trust remote code from MCP servers
1700
+ delegation_agents: List of (name, path) tuples for agents to make available
1701
+ stream: Whether to stream responses in real-time
1702
+
1703
+ Returns:
1704
+ Result from the final step
1705
+
1706
+ Raises:
1707
+ ValueError: If agent file is invalid or step parsing fails
1708
+ RuntimeError: If any step execution fails
1709
+ """
1710
+ return await _run_multistep_agent_impl(
1711
+ agent_path=agent_path,
1712
+ prompt=prompt,
1713
+ context=context,
1714
+ model_override=model_override,
1715
+ debug=debug,
1716
+ custom_logger=custom_logger,
1717
+ trust_mcp_code=trust_mcp_code,
1718
+ delegation_agents=delegation_agents,
1719
+ stream=stream,
1720
+ )
1721
+
1722
+
1723
+ def preview_multistep_agent(
1724
+ agent_path: Path,
1725
+ prompt: str,
1726
+ context: Optional[Dict[str, Any]] = None,
1727
+ console: Optional[Any] = None,
1728
+ custom_logger: Optional[Any] = None,
1729
+ ):
1730
+ """Preview multi-step agent execution without running it.
1731
+
1732
+ Shows the execution plan including steps, dependencies, attributes,
1733
+ and estimated resource usage.
1734
+
1735
+ Args:
1736
+ agent_path: Path to agent markdown file
1737
+ prompt: User prompt/task for the agent
1738
+ context: Additional context variables
1739
+ console: Rich Console instance (defaults to stderr console)
1740
+ custom_logger: Custom logger with ui_handler for event emission
1741
+ """
1742
+ import re
1743
+
1744
+ from rich.table import Table
1745
+
1746
+ from tsugite.events import EventBus, InfoEvent, WarningEvent
1747
+
1748
+ # Check if we should use event system
1749
+ ui_handler = get_ui_handler(custom_logger)
1750
+ event_bus = None
1751
+ if ui_handler:
1752
+ event_bus = EventBus()
1753
+ event_bus.subscribe(ui_handler.handle_event)
1754
+
1755
+ # Use provided console or default to stderr (for non-event output)
1756
+ if console is None and not event_bus:
1757
+ console = _stderr_console
1758
+
1759
+ # Helper to output messages (via events or console)
1760
+ def output(msg: str, is_warning: bool = False):
1761
+ if event_bus:
1762
+ if is_warning:
1763
+ event_bus.emit(WarningEvent(message=msg))
1764
+ else:
1765
+ event_bus.emit(InfoEvent(message=msg))
1766
+ elif console:
1767
+ console.print(msg) # noqa: T201 - Intentional fallback when no event system available
1768
+
1769
+ # Parse agent (with inheritance resolution)
1770
+ try:
1771
+ agent = parse_agent_file(agent_path)
1772
+ except Exception as e:
1773
+ output(f"[red]Error parsing agent: {e}[/red]")
1774
+ return
1775
+
1776
+ # Extract steps
1777
+ from tsugite.md_agents import extract_step_directives, has_step_directives
1778
+
1779
+ if not has_step_directives(agent.content):
1780
+ output("[yellow]This is a single-step agent (no step directives).[/yellow]", is_warning=True)
1781
+ output("[dim]Dry-run preview is for multi-step agents only.[/dim]")
1782
+ return
1783
+
1784
+ try:
1785
+ preamble, steps = extract_step_directives(agent.content)
1786
+ except Exception as e:
1787
+ output(f"[red]Error extracting steps: {e}[/red]")
1788
+ return
1789
+
1790
+ # Display header
1791
+ output("")
1792
+ output("[bold]Dry-Run Preview: Multi-Step Agent[/bold]")
1793
+ output("═" * 60)
1794
+ output(f"Agent: {agent.config.name}")
1795
+ output(f"File: {agent_path.name}")
1796
+ output(f"Prompt: {prompt}")
1797
+ output(f"Steps: {len(steps)}")
1798
+ output(f"Model: {agent.config.model or 'default'}")
1799
+ output(f"Tools: {', '.join(agent.config.tools) if agent.config.tools else 'None'}")
1800
+ output("")
1801
+
1802
+ # Show steps in table format
1803
+ table = Table(title="Execution Plan", show_header=True)
1804
+ table.add_column("#", style="cyan", width=3)
1805
+ table.add_column("Step Name", style="green")
1806
+ table.add_column("Attributes", style="yellow")
1807
+ table.add_column("Dependencies", style="dim")
1808
+
1809
+ for i, step in enumerate(steps, 1):
1810
+ # Collect attributes
1811
+ attrs = []
1812
+ if step.assign_var:
1813
+ attrs.append(f"→ {step.assign_var}")
1814
+ if step.max_retries > 0:
1815
+ attrs.append(f"retries:{step.max_retries}")
1816
+ if step.timeout:
1817
+ attrs.append(f"timeout:{step.timeout}s")
1818
+ if step.continue_on_error:
1819
+ attrs.append("continue_on_error")
1820
+ if step.retry_delay > 0:
1821
+ attrs.append(f"delay:{step.retry_delay}s")
1822
+
1823
+ attr_str = ", ".join(attrs) if attrs else "—"
1824
+
1825
+ # Find dependencies (variables referenced in step content)
1826
+ variables_used = set(re.findall(r"\{\{\s*(\w+)", step.content))
1827
+ # Filter out template helpers and metadata
1828
+ metadata_vars = {
1829
+ "user_prompt",
1830
+ "task_summary",
1831
+ "step_number",
1832
+ "step_name",
1833
+ "total_steps",
1834
+ "now",
1835
+ "today",
1836
+ "is_interactive",
1837
+ "is_retry",
1838
+ "retry_count",
1839
+ }
1840
+ real_deps = variables_used - metadata_vars
1841
+
1842
+ deps_str = ", ".join(sorted(real_deps)) if real_deps else "—"
1843
+
1844
+ table.add_row(str(i), step.name, attr_str, deps_str)
1845
+
1846
+ # Output table (via console fallback since tables need special rendering)
1847
+ if event_bus:
1848
+ # For events, render table to string
1849
+ from io import StringIO
1850
+
1851
+ buffer = StringIO()
1852
+ temp_console = get_display_console(custom_logger)
1853
+ temp_console.file = buffer
1854
+ temp_console.print(table) # noqa: T201 - Rendering to buffer, not user console
1855
+ event_bus.emit(InfoEvent(message=buffer.getvalue()))
1856
+ elif console:
1857
+ console.print(table) # noqa: T201 - Intentional fallback when no event system available
1858
+
1859
+ output("")
1860
+
1861
+ # Warnings
1862
+ warning_messages = []
1863
+ for step in steps:
1864
+ if step.timeout and step.timeout < 30:
1865
+ warning_messages.append(f"⚠ Step '{step.name}' has short timeout ({step.timeout}s)")
1866
+ if step.continue_on_error and not step.assign_var:
1867
+ warning_messages.append(f"⚠ Step '{step.name}' has continue_on_error but no assign variable")
1868
+
1869
+ if warning_messages:
1870
+ output("[bold]Warnings:[/bold]")
1871
+ output("─" * 60)
1872
+ for warning in warning_messages:
1873
+ output(f" [yellow]{warning}[/yellow]", is_warning=True)
1874
+ output("")
1875
+
1876
+ output("━" * 60)
1877
+ output("[dim]Note: This is a preview only. No tools will be executed.[/dim]")
1878
+ output("[dim]Remove --dry-run to execute the agent.[/dim]")
1879
+ output("")