tsugite-cli 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. tsugite/__init__.py +6 -0
  2. tsugite/agent_composition.py +163 -0
  3. tsugite/agent_inheritance.py +479 -0
  4. tsugite/agent_preparation.py +236 -0
  5. tsugite/agent_runner/__init__.py +45 -0
  6. tsugite/agent_runner/helpers.py +106 -0
  7. tsugite/agent_runner/history_integration.py +248 -0
  8. tsugite/agent_runner/metrics.py +100 -0
  9. tsugite/agent_runner/runner.py +1879 -0
  10. tsugite/agent_runner/validation.py +70 -0
  11. tsugite/agent_utils.py +167 -0
  12. tsugite/attachments/__init__.py +65 -0
  13. tsugite/attachments/auto_context.py +199 -0
  14. tsugite/attachments/base.py +34 -0
  15. tsugite/attachments/file.py +51 -0
  16. tsugite/attachments/inline.py +31 -0
  17. tsugite/attachments/storage.py +178 -0
  18. tsugite/attachments/url.py +59 -0
  19. tsugite/attachments/youtube.py +101 -0
  20. tsugite/benchmark/__init__.py +62 -0
  21. tsugite/benchmark/config.py +183 -0
  22. tsugite/benchmark/core.py +292 -0
  23. tsugite/benchmark/discovery.py +377 -0
  24. tsugite/benchmark/evaluators.py +671 -0
  25. tsugite/benchmark/execution.py +657 -0
  26. tsugite/benchmark/metrics.py +204 -0
  27. tsugite/benchmark/reports.py +420 -0
  28. tsugite/benchmark/utils.py +288 -0
  29. tsugite/builtin_agents/chat-assistant.md +53 -0
  30. tsugite/builtin_agents/default.md +140 -0
  31. tsugite/builtin_agents.py +5 -0
  32. tsugite/cache.py +195 -0
  33. tsugite/cli/__init__.py +1042 -0
  34. tsugite/cli/agents.py +148 -0
  35. tsugite/cli/attachments.py +193 -0
  36. tsugite/cli/benchmark.py +663 -0
  37. tsugite/cli/cache.py +113 -0
  38. tsugite/cli/config.py +272 -0
  39. tsugite/cli/helpers.py +534 -0
  40. tsugite/cli/history.py +193 -0
  41. tsugite/cli/init.py +387 -0
  42. tsugite/cli/mcp.py +193 -0
  43. tsugite/cli/tools.py +419 -0
  44. tsugite/config.py +204 -0
  45. tsugite/console.py +48 -0
  46. tsugite/constants.py +21 -0
  47. tsugite/core/__init__.py +19 -0
  48. tsugite/core/agent.py +774 -0
  49. tsugite/core/executor.py +300 -0
  50. tsugite/core/memory.py +67 -0
  51. tsugite/core/tools.py +271 -0
  52. tsugite/docker_cli.py +270 -0
  53. tsugite/events/__init__.py +55 -0
  54. tsugite/events/base.py +46 -0
  55. tsugite/events/bus.py +62 -0
  56. tsugite/events/events.py +224 -0
  57. tsugite/exceptions.py +40 -0
  58. tsugite/history/__init__.py +29 -0
  59. tsugite/history/index.py +210 -0
  60. tsugite/history/models.py +106 -0
  61. tsugite/history/storage.py +157 -0
  62. tsugite/mcp_client.py +219 -0
  63. tsugite/mcp_config.py +174 -0
  64. tsugite/md_agents.py +751 -0
  65. tsugite/models.py +257 -0
  66. tsugite/renderer.py +151 -0
  67. tsugite/shell_tool_config.py +265 -0
  68. tsugite/templates/assistant.md +14 -0
  69. tsugite/tools/__init__.py +265 -0
  70. tsugite/tools/agents.py +312 -0
  71. tsugite/tools/edit_strategies.py +393 -0
  72. tsugite/tools/fs.py +329 -0
  73. tsugite/tools/http.py +239 -0
  74. tsugite/tools/interactive.py +430 -0
  75. tsugite/tools/shell.py +129 -0
  76. tsugite/tools/shell_tools.py +214 -0
  77. tsugite/tools/tasks.py +339 -0
  78. tsugite/tsugite.py +7 -0
  79. tsugite/ui/__init__.py +46 -0
  80. tsugite/ui/base.py +638 -0
  81. tsugite/ui/chat.py +265 -0
  82. tsugite/ui/chat.tcss +92 -0
  83. tsugite/ui/chat_history.py +286 -0
  84. tsugite/ui/helpers.py +102 -0
  85. tsugite/ui/jsonl.py +125 -0
  86. tsugite/ui/live_template.py +529 -0
  87. tsugite/ui/plain.py +419 -0
  88. tsugite/ui/textual_chat.py +642 -0
  89. tsugite/ui/textual_handler.py +225 -0
  90. tsugite/ui/widgets/__init__.py +6 -0
  91. tsugite/ui/widgets/base_scroll_log.py +27 -0
  92. tsugite/ui/widgets/message_list.py +121 -0
  93. tsugite/ui/widgets/thought_log.py +80 -0
  94. tsugite/ui_context.py +90 -0
  95. tsugite/utils.py +367 -0
  96. tsugite/xdg.py +104 -0
  97. tsugite_cli-0.3.3.dist-info/METADATA +325 -0
  98. tsugite_cli-0.3.3.dist-info/RECORD +101 -0
  99. tsugite_cli-0.3.3.dist-info/WHEEL +4 -0
  100. tsugite_cli-0.3.3.dist-info/entry_points.txt +5 -0
  101. tsugite_cli-0.3.3.dist-info/licenses/LICENSE +235 -0
tsugite/core/agent.py ADDED
@@ -0,0 +1,774 @@
1
+ """Core agent implementation using LiteLLM directly.
2
+
3
+ A simpler, more direct implementation that gives us full control over
4
+ model parameters and reasoning model support.
5
+ """
6
+
7
+ import asyncio
8
+ import time
9
+ from dataclasses import dataclass
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ import litellm
13
+
14
+ from tsugite.events import (
15
+ CodeExecutionEvent,
16
+ CostSummaryEvent,
17
+ ErrorEvent,
18
+ EventBus,
19
+ FinalAnswerEvent,
20
+ LLMMessageEvent,
21
+ ObservationEvent,
22
+ ReasoningContentEvent,
23
+ ReasoningTokensEvent,
24
+ StepStartEvent,
25
+ StreamChunkEvent,
26
+ StreamCompleteEvent,
27
+ TaskStartEvent,
28
+ )
29
+
30
+ from .executor import CodeExecutor, LocalExecutor
31
+ from .memory import AgentMemory, StepResult
32
+ from .tools import Tool
33
+
34
+ # Agent execution constants
35
+ DEFAULT_MAX_TURNS = 10 # Default maximum reasoning iterations before timeout
36
+
37
+
38
+ def build_system_prompt(tools: List[Tool], instructions: str = "", text_mode: bool = False) -> str:
39
+ """Build system prompt for LLM with tools and instructions.
40
+
41
+ This is shared between TsugiteAgent and the render command to ensure
42
+ consistency between what's shown and what's sent to the LLM.
43
+
44
+ Args:
45
+ tools: List of Tool objects available to the agent
46
+ instructions: Additional instructions from agent config
47
+ text_mode: If True, use text mode (code blocks optional)
48
+
49
+ Returns:
50
+ Complete system prompt string
51
+ """
52
+ tools_section = build_tools_section(tools)
53
+ has_tools = bool(tools)
54
+
55
+ if text_mode:
56
+ return build_text_mode_prompt(tools_section, instructions, has_tools)
57
+ else:
58
+ return build_standard_mode_prompt(tools_section, instructions, has_tools)
59
+
60
+
61
+ @dataclass
62
+ class AgentResult:
63
+ """Result from agent execution."""
64
+
65
+ output: Any
66
+ token_usage: Optional[int] = None
67
+ cost: Optional[float] = None
68
+ steps: Optional[List[StepResult]] = None
69
+ error: Optional[str] = None
70
+
71
+
72
+ class TsugiteAgent:
73
+ """Custom agent that uses Thought/Code/Observation loop.
74
+
75
+ Provides direct access to LiteLLM features including reasoning models,
76
+ custom parameters, and full control over the execution loop.
77
+
78
+ Example:
79
+ agent = TsugiteAgent(
80
+ model_string="openai:gpt-4o-mini",
81
+ tools=[tool1, tool2],
82
+ instructions="You are a helpful assistant",
83
+ max_turns=10
84
+ )
85
+
86
+ result = await agent.run("Calculate 5 + 3")
87
+ print(result) # "8"
88
+ """
89
+
90
+ def __init__(
91
+ self,
92
+ model_string: str,
93
+ tools: List[Tool],
94
+ instructions: str = "",
95
+ max_turns: int = DEFAULT_MAX_TURNS,
96
+ executor: CodeExecutor = None,
97
+ model_kwargs: dict = None,
98
+ event_bus: EventBus = None,
99
+ model_name: str = None,
100
+ text_mode: bool = False,
101
+ attachments: List[tuple[str, str]] = None,
102
+ previous_messages: List[Dict] = None,
103
+ ):
104
+ """Initialize the agent.
105
+
106
+ Args:
107
+ model_string: Model identifier like "openai:gpt-4o-mini"
108
+ tools: List of Tool objects the agent can use
109
+ instructions: Additional instructions to append to system prompt
110
+ max_turns: Maximum number of reasoning turns (think-act cycles) before giving up
111
+ executor: Code executor (microsandbox or local). If None, uses LocalExecutor
112
+ model_kwargs: Extra parameters for LiteLLM (reasoning_effort, response_format, etc.)
113
+ event_bus: Optional EventBus for broadcasting events
114
+ model_name: Optional display name for the model (for UI)
115
+ text_mode: Allow text-only responses (code blocks optional)
116
+ attachments: List of (name, content) tuples for prompt caching
117
+ previous_messages: List of previous conversation messages (user/assistant pairs)
118
+ """
119
+ from tsugite.models import get_model_params
120
+
121
+ self.model_string = model_string
122
+ self.tools = tools
123
+ self.instructions = instructions
124
+ self.max_turns = max_turns
125
+ self.executor = executor or LocalExecutor()
126
+ self.memory = AgentMemory()
127
+ self.event_bus = event_bus
128
+ self.model_name = model_name or model_string
129
+ self.text_mode = text_mode
130
+ self.attachments = attachments or []
131
+ self.previous_messages = previous_messages or []
132
+
133
+ # Track cumulative cost across all steps
134
+ self.total_cost = 0.0
135
+
136
+ self.tool_map = {tool.name: tool for tool in tools}
137
+
138
+ self._inject_tools_into_executor()
139
+
140
+ self.litellm_params = get_model_params(model_string, **(model_kwargs or {}))
141
+
142
+ def _inject_tools_into_executor(self):
143
+ """Inject tools into executor namespace so they can be called from Python code.
144
+
145
+ Creates wrapper functions for each tool that call the tool's execute() method.
146
+ The LLM sees tools as Python functions and calls them directly in generated code.
147
+ """
148
+
149
+ tool_functions = {}
150
+
151
+ for tool in self.tools:
152
+
153
+ def make_tool_wrapper(tool_obj):
154
+ """Create a wrapper for this specific tool."""
155
+
156
+ def tool_wrapper(*args, **kwargs):
157
+ """Synchronous wrapper that calls async tool.execute().
158
+
159
+ Accepts both positional and keyword arguments for flexibility,
160
+ but tool.execute() expects keyword arguments only.
161
+ """
162
+ if hasattr(self.executor, "_tools_called"):
163
+ self.executor._tools_called.append(tool_obj.name)
164
+
165
+ if args:
166
+ import inspect
167
+
168
+ try:
169
+ sig = inspect.signature(tool_obj.function)
170
+ param_names = list(sig.parameters.keys())
171
+
172
+ for i, arg in enumerate(args):
173
+ if i < len(param_names):
174
+ param_name = param_names[i]
175
+ if param_name not in kwargs:
176
+ kwargs[param_name] = arg
177
+ else:
178
+ raise TypeError(
179
+ f"Tool '{tool_obj.name}' takes at most {len(param_names)} "
180
+ f"positional arguments but {len(args)} were given"
181
+ )
182
+ except Exception:
183
+ # If signature inspection fails, fall back to error
184
+ raise TypeError(
185
+ f"Tool '{tool_obj.name}' must be called with keyword arguments, "
186
+ f"not positional arguments. "
187
+ f"Example: {tool_obj.name}(param1=value1, param2=value2)"
188
+ )
189
+
190
+ try:
191
+ loop = asyncio.get_running_loop()
192
+ import concurrent.futures
193
+ import contextvars
194
+
195
+ ctx = contextvars.copy_context()
196
+
197
+ with concurrent.futures.ThreadPoolExecutor() as executor:
198
+
199
+ def run_async():
200
+ new_loop = asyncio.new_event_loop()
201
+ asyncio.set_event_loop(new_loop)
202
+ try:
203
+ return new_loop.run_until_complete(tool_obj.execute(**kwargs))
204
+ finally:
205
+ pending = asyncio.all_tasks(new_loop)
206
+ for task in pending:
207
+ task.cancel()
208
+ if pending:
209
+ new_loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
210
+ new_loop.close()
211
+
212
+ result = executor.submit(ctx.run, run_async).result()
213
+ except RuntimeError:
214
+ loop = asyncio.new_event_loop()
215
+ asyncio.set_event_loop(loop)
216
+ try:
217
+ result = loop.run_until_complete(tool_obj.execute(**kwargs))
218
+ finally:
219
+ pending = asyncio.all_tasks(loop)
220
+ for task in pending:
221
+ task.cancel()
222
+ if pending:
223
+ loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
224
+ loop.close()
225
+
226
+ return result
227
+
228
+ tool_wrapper.__name__ = tool_obj.name
229
+ tool_wrapper.__doc__ = tool_obj.description
230
+ if hasattr(tool_obj.function, "__signature__"):
231
+ tool_wrapper.__signature__ = tool_obj.function.__signature__
232
+ if hasattr(tool_obj.function, "__annotations__"):
233
+ tool_wrapper.__annotations__ = tool_obj.function.__annotations__
234
+
235
+ return tool_wrapper
236
+
237
+ tool_functions[tool.name] = make_tool_wrapper(tool)
238
+
239
+ if hasattr(self.executor, "namespace"):
240
+ self.executor.namespace.update(tool_functions)
241
+
242
+ async def run(self, task: str, return_full_result: bool = False, stream: bool = False):
243
+ """Run the agent on a task.
244
+
245
+ Args:
246
+ task: The task to solve
247
+ return_full_result: If True, return AgentResult with metadata
248
+ stream: If True, stream the response chunks in real-time
249
+
250
+ Returns:
251
+ str: The final answer from the agent
252
+ or AgentResult: Full result with token usage and turns
253
+
254
+ Raises:
255
+ RuntimeError: If agent reaches max_turns without finishing
256
+ """
257
+ # Track execution time
258
+ start_time = time.time()
259
+
260
+ # Add task to memory
261
+ self.memory.add_task(task)
262
+
263
+ # Trigger task start event
264
+ if self.event_bus:
265
+ self.event_bus.emit(TaskStartEvent(task=task, model=self.model_name))
266
+
267
+ # Main agent loop
268
+ for turn_num in range(self.max_turns):
269
+ # Trigger turn start event
270
+ if self.event_bus:
271
+ self.event_bus.emit(StepStartEvent(step=turn_num + 1, max_turns=self.max_turns))
272
+
273
+ # Build conversation messages from memory
274
+ messages = self._build_messages()
275
+
276
+ # Call LiteLLM directly with pre-computed params
277
+ # Parameters are filtered for reasoning models (o1/o3/Claude)
278
+ if stream:
279
+ # Streaming mode: accumulate chunks and emit events
280
+ accumulated_content = ""
281
+ response = None
282
+
283
+ # Add stream parameter to litellm params
284
+ stream_params = {**self.litellm_params, "stream": True}
285
+
286
+ # Get the streaming response generator
287
+ stream_response = await litellm.acompletion(messages=messages, **stream_params)
288
+
289
+ async for chunk in stream_response:
290
+ # Extract content from chunk
291
+ if hasattr(chunk, "choices") and len(chunk.choices) > 0:
292
+ delta = chunk.choices[0].delta
293
+ if hasattr(delta, "content") and delta.content:
294
+ chunk_text = delta.content
295
+ accumulated_content += chunk_text
296
+
297
+ # Emit stream chunk event
298
+ if self.event_bus:
299
+ self.event_bus.emit(StreamChunkEvent(chunk=chunk_text))
300
+
301
+ # Save the last chunk as response for usage/cost tracking
302
+ response = chunk
303
+
304
+ # Emit stream complete event
305
+ if self.event_bus:
306
+ self.event_bus.emit(StreamCompleteEvent())
307
+
308
+ # Parse accumulated content
309
+ thought, code, _ = self._parse_response_from_text(accumulated_content)
310
+ else:
311
+ # Non-streaming mode: get complete response
312
+ response = await litellm.acompletion(messages=messages, **self.litellm_params)
313
+
314
+ # Parse LLM response
315
+ # Response should contain: Thought + Code OR final_answer()
316
+ thought, code, _ = self._parse_response(response)
317
+
318
+ # Track cost from this response
319
+ step_cost = 0.0
320
+ if hasattr(response, "_hidden_params") and "response_cost" in response._hidden_params:
321
+ step_cost = response._hidden_params["response_cost"]
322
+ if step_cost is not None:
323
+ self.total_cost += step_cost
324
+
325
+ # Extract reasoning content if present (for o1/o3/Claude thinking)
326
+ reasoning_content = self._extract_reasoning_content(response)
327
+ if reasoning_content:
328
+ self.memory.add_reasoning(reasoning_content)
329
+ # Trigger reasoning content event
330
+ if self.event_bus:
331
+ self.event_bus.emit(ReasoningContentEvent(content=reasoning_content, step=turn_num + 1))
332
+
333
+ # Check for reasoning tokens (o1/o3 models)
334
+ if response.usage and hasattr(response.usage, "completion_tokens_details"):
335
+ details = response.usage.completion_tokens_details
336
+ if hasattr(details, "reasoning_tokens") and details.reasoning_tokens:
337
+ if self.event_bus:
338
+ self.event_bus.emit(ReasoningTokensEvent(tokens=details.reasoning_tokens, step=turn_num + 1))
339
+
340
+ # Show LLM's thought/reasoning (always show what the LLM is saying)
341
+ # Skip this if streaming (already shown via STREAM_CHUNK events)
342
+ # Skip if text mode with no code (thought will be shown as final answer)
343
+ if self.event_bus and not stream:
344
+ # If we parsed a thought, show it. Otherwise show the raw response
345
+ # (this helps debug when LLM doesn't follow the expected format)
346
+ display_content = thought if thought else response.choices[0].message.content
347
+
348
+ # In text mode, if there's a thought but no code, skip showing the thought here
349
+ # because it will be shown as the final answer (to avoid duplication)
350
+ skip_llm_message = self.text_mode and thought and not (code and code.strip())
351
+
352
+ if display_content and display_content.strip() and not skip_llm_message:
353
+ self.event_bus.emit(
354
+ LLMMessageEvent(
355
+ content=display_content, title=f"Turn {turn_num + 1} Reasoning", step=turn_num + 1
356
+ )
357
+ )
358
+
359
+ # Only execute code if the LLM actually generated some
360
+ if code and code.strip():
361
+ # Trigger code execution event
362
+ if self.event_bus:
363
+ self.event_bus.emit(CodeExecutionEvent(code=code))
364
+
365
+ # Execute the code
366
+ exec_result = await self.executor.execute(code)
367
+
368
+ # Trigger observation event
369
+ if self.event_bus:
370
+ observation = exec_result.output
371
+
372
+ if exec_result.error:
373
+ # Trigger error event for execution errors
374
+ self.event_bus.emit(
375
+ ErrorEvent(error=exec_result.error, error_type="Execution Error", step=turn_num + 1)
376
+ )
377
+ else:
378
+ self.event_bus.emit(ObservationEvent(observation=observation))
379
+ else:
380
+ # No code to execute - create a dummy result
381
+ from .executor import ExecutionResult
382
+
383
+ exec_result = ExecutionResult(output="", error=None, stdout="", stderr="")
384
+
385
+ if self.text_mode:
386
+ # In text mode, code blocks are optional
387
+ # If there's a thought but no code, treat the thought as the final answer
388
+ if thought and thought.strip():
389
+ exec_result.final_answer = thought
390
+ # Don't show error - this is expected behavior in text mode
391
+ else:
392
+ # No thought and no code - this is an error even in text mode
393
+ if self.event_bus:
394
+ self.event_bus.emit(
395
+ ErrorEvent(
396
+ error="No response generated. Expected at least a Thought.",
397
+ error_type="Format Error",
398
+ step=turn_num + 1,
399
+ )
400
+ )
401
+ else:
402
+ # Standard mode: code is required
403
+ # Show a warning that the LLM didn't generate code
404
+ if self.event_bus:
405
+ self.event_bus.emit(
406
+ ErrorEvent(
407
+ error="LLM did not generate code. Expected format:\n\nThought: <explanation>\n```python\n<code>\n```",
408
+ error_type="Format Error",
409
+ step=turn_num + 1,
410
+ )
411
+ )
412
+
413
+ # Add a correction to memory to guide the LLM
414
+ # Instead of adding a step with empty code, add an observation telling LLM what to do
415
+ correction_msg = (
416
+ "Format Error: You must provide your response in a Python code block.\n\n"
417
+ "Use this format:\n\n"
418
+ "Thought: <your explanation>\n"
419
+ "```python\n"
420
+ "# Your code here\n"
421
+ 'final_answer("your answer")\n'
422
+ "```\n\n"
423
+ "Remember to call final_answer() with your result."
424
+ )
425
+
426
+ # Add the thought and correction as a step
427
+ # This will show the LLM what it did wrong and how to fix it
428
+ self.memory.add_step(
429
+ thought=thought if thought else "(No thought provided)",
430
+ code="",
431
+ output=correction_msg,
432
+ error=None,
433
+ tools_called=[],
434
+ )
435
+
436
+ # Continue to next turn - the correction will be in the observation
437
+ continue
438
+
439
+ # Add this step to memory (only for successful executions or text mode)
440
+ self.memory.add_step(
441
+ thought=thought,
442
+ code=code,
443
+ output=exec_result.output,
444
+ error=exec_result.error,
445
+ tools_called=exec_result.tools_called,
446
+ )
447
+
448
+ # Check if final_answer was called during execution
449
+ if exec_result.final_answer is not None:
450
+ # Agent is done!
451
+ self.memory.add_final_answer(exec_result.final_answer)
452
+
453
+ # Trigger final answer event
454
+ if self.event_bus:
455
+ self.event_bus.emit(
456
+ FinalAnswerEvent(
457
+ answer=str(exec_result.final_answer),
458
+ turns=turn_num + 1,
459
+ tokens=response.usage.total_tokens if response.usage else None,
460
+ cost=self.total_cost if self.total_cost > 0 else None,
461
+ )
462
+ )
463
+
464
+ # Trigger cost summary event
465
+ total_tokens = response.usage.total_tokens if response.usage else None
466
+ duration = time.time() - start_time
467
+
468
+ # Extract cache-related fields (supported by OpenAI, Anthropic, Bedrock, Deepseek)
469
+ cached_tokens = None
470
+ cache_creation_tokens = None
471
+ cache_read_tokens = None
472
+ if response.usage:
473
+ cached_tokens = getattr(response.usage, "cached_tokens", None)
474
+ cache_creation_tokens = getattr(response.usage, "cache_creation_input_tokens", None)
475
+ cache_read_tokens = getattr(response.usage, "cache_read_input_tokens", None)
476
+
477
+ self.event_bus.emit(
478
+ CostSummaryEvent(
479
+ tokens=total_tokens,
480
+ cost=self.total_cost if self.total_cost > 0 else None,
481
+ model=self.model_name,
482
+ duration_seconds=duration,
483
+ cached_tokens=cached_tokens,
484
+ cache_creation_input_tokens=cache_creation_tokens,
485
+ cache_read_input_tokens=cache_read_tokens,
486
+ )
487
+ )
488
+
489
+ if return_full_result:
490
+ return AgentResult(
491
+ output=exec_result.final_answer,
492
+ token_usage=response.usage.total_tokens if response.usage else None,
493
+ cost=self.total_cost if self.total_cost > 0 else None,
494
+ steps=self.memory.steps,
495
+ )
496
+ return exec_result.final_answer
497
+
498
+ # Continue loop (LLM will see the observation in next iteration)
499
+
500
+ # If we get here, we hit max_turns
501
+ error_msg = f"Agent reached max_turns ({self.max_turns}) without completing task"
502
+ if self.event_bus:
503
+ self.event_bus.emit(ErrorEvent(error=error_msg, error_type="RuntimeError"))
504
+
505
+ # For benchmark/testing use cases that need execution trace even on error,
506
+ # return AgentResult with error field set instead of raising
507
+ if return_full_result:
508
+ return AgentResult(
509
+ output=None,
510
+ token_usage=None,
511
+ cost=self.total_cost,
512
+ steps=self.memory.steps,
513
+ error=error_msg,
514
+ )
515
+ else:
516
+ # Backward compatibility: raise exception for non-benchmark usage
517
+ raise RuntimeError(error_msg)
518
+
519
+ def _build_messages(self) -> List[Dict]:
520
+ """Build message list for LLM from memory.
521
+
522
+ Uses system blocks with cache control when attachments are present
523
+ for better prompt caching support.
524
+
525
+ Format with attachments (system blocks):
526
+ [
527
+ {"role": "system", "content": [
528
+ {"type": "text", "text": system_prompt},
529
+ {"type": "text", "text": attachment1, "cache_control": {"type": "ephemeral"}},
530
+ {"type": "text", "text": attachment2, "cache_control": {"type": "ephemeral"}},
531
+ ]},
532
+ {"role": "user", "content": "previous turn 1"},
533
+ {"role": "assistant", "content": "previous response 1"},
534
+ {"role": "user", "content": "previous turn 2"},
535
+ {"role": "assistant", "content": "previous response 2"},
536
+ {"role": "user", "content": task},
537
+ ...
538
+ ]
539
+
540
+ Format without attachments (legacy):
541
+ [
542
+ {"role": "system", "content": system_prompt},
543
+ {"role": "user", "content": "previous turn 1"},
544
+ {"role": "assistant", "content": "previous response 1"},
545
+ {"role": "user", "content": task},
546
+ ...
547
+ ]
548
+ """
549
+ messages = []
550
+
551
+ # Build system message with or without attachments
552
+ if self.attachments:
553
+ # Use system blocks with cache control for better caching
554
+ system_blocks = [{"type": "text", "text": self._build_system_prompt()}]
555
+
556
+ # Add each attachment as a separate cacheable block
557
+ for name, content in self.attachments:
558
+ system_blocks.append(
559
+ {
560
+ "type": "text",
561
+ "text": f"<Attachment: {name}>\n{content}\n</Attachment: {name}>",
562
+ "cache_control": {"type": "ephemeral"},
563
+ }
564
+ )
565
+
566
+ messages.append({"role": "system", "content": system_blocks})
567
+ else:
568
+ # Legacy format: simple string
569
+ messages.append({"role": "system", "content": self._build_system_prompt()})
570
+
571
+ # Previous conversation messages (if continuing a conversation)
572
+ if self.previous_messages:
573
+ messages.extend(self.previous_messages)
574
+
575
+ # Task
576
+ messages.append({"role": "user", "content": self.memory.task})
577
+
578
+ # Previous steps (Thought/Code → Observation pairs)
579
+ for step in self.memory.steps:
580
+ # Assistant's thought + code
581
+ assistant_msg = f"Thought: {step.thought}\n\n```python\n{step.code}\n```"
582
+ messages.append({"role": "assistant", "content": assistant_msg})
583
+
584
+ # Observation (code execution result)
585
+ observation = f"Observation: {step.output}"
586
+ if step.error:
587
+ observation += f"\nError: {step.error}"
588
+
589
+ messages.append({"role": "user", "content": observation})
590
+
591
+ return messages
592
+
593
+ def _build_system_prompt(self) -> str:
594
+ """Build system prompt that teaches LLM how to solve tasks."""
595
+ return build_system_prompt(self.tools, self.instructions, self.text_mode)
596
+
597
+ def _parse_response(self, response) -> tuple[str, str, Optional[str]]:
598
+ """Parse LLM response into thought, code, and final_answer.
599
+
600
+ Returns:
601
+ (thought, code, final_answer)
602
+ """
603
+ content = response.choices[0].message.content
604
+ return self._parse_response_from_text(content)
605
+
606
+ def _parse_response_from_text(self, content: str) -> tuple[str, str, Optional[str]]:
607
+ """Parse text content into thought, code, and final_answer.
608
+
609
+ Args:
610
+ content: The text content to parse
611
+
612
+ Returns:
613
+ (thought, code, final_answer)
614
+ """
615
+ thought = ""
616
+ code = ""
617
+
618
+ # Extract thought (everything before code block)
619
+ thought_start = content.find("Thought:")
620
+ if thought_start != -1:
621
+ thought_start += len("Thought:")
622
+ code_block_start = content.find("```python", thought_start)
623
+ if code_block_start != -1:
624
+ thought = content[thought_start:code_block_start].strip()
625
+ else:
626
+ thought = content[thought_start:].strip()
627
+
628
+ # Extract code block
629
+ code_block_start = content.find("```python")
630
+ if code_block_start != -1:
631
+ code_start = code_block_start + len("```python")
632
+ code_end = content.find("```", code_start)
633
+ if code_end != -1:
634
+ code = content[code_start:code_end].strip()
635
+
636
+ return thought, code, None
637
+
638
+ def _extract_reasoning_content(self, response) -> Optional[str]:
639
+ """Extract reasoning content from response (for o1/o3/Claude thinking).
640
+
641
+ Returns:
642
+ str: Reasoning content if present, None otherwise
643
+ """
644
+ try:
645
+ if hasattr(response, "choices") and len(response.choices) > 0:
646
+ choice = response.choices[0]
647
+ if hasattr(choice.message, "reasoning_content"):
648
+ return choice.message.reasoning_content
649
+ except (AttributeError, IndexError):
650
+ pass
651
+
652
+ return None
653
+
654
+
655
+ def build_tools_section(tools: List[Tool]) -> str:
656
+ """Build the tools section of the system prompt.
657
+
658
+ Args:
659
+ tools: List of Tool objects available to the agent
660
+
661
+ Returns:
662
+ Formatted tools section or empty string if no tools
663
+ """
664
+ if not tools:
665
+ return ""
666
+
667
+ tool_definitions = "\n\n".join([tool.to_code_prompt() for tool in tools])
668
+ return f"""
669
+ ## Available tools:
670
+
671
+ You have access to these Python functions:
672
+
673
+ ```python
674
+ {tool_definitions}
675
+ ```
676
+ """
677
+
678
+
679
+ def build_text_mode_prompt(tools_section: str, instructions: str, has_tools: bool) -> str:
680
+ """Build system prompt for text mode (code blocks optional).
681
+
682
+ Args:
683
+ tools_section: Formatted tools section
684
+ instructions: Additional instructions from agent config
685
+ has_tools: Whether tools are available
686
+
687
+ Returns:
688
+ Complete system prompt for text mode
689
+ """
690
+ tool_rule = (
691
+ "4. When using code, call tools with keyword arguments: result = tool_name(arg1=value1, arg2=value2)"
692
+ if has_tools
693
+ else "4. Use Python when you need to perform actions"
694
+ )
695
+
696
+ return f"""You are an expert assistant who helps with tasks.
697
+
698
+ You can respond in two ways:
699
+
700
+ **For conversational questions or simple responses:**
701
+ Just provide your Thought with the answer directly:
702
+
703
+ Thought: [Your response here]
704
+
705
+ **When you need to use tools or perform actions:**
706
+ Provide a Thought and write Python code:
707
+
708
+ Thought: [What you'll do and why]
709
+ ```python
710
+ # Your code here
711
+ final_answer(result)
712
+ ```
713
+ {tools_section}
714
+ ## Rules:
715
+
716
+ 1. Start with "Thought:" to explain your reasoning
717
+ 2. Code blocks are OPTIONAL - only use them when you need tools or complex logic
718
+ 3. For direct answers, just provide the Thought without code
719
+ {tool_rule}
720
+ 5. When using code blocks, call final_answer() with the result
721
+ 6. Variables persist across code blocks
722
+
723
+ {instructions}
724
+
725
+ Now begin!"""
726
+
727
+
728
+ def build_standard_mode_prompt(tools_section: str, instructions: str, has_tools: bool) -> str:
729
+ """Build system prompt for standard mode (code blocks required).
730
+
731
+ Args:
732
+ tools_section: Formatted tools section
733
+ instructions: Additional instructions from agent config
734
+ has_tools: Whether tools are available
735
+
736
+ Returns:
737
+ Complete system prompt for standard mode
738
+ """
739
+ tool_rule = (
740
+ "3. Call tools with keyword arguments: result = tool_name(arg1=value1, arg2=value2)"
741
+ if has_tools
742
+ else "3. Use standard Python to solve the task"
743
+ )
744
+
745
+ return f"""You are an expert assistant who solves tasks using Python code.
746
+
747
+ To solve a task, you proceed in steps using this pattern:
748
+
749
+ 1. **Thought:** Explain your reasoning (what you'll do and why)
750
+ 2. **Code:** Write Python code in a code block
751
+ 3. **Observation:** You'll see the code execution result
752
+
753
+ You repeat this Thought → Code → Observation cycle until you have the final answer.
754
+
755
+ ## How to write code:
756
+
757
+ - Always start with a Thought explaining your approach
758
+ - Write code in triple-backtick code blocks: ```python
759
+ - Use print() to output important information
760
+ - Variables persist between code blocks
761
+ - When you have the final answer, call: final_answer(your_answer)
762
+ {tools_section}
763
+ ## Rules:
764
+
765
+ 1. Always provide Thought before code
766
+ 2. Only use variables you've defined
767
+ {tool_rule}
768
+ 4. Call final_answer() when you have the answer
769
+ 5. If you get an error, try a different approach
770
+ 6. State persists - variables remain available across code blocks
771
+
772
+ {instructions}
773
+
774
+ Now begin!"""