tsugite-cli 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsugite/__init__.py +6 -0
- tsugite/agent_composition.py +163 -0
- tsugite/agent_inheritance.py +479 -0
- tsugite/agent_preparation.py +236 -0
- tsugite/agent_runner/__init__.py +45 -0
- tsugite/agent_runner/helpers.py +106 -0
- tsugite/agent_runner/history_integration.py +248 -0
- tsugite/agent_runner/metrics.py +100 -0
- tsugite/agent_runner/runner.py +1879 -0
- tsugite/agent_runner/validation.py +70 -0
- tsugite/agent_utils.py +167 -0
- tsugite/attachments/__init__.py +65 -0
- tsugite/attachments/auto_context.py +199 -0
- tsugite/attachments/base.py +34 -0
- tsugite/attachments/file.py +51 -0
- tsugite/attachments/inline.py +31 -0
- tsugite/attachments/storage.py +178 -0
- tsugite/attachments/url.py +59 -0
- tsugite/attachments/youtube.py +101 -0
- tsugite/benchmark/__init__.py +62 -0
- tsugite/benchmark/config.py +183 -0
- tsugite/benchmark/core.py +292 -0
- tsugite/benchmark/discovery.py +377 -0
- tsugite/benchmark/evaluators.py +671 -0
- tsugite/benchmark/execution.py +657 -0
- tsugite/benchmark/metrics.py +204 -0
- tsugite/benchmark/reports.py +420 -0
- tsugite/benchmark/utils.py +288 -0
- tsugite/builtin_agents/chat-assistant.md +53 -0
- tsugite/builtin_agents/default.md +140 -0
- tsugite/builtin_agents.py +5 -0
- tsugite/cache.py +195 -0
- tsugite/cli/__init__.py +1042 -0
- tsugite/cli/agents.py +148 -0
- tsugite/cli/attachments.py +193 -0
- tsugite/cli/benchmark.py +663 -0
- tsugite/cli/cache.py +113 -0
- tsugite/cli/config.py +272 -0
- tsugite/cli/helpers.py +534 -0
- tsugite/cli/history.py +193 -0
- tsugite/cli/init.py +387 -0
- tsugite/cli/mcp.py +193 -0
- tsugite/cli/tools.py +419 -0
- tsugite/config.py +204 -0
- tsugite/console.py +48 -0
- tsugite/constants.py +21 -0
- tsugite/core/__init__.py +19 -0
- tsugite/core/agent.py +774 -0
- tsugite/core/executor.py +300 -0
- tsugite/core/memory.py +67 -0
- tsugite/core/tools.py +271 -0
- tsugite/docker_cli.py +270 -0
- tsugite/events/__init__.py +55 -0
- tsugite/events/base.py +46 -0
- tsugite/events/bus.py +62 -0
- tsugite/events/events.py +224 -0
- tsugite/exceptions.py +40 -0
- tsugite/history/__init__.py +29 -0
- tsugite/history/index.py +210 -0
- tsugite/history/models.py +106 -0
- tsugite/history/storage.py +157 -0
- tsugite/mcp_client.py +219 -0
- tsugite/mcp_config.py +174 -0
- tsugite/md_agents.py +751 -0
- tsugite/models.py +257 -0
- tsugite/renderer.py +151 -0
- tsugite/shell_tool_config.py +265 -0
- tsugite/templates/assistant.md +14 -0
- tsugite/tools/__init__.py +265 -0
- tsugite/tools/agents.py +312 -0
- tsugite/tools/edit_strategies.py +393 -0
- tsugite/tools/fs.py +329 -0
- tsugite/tools/http.py +239 -0
- tsugite/tools/interactive.py +430 -0
- tsugite/tools/shell.py +129 -0
- tsugite/tools/shell_tools.py +214 -0
- tsugite/tools/tasks.py +339 -0
- tsugite/tsugite.py +7 -0
- tsugite/ui/__init__.py +46 -0
- tsugite/ui/base.py +638 -0
- tsugite/ui/chat.py +265 -0
- tsugite/ui/chat.tcss +92 -0
- tsugite/ui/chat_history.py +286 -0
- tsugite/ui/helpers.py +102 -0
- tsugite/ui/jsonl.py +125 -0
- tsugite/ui/live_template.py +529 -0
- tsugite/ui/plain.py +419 -0
- tsugite/ui/textual_chat.py +642 -0
- tsugite/ui/textual_handler.py +225 -0
- tsugite/ui/widgets/__init__.py +6 -0
- tsugite/ui/widgets/base_scroll_log.py +27 -0
- tsugite/ui/widgets/message_list.py +121 -0
- tsugite/ui/widgets/thought_log.py +80 -0
- tsugite/ui_context.py +90 -0
- tsugite/utils.py +367 -0
- tsugite/xdg.py +104 -0
- tsugite_cli-0.3.3.dist-info/METADATA +325 -0
- tsugite_cli-0.3.3.dist-info/RECORD +101 -0
- tsugite_cli-0.3.3.dist-info/WHEEL +4 -0
- tsugite_cli-0.3.3.dist-info/entry_points.txt +5 -0
- tsugite_cli-0.3.3.dist-info/licenses/LICENSE +235 -0
tsugite/core/agent.py
ADDED
|
@@ -0,0 +1,774 @@
|
|
|
1
|
+
"""Core agent implementation using LiteLLM directly.
|
|
2
|
+
|
|
3
|
+
A simpler, more direct implementation that gives us full control over
|
|
4
|
+
model parameters and reasoning model support.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
import litellm
|
|
13
|
+
|
|
14
|
+
from tsugite.events import (
|
|
15
|
+
CodeExecutionEvent,
|
|
16
|
+
CostSummaryEvent,
|
|
17
|
+
ErrorEvent,
|
|
18
|
+
EventBus,
|
|
19
|
+
FinalAnswerEvent,
|
|
20
|
+
LLMMessageEvent,
|
|
21
|
+
ObservationEvent,
|
|
22
|
+
ReasoningContentEvent,
|
|
23
|
+
ReasoningTokensEvent,
|
|
24
|
+
StepStartEvent,
|
|
25
|
+
StreamChunkEvent,
|
|
26
|
+
StreamCompleteEvent,
|
|
27
|
+
TaskStartEvent,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from .executor import CodeExecutor, LocalExecutor
|
|
31
|
+
from .memory import AgentMemory, StepResult
|
|
32
|
+
from .tools import Tool
|
|
33
|
+
|
|
34
|
+
# Agent execution constants
|
|
35
|
+
DEFAULT_MAX_TURNS = 10 # Default maximum reasoning iterations before timeout
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_system_prompt(tools: List[Tool], instructions: str = "", text_mode: bool = False) -> str:
|
|
39
|
+
"""Build system prompt for LLM with tools and instructions.
|
|
40
|
+
|
|
41
|
+
This is shared between TsugiteAgent and the render command to ensure
|
|
42
|
+
consistency between what's shown and what's sent to the LLM.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
tools: List of Tool objects available to the agent
|
|
46
|
+
instructions: Additional instructions from agent config
|
|
47
|
+
text_mode: If True, use text mode (code blocks optional)
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Complete system prompt string
|
|
51
|
+
"""
|
|
52
|
+
tools_section = build_tools_section(tools)
|
|
53
|
+
has_tools = bool(tools)
|
|
54
|
+
|
|
55
|
+
if text_mode:
|
|
56
|
+
return build_text_mode_prompt(tools_section, instructions, has_tools)
|
|
57
|
+
else:
|
|
58
|
+
return build_standard_mode_prompt(tools_section, instructions, has_tools)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class AgentResult:
|
|
63
|
+
"""Result from agent execution."""
|
|
64
|
+
|
|
65
|
+
output: Any
|
|
66
|
+
token_usage: Optional[int] = None
|
|
67
|
+
cost: Optional[float] = None
|
|
68
|
+
steps: Optional[List[StepResult]] = None
|
|
69
|
+
error: Optional[str] = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TsugiteAgent:
|
|
73
|
+
"""Custom agent that uses Thought/Code/Observation loop.
|
|
74
|
+
|
|
75
|
+
Provides direct access to LiteLLM features including reasoning models,
|
|
76
|
+
custom parameters, and full control over the execution loop.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
agent = TsugiteAgent(
|
|
80
|
+
model_string="openai:gpt-4o-mini",
|
|
81
|
+
tools=[tool1, tool2],
|
|
82
|
+
instructions="You are a helpful assistant",
|
|
83
|
+
max_turns=10
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
result = await agent.run("Calculate 5 + 3")
|
|
87
|
+
print(result) # "8"
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
def __init__(
|
|
91
|
+
self,
|
|
92
|
+
model_string: str,
|
|
93
|
+
tools: List[Tool],
|
|
94
|
+
instructions: str = "",
|
|
95
|
+
max_turns: int = DEFAULT_MAX_TURNS,
|
|
96
|
+
executor: CodeExecutor = None,
|
|
97
|
+
model_kwargs: dict = None,
|
|
98
|
+
event_bus: EventBus = None,
|
|
99
|
+
model_name: str = None,
|
|
100
|
+
text_mode: bool = False,
|
|
101
|
+
attachments: List[tuple[str, str]] = None,
|
|
102
|
+
previous_messages: List[Dict] = None,
|
|
103
|
+
):
|
|
104
|
+
"""Initialize the agent.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
model_string: Model identifier like "openai:gpt-4o-mini"
|
|
108
|
+
tools: List of Tool objects the agent can use
|
|
109
|
+
instructions: Additional instructions to append to system prompt
|
|
110
|
+
max_turns: Maximum number of reasoning turns (think-act cycles) before giving up
|
|
111
|
+
executor: Code executor (microsandbox or local). If None, uses LocalExecutor
|
|
112
|
+
model_kwargs: Extra parameters for LiteLLM (reasoning_effort, response_format, etc.)
|
|
113
|
+
event_bus: Optional EventBus for broadcasting events
|
|
114
|
+
model_name: Optional display name for the model (for UI)
|
|
115
|
+
text_mode: Allow text-only responses (code blocks optional)
|
|
116
|
+
attachments: List of (name, content) tuples for prompt caching
|
|
117
|
+
previous_messages: List of previous conversation messages (user/assistant pairs)
|
|
118
|
+
"""
|
|
119
|
+
from tsugite.models import get_model_params
|
|
120
|
+
|
|
121
|
+
self.model_string = model_string
|
|
122
|
+
self.tools = tools
|
|
123
|
+
self.instructions = instructions
|
|
124
|
+
self.max_turns = max_turns
|
|
125
|
+
self.executor = executor or LocalExecutor()
|
|
126
|
+
self.memory = AgentMemory()
|
|
127
|
+
self.event_bus = event_bus
|
|
128
|
+
self.model_name = model_name or model_string
|
|
129
|
+
self.text_mode = text_mode
|
|
130
|
+
self.attachments = attachments or []
|
|
131
|
+
self.previous_messages = previous_messages or []
|
|
132
|
+
|
|
133
|
+
# Track cumulative cost across all steps
|
|
134
|
+
self.total_cost = 0.0
|
|
135
|
+
|
|
136
|
+
self.tool_map = {tool.name: tool for tool in tools}
|
|
137
|
+
|
|
138
|
+
self._inject_tools_into_executor()
|
|
139
|
+
|
|
140
|
+
self.litellm_params = get_model_params(model_string, **(model_kwargs or {}))
|
|
141
|
+
|
|
142
|
+
def _inject_tools_into_executor(self):
|
|
143
|
+
"""Inject tools into executor namespace so they can be called from Python code.
|
|
144
|
+
|
|
145
|
+
Creates wrapper functions for each tool that call the tool's execute() method.
|
|
146
|
+
The LLM sees tools as Python functions and calls them directly in generated code.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
tool_functions = {}
|
|
150
|
+
|
|
151
|
+
for tool in self.tools:
|
|
152
|
+
|
|
153
|
+
def make_tool_wrapper(tool_obj):
|
|
154
|
+
"""Create a wrapper for this specific tool."""
|
|
155
|
+
|
|
156
|
+
def tool_wrapper(*args, **kwargs):
|
|
157
|
+
"""Synchronous wrapper that calls async tool.execute().
|
|
158
|
+
|
|
159
|
+
Accepts both positional and keyword arguments for flexibility,
|
|
160
|
+
but tool.execute() expects keyword arguments only.
|
|
161
|
+
"""
|
|
162
|
+
if hasattr(self.executor, "_tools_called"):
|
|
163
|
+
self.executor._tools_called.append(tool_obj.name)
|
|
164
|
+
|
|
165
|
+
if args:
|
|
166
|
+
import inspect
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
sig = inspect.signature(tool_obj.function)
|
|
170
|
+
param_names = list(sig.parameters.keys())
|
|
171
|
+
|
|
172
|
+
for i, arg in enumerate(args):
|
|
173
|
+
if i < len(param_names):
|
|
174
|
+
param_name = param_names[i]
|
|
175
|
+
if param_name not in kwargs:
|
|
176
|
+
kwargs[param_name] = arg
|
|
177
|
+
else:
|
|
178
|
+
raise TypeError(
|
|
179
|
+
f"Tool '{tool_obj.name}' takes at most {len(param_names)} "
|
|
180
|
+
f"positional arguments but {len(args)} were given"
|
|
181
|
+
)
|
|
182
|
+
except Exception:
|
|
183
|
+
# If signature inspection fails, fall back to error
|
|
184
|
+
raise TypeError(
|
|
185
|
+
f"Tool '{tool_obj.name}' must be called with keyword arguments, "
|
|
186
|
+
f"not positional arguments. "
|
|
187
|
+
f"Example: {tool_obj.name}(param1=value1, param2=value2)"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
loop = asyncio.get_running_loop()
|
|
192
|
+
import concurrent.futures
|
|
193
|
+
import contextvars
|
|
194
|
+
|
|
195
|
+
ctx = contextvars.copy_context()
|
|
196
|
+
|
|
197
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
198
|
+
|
|
199
|
+
def run_async():
|
|
200
|
+
new_loop = asyncio.new_event_loop()
|
|
201
|
+
asyncio.set_event_loop(new_loop)
|
|
202
|
+
try:
|
|
203
|
+
return new_loop.run_until_complete(tool_obj.execute(**kwargs))
|
|
204
|
+
finally:
|
|
205
|
+
pending = asyncio.all_tasks(new_loop)
|
|
206
|
+
for task in pending:
|
|
207
|
+
task.cancel()
|
|
208
|
+
if pending:
|
|
209
|
+
new_loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
|
210
|
+
new_loop.close()
|
|
211
|
+
|
|
212
|
+
result = executor.submit(ctx.run, run_async).result()
|
|
213
|
+
except RuntimeError:
|
|
214
|
+
loop = asyncio.new_event_loop()
|
|
215
|
+
asyncio.set_event_loop(loop)
|
|
216
|
+
try:
|
|
217
|
+
result = loop.run_until_complete(tool_obj.execute(**kwargs))
|
|
218
|
+
finally:
|
|
219
|
+
pending = asyncio.all_tasks(loop)
|
|
220
|
+
for task in pending:
|
|
221
|
+
task.cancel()
|
|
222
|
+
if pending:
|
|
223
|
+
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
|
224
|
+
loop.close()
|
|
225
|
+
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
tool_wrapper.__name__ = tool_obj.name
|
|
229
|
+
tool_wrapper.__doc__ = tool_obj.description
|
|
230
|
+
if hasattr(tool_obj.function, "__signature__"):
|
|
231
|
+
tool_wrapper.__signature__ = tool_obj.function.__signature__
|
|
232
|
+
if hasattr(tool_obj.function, "__annotations__"):
|
|
233
|
+
tool_wrapper.__annotations__ = tool_obj.function.__annotations__
|
|
234
|
+
|
|
235
|
+
return tool_wrapper
|
|
236
|
+
|
|
237
|
+
tool_functions[tool.name] = make_tool_wrapper(tool)
|
|
238
|
+
|
|
239
|
+
if hasattr(self.executor, "namespace"):
|
|
240
|
+
self.executor.namespace.update(tool_functions)
|
|
241
|
+
|
|
242
|
+
async def run(self, task: str, return_full_result: bool = False, stream: bool = False):
|
|
243
|
+
"""Run the agent on a task.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
task: The task to solve
|
|
247
|
+
return_full_result: If True, return AgentResult with metadata
|
|
248
|
+
stream: If True, stream the response chunks in real-time
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
str: The final answer from the agent
|
|
252
|
+
or AgentResult: Full result with token usage and turns
|
|
253
|
+
|
|
254
|
+
Raises:
|
|
255
|
+
RuntimeError: If agent reaches max_turns without finishing
|
|
256
|
+
"""
|
|
257
|
+
# Track execution time
|
|
258
|
+
start_time = time.time()
|
|
259
|
+
|
|
260
|
+
# Add task to memory
|
|
261
|
+
self.memory.add_task(task)
|
|
262
|
+
|
|
263
|
+
# Trigger task start event
|
|
264
|
+
if self.event_bus:
|
|
265
|
+
self.event_bus.emit(TaskStartEvent(task=task, model=self.model_name))
|
|
266
|
+
|
|
267
|
+
# Main agent loop
|
|
268
|
+
for turn_num in range(self.max_turns):
|
|
269
|
+
# Trigger turn start event
|
|
270
|
+
if self.event_bus:
|
|
271
|
+
self.event_bus.emit(StepStartEvent(step=turn_num + 1, max_turns=self.max_turns))
|
|
272
|
+
|
|
273
|
+
# Build conversation messages from memory
|
|
274
|
+
messages = self._build_messages()
|
|
275
|
+
|
|
276
|
+
# Call LiteLLM directly with pre-computed params
|
|
277
|
+
# Parameters are filtered for reasoning models (o1/o3/Claude)
|
|
278
|
+
if stream:
|
|
279
|
+
# Streaming mode: accumulate chunks and emit events
|
|
280
|
+
accumulated_content = ""
|
|
281
|
+
response = None
|
|
282
|
+
|
|
283
|
+
# Add stream parameter to litellm params
|
|
284
|
+
stream_params = {**self.litellm_params, "stream": True}
|
|
285
|
+
|
|
286
|
+
# Get the streaming response generator
|
|
287
|
+
stream_response = await litellm.acompletion(messages=messages, **stream_params)
|
|
288
|
+
|
|
289
|
+
async for chunk in stream_response:
|
|
290
|
+
# Extract content from chunk
|
|
291
|
+
if hasattr(chunk, "choices") and len(chunk.choices) > 0:
|
|
292
|
+
delta = chunk.choices[0].delta
|
|
293
|
+
if hasattr(delta, "content") and delta.content:
|
|
294
|
+
chunk_text = delta.content
|
|
295
|
+
accumulated_content += chunk_text
|
|
296
|
+
|
|
297
|
+
# Emit stream chunk event
|
|
298
|
+
if self.event_bus:
|
|
299
|
+
self.event_bus.emit(StreamChunkEvent(chunk=chunk_text))
|
|
300
|
+
|
|
301
|
+
# Save the last chunk as response for usage/cost tracking
|
|
302
|
+
response = chunk
|
|
303
|
+
|
|
304
|
+
# Emit stream complete event
|
|
305
|
+
if self.event_bus:
|
|
306
|
+
self.event_bus.emit(StreamCompleteEvent())
|
|
307
|
+
|
|
308
|
+
# Parse accumulated content
|
|
309
|
+
thought, code, _ = self._parse_response_from_text(accumulated_content)
|
|
310
|
+
else:
|
|
311
|
+
# Non-streaming mode: get complete response
|
|
312
|
+
response = await litellm.acompletion(messages=messages, **self.litellm_params)
|
|
313
|
+
|
|
314
|
+
# Parse LLM response
|
|
315
|
+
# Response should contain: Thought + Code OR final_answer()
|
|
316
|
+
thought, code, _ = self._parse_response(response)
|
|
317
|
+
|
|
318
|
+
# Track cost from this response
|
|
319
|
+
step_cost = 0.0
|
|
320
|
+
if hasattr(response, "_hidden_params") and "response_cost" in response._hidden_params:
|
|
321
|
+
step_cost = response._hidden_params["response_cost"]
|
|
322
|
+
if step_cost is not None:
|
|
323
|
+
self.total_cost += step_cost
|
|
324
|
+
|
|
325
|
+
# Extract reasoning content if present (for o1/o3/Claude thinking)
|
|
326
|
+
reasoning_content = self._extract_reasoning_content(response)
|
|
327
|
+
if reasoning_content:
|
|
328
|
+
self.memory.add_reasoning(reasoning_content)
|
|
329
|
+
# Trigger reasoning content event
|
|
330
|
+
if self.event_bus:
|
|
331
|
+
self.event_bus.emit(ReasoningContentEvent(content=reasoning_content, step=turn_num + 1))
|
|
332
|
+
|
|
333
|
+
# Check for reasoning tokens (o1/o3 models)
|
|
334
|
+
if response.usage and hasattr(response.usage, "completion_tokens_details"):
|
|
335
|
+
details = response.usage.completion_tokens_details
|
|
336
|
+
if hasattr(details, "reasoning_tokens") and details.reasoning_tokens:
|
|
337
|
+
if self.event_bus:
|
|
338
|
+
self.event_bus.emit(ReasoningTokensEvent(tokens=details.reasoning_tokens, step=turn_num + 1))
|
|
339
|
+
|
|
340
|
+
# Show LLM's thought/reasoning (always show what the LLM is saying)
|
|
341
|
+
# Skip this if streaming (already shown via STREAM_CHUNK events)
|
|
342
|
+
# Skip if text mode with no code (thought will be shown as final answer)
|
|
343
|
+
if self.event_bus and not stream:
|
|
344
|
+
# If we parsed a thought, show it. Otherwise show the raw response
|
|
345
|
+
# (this helps debug when LLM doesn't follow the expected format)
|
|
346
|
+
display_content = thought if thought else response.choices[0].message.content
|
|
347
|
+
|
|
348
|
+
# In text mode, if there's a thought but no code, skip showing the thought here
|
|
349
|
+
# because it will be shown as the final answer (to avoid duplication)
|
|
350
|
+
skip_llm_message = self.text_mode and thought and not (code and code.strip())
|
|
351
|
+
|
|
352
|
+
if display_content and display_content.strip() and not skip_llm_message:
|
|
353
|
+
self.event_bus.emit(
|
|
354
|
+
LLMMessageEvent(
|
|
355
|
+
content=display_content, title=f"Turn {turn_num + 1} Reasoning", step=turn_num + 1
|
|
356
|
+
)
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Only execute code if the LLM actually generated some
|
|
360
|
+
if code and code.strip():
|
|
361
|
+
# Trigger code execution event
|
|
362
|
+
if self.event_bus:
|
|
363
|
+
self.event_bus.emit(CodeExecutionEvent(code=code))
|
|
364
|
+
|
|
365
|
+
# Execute the code
|
|
366
|
+
exec_result = await self.executor.execute(code)
|
|
367
|
+
|
|
368
|
+
# Trigger observation event
|
|
369
|
+
if self.event_bus:
|
|
370
|
+
observation = exec_result.output
|
|
371
|
+
|
|
372
|
+
if exec_result.error:
|
|
373
|
+
# Trigger error event for execution errors
|
|
374
|
+
self.event_bus.emit(
|
|
375
|
+
ErrorEvent(error=exec_result.error, error_type="Execution Error", step=turn_num + 1)
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
self.event_bus.emit(ObservationEvent(observation=observation))
|
|
379
|
+
else:
|
|
380
|
+
# No code to execute - create a dummy result
|
|
381
|
+
from .executor import ExecutionResult
|
|
382
|
+
|
|
383
|
+
exec_result = ExecutionResult(output="", error=None, stdout="", stderr="")
|
|
384
|
+
|
|
385
|
+
if self.text_mode:
|
|
386
|
+
# In text mode, code blocks are optional
|
|
387
|
+
# If there's a thought but no code, treat the thought as the final answer
|
|
388
|
+
if thought and thought.strip():
|
|
389
|
+
exec_result.final_answer = thought
|
|
390
|
+
# Don't show error - this is expected behavior in text mode
|
|
391
|
+
else:
|
|
392
|
+
# No thought and no code - this is an error even in text mode
|
|
393
|
+
if self.event_bus:
|
|
394
|
+
self.event_bus.emit(
|
|
395
|
+
ErrorEvent(
|
|
396
|
+
error="No response generated. Expected at least a Thought.",
|
|
397
|
+
error_type="Format Error",
|
|
398
|
+
step=turn_num + 1,
|
|
399
|
+
)
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
# Standard mode: code is required
|
|
403
|
+
# Show a warning that the LLM didn't generate code
|
|
404
|
+
if self.event_bus:
|
|
405
|
+
self.event_bus.emit(
|
|
406
|
+
ErrorEvent(
|
|
407
|
+
error="LLM did not generate code. Expected format:\n\nThought: <explanation>\n```python\n<code>\n```",
|
|
408
|
+
error_type="Format Error",
|
|
409
|
+
step=turn_num + 1,
|
|
410
|
+
)
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
# Add a correction to memory to guide the LLM
|
|
414
|
+
# Instead of adding a step with empty code, add an observation telling LLM what to do
|
|
415
|
+
correction_msg = (
|
|
416
|
+
"Format Error: You must provide your response in a Python code block.\n\n"
|
|
417
|
+
"Use this format:\n\n"
|
|
418
|
+
"Thought: <your explanation>\n"
|
|
419
|
+
"```python\n"
|
|
420
|
+
"# Your code here\n"
|
|
421
|
+
'final_answer("your answer")\n'
|
|
422
|
+
"```\n\n"
|
|
423
|
+
"Remember to call final_answer() with your result."
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Add the thought and correction as a step
|
|
427
|
+
# This will show the LLM what it did wrong and how to fix it
|
|
428
|
+
self.memory.add_step(
|
|
429
|
+
thought=thought if thought else "(No thought provided)",
|
|
430
|
+
code="",
|
|
431
|
+
output=correction_msg,
|
|
432
|
+
error=None,
|
|
433
|
+
tools_called=[],
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# Continue to next turn - the correction will be in the observation
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
# Add this step to memory (only for successful executions or text mode)
|
|
440
|
+
self.memory.add_step(
|
|
441
|
+
thought=thought,
|
|
442
|
+
code=code,
|
|
443
|
+
output=exec_result.output,
|
|
444
|
+
error=exec_result.error,
|
|
445
|
+
tools_called=exec_result.tools_called,
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
# Check if final_answer was called during execution
|
|
449
|
+
if exec_result.final_answer is not None:
|
|
450
|
+
# Agent is done!
|
|
451
|
+
self.memory.add_final_answer(exec_result.final_answer)
|
|
452
|
+
|
|
453
|
+
# Trigger final answer event
|
|
454
|
+
if self.event_bus:
|
|
455
|
+
self.event_bus.emit(
|
|
456
|
+
FinalAnswerEvent(
|
|
457
|
+
answer=str(exec_result.final_answer),
|
|
458
|
+
turns=turn_num + 1,
|
|
459
|
+
tokens=response.usage.total_tokens if response.usage else None,
|
|
460
|
+
cost=self.total_cost if self.total_cost > 0 else None,
|
|
461
|
+
)
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
# Trigger cost summary event
|
|
465
|
+
total_tokens = response.usage.total_tokens if response.usage else None
|
|
466
|
+
duration = time.time() - start_time
|
|
467
|
+
|
|
468
|
+
# Extract cache-related fields (supported by OpenAI, Anthropic, Bedrock, Deepseek)
|
|
469
|
+
cached_tokens = None
|
|
470
|
+
cache_creation_tokens = None
|
|
471
|
+
cache_read_tokens = None
|
|
472
|
+
if response.usage:
|
|
473
|
+
cached_tokens = getattr(response.usage, "cached_tokens", None)
|
|
474
|
+
cache_creation_tokens = getattr(response.usage, "cache_creation_input_tokens", None)
|
|
475
|
+
cache_read_tokens = getattr(response.usage, "cache_read_input_tokens", None)
|
|
476
|
+
|
|
477
|
+
self.event_bus.emit(
|
|
478
|
+
CostSummaryEvent(
|
|
479
|
+
tokens=total_tokens,
|
|
480
|
+
cost=self.total_cost if self.total_cost > 0 else None,
|
|
481
|
+
model=self.model_name,
|
|
482
|
+
duration_seconds=duration,
|
|
483
|
+
cached_tokens=cached_tokens,
|
|
484
|
+
cache_creation_input_tokens=cache_creation_tokens,
|
|
485
|
+
cache_read_input_tokens=cache_read_tokens,
|
|
486
|
+
)
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
if return_full_result:
|
|
490
|
+
return AgentResult(
|
|
491
|
+
output=exec_result.final_answer,
|
|
492
|
+
token_usage=response.usage.total_tokens if response.usage else None,
|
|
493
|
+
cost=self.total_cost if self.total_cost > 0 else None,
|
|
494
|
+
steps=self.memory.steps,
|
|
495
|
+
)
|
|
496
|
+
return exec_result.final_answer
|
|
497
|
+
|
|
498
|
+
# Continue loop (LLM will see the observation in next iteration)
|
|
499
|
+
|
|
500
|
+
# If we get here, we hit max_turns
|
|
501
|
+
error_msg = f"Agent reached max_turns ({self.max_turns}) without completing task"
|
|
502
|
+
if self.event_bus:
|
|
503
|
+
self.event_bus.emit(ErrorEvent(error=error_msg, error_type="RuntimeError"))
|
|
504
|
+
|
|
505
|
+
# For benchmark/testing use cases that need execution trace even on error,
|
|
506
|
+
# return AgentResult with error field set instead of raising
|
|
507
|
+
if return_full_result:
|
|
508
|
+
return AgentResult(
|
|
509
|
+
output=None,
|
|
510
|
+
token_usage=None,
|
|
511
|
+
cost=self.total_cost,
|
|
512
|
+
steps=self.memory.steps,
|
|
513
|
+
error=error_msg,
|
|
514
|
+
)
|
|
515
|
+
else:
|
|
516
|
+
# Backward compatibility: raise exception for non-benchmark usage
|
|
517
|
+
raise RuntimeError(error_msg)
|
|
518
|
+
|
|
519
|
+
def _build_messages(self) -> List[Dict]:
|
|
520
|
+
"""Build message list for LLM from memory.
|
|
521
|
+
|
|
522
|
+
Uses system blocks with cache control when attachments are present
|
|
523
|
+
for better prompt caching support.
|
|
524
|
+
|
|
525
|
+
Format with attachments (system blocks):
|
|
526
|
+
[
|
|
527
|
+
{"role": "system", "content": [
|
|
528
|
+
{"type": "text", "text": system_prompt},
|
|
529
|
+
{"type": "text", "text": attachment1, "cache_control": {"type": "ephemeral"}},
|
|
530
|
+
{"type": "text", "text": attachment2, "cache_control": {"type": "ephemeral"}},
|
|
531
|
+
]},
|
|
532
|
+
{"role": "user", "content": "previous turn 1"},
|
|
533
|
+
{"role": "assistant", "content": "previous response 1"},
|
|
534
|
+
{"role": "user", "content": "previous turn 2"},
|
|
535
|
+
{"role": "assistant", "content": "previous response 2"},
|
|
536
|
+
{"role": "user", "content": task},
|
|
537
|
+
...
|
|
538
|
+
]
|
|
539
|
+
|
|
540
|
+
Format without attachments (legacy):
|
|
541
|
+
[
|
|
542
|
+
{"role": "system", "content": system_prompt},
|
|
543
|
+
{"role": "user", "content": "previous turn 1"},
|
|
544
|
+
{"role": "assistant", "content": "previous response 1"},
|
|
545
|
+
{"role": "user", "content": task},
|
|
546
|
+
...
|
|
547
|
+
]
|
|
548
|
+
"""
|
|
549
|
+
messages = []
|
|
550
|
+
|
|
551
|
+
# Build system message with or without attachments
|
|
552
|
+
if self.attachments:
|
|
553
|
+
# Use system blocks with cache control for better caching
|
|
554
|
+
system_blocks = [{"type": "text", "text": self._build_system_prompt()}]
|
|
555
|
+
|
|
556
|
+
# Add each attachment as a separate cacheable block
|
|
557
|
+
for name, content in self.attachments:
|
|
558
|
+
system_blocks.append(
|
|
559
|
+
{
|
|
560
|
+
"type": "text",
|
|
561
|
+
"text": f"<Attachment: {name}>\n{content}\n</Attachment: {name}>",
|
|
562
|
+
"cache_control": {"type": "ephemeral"},
|
|
563
|
+
}
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
messages.append({"role": "system", "content": system_blocks})
|
|
567
|
+
else:
|
|
568
|
+
# Legacy format: simple string
|
|
569
|
+
messages.append({"role": "system", "content": self._build_system_prompt()})
|
|
570
|
+
|
|
571
|
+
# Previous conversation messages (if continuing a conversation)
|
|
572
|
+
if self.previous_messages:
|
|
573
|
+
messages.extend(self.previous_messages)
|
|
574
|
+
|
|
575
|
+
# Task
|
|
576
|
+
messages.append({"role": "user", "content": self.memory.task})
|
|
577
|
+
|
|
578
|
+
# Previous steps (Thought/Code → Observation pairs)
|
|
579
|
+
for step in self.memory.steps:
|
|
580
|
+
# Assistant's thought + code
|
|
581
|
+
assistant_msg = f"Thought: {step.thought}\n\n```python\n{step.code}\n```"
|
|
582
|
+
messages.append({"role": "assistant", "content": assistant_msg})
|
|
583
|
+
|
|
584
|
+
# Observation (code execution result)
|
|
585
|
+
observation = f"Observation: {step.output}"
|
|
586
|
+
if step.error:
|
|
587
|
+
observation += f"\nError: {step.error}"
|
|
588
|
+
|
|
589
|
+
messages.append({"role": "user", "content": observation})
|
|
590
|
+
|
|
591
|
+
return messages
|
|
592
|
+
|
|
593
|
+
def _build_system_prompt(self) -> str:
|
|
594
|
+
"""Build system prompt that teaches LLM how to solve tasks."""
|
|
595
|
+
return build_system_prompt(self.tools, self.instructions, self.text_mode)
|
|
596
|
+
|
|
597
|
+
def _parse_response(self, response) -> tuple[str, str, Optional[str]]:
|
|
598
|
+
"""Parse LLM response into thought, code, and final_answer.
|
|
599
|
+
|
|
600
|
+
Returns:
|
|
601
|
+
(thought, code, final_answer)
|
|
602
|
+
"""
|
|
603
|
+
content = response.choices[0].message.content
|
|
604
|
+
return self._parse_response_from_text(content)
|
|
605
|
+
|
|
606
|
+
def _parse_response_from_text(self, content: str) -> tuple[str, str, Optional[str]]:
|
|
607
|
+
"""Parse text content into thought, code, and final_answer.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
content: The text content to parse
|
|
611
|
+
|
|
612
|
+
Returns:
|
|
613
|
+
(thought, code, final_answer)
|
|
614
|
+
"""
|
|
615
|
+
thought = ""
|
|
616
|
+
code = ""
|
|
617
|
+
|
|
618
|
+
# Extract thought (everything before code block)
|
|
619
|
+
thought_start = content.find("Thought:")
|
|
620
|
+
if thought_start != -1:
|
|
621
|
+
thought_start += len("Thought:")
|
|
622
|
+
code_block_start = content.find("```python", thought_start)
|
|
623
|
+
if code_block_start != -1:
|
|
624
|
+
thought = content[thought_start:code_block_start].strip()
|
|
625
|
+
else:
|
|
626
|
+
thought = content[thought_start:].strip()
|
|
627
|
+
|
|
628
|
+
# Extract code block
|
|
629
|
+
code_block_start = content.find("```python")
|
|
630
|
+
if code_block_start != -1:
|
|
631
|
+
code_start = code_block_start + len("```python")
|
|
632
|
+
code_end = content.find("```", code_start)
|
|
633
|
+
if code_end != -1:
|
|
634
|
+
code = content[code_start:code_end].strip()
|
|
635
|
+
|
|
636
|
+
return thought, code, None
|
|
637
|
+
|
|
638
|
+
def _extract_reasoning_content(self, response) -> Optional[str]:
|
|
639
|
+
"""Extract reasoning content from response (for o1/o3/Claude thinking).
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
str: Reasoning content if present, None otherwise
|
|
643
|
+
"""
|
|
644
|
+
try:
|
|
645
|
+
if hasattr(response, "choices") and len(response.choices) > 0:
|
|
646
|
+
choice = response.choices[0]
|
|
647
|
+
if hasattr(choice.message, "reasoning_content"):
|
|
648
|
+
return choice.message.reasoning_content
|
|
649
|
+
except (AttributeError, IndexError):
|
|
650
|
+
pass
|
|
651
|
+
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def build_tools_section(tools: List[Tool]) -> str:
|
|
656
|
+
"""Build the tools section of the system prompt.
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
tools: List of Tool objects available to the agent
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
Formatted tools section or empty string if no tools
|
|
663
|
+
"""
|
|
664
|
+
if not tools:
|
|
665
|
+
return ""
|
|
666
|
+
|
|
667
|
+
tool_definitions = "\n\n".join([tool.to_code_prompt() for tool in tools])
|
|
668
|
+
return f"""
|
|
669
|
+
## Available tools:
|
|
670
|
+
|
|
671
|
+
You have access to these Python functions:
|
|
672
|
+
|
|
673
|
+
```python
|
|
674
|
+
{tool_definitions}
|
|
675
|
+
```
|
|
676
|
+
"""
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
def build_text_mode_prompt(tools_section: str, instructions: str, has_tools: bool) -> str:
|
|
680
|
+
"""Build system prompt for text mode (code blocks optional).
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
tools_section: Formatted tools section
|
|
684
|
+
instructions: Additional instructions from agent config
|
|
685
|
+
has_tools: Whether tools are available
|
|
686
|
+
|
|
687
|
+
Returns:
|
|
688
|
+
Complete system prompt for text mode
|
|
689
|
+
"""
|
|
690
|
+
tool_rule = (
|
|
691
|
+
"4. When using code, call tools with keyword arguments: result = tool_name(arg1=value1, arg2=value2)"
|
|
692
|
+
if has_tools
|
|
693
|
+
else "4. Use Python when you need to perform actions"
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
return f"""You are an expert assistant who helps with tasks.
|
|
697
|
+
|
|
698
|
+
You can respond in two ways:
|
|
699
|
+
|
|
700
|
+
**For conversational questions or simple responses:**
|
|
701
|
+
Just provide your Thought with the answer directly:
|
|
702
|
+
|
|
703
|
+
Thought: [Your response here]
|
|
704
|
+
|
|
705
|
+
**When you need to use tools or perform actions:**
|
|
706
|
+
Provide a Thought and write Python code:
|
|
707
|
+
|
|
708
|
+
Thought: [What you'll do and why]
|
|
709
|
+
```python
|
|
710
|
+
# Your code here
|
|
711
|
+
final_answer(result)
|
|
712
|
+
```
|
|
713
|
+
{tools_section}
|
|
714
|
+
## Rules:
|
|
715
|
+
|
|
716
|
+
1. Start with "Thought:" to explain your reasoning
|
|
717
|
+
2. Code blocks are OPTIONAL - only use them when you need tools or complex logic
|
|
718
|
+
3. For direct answers, just provide the Thought without code
|
|
719
|
+
{tool_rule}
|
|
720
|
+
5. When using code blocks, call final_answer() with the result
|
|
721
|
+
6. Variables persist across code blocks
|
|
722
|
+
|
|
723
|
+
{instructions}
|
|
724
|
+
|
|
725
|
+
Now begin!"""
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def build_standard_mode_prompt(tools_section: str, instructions: str, has_tools: bool) -> str:
|
|
729
|
+
"""Build system prompt for standard mode (code blocks required).
|
|
730
|
+
|
|
731
|
+
Args:
|
|
732
|
+
tools_section: Formatted tools section
|
|
733
|
+
instructions: Additional instructions from agent config
|
|
734
|
+
has_tools: Whether tools are available
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
Complete system prompt for standard mode
|
|
738
|
+
"""
|
|
739
|
+
tool_rule = (
|
|
740
|
+
"3. Call tools with keyword arguments: result = tool_name(arg1=value1, arg2=value2)"
|
|
741
|
+
if has_tools
|
|
742
|
+
else "3. Use standard Python to solve the task"
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
return f"""You are an expert assistant who solves tasks using Python code.
|
|
746
|
+
|
|
747
|
+
To solve a task, you proceed in steps using this pattern:
|
|
748
|
+
|
|
749
|
+
1. **Thought:** Explain your reasoning (what you'll do and why)
|
|
750
|
+
2. **Code:** Write Python code in a code block
|
|
751
|
+
3. **Observation:** You'll see the code execution result
|
|
752
|
+
|
|
753
|
+
You repeat this Thought → Code → Observation cycle until you have the final answer.
|
|
754
|
+
|
|
755
|
+
## How to write code:
|
|
756
|
+
|
|
757
|
+
- Always start with a Thought explaining your approach
|
|
758
|
+
- Write code in triple-backtick code blocks: ```python
|
|
759
|
+
- Use print() to output important information
|
|
760
|
+
- Variables persist between code blocks
|
|
761
|
+
- When you have the final answer, call: final_answer(your_answer)
|
|
762
|
+
{tools_section}
|
|
763
|
+
## Rules:
|
|
764
|
+
|
|
765
|
+
1. Always provide Thought before code
|
|
766
|
+
2. Only use variables you've defined
|
|
767
|
+
{tool_rule}
|
|
768
|
+
4. Call final_answer() when you have the answer
|
|
769
|
+
5. If you get an error, try a different approach
|
|
770
|
+
6. State persists - variables remain available across code blocks
|
|
771
|
+
|
|
772
|
+
{instructions}
|
|
773
|
+
|
|
774
|
+
Now begin!"""
|