tinyagent-py 0.0.13__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinyagent/code_agent/helper.py +2 -2
- tinyagent/code_agent/modal_sandbox.py +1 -1
- tinyagent/code_agent/providers/__init__.py +14 -1
- tinyagent/code_agent/providers/base.py +181 -7
- tinyagent/code_agent/providers/modal_provider.py +150 -27
- tinyagent/code_agent/providers/seatbelt_provider.py +1065 -0
- tinyagent/code_agent/safety.py +6 -2
- tinyagent/code_agent/tiny_code_agent.py +973 -12
- tinyagent/code_agent/utils.py +263 -2
- tinyagent/hooks/__init__.py +3 -1
- tinyagent/hooks/jupyter_notebook_callback.py +1464 -0
- tinyagent/hooks/token_tracker.py +564 -0
- tinyagent/prompts/summarize.yaml +96 -0
- tinyagent/prompts/truncation.yaml +13 -0
- tinyagent/tiny_agent.py +811 -49
- {tinyagent_py-0.0.13.dist-info → tinyagent_py-0.0.16.dist-info}/METADATA +25 -1
- tinyagent_py-0.0.16.dist-info/RECORD +38 -0
- tinyagent_py-0.0.13.dist-info/RECORD +0 -33
- {tinyagent_py-0.0.13.dist-info → tinyagent_py-0.0.16.dist-info}/WHEEL +0 -0
- {tinyagent_py-0.0.13.dist-info → tinyagent_py-0.0.16.dist-info}/licenses/LICENSE +0 -0
- {tinyagent_py-0.0.13.dist-info → tinyagent_py-0.0.16.dist-info}/top_level.txt +0 -0
tinyagent/tiny_agent.py
CHANGED
@@ -12,10 +12,46 @@ import uuid
|
|
12
12
|
from .storage import Storage # ← your abstract base
|
13
13
|
import traceback
|
14
14
|
import time # Add time import for Unix timestamps
|
15
|
+
from pathlib import Path
|
16
|
+
import random # Add random for jitter in retry backoff
|
17
|
+
|
15
18
|
# Module-level logger; configuration is handled externally.
|
16
19
|
logger = logging.getLogger(__name__)
|
17
20
|
#litellm.callbacks = ["arize_phoenix"]
|
18
21
|
|
22
|
+
# Set global LiteLLM configuration
|
23
|
+
litellm.drop_params = True # Enable dropping unsupported parameters globally
|
24
|
+
|
25
|
+
# Define default retry configuration
|
26
|
+
DEFAULT_RETRY_CONFIG = {
|
27
|
+
"max_retries": 5,
|
28
|
+
"min_backoff": 1, # Start with 1 second
|
29
|
+
"max_backoff": 60, # Max 60 seconds between retries
|
30
|
+
"jitter": True, # Add randomness to backoff
|
31
|
+
"backoff_multiplier": 2, # Exponential backoff factor
|
32
|
+
"retry_status_codes": [429, 500, 502, 503, 504], # Common server errors
|
33
|
+
"retry_exceptions": [
|
34
|
+
"litellm.InternalServerError",
|
35
|
+
"litellm.APIError",
|
36
|
+
"litellm.APIConnectionError",
|
37
|
+
"litellm.RateLimitError",
|
38
|
+
"litellm.ServiceUnavailableError",
|
39
|
+
"litellm.APITimeoutError"
|
40
|
+
],
|
41
|
+
# Rate limit specific configuration
|
42
|
+
"rate_limit_backoff_min": 60, # Minimum wait time for rate limit errors (60 seconds)
|
43
|
+
"rate_limit_backoff_max": 90, # Maximum wait time for rate limit errors (90 seconds)
|
44
|
+
}
|
45
|
+
|
46
|
+
def load_template(path: str,key:str="system_prompt") -> str:
|
47
|
+
"""
|
48
|
+
Load the YAML file and extract its 'system_prompt' field.
|
49
|
+
"""
|
50
|
+
import yaml
|
51
|
+
with open(path, "r") as f:
|
52
|
+
data = yaml.safe_load(f)
|
53
|
+
return data[key]
|
54
|
+
|
19
55
|
def tool(name: Optional[str] = None, description: Optional[str] = None,
|
20
56
|
schema: Optional[Dict[str, Any]] = None):
|
21
57
|
"""
|
@@ -39,6 +75,11 @@ def tool(name: Optional[str] = None, description: Optional[str] = None,
|
|
39
75
|
# Get the description (use provided description or docstring)
|
40
76
|
tool_description = description or inspect.getdoc(func_or_class) or f"Tool based on {tool_name}"
|
41
77
|
|
78
|
+
# Temporarily attach the description to the function/class
|
79
|
+
# This allows _generate_schema_from_function to access it for param extraction
|
80
|
+
if description:
|
81
|
+
func_or_class._temp_tool_description = description
|
82
|
+
|
42
83
|
# Generate schema if not provided
|
43
84
|
tool_schema = schema or {}
|
44
85
|
if not tool_schema:
|
@@ -50,6 +91,10 @@ def tool(name: Optional[str] = None, description: Optional[str] = None,
|
|
50
91
|
# For functions, use the function itself
|
51
92
|
tool_schema = _generate_schema_from_function(func_or_class)
|
52
93
|
|
94
|
+
# Clean up temporary attribute
|
95
|
+
if hasattr(func_or_class, '_temp_tool_description'):
|
96
|
+
delattr(func_or_class, '_temp_tool_description')
|
97
|
+
|
53
98
|
# Attach metadata to the function or class
|
54
99
|
func_or_class._tool_metadata = {
|
55
100
|
"name": tool_name,
|
@@ -76,6 +121,65 @@ def _generate_schema_from_function(func: Callable) -> Dict[str, Any]:
|
|
76
121
|
sig = inspect.signature(func)
|
77
122
|
type_hints = get_type_hints(func)
|
78
123
|
|
124
|
+
# Extract parameter descriptions from docstring
|
125
|
+
param_descriptions = {}
|
126
|
+
|
127
|
+
# First check if we have a tool decorator description (has higher priority)
|
128
|
+
decorator_description = None
|
129
|
+
if hasattr(func, '_temp_tool_description'):
|
130
|
+
decorator_description = func._temp_tool_description
|
131
|
+
|
132
|
+
# Get function docstring
|
133
|
+
docstring = inspect.getdoc(func) or ""
|
134
|
+
|
135
|
+
# Combine sources to check for parameter descriptions
|
136
|
+
sources_to_check = []
|
137
|
+
if decorator_description:
|
138
|
+
sources_to_check.append(decorator_description)
|
139
|
+
if docstring:
|
140
|
+
sources_to_check.append(docstring)
|
141
|
+
|
142
|
+
# Parse parameter descriptions from all sources
|
143
|
+
for source in sources_to_check:
|
144
|
+
lines = source.split('\n')
|
145
|
+
in_args_section = False
|
146
|
+
current_param = None
|
147
|
+
|
148
|
+
for line in lines:
|
149
|
+
line = line.strip()
|
150
|
+
|
151
|
+
# Check for Args/Parameters section markers
|
152
|
+
if line.lower() in ('args:', 'arguments:', 'parameters:'):
|
153
|
+
in_args_section = True
|
154
|
+
continue
|
155
|
+
|
156
|
+
# Check for other section markers that would end the args section
|
157
|
+
if line.lower() in ('returns:', 'raises:', 'yields:', 'examples:') and in_args_section:
|
158
|
+
in_args_section = False
|
159
|
+
|
160
|
+
# Look for :param or :arg style parameter descriptions
|
161
|
+
if line.startswith((":param", ":arg")):
|
162
|
+
try:
|
163
|
+
# e.g., ":param user_id: The ID of the user."
|
164
|
+
parts = line.split(" ", 2)
|
165
|
+
if len(parts) >= 3:
|
166
|
+
param_name = parts[1].strip().split(" ")[0]
|
167
|
+
param_descriptions[param_name] = parts[2].strip()
|
168
|
+
except (ValueError, IndexError):
|
169
|
+
continue
|
170
|
+
|
171
|
+
# Look for indented parameter descriptions in Args section
|
172
|
+
elif in_args_section and line.strip():
|
173
|
+
# Check for param: description pattern
|
174
|
+
param_match = line.lstrip().split(":", 1)
|
175
|
+
if len(param_match) == 2:
|
176
|
+
param_name = param_match[0].strip()
|
177
|
+
description = param_match[1].strip()
|
178
|
+
param_descriptions[param_name] = description
|
179
|
+
current_param = param_name
|
180
|
+
# Check for continued description from previous param
|
181
|
+
elif current_param and line.startswith((' ', '\t')):
|
182
|
+
param_descriptions[current_param] += " " + line.strip()
|
79
183
|
# Skip 'self' parameter for methods
|
80
184
|
params = {
|
81
185
|
name: param for name, param in sig.parameters.items()
|
@@ -91,9 +195,12 @@ def _generate_schema_from_function(func: Callable) -> Dict[str, Any]:
|
|
91
195
|
param_type = type_hints.get(name, Any)
|
92
196
|
|
93
197
|
# Create property schema
|
94
|
-
prop_schema = {
|
198
|
+
prop_schema = {}
|
199
|
+
description = param_descriptions.get(name)
|
200
|
+
if description:
|
201
|
+
prop_schema["description"] = description
|
95
202
|
|
96
|
-
#
|
203
|
+
# Handle different types of type annotations
|
97
204
|
if param_type == str:
|
98
205
|
prop_schema["type"] = "string"
|
99
206
|
elif param_type == int:
|
@@ -107,7 +214,113 @@ def _generate_schema_from_function(func: Callable) -> Dict[str, Any]:
|
|
107
214
|
elif param_type == dict or param_type == Dict:
|
108
215
|
prop_schema["type"] = "object"
|
109
216
|
else:
|
110
|
-
|
217
|
+
# Handle generic types
|
218
|
+
origin = getattr(param_type, "__origin__", None)
|
219
|
+
args = getattr(param_type, "__args__", None)
|
220
|
+
|
221
|
+
if origin is not None and args is not None:
|
222
|
+
# Handle List[X], Sequence[X], etc.
|
223
|
+
if origin in (list, List) or (hasattr(origin, "__name__") and "List" in origin.__name__):
|
224
|
+
prop_schema["type"] = "array"
|
225
|
+
# Add items type if we can determine it
|
226
|
+
if args and len(args) == 1:
|
227
|
+
item_type = args[0]
|
228
|
+
if item_type == str:
|
229
|
+
prop_schema["items"] = {"type": "string"}
|
230
|
+
elif item_type == int:
|
231
|
+
prop_schema["items"] = {"type": "integer"}
|
232
|
+
elif item_type == float:
|
233
|
+
prop_schema["items"] = {"type": "number"}
|
234
|
+
elif item_type == bool:
|
235
|
+
prop_schema["items"] = {"type": "boolean"}
|
236
|
+
else:
|
237
|
+
prop_schema["items"] = {"type": "string"}
|
238
|
+
|
239
|
+
# Handle Dict[K, V], Mapping[K, V], etc.
|
240
|
+
elif origin in (dict, Dict) or (hasattr(origin, "__name__") and "Dict" in origin.__name__):
|
241
|
+
prop_schema["type"] = "object"
|
242
|
+
# We could add additionalProperties for value type, but it's not always needed
|
243
|
+
if args and len(args) == 2:
|
244
|
+
value_type = args[1]
|
245
|
+
if value_type == str:
|
246
|
+
prop_schema["additionalProperties"] = {"type": "string"}
|
247
|
+
elif value_type == int:
|
248
|
+
prop_schema["additionalProperties"] = {"type": "integer"}
|
249
|
+
elif value_type == float:
|
250
|
+
prop_schema["additionalProperties"] = {"type": "number"}
|
251
|
+
elif value_type == bool:
|
252
|
+
prop_schema["additionalProperties"] = {"type": "boolean"}
|
253
|
+
else:
|
254
|
+
prop_schema["additionalProperties"] = {"type": "string"}
|
255
|
+
|
256
|
+
# Handle Union types (Optional is Union[T, None])
|
257
|
+
elif origin is Union:
|
258
|
+
# Check if this is Optional[X] (Union[X, None])
|
259
|
+
if type(None) in args:
|
260
|
+
# Get the non-None type
|
261
|
+
non_none_types = [arg for arg in args if arg is not type(None)]
|
262
|
+
if non_none_types:
|
263
|
+
# Use the first non-None type
|
264
|
+
main_type = non_none_types[0]
|
265
|
+
# Recursively process this type
|
266
|
+
if main_type == str:
|
267
|
+
prop_schema["type"] = "string"
|
268
|
+
elif main_type == int:
|
269
|
+
prop_schema["type"] = "integer"
|
270
|
+
elif main_type == float:
|
271
|
+
prop_schema["type"] = "number"
|
272
|
+
elif main_type == bool:
|
273
|
+
prop_schema["type"] = "boolean"
|
274
|
+
elif main_type == list or main_type == List:
|
275
|
+
prop_schema["type"] = "array"
|
276
|
+
elif main_type == dict or main_type == Dict:
|
277
|
+
prop_schema["type"] = "object"
|
278
|
+
else:
|
279
|
+
# Try to handle generic types like List[str]
|
280
|
+
inner_origin = getattr(main_type, "__origin__", None)
|
281
|
+
inner_args = getattr(main_type, "__args__", None)
|
282
|
+
|
283
|
+
if inner_origin is not None and inner_args is not None:
|
284
|
+
if inner_origin in (list, List) or (hasattr(inner_origin, "__name__") and "List" in inner_origin.__name__):
|
285
|
+
prop_schema["type"] = "array"
|
286
|
+
if inner_args and len(inner_args) == 1:
|
287
|
+
inner_item_type = inner_args[0]
|
288
|
+
if inner_item_type == str:
|
289
|
+
prop_schema["items"] = {"type": "string"}
|
290
|
+
elif inner_item_type == int:
|
291
|
+
prop_schema["items"] = {"type": "integer"}
|
292
|
+
elif inner_item_type == float:
|
293
|
+
prop_schema["items"] = {"type": "number"}
|
294
|
+
elif inner_item_type == bool:
|
295
|
+
prop_schema["items"] = {"type": "boolean"}
|
296
|
+
else:
|
297
|
+
prop_schema["items"] = {"type": "string"}
|
298
|
+
elif inner_origin in (dict, Dict) or (hasattr(inner_origin, "__name__") and "Dict" in inner_origin.__name__):
|
299
|
+
prop_schema["type"] = "object"
|
300
|
+
# Add additionalProperties for value type
|
301
|
+
if inner_args and len(inner_args) == 2:
|
302
|
+
value_type = inner_args[1]
|
303
|
+
if value_type == str:
|
304
|
+
prop_schema["additionalProperties"] = {"type": "string"}
|
305
|
+
elif value_type == int:
|
306
|
+
prop_schema["additionalProperties"] = {"type": "integer"}
|
307
|
+
elif value_type == float:
|
308
|
+
prop_schema["additionalProperties"] = {"type": "number"}
|
309
|
+
elif value_type == bool:
|
310
|
+
prop_schema["additionalProperties"] = {"type": "boolean"}
|
311
|
+
else:
|
312
|
+
prop_schema["additionalProperties"] = {"type": "string"}
|
313
|
+
else:
|
314
|
+
prop_schema["type"] = "string" # Default for complex types
|
315
|
+
else:
|
316
|
+
prop_schema["type"] = "string" # Default for complex types
|
317
|
+
else:
|
318
|
+
# For non-Optional Union types, default to string
|
319
|
+
prop_schema["type"] = "string"
|
320
|
+
else:
|
321
|
+
prop_schema["type"] = "string" # Default for other complex types
|
322
|
+
else:
|
323
|
+
prop_schema["type"] = "string" # Default to string for complex types
|
111
324
|
|
112
325
|
properties[name] = prop_schema
|
113
326
|
|
@@ -132,10 +345,23 @@ DEFAULT_SYSTEM_PROMPT = (
|
|
132
345
|
"If a tool you need isn't available, just say so."
|
133
346
|
)
|
134
347
|
|
348
|
+
DEFAULT_SUMMARY_SYSTEM_PROMPT = (
|
349
|
+
"You are an expert assistant. Your goal is to generate a concise, structured summary "
|
350
|
+
"of the conversation below that captures all essential information needed to continue "
|
351
|
+
"development after context replacement. Include tasks performed, code areas modified or "
|
352
|
+
"reviewed, key decisions or assumptions, test results or errors, and outstanding tasks or next steps."
|
353
|
+
)
|
354
|
+
|
135
355
|
class TinyAgent:
|
136
356
|
"""
|
137
357
|
A minimal implementation of an agent powered by MCP and LiteLLM,
|
138
|
-
now with session/state persistence.
|
358
|
+
now with session/state persistence and robust error handling.
|
359
|
+
|
360
|
+
Features:
|
361
|
+
- Automatic retry mechanism for LLM API calls with exponential backoff
|
362
|
+
- Configurable retry parameters (max retries, backoff times, etc.)
|
363
|
+
- Session persistence
|
364
|
+
- Tool integration via MCP protocol
|
139
365
|
"""
|
140
366
|
session_state: Dict[str, Any] = {}
|
141
367
|
user_id: Optional[str] = None
|
@@ -154,7 +380,10 @@ class TinyAgent:
|
|
154
380
|
session_id: Optional[str] = None,
|
155
381
|
metadata: Optional[Dict[str, Any]] = None,
|
156
382
|
storage: Optional[Storage] = None,
|
157
|
-
persist_tool_configs: bool = False
|
383
|
+
persist_tool_configs: bool = False,
|
384
|
+
summary_config: Optional[Dict[str, Any]] = None,
|
385
|
+
retry_config: Optional[Dict[str, Any]] = None,
|
386
|
+
parallel_tool_calls: Optional[bool] = True,
|
158
387
|
):
|
159
388
|
"""
|
160
389
|
Initialize the Tiny Agent.
|
@@ -163,12 +392,29 @@ class TinyAgent:
|
|
163
392
|
model: The model to use with LiteLLM
|
164
393
|
api_key: The API key for the model provider
|
165
394
|
system_prompt: Custom system prompt for the agent
|
395
|
+
temperature: Temperature parameter for the model (controls randomness)
|
166
396
|
logger: Optional logger to use
|
397
|
+
model_kwargs: Additional keyword arguments to pass to the model
|
398
|
+
user_id: Optional user ID for the session
|
167
399
|
session_id: Optional session ID (if provided with storage, will attempt to load existing session)
|
168
400
|
metadata: Optional metadata for the session
|
169
401
|
storage: Optional storage backend for persistence
|
170
402
|
persist_tool_configs: Whether to persist tool configurations
|
171
|
-
|
403
|
+
summary_config: Optional model to use for generating conversation summaries
|
404
|
+
retry_config: Optional configuration for LLM API call retries. Supports:
|
405
|
+
- max_retries: Maximum number of retry attempts (default: 5)
|
406
|
+
- min_backoff: Minimum backoff time in seconds (default: 1)
|
407
|
+
- max_backoff: Maximum backoff time in seconds (default: 60)
|
408
|
+
- backoff_multiplier: Exponential backoff multiplier (default: 2)
|
409
|
+
- jitter: Whether to add randomness to backoff (default: True)
|
410
|
+
- retry_status_codes: HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
|
411
|
+
- retry_exceptions: Exception types to retry on (default: includes RateLimitError, etc.)
|
412
|
+
- rate_limit_backoff_min: Minimum wait time for rate limit errors (default: 60 seconds)
|
413
|
+
- rate_limit_backoff_max: Maximum wait time for rate limit errors (default: 90 seconds)
|
414
|
+
parallel_tool_calls: Whether to enable parallel tool calls. If True, the agent will ask the model
|
415
|
+
to execute multiple tool calls in parallel when possible. Some models like GPT-4
|
416
|
+
and Claude 3 support this feature. Default is True.
|
417
|
+
"""
|
172
418
|
# Set up logger
|
173
419
|
self.logger = logger or logging.getLogger(__name__)
|
174
420
|
|
@@ -180,6 +426,12 @@ class TinyAgent:
|
|
180
426
|
# Simplified hook system - single list of callbacks
|
181
427
|
self.callbacks: List[callable] = []
|
182
428
|
|
429
|
+
# Configure LiteLLM to drop unsupported parameters
|
430
|
+
# This is also set globally at the module level, but we set it again here to be sure
|
431
|
+
import litellm
|
432
|
+
litellm.drop_params = True
|
433
|
+
self.logger.info("LiteLLM drop_params feature is enabled")
|
434
|
+
|
183
435
|
# LiteLLM configuration
|
184
436
|
self.model = model
|
185
437
|
self.api_key = api_key
|
@@ -190,6 +442,14 @@ class TinyAgent:
|
|
190
442
|
|
191
443
|
self.model_kwargs = model_kwargs
|
192
444
|
self.encoder = tiktoken.get_encoding("o200k_base")
|
445
|
+
|
446
|
+
# Set up retry configuration
|
447
|
+
self.retry_config = DEFAULT_RETRY_CONFIG.copy()
|
448
|
+
if retry_config:
|
449
|
+
self.retry_config.update(retry_config)
|
450
|
+
|
451
|
+
# Set parallel tool calls preference
|
452
|
+
self.parallel_tool_calls = parallel_tool_calls
|
193
453
|
|
194
454
|
# Conversation state
|
195
455
|
self.messages = [{
|
@@ -197,11 +457,16 @@ class TinyAgent:
|
|
197
457
|
"content": system_prompt or DEFAULT_SYSTEM_PROMPT
|
198
458
|
}]
|
199
459
|
|
460
|
+
self.summary_config = summary_config or {}
|
461
|
+
|
200
462
|
# This list now accumulates tools from *all* connected MCP servers:
|
201
463
|
self.available_tools: List[Dict[str, Any]] = []
|
202
464
|
|
203
|
-
#
|
204
|
-
|
465
|
+
# Default built-in tools:
|
466
|
+
# - final_answer: Exit tool that completes the task and returns the final answer
|
467
|
+
# - ask_question: Exit tool that asks the user a question and waits for a response
|
468
|
+
# - notify_user: Non-exit tool that shares progress with the user without stopping the agent loop
|
469
|
+
self.default_tools = [
|
205
470
|
{
|
206
471
|
"type": "function",
|
207
472
|
"function": {
|
@@ -231,6 +496,23 @@ class TinyAgent:
|
|
231
496
|
"required": ["question"]
|
232
497
|
}
|
233
498
|
}
|
499
|
+
},
|
500
|
+
{
|
501
|
+
"type": "function",
|
502
|
+
"function": {
|
503
|
+
"name": "notify_user",
|
504
|
+
"description": "Share progress or status updates with the user without stopping the agent loop. Use this to keep the user informed during long-running tasks. Unlike final_answer and ask_question, this tool allows the agent to continue processing after sending the notification.",
|
505
|
+
"parameters": {
|
506
|
+
"type": "object",
|
507
|
+
"properties": {
|
508
|
+
"message": {
|
509
|
+
"type": "string",
|
510
|
+
"description": "The progress update or status message to share with the user"
|
511
|
+
}
|
512
|
+
},
|
513
|
+
"required": ["message"]
|
514
|
+
}
|
515
|
+
}
|
234
516
|
}
|
235
517
|
]
|
236
518
|
|
@@ -376,7 +658,8 @@ class TinyAgent:
|
|
376
658
|
session_id=session_id,
|
377
659
|
metadata=metadata,
|
378
660
|
storage=storage,
|
379
|
-
persist_tool_configs=False # default off
|
661
|
+
persist_tool_configs=False, # default off
|
662
|
+
retry_config=None # Use default retry configuration
|
380
663
|
)
|
381
664
|
|
382
665
|
# Apply the session data directly instead of loading from storage
|
@@ -586,6 +869,42 @@ class TinyAgent:
|
|
586
869
|
self.messages.append(user_message)
|
587
870
|
await self._run_callbacks("message_add", message=self.messages[-1])
|
588
871
|
|
872
|
+
return await self._run_agent_loop(max_turns)
|
873
|
+
|
874
|
+
async def resume(self, max_turns: int = 10) -> str:
|
875
|
+
"""
|
876
|
+
Resume the conversation without adding a new user message.
|
877
|
+
|
878
|
+
This method continues the conversation from the current state,
|
879
|
+
allowing the agent to process the existing conversation history
|
880
|
+
and potentially take additional actions.
|
881
|
+
|
882
|
+
Args:
|
883
|
+
max_turns: Maximum number of conversation turns
|
884
|
+
|
885
|
+
Returns:
|
886
|
+
The agent's response
|
887
|
+
"""
|
888
|
+
# Ensure any deferred session-load happens exactly once
|
889
|
+
if self._needs_session_load:
|
890
|
+
self.logger.debug(f"Deferred session load detected for {self.session_id}; loading now")
|
891
|
+
await self.init_async()
|
892
|
+
|
893
|
+
# Notify start with resume flag
|
894
|
+
await self._run_callbacks("agent_start", resume=True)
|
895
|
+
|
896
|
+
return await self._run_agent_loop(max_turns)
|
897
|
+
|
898
|
+
async def _run_agent_loop(self, max_turns: int = 10) -> str:
|
899
|
+
"""
|
900
|
+
Internal method that runs the agent's main loop.
|
901
|
+
|
902
|
+
Args:
|
903
|
+
max_turns: Maximum number of conversation turns
|
904
|
+
|
905
|
+
Returns:
|
906
|
+
The agent's response
|
907
|
+
"""
|
589
908
|
# Initialize loop control variables
|
590
909
|
num_turns = 0
|
591
910
|
next_turn_should_call_tools = True
|
@@ -593,21 +912,41 @@ class TinyAgent:
|
|
593
912
|
# The main agent loop
|
594
913
|
while True:
|
595
914
|
# Get all available tools including exit loop tools
|
596
|
-
all_tools = self.available_tools + self.
|
915
|
+
all_tools = self.available_tools + self.default_tools
|
597
916
|
|
598
917
|
# Call LLM with messages and tools
|
599
918
|
try:
|
600
919
|
self.logger.info(f"Calling LLM with {len(self.messages)} messages and {len(all_tools)} tools")
|
601
920
|
|
921
|
+
# Verify LiteLLM drop_params setting
|
922
|
+
import litellm
|
923
|
+
self.logger.info(f"LiteLLM drop_params is currently set to: {litellm.drop_params}")
|
924
|
+
|
602
925
|
# Notify LLM start
|
603
926
|
await self._run_callbacks("llm_start", messages=self.messages, tools=all_tools)
|
604
927
|
|
605
|
-
|
928
|
+
# Use parallel_tool_calls based on user preference, default to False if not specified
|
929
|
+
use_parallel_tool_calls = self.parallel_tool_calls if self.parallel_tool_calls is not None else False
|
930
|
+
|
931
|
+
# Disable parallel_tool_calls for models known not to support it
|
932
|
+
unsupported_models = ["o1-mini", "o1-preview", "o3", "o4-mini"]
|
933
|
+
for unsupported_model in unsupported_models:
|
934
|
+
if unsupported_model in self.model:
|
935
|
+
old_value = use_parallel_tool_calls
|
936
|
+
use_parallel_tool_calls = False
|
937
|
+
if old_value:
|
938
|
+
self.logger.warning(f"Disabling parallel_tool_calls for model {self.model} as it's known not to support it")
|
939
|
+
|
940
|
+
self.logger.info(f"Using parallel tool calls: {use_parallel_tool_calls}")
|
941
|
+
|
942
|
+
# Use our retry wrapper instead of direct litellm call
|
943
|
+
response = await self._litellm_with_retry(
|
606
944
|
model=self.model,
|
607
945
|
api_key=self.api_key,
|
608
946
|
messages=self.messages,
|
609
947
|
tools=all_tools,
|
610
948
|
tool_choice="auto",
|
949
|
+
parallel_tool_calls=use_parallel_tool_calls,
|
611
950
|
temperature=self.temperature,
|
612
951
|
**self.model_kwargs
|
613
952
|
)
|
@@ -645,12 +984,19 @@ class TinyAgent:
|
|
645
984
|
if has_tool_calls:
|
646
985
|
self.logger.info(f"Tool calls detected: {len(tool_calls)}")
|
647
986
|
|
648
|
-
#
|
649
|
-
|
987
|
+
# Create a list to hold all the tool execution tasks
|
988
|
+
tool_tasks = []
|
989
|
+
|
990
|
+
# Create a function to process a single tool call
|
991
|
+
async def process_tool_call(tool_call):
|
650
992
|
tool_call_id = tool_call.id
|
651
993
|
function_info = tool_call.function
|
652
994
|
tool_name = function_info.name
|
653
995
|
|
996
|
+
await self._run_callbacks("tool_start", tool_call=tool_call)
|
997
|
+
|
998
|
+
tool_result_content = ""
|
999
|
+
|
654
1000
|
# Create a tool message
|
655
1001
|
tool_message = {
|
656
1002
|
"role": "tool",
|
@@ -671,28 +1017,25 @@ class TinyAgent:
|
|
671
1017
|
# Handle control flow tools
|
672
1018
|
if tool_name == "final_answer":
|
673
1019
|
# Add a response for this tool call before returning
|
674
|
-
|
675
|
-
self.messages.append(tool_message)
|
676
|
-
await self._run_callbacks("message_add", message=tool_message)
|
677
|
-
await self._run_callbacks("agent_end", result="Task completed.")
|
678
|
-
return tool_message["content"]
|
1020
|
+
tool_result_content = tool_args.get("content", "Task completed without final answer.!!!")
|
679
1021
|
elif tool_name == "ask_question":
|
680
1022
|
question = tool_args.get("question", "Could you provide more details?")
|
681
1023
|
# Add a response for this tool call before returning
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
1024
|
+
tool_result_content = f"Question asked: {question}"
|
1025
|
+
elif tool_name == "notify_user":
|
1026
|
+
message = tool_args.get("message", "No message provided.")
|
1027
|
+
self.logger.info(f"Received notify_user tool call with message: {message}")
|
1028
|
+
# Set the tool result content
|
1029
|
+
tool_result_content = "OK"
|
687
1030
|
else:
|
688
1031
|
# Check if it's a custom tool first
|
689
1032
|
if tool_name in self.custom_tool_handlers:
|
690
|
-
|
1033
|
+
tool_result_content = await self._execute_custom_tool(tool_name, tool_args)
|
691
1034
|
else:
|
692
1035
|
# Dispatch to the proper MCPClient
|
693
1036
|
client = self.tool_to_client.get(tool_name)
|
694
1037
|
if not client:
|
695
|
-
|
1038
|
+
tool_result_content = f"No MCP server registered for tool '{tool_name}'"
|
696
1039
|
else:
|
697
1040
|
try:
|
698
1041
|
self.logger.debug(f"Calling tool {tool_name} with args: {tool_args}")
|
@@ -703,24 +1046,50 @@ class TinyAgent:
|
|
703
1046
|
if content_list:
|
704
1047
|
# Try different ways to extract the content
|
705
1048
|
if hasattr(content_list[0], 'text'):
|
706
|
-
|
1049
|
+
tool_result_content = content_list[0].text
|
707
1050
|
elif isinstance(content_list[0], dict) and 'text' in content_list[0]:
|
708
|
-
|
1051
|
+
tool_result_content = content_list[0]['text']
|
709
1052
|
else:
|
710
|
-
|
1053
|
+
tool_result_content = str(content_list)
|
711
1054
|
else:
|
712
|
-
|
1055
|
+
tool_result_content = "Tool returned no content"
|
713
1056
|
except Exception as e:
|
714
1057
|
self.logger.error(f"Error calling tool {tool_name}: {str(e)}")
|
715
|
-
|
1058
|
+
tool_result_content = f"Error executing tool {tool_name}: {str(e)}"
|
716
1059
|
except Exception as e:
|
717
1060
|
# If any error occurs during tool call processing, make sure we still have a tool response
|
718
1061
|
self.logger.error(f"Unexpected error processing tool call {tool_call_id}: {str(e)}")
|
719
|
-
|
720
|
-
|
721
|
-
|
1062
|
+
tool_result_content = f"Error processing tool call: {str(e)}"
|
1063
|
+
finally:
|
1064
|
+
# Always add the tool message to ensure each tool call has a response
|
1065
|
+
tool_message["content"] = tool_result_content
|
1066
|
+
await self._run_callbacks("tool_end", tool_call=tool_call, result=tool_result_content)
|
1067
|
+
return tool_message
|
1068
|
+
|
1069
|
+
# Create tasks for all tool calls
|
1070
|
+
for tool_call in tool_calls:
|
1071
|
+
tool_tasks.append(process_tool_call(tool_call))
|
1072
|
+
|
1073
|
+
# Execute all tool calls concurrently
|
1074
|
+
tool_messages = await asyncio.gather(*tool_tasks)
|
1075
|
+
|
1076
|
+
# Process results of tool calls
|
1077
|
+
for tool_message in tool_messages:
|
722
1078
|
self.messages.append(tool_message)
|
723
1079
|
await self._run_callbacks("message_add", message=tool_message)
|
1080
|
+
|
1081
|
+
# Handle special exit tools
|
1082
|
+
if tool_message["name"] == "final_answer":
|
1083
|
+
await self._run_callbacks("agent_end", result="Task completed.")
|
1084
|
+
return tool_message["content"]
|
1085
|
+
elif tool_message["name"] == "ask_question":
|
1086
|
+
# Extract the question from the original tool call
|
1087
|
+
for tc in tool_calls:
|
1088
|
+
if tc.id == tool_message["tool_call_id"]:
|
1089
|
+
args = json.loads(tc.function.arguments)
|
1090
|
+
question = args.get("question", "")
|
1091
|
+
await self._run_callbacks("agent_end", result=f"I need more information: {question}")
|
1092
|
+
return f"I need more information: {question}"
|
724
1093
|
|
725
1094
|
next_turn_should_call_tools = False
|
726
1095
|
else:
|
@@ -866,6 +1235,154 @@ class TinyAgent:
|
|
866
1235
|
self._needs_session_load = False
|
867
1236
|
|
868
1237
|
return self
|
1238
|
+
|
1239
|
+
def _is_rate_limit_error(self, exception: Exception) -> bool:
|
1240
|
+
"""
|
1241
|
+
Check if an exception is a rate limit error that should be handled with longer backoff.
|
1242
|
+
|
1243
|
+
Args:
|
1244
|
+
exception: The exception to check
|
1245
|
+
|
1246
|
+
Returns:
|
1247
|
+
True if this is a rate limit error, False otherwise
|
1248
|
+
"""
|
1249
|
+
if not exception:
|
1250
|
+
return False
|
1251
|
+
|
1252
|
+
# Check for LiteLLM RateLimitError
|
1253
|
+
error_name = exception.__class__.__name__
|
1254
|
+
if "RateLimitError" in error_name:
|
1255
|
+
return True
|
1256
|
+
|
1257
|
+
# Check for rate limit in the error message
|
1258
|
+
error_message = str(exception).lower()
|
1259
|
+
rate_limit_indicators = [
|
1260
|
+
"rate limit",
|
1261
|
+
"rate_limit_error",
|
1262
|
+
"rate-limit",
|
1263
|
+
"too many requests",
|
1264
|
+
"quota exceeded",
|
1265
|
+
"requests per minute",
|
1266
|
+
"requests per hour",
|
1267
|
+
"requests per day",
|
1268
|
+
"rate limiting",
|
1269
|
+
"throttled"
|
1270
|
+
]
|
1271
|
+
|
1272
|
+
for indicator in rate_limit_indicators:
|
1273
|
+
if indicator in error_message:
|
1274
|
+
return True
|
1275
|
+
|
1276
|
+
# Check for specific HTTP status codes (429 = Too Many Requests)
|
1277
|
+
status_code = getattr(exception, "status_code", None)
|
1278
|
+
if status_code == 429:
|
1279
|
+
return True
|
1280
|
+
|
1281
|
+
return False
|
1282
|
+
|
1283
|
+
async def _litellm_with_retry(self, **kwargs) -> Any:
|
1284
|
+
"""
|
1285
|
+
Execute litellm.acompletion with retry logic for handling transient errors.
|
1286
|
+
|
1287
|
+
Args:
|
1288
|
+
**kwargs: Arguments to pass to litellm.acompletion
|
1289
|
+
|
1290
|
+
Returns:
|
1291
|
+
The response from litellm.acompletion
|
1292
|
+
|
1293
|
+
Raises:
|
1294
|
+
Exception: If all retries fail
|
1295
|
+
"""
|
1296
|
+
max_retries = self.retry_config["max_retries"]
|
1297
|
+
min_backoff = self.retry_config["min_backoff"]
|
1298
|
+
max_backoff = self.retry_config["max_backoff"]
|
1299
|
+
backoff_multiplier = self.retry_config["backoff_multiplier"]
|
1300
|
+
jitter = self.retry_config["jitter"]
|
1301
|
+
retry_status_codes = self.retry_config["retry_status_codes"]
|
1302
|
+
retry_exceptions = self.retry_config["retry_exceptions"]
|
1303
|
+
|
1304
|
+
# Rate limit specific configuration
|
1305
|
+
rate_limit_backoff_min = self.retry_config.get("rate_limit_backoff_min", 60) # 60 seconds
|
1306
|
+
rate_limit_backoff_max = self.retry_config.get("rate_limit_backoff_max", 90) # 90 seconds
|
1307
|
+
|
1308
|
+
attempt = 0
|
1309
|
+
last_exception = None
|
1310
|
+
|
1311
|
+
# Log the model and key parameters being used
|
1312
|
+
model_name = kwargs.get('model', 'unknown')
|
1313
|
+
self.logger.debug(f"Calling LiteLLM with model: {model_name}")
|
1314
|
+
if 'parallel_tool_calls' in kwargs:
|
1315
|
+
self.logger.debug(f"Using parallel_tool_calls={kwargs['parallel_tool_calls']}")
|
1316
|
+
|
1317
|
+
while attempt <= max_retries:
|
1318
|
+
try:
|
1319
|
+
# First attempt or retry
|
1320
|
+
if attempt > 0:
|
1321
|
+
# Check if this is a rate limit error and handle it specially
|
1322
|
+
is_rate_limit_error = self._is_rate_limit_error(last_exception)
|
1323
|
+
|
1324
|
+
if is_rate_limit_error:
|
1325
|
+
# Use longer backoff for rate limit errors (60-90 seconds)
|
1326
|
+
backoff = rate_limit_backoff_min + (rate_limit_backoff_max - rate_limit_backoff_min) * random.random()
|
1327
|
+
self.logger.warning(
|
1328
|
+
f"Rate limit error detected. Retry attempt {attempt}/{max_retries} for LLM call after {backoff:.2f}s delay. "
|
1329
|
+
f"Previous error: {str(last_exception)}"
|
1330
|
+
)
|
1331
|
+
else:
|
1332
|
+
# Use normal exponential backoff for other errors
|
1333
|
+
backoff = min(max_backoff, min_backoff * (backoff_multiplier ** (attempt - 1)))
|
1334
|
+
|
1335
|
+
# Add jitter if enabled (±20% randomness)
|
1336
|
+
if jitter:
|
1337
|
+
backoff = backoff * (0.8 + 0.4 * random.random())
|
1338
|
+
|
1339
|
+
self.logger.warning(
|
1340
|
+
f"Retry attempt {attempt}/{max_retries} for LLM call after {backoff:.2f}s delay. "
|
1341
|
+
f"Previous error: {str(last_exception)}"
|
1342
|
+
)
|
1343
|
+
|
1344
|
+
# Wait before retry
|
1345
|
+
await asyncio.sleep(backoff)
|
1346
|
+
|
1347
|
+
# Make the actual API call
|
1348
|
+
return await litellm.acompletion(**kwargs)
|
1349
|
+
|
1350
|
+
except Exception as e:
|
1351
|
+
last_exception = e
|
1352
|
+
error_name = e.__class__.__name__
|
1353
|
+
full_error_path = f"{e.__class__.__module__}.{error_name}" if hasattr(e, "__module__") else error_name
|
1354
|
+
|
1355
|
+
# Check if this exception should trigger a retry
|
1356
|
+
should_retry = False
|
1357
|
+
|
1358
|
+
# Check for status code in exception (if available)
|
1359
|
+
status_code = getattr(e, "status_code", None)
|
1360
|
+
if status_code and status_code in retry_status_codes:
|
1361
|
+
should_retry = True
|
1362
|
+
|
1363
|
+
# Check exception type against retry list
|
1364
|
+
for exception_path in retry_exceptions:
|
1365
|
+
if exception_path in full_error_path:
|
1366
|
+
should_retry = True
|
1367
|
+
break
|
1368
|
+
|
1369
|
+
if not should_retry or attempt >= max_retries:
|
1370
|
+
# Either not a retryable error or we've exhausted retries
|
1371
|
+
self.logger.error(
|
1372
|
+
f"LLM call failed after {attempt} attempt(s). Error: {str(e)}"
|
1373
|
+
)
|
1374
|
+
raise
|
1375
|
+
|
1376
|
+
# Log the error and continue to next retry attempt
|
1377
|
+
error_type = "rate limit" if self._is_rate_limit_error(e) else "general"
|
1378
|
+
self.logger.warning(
|
1379
|
+
f"LLM call failed (attempt {attempt+1}/{max_retries+1}) - {error_type} error: {str(e)}. Will retry."
|
1380
|
+
)
|
1381
|
+
|
1382
|
+
attempt += 1
|
1383
|
+
|
1384
|
+
# This should not be reached due to the raise in the loop, but just in case:
|
1385
|
+
raise last_exception
|
869
1386
|
|
870
1387
|
@classmethod
|
871
1388
|
async def create(
|
@@ -881,11 +1398,39 @@ class TinyAgent:
|
|
881
1398
|
session_id: Optional[str] = None,
|
882
1399
|
metadata: Optional[Dict[str, Any]] = None,
|
883
1400
|
storage: Optional[Storage] = None,
|
884
|
-
persist_tool_configs: bool = False
|
1401
|
+
persist_tool_configs: bool = False,
|
1402
|
+
retry_config: Optional[Dict[str, Any]] = None,
|
1403
|
+
parallel_tool_calls: Optional[bool] = True,
|
885
1404
|
) -> "TinyAgent":
|
886
1405
|
"""
|
887
1406
|
Async factory: constructs the agent, then loads an existing session
|
888
1407
|
if (storage and session_id) were provided.
|
1408
|
+
|
1409
|
+
Args:
|
1410
|
+
model: The model to use with LiteLLM
|
1411
|
+
api_key: The API key for the model provider
|
1412
|
+
system_prompt: Custom system prompt for the agent
|
1413
|
+
temperature: Temperature parameter for the model (controls randomness)
|
1414
|
+
logger: Optional logger to use
|
1415
|
+
model_kwargs: Additional keyword arguments to pass to the model
|
1416
|
+
user_id: Optional user ID for the session
|
1417
|
+
session_id: Optional session ID (if provided with storage, will attempt to load existing session)
|
1418
|
+
metadata: Optional metadata for the session
|
1419
|
+
storage: Optional storage backend for persistence
|
1420
|
+
persist_tool_configs: Whether to persist tool configurations
|
1421
|
+
retry_config: Optional configuration for LLM API call retries. Supports:
|
1422
|
+
- max_retries: Maximum number of retry attempts (default: 5)
|
1423
|
+
- min_backoff: Minimum backoff time in seconds (default: 1)
|
1424
|
+
- max_backoff: Maximum backoff time in seconds (default: 60)
|
1425
|
+
- backoff_multiplier: Exponential backoff multiplier (default: 2)
|
1426
|
+
- jitter: Whether to add randomness to backoff (default: True)
|
1427
|
+
- retry_status_codes: HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
|
1428
|
+
- retry_exceptions: Exception types to retry on (default: includes RateLimitError, etc.)
|
1429
|
+
- rate_limit_backoff_min: Minimum wait time for rate limit errors (default: 60 seconds)
|
1430
|
+
- rate_limit_backoff_max: Maximum wait time for rate limit errors (default: 90 seconds)
|
1431
|
+
parallel_tool_calls: Whether to enable parallel tool calls. If True, the agent will ask the model
|
1432
|
+
to execute multiple tool calls in parallel when possible. Some models like GPT-4
|
1433
|
+
and Claude 3 support this feature. Default is None (disabled).
|
889
1434
|
"""
|
890
1435
|
agent = cls(
|
891
1436
|
model=model,
|
@@ -898,7 +1443,9 @@ class TinyAgent:
|
|
898
1443
|
session_id=session_id,
|
899
1444
|
metadata=metadata,
|
900
1445
|
storage=storage,
|
901
|
-
persist_tool_configs=persist_tool_configs
|
1446
|
+
persist_tool_configs=persist_tool_configs,
|
1447
|
+
retry_config=retry_config,
|
1448
|
+
parallel_tool_calls=parallel_tool_calls
|
902
1449
|
)
|
903
1450
|
if agent._needs_session_load:
|
904
1451
|
await agent.init_async()
|
@@ -934,6 +1481,162 @@ class TinyAgent:
|
|
934
1481
|
|
935
1482
|
# Tool configs would be handled separately if needed
|
936
1483
|
|
1484
|
+
async def summarize(self) -> str:
|
1485
|
+
"""
|
1486
|
+
Generate a summary of the current conversation history.
|
1487
|
+
|
1488
|
+
Args:
|
1489
|
+
custom_model: Optional model to use for summary generation (overrides self.summary_model)
|
1490
|
+
custom_system_prompt: Optional system prompt for summary generation (overrides self.summary_system_prompt)
|
1491
|
+
|
1492
|
+
Returns:
|
1493
|
+
A string containing the conversation summary
|
1494
|
+
"""
|
1495
|
+
# Skip if there are no messages or just the system message
|
1496
|
+
if len(self.messages) <= 1:
|
1497
|
+
return "No conversation to summarize."
|
1498
|
+
|
1499
|
+
# Use provided parameters or defaults
|
1500
|
+
system_prompt = self.summary_config.get("system_prompt",DEFAULT_SUMMARY_SYSTEM_PROMPT)
|
1501
|
+
|
1502
|
+
# Format the conversation into a single string
|
1503
|
+
conversation_text = self._format_conversation_for_summary()
|
1504
|
+
|
1505
|
+
task_prompt = load_template(str(Path(__file__).parent / "prompts" / "summarize.yaml"),"user_prompt")
|
1506
|
+
|
1507
|
+
# Build the prompt for the summary model
|
1508
|
+
summary_messages = [
|
1509
|
+
{
|
1510
|
+
"role": "system",
|
1511
|
+
"content": system_prompt
|
1512
|
+
},
|
1513
|
+
{
|
1514
|
+
"role": "user",
|
1515
|
+
#"content": f"Here is the conversation so far:\n{conversation_text}\n\nPlease summarize this conversation, covering:\n0. What is the task its requirments, goals and constraints\n1. Tasks performed and outcomes\n2. Code files, modules, or functions modified or examined\n3. Important decisions or assumptions made\n4. Errors encountered and test or build results\n5. Remaining tasks, open questions, or next steps\nProvide the summary in a clear, concise format."
|
1516
|
+
"content":conversation_text
|
1517
|
+
},
|
1518
|
+
{
|
1519
|
+
"role": "user",
|
1520
|
+
"content": task_prompt
|
1521
|
+
}
|
1522
|
+
]
|
1523
|
+
|
1524
|
+
try:
|
1525
|
+
# Log that we're generating a summary
|
1526
|
+
self.logger.info(f"Generating conversation summary using model {self.summary_config.get('model',self.model)}")
|
1527
|
+
|
1528
|
+
# Call the LLM to generate the summary using our retry wrapper
|
1529
|
+
response = await self._litellm_with_retry(
|
1530
|
+
model=self.summary_config.get("model",self.model),
|
1531
|
+
api_key=self.summary_config.get("api_key",self.api_key),
|
1532
|
+
messages=summary_messages,
|
1533
|
+
temperature=self.summary_config.get("temperature",self.temperature),
|
1534
|
+
max_tokens=self.summary_config.get("max_tokens",8000)
|
1535
|
+
)
|
1536
|
+
|
1537
|
+
# Extract the summary from the response
|
1538
|
+
summary = response.choices[0].message.content
|
1539
|
+
return summary
|
1540
|
+
|
1541
|
+
except Exception as e:
|
1542
|
+
self.logger.error(f"Error generating conversation summary: {str(e)}")
|
1543
|
+
return f"Failed to generate summary: {str(e)}"
|
1544
|
+
|
1545
|
+
async def compact(self) -> bool:
|
1546
|
+
"""
|
1547
|
+
Compact the conversation history by replacing it with a summary.
|
1548
|
+
|
1549
|
+
This method:
|
1550
|
+
1. Generates a summary of the current conversation
|
1551
|
+
2. If successful, replaces the conversation with just [system, user] messages
|
1552
|
+
where the user message contains the summary
|
1553
|
+
3. Returns True if compaction was successful, False otherwise
|
1554
|
+
|
1555
|
+
Returns:
|
1556
|
+
Boolean indicating whether the compaction was successful
|
1557
|
+
"""
|
1558
|
+
# Skip if there are no messages or just the system message
|
1559
|
+
if len(self.messages) <= 1:
|
1560
|
+
self.logger.info("No conversation to compact.")
|
1561
|
+
return False
|
1562
|
+
|
1563
|
+
# Generate the summary
|
1564
|
+
summary = await self.summarize()
|
1565
|
+
|
1566
|
+
# Check if the summary generation was successful
|
1567
|
+
if summary.startswith("Failed to generate summary:") or summary == "No conversation to summarize.":
|
1568
|
+
self.logger.error(f"Compaction failed: {summary}")
|
1569
|
+
return False
|
1570
|
+
|
1571
|
+
# Save the system message
|
1572
|
+
system_message = self.messages[0]
|
1573
|
+
|
1574
|
+
|
1575
|
+
# Create a new user message with the summary
|
1576
|
+
summary_message = {
|
1577
|
+
"role": "user",
|
1578
|
+
"content": f"This session is being continued from a previous conversation that ran out of context. The conversation is summarized below:\n{summary}",
|
1579
|
+
"created_at": int(time.time())
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
# Replace the conversation with just [system, user] messages
|
1583
|
+
self.messages = [system_message, summary_message]
|
1584
|
+
|
1585
|
+
# Notify about the compaction
|
1586
|
+
self.logger.info("🤐Conversation successfully compacted.")
|
1587
|
+
await self._run_callbacks("message_add", message=summary_message)
|
1588
|
+
|
1589
|
+
return True
|
1590
|
+
|
1591
|
+
def _format_conversation_for_summary(self) -> str:
|
1592
|
+
"""
|
1593
|
+
Format the conversation history into a string for summarization.
|
1594
|
+
|
1595
|
+
Returns:
|
1596
|
+
A string representing the conversation in the format:
|
1597
|
+
user: content
|
1598
|
+
assistant: content
|
1599
|
+
tool_call: tool name and args
|
1600
|
+
tool_response: response content
|
1601
|
+
...
|
1602
|
+
"""
|
1603
|
+
formatted_lines = []
|
1604
|
+
|
1605
|
+
# Skip the system message (index 0)
|
1606
|
+
for message in self.messages[1:]:
|
1607
|
+
role = message.get("role", "unknown")
|
1608
|
+
|
1609
|
+
if role == "user":
|
1610
|
+
formatted_lines.append(f"user: {message.get('content', '')}")
|
1611
|
+
|
1612
|
+
elif role == "assistant":
|
1613
|
+
content = message.get("content", "")
|
1614
|
+
tool_calls = message.get("tool_calls", [])
|
1615
|
+
|
1616
|
+
# Add assistant message content if present
|
1617
|
+
if content:
|
1618
|
+
formatted_lines.append(f"assistant: {content}")
|
1619
|
+
|
1620
|
+
# Add tool calls if present
|
1621
|
+
for tool_call in tool_calls:
|
1622
|
+
function_info = tool_call.get("function", {})
|
1623
|
+
tool_name = function_info.get("name", "unknown_tool")
|
1624
|
+
arguments = function_info.get("arguments", "{}")
|
1625
|
+
|
1626
|
+
formatted_lines.append(f"tool_call: {tool_name} with args {arguments}")
|
1627
|
+
|
1628
|
+
elif role == "tool":
|
1629
|
+
tool_name = message.get("name", "unknown_tool")
|
1630
|
+
content = message.get("content", "")
|
1631
|
+
formatted_lines.append(f"tool_response: {content}")
|
1632
|
+
|
1633
|
+
else:
|
1634
|
+
# Handle any other message types
|
1635
|
+
formatted_lines.append(f"{role}: {message.get('content', '')}")
|
1636
|
+
|
1637
|
+
return [{'type': 'text', 'text': f"{x}"} for x in formatted_lines]
|
1638
|
+
#return "\n".join(formatted_lines)
|
1639
|
+
|
937
1640
|
async def run_example():
|
938
1641
|
"""Example usage of TinyAgent with proper logging."""
|
939
1642
|
import os
|
@@ -970,16 +1673,39 @@ async def run_example():
|
|
970
1673
|
agent_logger.error("Please set the OPENAI_API_KEY environment variable")
|
971
1674
|
return
|
972
1675
|
|
973
|
-
#
|
974
|
-
|
975
|
-
|
1676
|
+
# Custom retry configuration - more aggressive than default
|
1677
|
+
custom_retry_config = {
|
1678
|
+
"max_retries": 3, # Fewer retries for the example
|
1679
|
+
"min_backoff": 2, # Start with 2 seconds
|
1680
|
+
"max_backoff": 30, # Max 30 seconds between retries
|
1681
|
+
"retry_exceptions": [
|
1682
|
+
"litellm.InternalServerError",
|
1683
|
+
"litellm.APIError",
|
1684
|
+
"litellm.APIConnectionError",
|
1685
|
+
"litellm.RateLimitError",
|
1686
|
+
"litellm.ServiceUnavailableError",
|
1687
|
+
"litellm.APITimeoutError",
|
1688
|
+
"TimeoutError", # Add any additional exceptions
|
1689
|
+
"ConnectionError"
|
1690
|
+
],
|
1691
|
+
# Rate limit specific configuration
|
1692
|
+
"rate_limit_backoff_min": 60, # Wait 60-90 seconds for rate limit errors
|
1693
|
+
"rate_limit_backoff_max": 90, # This is the recommended range for most APIs
|
1694
|
+
}
|
1695
|
+
|
1696
|
+
# Example 1: Using a model that supports parallel function calling (GPT-4)
|
1697
|
+
agent_logger.info("Example 1: Using a model that supports parallel function calling (GPT-4)")
|
1698
|
+
agent1 = await TinyAgent.create(
|
1699
|
+
model="gpt-4", # A model that supports parallel function calling
|
976
1700
|
api_key=api_key,
|
977
1701
|
logger=agent_logger,
|
978
|
-
session_id="
|
979
|
-
|
1702
|
+
session_id="parallel-example",
|
1703
|
+
retry_config=custom_retry_config,
|
1704
|
+
parallel_tool_calls=True, # Explicitly enable parallel function calling
|
1705
|
+
drop_unsupported_params=True # Enable dropping unsupported parameters
|
980
1706
|
)
|
981
1707
|
|
982
|
-
# Add the Rich UI callback
|
1708
|
+
# Add the Rich UI callback
|
983
1709
|
rich_ui = RichUICallback(
|
984
1710
|
markdown=True,
|
985
1711
|
show_message=True,
|
@@ -987,15 +1713,51 @@ async def run_example():
|
|
987
1713
|
show_tool_calls=True,
|
988
1714
|
logger=ui_logger
|
989
1715
|
)
|
990
|
-
|
1716
|
+
agent1.add_callback(rich_ui)
|
1717
|
+
|
1718
|
+
# Connect to MCP servers for additional tools
|
1719
|
+
try:
|
1720
|
+
await agent1.connect_to_server("npx", ["-y", "@openbnb/mcp-server-airbnb", "--ignore-robots-txt"])
|
1721
|
+
except Exception as e:
|
1722
|
+
agent_logger.error(f"Failed to connect to MCP servers: {e}")
|
1723
|
+
|
1724
|
+
# Run the agent with a task that would benefit from parallel function calling
|
1725
|
+
user_input1 = "Compare the weather in Tokyo, New York, and Paris for planning a trip next week."
|
1726
|
+
agent_logger.info(f"Running agent with input: {user_input1}")
|
1727
|
+
result1 = await agent1.run(user_input1, max_turns=10)
|
1728
|
+
agent_logger.info(f"Final result from example 1: {result1}")
|
1729
|
+
|
1730
|
+
# Clean up
|
1731
|
+
await agent1.close()
|
1732
|
+
|
1733
|
+
# Example 2: Using a model that doesn't support parallel function calling (o4-mini)
|
1734
|
+
agent_logger.info("\nExample 2: Using a model that doesn't support parallel function calling (o4-mini)")
|
1735
|
+
agent2 = await TinyAgent.create(
|
1736
|
+
model="o4-mini", # A model that doesn't support parallel function calling
|
1737
|
+
api_key=api_key,
|
1738
|
+
logger=agent_logger,
|
1739
|
+
session_id="o4-mini-example",
|
1740
|
+
retry_config=custom_retry_config,
|
1741
|
+
parallel_tool_calls=True, # We still set this to True, but it will be automatically disabled
|
1742
|
+
drop_unsupported_params=True # Enable dropping unsupported parameters
|
1743
|
+
)
|
1744
|
+
|
1745
|
+
# Add the Rich UI callback
|
1746
|
+
agent2.add_callback(rich_ui)
|
991
1747
|
|
992
|
-
#
|
993
|
-
|
994
|
-
|
995
|
-
|
1748
|
+
# Connect to the same MCP server
|
1749
|
+
try:
|
1750
|
+
await agent2.connect_to_server("npx", ["-y", "@openbnb/mcp-server-airbnb", "--ignore-robots-txt"])
|
1751
|
+
except Exception as e:
|
1752
|
+
agent_logger.error(f"Failed to connect to MCP servers: {e}")
|
996
1753
|
|
997
|
-
|
1754
|
+
# Run the agent with the same task
|
1755
|
+
user_input2 = "Compare the weather in Tokyo, New York, and Paris for planning a trip next week."
|
1756
|
+
agent_logger.info(f"Running agent with input: {user_input2}")
|
1757
|
+
result2 = await agent2.run(user_input2, max_turns=10)
|
1758
|
+
agent_logger.info(f"Final result from example 2: {result2}")
|
998
1759
|
|
999
1760
|
# Clean up
|
1000
|
-
await
|
1001
|
-
|
1761
|
+
await agent2.close()
|
1762
|
+
|
1763
|
+
agent_logger.debug("Examples completed")
|