tinyagent-py 0.0.15__py3-none-any.whl → 0.0.16rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinyagent/code_agent/providers/__init__.py +14 -1
- tinyagent/code_agent/providers/base.py +29 -1
- tinyagent/code_agent/providers/modal_provider.py +9 -0
- tinyagent/code_agent/providers/seatbelt_provider.py +1065 -0
- tinyagent/code_agent/tiny_code_agent.py +692 -8
- tinyagent/code_agent/utils.py +187 -22
- tinyagent/prompts/truncation.yaml +13 -0
- tinyagent/tiny_agent.py +402 -49
- {tinyagent_py-0.0.15.dist-info → tinyagent_py-0.0.16rc0.dist-info}/METADATA +25 -1
- {tinyagent_py-0.0.15.dist-info → tinyagent_py-0.0.16rc0.dist-info}/RECORD +13 -11
- {tinyagent_py-0.0.15.dist-info → tinyagent_py-0.0.16rc0.dist-info}/WHEEL +0 -0
- {tinyagent_py-0.0.15.dist-info → tinyagent_py-0.0.16rc0.dist-info}/licenses/LICENSE +0 -0
- {tinyagent_py-0.0.15.dist-info → tinyagent_py-0.0.16rc0.dist-info}/top_level.txt +0 -0
tinyagent/tiny_agent.py
CHANGED
@@ -13,11 +13,36 @@ from .storage import Storage # ← your abstract base
|
|
13
13
|
import traceback
|
14
14
|
import time # Add time import for Unix timestamps
|
15
15
|
from pathlib import Path
|
16
|
+
import random # Add random for jitter in retry backoff
|
16
17
|
|
17
18
|
# Module-level logger; configuration is handled externally.
|
18
19
|
logger = logging.getLogger(__name__)
|
19
20
|
#litellm.callbacks = ["arize_phoenix"]
|
20
21
|
|
22
|
+
# Set global LiteLLM configuration
|
23
|
+
litellm.drop_params = True # Enable dropping unsupported parameters globally
|
24
|
+
|
25
|
+
# Define default retry configuration
|
26
|
+
DEFAULT_RETRY_CONFIG = {
|
27
|
+
"max_retries": 5,
|
28
|
+
"min_backoff": 1, # Start with 1 second
|
29
|
+
"max_backoff": 60, # Max 60 seconds between retries
|
30
|
+
"jitter": True, # Add randomness to backoff
|
31
|
+
"backoff_multiplier": 2, # Exponential backoff factor
|
32
|
+
"retry_status_codes": [429, 500, 502, 503, 504], # Common server errors
|
33
|
+
"retry_exceptions": [
|
34
|
+
"litellm.InternalServerError",
|
35
|
+
"litellm.APIError",
|
36
|
+
"litellm.APIConnectionError",
|
37
|
+
"litellm.RateLimitError",
|
38
|
+
"litellm.ServiceUnavailableError",
|
39
|
+
"litellm.APITimeoutError"
|
40
|
+
],
|
41
|
+
# Rate limit specific configuration
|
42
|
+
"rate_limit_backoff_min": 60, # Minimum wait time for rate limit errors (60 seconds)
|
43
|
+
"rate_limit_backoff_max": 90, # Maximum wait time for rate limit errors (90 seconds)
|
44
|
+
}
|
45
|
+
|
21
46
|
def load_template(path: str,key:str="system_prompt") -> str:
|
22
47
|
"""
|
23
48
|
Load the YAML file and extract its 'system_prompt' field.
|
@@ -330,7 +355,13 @@ DEFAULT_SUMMARY_SYSTEM_PROMPT = (
|
|
330
355
|
class TinyAgent:
|
331
356
|
"""
|
332
357
|
A minimal implementation of an agent powered by MCP and LiteLLM,
|
333
|
-
now with session/state persistence.
|
358
|
+
now with session/state persistence and robust error handling.
|
359
|
+
|
360
|
+
Features:
|
361
|
+
- Automatic retry mechanism for LLM API calls with exponential backoff
|
362
|
+
- Configurable retry parameters (max retries, backoff times, etc.)
|
363
|
+
- Session persistence
|
364
|
+
- Tool integration via MCP protocol
|
334
365
|
"""
|
335
366
|
session_state: Dict[str, Any] = {}
|
336
367
|
user_id: Optional[str] = None
|
@@ -350,7 +381,9 @@ class TinyAgent:
|
|
350
381
|
metadata: Optional[Dict[str, Any]] = None,
|
351
382
|
storage: Optional[Storage] = None,
|
352
383
|
persist_tool_configs: bool = False,
|
353
|
-
summary_config: Optional[Dict[str, Any]] = None
|
384
|
+
summary_config: Optional[Dict[str, Any]] = None,
|
385
|
+
retry_config: Optional[Dict[str, Any]] = None,
|
386
|
+
parallel_tool_calls: Optional[bool] = True,
|
354
387
|
):
|
355
388
|
"""
|
356
389
|
Initialize the Tiny Agent.
|
@@ -359,14 +392,29 @@ class TinyAgent:
|
|
359
392
|
model: The model to use with LiteLLM
|
360
393
|
api_key: The API key for the model provider
|
361
394
|
system_prompt: Custom system prompt for the agent
|
395
|
+
temperature: Temperature parameter for the model (controls randomness)
|
362
396
|
logger: Optional logger to use
|
397
|
+
model_kwargs: Additional keyword arguments to pass to the model
|
398
|
+
user_id: Optional user ID for the session
|
363
399
|
session_id: Optional session ID (if provided with storage, will attempt to load existing session)
|
364
400
|
metadata: Optional metadata for the session
|
365
401
|
storage: Optional storage backend for persistence
|
366
402
|
persist_tool_configs: Whether to persist tool configurations
|
367
|
-
|
368
|
-
|
369
|
-
|
403
|
+
summary_config: Optional model to use for generating conversation summaries
|
404
|
+
retry_config: Optional configuration for LLM API call retries. Supports:
|
405
|
+
- max_retries: Maximum number of retry attempts (default: 5)
|
406
|
+
- min_backoff: Minimum backoff time in seconds (default: 1)
|
407
|
+
- max_backoff: Maximum backoff time in seconds (default: 60)
|
408
|
+
- backoff_multiplier: Exponential backoff multiplier (default: 2)
|
409
|
+
- jitter: Whether to add randomness to backoff (default: True)
|
410
|
+
- retry_status_codes: HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
|
411
|
+
- retry_exceptions: Exception types to retry on (default: includes RateLimitError, etc.)
|
412
|
+
- rate_limit_backoff_min: Minimum wait time for rate limit errors (default: 60 seconds)
|
413
|
+
- rate_limit_backoff_max: Maximum wait time for rate limit errors (default: 90 seconds)
|
414
|
+
parallel_tool_calls: Whether to enable parallel tool calls. If True, the agent will ask the model
|
415
|
+
to execute multiple tool calls in parallel when possible. Some models like GPT-4
|
416
|
+
and Claude 3 support this feature. Default is True.
|
417
|
+
"""
|
370
418
|
# Set up logger
|
371
419
|
self.logger = logger or logging.getLogger(__name__)
|
372
420
|
|
@@ -378,6 +426,12 @@ class TinyAgent:
|
|
378
426
|
# Simplified hook system - single list of callbacks
|
379
427
|
self.callbacks: List[callable] = []
|
380
428
|
|
429
|
+
# Configure LiteLLM to drop unsupported parameters
|
430
|
+
# This is also set globally at the module level, but we set it again here to be sure
|
431
|
+
import litellm
|
432
|
+
litellm.drop_params = True
|
433
|
+
self.logger.info("LiteLLM drop_params feature is enabled")
|
434
|
+
|
381
435
|
# LiteLLM configuration
|
382
436
|
self.model = model
|
383
437
|
self.api_key = api_key
|
@@ -388,6 +442,14 @@ class TinyAgent:
|
|
388
442
|
|
389
443
|
self.model_kwargs = model_kwargs
|
390
444
|
self.encoder = tiktoken.get_encoding("o200k_base")
|
445
|
+
|
446
|
+
# Set up retry configuration
|
447
|
+
self.retry_config = DEFAULT_RETRY_CONFIG.copy()
|
448
|
+
if retry_config:
|
449
|
+
self.retry_config.update(retry_config)
|
450
|
+
|
451
|
+
# Set parallel tool calls preference
|
452
|
+
self.parallel_tool_calls = parallel_tool_calls
|
391
453
|
|
392
454
|
# Conversation state
|
393
455
|
self.messages = [{
|
@@ -400,8 +462,11 @@ class TinyAgent:
|
|
400
462
|
# This list now accumulates tools from *all* connected MCP servers:
|
401
463
|
self.available_tools: List[Dict[str, Any]] = []
|
402
464
|
|
403
|
-
#
|
404
|
-
|
465
|
+
# Default built-in tools:
|
466
|
+
# - final_answer: Exit tool that completes the task and returns the final answer
|
467
|
+
# - ask_question: Exit tool that asks the user a question and waits for a response
|
468
|
+
# - notify_user: Non-exit tool that shares progress with the user without stopping the agent loop
|
469
|
+
self.default_tools = [
|
405
470
|
{
|
406
471
|
"type": "function",
|
407
472
|
"function": {
|
@@ -431,6 +496,23 @@ class TinyAgent:
|
|
431
496
|
"required": ["question"]
|
432
497
|
}
|
433
498
|
}
|
499
|
+
},
|
500
|
+
{
|
501
|
+
"type": "function",
|
502
|
+
"function": {
|
503
|
+
"name": "notify_user",
|
504
|
+
"description": "Share progress or status updates with the user without stopping the agent loop. Use this to keep the user informed during long-running tasks. Unlike final_answer and ask_question, this tool allows the agent to continue processing after sending the notification.",
|
505
|
+
"parameters": {
|
506
|
+
"type": "object",
|
507
|
+
"properties": {
|
508
|
+
"message": {
|
509
|
+
"type": "string",
|
510
|
+
"description": "The progress update or status message to share with the user"
|
511
|
+
}
|
512
|
+
},
|
513
|
+
"required": ["message"]
|
514
|
+
}
|
515
|
+
}
|
434
516
|
}
|
435
517
|
]
|
436
518
|
|
@@ -576,7 +658,8 @@ class TinyAgent:
|
|
576
658
|
session_id=session_id,
|
577
659
|
metadata=metadata,
|
578
660
|
storage=storage,
|
579
|
-
persist_tool_configs=False # default off
|
661
|
+
persist_tool_configs=False, # default off
|
662
|
+
retry_config=None # Use default retry configuration
|
580
663
|
)
|
581
664
|
|
582
665
|
# Apply the session data directly instead of loading from storage
|
@@ -829,21 +912,41 @@ class TinyAgent:
|
|
829
912
|
# The main agent loop
|
830
913
|
while True:
|
831
914
|
# Get all available tools including exit loop tools
|
832
|
-
all_tools = self.available_tools + self.
|
915
|
+
all_tools = self.available_tools + self.default_tools
|
833
916
|
|
834
917
|
# Call LLM with messages and tools
|
835
918
|
try:
|
836
919
|
self.logger.info(f"Calling LLM with {len(self.messages)} messages and {len(all_tools)} tools")
|
837
920
|
|
921
|
+
# Verify LiteLLM drop_params setting
|
922
|
+
import litellm
|
923
|
+
self.logger.info(f"LiteLLM drop_params is currently set to: {litellm.drop_params}")
|
924
|
+
|
838
925
|
# Notify LLM start
|
839
926
|
await self._run_callbacks("llm_start", messages=self.messages, tools=all_tools)
|
840
927
|
|
841
|
-
|
928
|
+
# Use parallel_tool_calls based on user preference, default to False if not specified
|
929
|
+
use_parallel_tool_calls = self.parallel_tool_calls if self.parallel_tool_calls is not None else False
|
930
|
+
|
931
|
+
# Disable parallel_tool_calls for models known not to support it
|
932
|
+
unsupported_models = ["o1-mini", "o1-preview", "o3", "o4-mini"]
|
933
|
+
for unsupported_model in unsupported_models:
|
934
|
+
if unsupported_model in self.model:
|
935
|
+
old_value = use_parallel_tool_calls
|
936
|
+
use_parallel_tool_calls = False
|
937
|
+
if old_value:
|
938
|
+
self.logger.warning(f"Disabling parallel_tool_calls for model {self.model} as it's known not to support it")
|
939
|
+
|
940
|
+
self.logger.info(f"Using parallel tool calls: {use_parallel_tool_calls}")
|
941
|
+
|
942
|
+
# Use our retry wrapper instead of direct litellm call
|
943
|
+
response = await self._litellm_with_retry(
|
842
944
|
model=self.model,
|
843
945
|
api_key=self.api_key,
|
844
946
|
messages=self.messages,
|
845
947
|
tools=all_tools,
|
846
948
|
tool_choice="auto",
|
949
|
+
parallel_tool_calls=use_parallel_tool_calls,
|
847
950
|
temperature=self.temperature,
|
848
951
|
**self.model_kwargs
|
849
952
|
)
|
@@ -881,8 +984,11 @@ class TinyAgent:
|
|
881
984
|
if has_tool_calls:
|
882
985
|
self.logger.info(f"Tool calls detected: {len(tool_calls)}")
|
883
986
|
|
884
|
-
#
|
885
|
-
|
987
|
+
# Create a list to hold all the tool execution tasks
|
988
|
+
tool_tasks = []
|
989
|
+
|
990
|
+
# Create a function to process a single tool call
|
991
|
+
async def process_tool_call(tool_call):
|
886
992
|
tool_call_id = tool_call.id
|
887
993
|
function_info = tool_call.function
|
888
994
|
tool_name = function_info.name
|
@@ -912,22 +1018,15 @@ class TinyAgent:
|
|
912
1018
|
if tool_name == "final_answer":
|
913
1019
|
# Add a response for this tool call before returning
|
914
1020
|
tool_result_content = tool_args.get("content", "Task completed without final answer.!!!")
|
915
|
-
tool_message["content"] = tool_result_content
|
916
|
-
self.messages.append(tool_message)
|
917
|
-
await self._run_callbacks("message_add", message=tool_message)
|
918
|
-
await self._run_callbacks("agent_end", result="Task completed.")
|
919
|
-
await self._run_callbacks("tool_end", tool_call=tool_call, result=tool_result_content)
|
920
|
-
return tool_message["content"]
|
921
1021
|
elif tool_name == "ask_question":
|
922
1022
|
question = tool_args.get("question", "Could you provide more details?")
|
923
1023
|
# Add a response for this tool call before returning
|
924
1024
|
tool_result_content = f"Question asked: {question}"
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
return f"I need more information: {question}"
|
1025
|
+
elif tool_name == "notify_user":
|
1026
|
+
message = tool_args.get("message", "No message provided.")
|
1027
|
+
self.logger.info(f"Received notify_user tool call with message: {message}")
|
1028
|
+
# Set the tool result content
|
1029
|
+
tool_result_content = "OK"
|
931
1030
|
else:
|
932
1031
|
# Check if it's a custom tool first
|
933
1032
|
if tool_name in self.custom_tool_handlers:
|
@@ -965,9 +1064,32 @@ class TinyAgent:
|
|
965
1064
|
# Always add the tool message to ensure each tool call has a response
|
966
1065
|
tool_message["content"] = tool_result_content
|
967
1066
|
await self._run_callbacks("tool_end", tool_call=tool_call, result=tool_result_content)
|
968
|
-
|
1067
|
+
return tool_message
|
1068
|
+
|
1069
|
+
# Create tasks for all tool calls
|
1070
|
+
for tool_call in tool_calls:
|
1071
|
+
tool_tasks.append(process_tool_call(tool_call))
|
1072
|
+
|
1073
|
+
# Execute all tool calls concurrently
|
1074
|
+
tool_messages = await asyncio.gather(*tool_tasks)
|
1075
|
+
|
1076
|
+
# Process results of tool calls
|
1077
|
+
for tool_message in tool_messages:
|
969
1078
|
self.messages.append(tool_message)
|
970
1079
|
await self._run_callbacks("message_add", message=tool_message)
|
1080
|
+
|
1081
|
+
# Handle special exit tools
|
1082
|
+
if tool_message["name"] == "final_answer":
|
1083
|
+
await self._run_callbacks("agent_end", result="Task completed.")
|
1084
|
+
return tool_message["content"]
|
1085
|
+
elif tool_message["name"] == "ask_question":
|
1086
|
+
# Extract the question from the original tool call
|
1087
|
+
for tc in tool_calls:
|
1088
|
+
if tc.id == tool_message["tool_call_id"]:
|
1089
|
+
args = json.loads(tc.function.arguments)
|
1090
|
+
question = args.get("question", "")
|
1091
|
+
await self._run_callbacks("agent_end", result=f"I need more information: {question}")
|
1092
|
+
return f"I need more information: {question}"
|
971
1093
|
|
972
1094
|
next_turn_should_call_tools = False
|
973
1095
|
else:
|
@@ -1113,6 +1235,154 @@ class TinyAgent:
|
|
1113
1235
|
self._needs_session_load = False
|
1114
1236
|
|
1115
1237
|
return self
|
1238
|
+
|
1239
|
+
def _is_rate_limit_error(self, exception: Exception) -> bool:
|
1240
|
+
"""
|
1241
|
+
Check if an exception is a rate limit error that should be handled with longer backoff.
|
1242
|
+
|
1243
|
+
Args:
|
1244
|
+
exception: The exception to check
|
1245
|
+
|
1246
|
+
Returns:
|
1247
|
+
True if this is a rate limit error, False otherwise
|
1248
|
+
"""
|
1249
|
+
if not exception:
|
1250
|
+
return False
|
1251
|
+
|
1252
|
+
# Check for LiteLLM RateLimitError
|
1253
|
+
error_name = exception.__class__.__name__
|
1254
|
+
if "RateLimitError" in error_name:
|
1255
|
+
return True
|
1256
|
+
|
1257
|
+
# Check for rate limit in the error message
|
1258
|
+
error_message = str(exception).lower()
|
1259
|
+
rate_limit_indicators = [
|
1260
|
+
"rate limit",
|
1261
|
+
"rate_limit_error",
|
1262
|
+
"rate-limit",
|
1263
|
+
"too many requests",
|
1264
|
+
"quota exceeded",
|
1265
|
+
"requests per minute",
|
1266
|
+
"requests per hour",
|
1267
|
+
"requests per day",
|
1268
|
+
"rate limiting",
|
1269
|
+
"throttled"
|
1270
|
+
]
|
1271
|
+
|
1272
|
+
for indicator in rate_limit_indicators:
|
1273
|
+
if indicator in error_message:
|
1274
|
+
return True
|
1275
|
+
|
1276
|
+
# Check for specific HTTP status codes (429 = Too Many Requests)
|
1277
|
+
status_code = getattr(exception, "status_code", None)
|
1278
|
+
if status_code == 429:
|
1279
|
+
return True
|
1280
|
+
|
1281
|
+
return False
|
1282
|
+
|
1283
|
+
async def _litellm_with_retry(self, **kwargs) -> Any:
|
1284
|
+
"""
|
1285
|
+
Execute litellm.acompletion with retry logic for handling transient errors.
|
1286
|
+
|
1287
|
+
Args:
|
1288
|
+
**kwargs: Arguments to pass to litellm.acompletion
|
1289
|
+
|
1290
|
+
Returns:
|
1291
|
+
The response from litellm.acompletion
|
1292
|
+
|
1293
|
+
Raises:
|
1294
|
+
Exception: If all retries fail
|
1295
|
+
"""
|
1296
|
+
max_retries = self.retry_config["max_retries"]
|
1297
|
+
min_backoff = self.retry_config["min_backoff"]
|
1298
|
+
max_backoff = self.retry_config["max_backoff"]
|
1299
|
+
backoff_multiplier = self.retry_config["backoff_multiplier"]
|
1300
|
+
jitter = self.retry_config["jitter"]
|
1301
|
+
retry_status_codes = self.retry_config["retry_status_codes"]
|
1302
|
+
retry_exceptions = self.retry_config["retry_exceptions"]
|
1303
|
+
|
1304
|
+
# Rate limit specific configuration
|
1305
|
+
rate_limit_backoff_min = self.retry_config.get("rate_limit_backoff_min", 60) # 60 seconds
|
1306
|
+
rate_limit_backoff_max = self.retry_config.get("rate_limit_backoff_max", 90) # 90 seconds
|
1307
|
+
|
1308
|
+
attempt = 0
|
1309
|
+
last_exception = None
|
1310
|
+
|
1311
|
+
# Log the model and key parameters being used
|
1312
|
+
model_name = kwargs.get('model', 'unknown')
|
1313
|
+
self.logger.debug(f"Calling LiteLLM with model: {model_name}")
|
1314
|
+
if 'parallel_tool_calls' in kwargs:
|
1315
|
+
self.logger.debug(f"Using parallel_tool_calls={kwargs['parallel_tool_calls']}")
|
1316
|
+
|
1317
|
+
while attempt <= max_retries:
|
1318
|
+
try:
|
1319
|
+
# First attempt or retry
|
1320
|
+
if attempt > 0:
|
1321
|
+
# Check if this is a rate limit error and handle it specially
|
1322
|
+
is_rate_limit_error = self._is_rate_limit_error(last_exception)
|
1323
|
+
|
1324
|
+
if is_rate_limit_error:
|
1325
|
+
# Use longer backoff for rate limit errors (60-90 seconds)
|
1326
|
+
backoff = rate_limit_backoff_min + (rate_limit_backoff_max - rate_limit_backoff_min) * random.random()
|
1327
|
+
self.logger.warning(
|
1328
|
+
f"Rate limit error detected. Retry attempt {attempt}/{max_retries} for LLM call after {backoff:.2f}s delay. "
|
1329
|
+
f"Previous error: {str(last_exception)}"
|
1330
|
+
)
|
1331
|
+
else:
|
1332
|
+
# Use normal exponential backoff for other errors
|
1333
|
+
backoff = min(max_backoff, min_backoff * (backoff_multiplier ** (attempt - 1)))
|
1334
|
+
|
1335
|
+
# Add jitter if enabled (±20% randomness)
|
1336
|
+
if jitter:
|
1337
|
+
backoff = backoff * (0.8 + 0.4 * random.random())
|
1338
|
+
|
1339
|
+
self.logger.warning(
|
1340
|
+
f"Retry attempt {attempt}/{max_retries} for LLM call after {backoff:.2f}s delay. "
|
1341
|
+
f"Previous error: {str(last_exception)}"
|
1342
|
+
)
|
1343
|
+
|
1344
|
+
# Wait before retry
|
1345
|
+
await asyncio.sleep(backoff)
|
1346
|
+
|
1347
|
+
# Make the actual API call
|
1348
|
+
return await litellm.acompletion(**kwargs)
|
1349
|
+
|
1350
|
+
except Exception as e:
|
1351
|
+
last_exception = e
|
1352
|
+
error_name = e.__class__.__name__
|
1353
|
+
full_error_path = f"{e.__class__.__module__}.{error_name}" if hasattr(e, "__module__") else error_name
|
1354
|
+
|
1355
|
+
# Check if this exception should trigger a retry
|
1356
|
+
should_retry = False
|
1357
|
+
|
1358
|
+
# Check for status code in exception (if available)
|
1359
|
+
status_code = getattr(e, "status_code", None)
|
1360
|
+
if status_code and status_code in retry_status_codes:
|
1361
|
+
should_retry = True
|
1362
|
+
|
1363
|
+
# Check exception type against retry list
|
1364
|
+
for exception_path in retry_exceptions:
|
1365
|
+
if exception_path in full_error_path:
|
1366
|
+
should_retry = True
|
1367
|
+
break
|
1368
|
+
|
1369
|
+
if not should_retry or attempt >= max_retries:
|
1370
|
+
# Either not a retryable error or we've exhausted retries
|
1371
|
+
self.logger.error(
|
1372
|
+
f"LLM call failed after {attempt} attempt(s). Error: {str(e)}"
|
1373
|
+
)
|
1374
|
+
raise
|
1375
|
+
|
1376
|
+
# Log the error and continue to next retry attempt
|
1377
|
+
error_type = "rate limit" if self._is_rate_limit_error(e) else "general"
|
1378
|
+
self.logger.warning(
|
1379
|
+
f"LLM call failed (attempt {attempt+1}/{max_retries+1}) - {error_type} error: {str(e)}. Will retry."
|
1380
|
+
)
|
1381
|
+
|
1382
|
+
attempt += 1
|
1383
|
+
|
1384
|
+
# This should not be reached due to the raise in the loop, but just in case:
|
1385
|
+
raise last_exception
|
1116
1386
|
|
1117
1387
|
@classmethod
|
1118
1388
|
async def create(
|
@@ -1128,11 +1398,39 @@ class TinyAgent:
|
|
1128
1398
|
session_id: Optional[str] = None,
|
1129
1399
|
metadata: Optional[Dict[str, Any]] = None,
|
1130
1400
|
storage: Optional[Storage] = None,
|
1131
|
-
persist_tool_configs: bool = False
|
1401
|
+
persist_tool_configs: bool = False,
|
1402
|
+
retry_config: Optional[Dict[str, Any]] = None,
|
1403
|
+
parallel_tool_calls: Optional[bool] = True,
|
1132
1404
|
) -> "TinyAgent":
|
1133
1405
|
"""
|
1134
1406
|
Async factory: constructs the agent, then loads an existing session
|
1135
1407
|
if (storage and session_id) were provided.
|
1408
|
+
|
1409
|
+
Args:
|
1410
|
+
model: The model to use with LiteLLM
|
1411
|
+
api_key: The API key for the model provider
|
1412
|
+
system_prompt: Custom system prompt for the agent
|
1413
|
+
temperature: Temperature parameter for the model (controls randomness)
|
1414
|
+
logger: Optional logger to use
|
1415
|
+
model_kwargs: Additional keyword arguments to pass to the model
|
1416
|
+
user_id: Optional user ID for the session
|
1417
|
+
session_id: Optional session ID (if provided with storage, will attempt to load existing session)
|
1418
|
+
metadata: Optional metadata for the session
|
1419
|
+
storage: Optional storage backend for persistence
|
1420
|
+
persist_tool_configs: Whether to persist tool configurations
|
1421
|
+
retry_config: Optional configuration for LLM API call retries. Supports:
|
1422
|
+
- max_retries: Maximum number of retry attempts (default: 5)
|
1423
|
+
- min_backoff: Minimum backoff time in seconds (default: 1)
|
1424
|
+
- max_backoff: Maximum backoff time in seconds (default: 60)
|
1425
|
+
- backoff_multiplier: Exponential backoff multiplier (default: 2)
|
1426
|
+
- jitter: Whether to add randomness to backoff (default: True)
|
1427
|
+
- retry_status_codes: HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
|
1428
|
+
- retry_exceptions: Exception types to retry on (default: includes RateLimitError, etc.)
|
1429
|
+
- rate_limit_backoff_min: Minimum wait time for rate limit errors (default: 60 seconds)
|
1430
|
+
- rate_limit_backoff_max: Maximum wait time for rate limit errors (default: 90 seconds)
|
1431
|
+
parallel_tool_calls: Whether to enable parallel tool calls. If True, the agent will ask the model
|
1432
|
+
to execute multiple tool calls in parallel when possible. Some models like GPT-4
|
1433
|
+
and Claude 3 support this feature. Default is None (disabled).
|
1136
1434
|
"""
|
1137
1435
|
agent = cls(
|
1138
1436
|
model=model,
|
@@ -1145,7 +1443,9 @@ class TinyAgent:
|
|
1145
1443
|
session_id=session_id,
|
1146
1444
|
metadata=metadata,
|
1147
1445
|
storage=storage,
|
1148
|
-
persist_tool_configs=persist_tool_configs
|
1446
|
+
persist_tool_configs=persist_tool_configs,
|
1447
|
+
retry_config=retry_config,
|
1448
|
+
parallel_tool_calls=parallel_tool_calls
|
1149
1449
|
)
|
1150
1450
|
if agent._needs_session_load:
|
1151
1451
|
await agent.init_async()
|
@@ -1225,13 +1525,13 @@ class TinyAgent:
|
|
1225
1525
|
# Log that we're generating a summary
|
1226
1526
|
self.logger.info(f"Generating conversation summary using model {self.summary_config.get('model',self.model)}")
|
1227
1527
|
|
1228
|
-
# Call the LLM to generate the summary
|
1229
|
-
response = await
|
1528
|
+
# Call the LLM to generate the summary using our retry wrapper
|
1529
|
+
response = await self._litellm_with_retry(
|
1230
1530
|
model=self.summary_config.get("model",self.model),
|
1231
1531
|
api_key=self.summary_config.get("api_key",self.api_key),
|
1232
1532
|
messages=summary_messages,
|
1233
|
-
temperature=self.summary_config.get("temperature",self.temperature),
|
1234
|
-
max_tokens=self.summary_config.get("max_tokens",8000)
|
1533
|
+
temperature=self.summary_config.get("temperature",self.temperature),
|
1534
|
+
max_tokens=self.summary_config.get("max_tokens",8000)
|
1235
1535
|
)
|
1236
1536
|
|
1237
1537
|
# Extract the summary from the response
|
@@ -1373,16 +1673,39 @@ async def run_example():
|
|
1373
1673
|
agent_logger.error("Please set the OPENAI_API_KEY environment variable")
|
1374
1674
|
return
|
1375
1675
|
|
1376
|
-
#
|
1377
|
-
|
1378
|
-
|
1676
|
+
# Custom retry configuration - more aggressive than default
|
1677
|
+
custom_retry_config = {
|
1678
|
+
"max_retries": 3, # Fewer retries for the example
|
1679
|
+
"min_backoff": 2, # Start with 2 seconds
|
1680
|
+
"max_backoff": 30, # Max 30 seconds between retries
|
1681
|
+
"retry_exceptions": [
|
1682
|
+
"litellm.InternalServerError",
|
1683
|
+
"litellm.APIError",
|
1684
|
+
"litellm.APIConnectionError",
|
1685
|
+
"litellm.RateLimitError",
|
1686
|
+
"litellm.ServiceUnavailableError",
|
1687
|
+
"litellm.APITimeoutError",
|
1688
|
+
"TimeoutError", # Add any additional exceptions
|
1689
|
+
"ConnectionError"
|
1690
|
+
],
|
1691
|
+
# Rate limit specific configuration
|
1692
|
+
"rate_limit_backoff_min": 60, # Wait 60-90 seconds for rate limit errors
|
1693
|
+
"rate_limit_backoff_max": 90, # This is the recommended range for most APIs
|
1694
|
+
}
|
1695
|
+
|
1696
|
+
# Example 1: Using a model that supports parallel function calling (GPT-4)
|
1697
|
+
agent_logger.info("Example 1: Using a model that supports parallel function calling (GPT-4)")
|
1698
|
+
agent1 = await TinyAgent.create(
|
1699
|
+
model="gpt-4", # A model that supports parallel function calling
|
1379
1700
|
api_key=api_key,
|
1380
1701
|
logger=agent_logger,
|
1381
|
-
session_id="
|
1382
|
-
|
1702
|
+
session_id="parallel-example",
|
1703
|
+
retry_config=custom_retry_config,
|
1704
|
+
parallel_tool_calls=True, # Explicitly enable parallel function calling
|
1705
|
+
drop_unsupported_params=True # Enable dropping unsupported parameters
|
1383
1706
|
)
|
1384
1707
|
|
1385
|
-
# Add the Rich UI callback
|
1708
|
+
# Add the Rich UI callback
|
1386
1709
|
rich_ui = RichUICallback(
|
1387
1710
|
markdown=True,
|
1388
1711
|
show_message=True,
|
@@ -1390,21 +1713,51 @@ async def run_example():
|
|
1390
1713
|
show_tool_calls=True,
|
1391
1714
|
logger=ui_logger
|
1392
1715
|
)
|
1393
|
-
|
1716
|
+
agent1.add_callback(rich_ui)
|
1394
1717
|
|
1395
|
-
#
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1718
|
+
# Connect to MCP servers for additional tools
|
1719
|
+
try:
|
1720
|
+
await agent1.connect_to_server("npx", ["-y", "@openbnb/mcp-server-airbnb", "--ignore-robots-txt"])
|
1721
|
+
except Exception as e:
|
1722
|
+
agent_logger.error(f"Failed to connect to MCP servers: {e}")
|
1399
1723
|
|
1400
|
-
|
1724
|
+
# Run the agent with a task that would benefit from parallel function calling
|
1725
|
+
user_input1 = "Compare the weather in Tokyo, New York, and Paris for planning a trip next week."
|
1726
|
+
agent_logger.info(f"Running agent with input: {user_input1}")
|
1727
|
+
result1 = await agent1.run(user_input1, max_turns=10)
|
1728
|
+
agent_logger.info(f"Final result from example 1: {result1}")
|
1401
1729
|
|
1402
|
-
#
|
1403
|
-
|
1404
|
-
resume_result = await agent.resume(max_turns=3)
|
1730
|
+
# Clean up
|
1731
|
+
await agent1.close()
|
1405
1732
|
|
1406
|
-
|
1733
|
+
# Example 2: Using a model that doesn't support parallel function calling (o4-mini)
|
1734
|
+
agent_logger.info("\nExample 2: Using a model that doesn't support parallel function calling (o4-mini)")
|
1735
|
+
agent2 = await TinyAgent.create(
|
1736
|
+
model="o4-mini", # A model that doesn't support parallel function calling
|
1737
|
+
api_key=api_key,
|
1738
|
+
logger=agent_logger,
|
1739
|
+
session_id="o4-mini-example",
|
1740
|
+
retry_config=custom_retry_config,
|
1741
|
+
parallel_tool_calls=True, # We still set this to True, but it will be automatically disabled
|
1742
|
+
drop_unsupported_params=True # Enable dropping unsupported parameters
|
1743
|
+
)
|
1744
|
+
|
1745
|
+
# Add the Rich UI callback
|
1746
|
+
agent2.add_callback(rich_ui)
|
1747
|
+
|
1748
|
+
# Connect to the same MCP server
|
1749
|
+
try:
|
1750
|
+
await agent2.connect_to_server("npx", ["-y", "@openbnb/mcp-server-airbnb", "--ignore-robots-txt"])
|
1751
|
+
except Exception as e:
|
1752
|
+
agent_logger.error(f"Failed to connect to MCP servers: {e}")
|
1753
|
+
|
1754
|
+
# Run the agent with the same task
|
1755
|
+
user_input2 = "Compare the weather in Tokyo, New York, and Paris for planning a trip next week."
|
1756
|
+
agent_logger.info(f"Running agent with input: {user_input2}")
|
1757
|
+
result2 = await agent2.run(user_input2, max_turns=10)
|
1758
|
+
agent_logger.info(f"Final result from example 2: {result2}")
|
1407
1759
|
|
1408
1760
|
# Clean up
|
1409
|
-
await
|
1410
|
-
|
1761
|
+
await agent2.close()
|
1762
|
+
|
1763
|
+
agent_logger.debug("Examples completed")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tinyagent-py
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.16rc0
|
4
4
|
Summary: TinyAgent with MCP Client, Code Agent (Thinking, Planning, and Executing in Python), and Extendable Hooks, Tiny but powerful
|
5
5
|
Author-email: Mahdi Golchin <golchin@askdev.ai>
|
6
6
|
Project-URL: Homepage, https://github.com/askbudi/tinyagent
|
@@ -274,6 +274,30 @@ agent = TinyCodeAgent(
|
|
274
274
|
)
|
275
275
|
```
|
276
276
|
|
277
|
+
### Automatic Git Checkpoints
|
278
|
+
|
279
|
+
TinyCodeAgent can automatically create Git checkpoints after each successful shell command execution. This helps track changes made by the agent and provides a safety net for reverting changes if needed.
|
280
|
+
|
281
|
+
```python
|
282
|
+
# Enable automatic Git checkpoints during initialization
|
283
|
+
agent = TinyCodeAgent(
|
284
|
+
model="gpt-4.1-mini",
|
285
|
+
auto_git_checkpoint=True # Enable automatic Git checkpoints
|
286
|
+
)
|
287
|
+
|
288
|
+
# Or enable/disable it later
|
289
|
+
agent.enable_auto_git_checkpoint(True) # Enable
|
290
|
+
agent.enable_auto_git_checkpoint(False) # Disable
|
291
|
+
|
292
|
+
# Check current status
|
293
|
+
is_enabled = agent.get_auto_git_checkpoint_status()
|
294
|
+
```
|
295
|
+
|
296
|
+
Each checkpoint includes:
|
297
|
+
- Descriptive commit message with the command description
|
298
|
+
- Timestamp of when the command was executed
|
299
|
+
- The actual command that was run
|
300
|
+
|
277
301
|
For detailed documentation, see the [TinyCodeAgent README](tinyagent/code_agent/README.md).
|
278
302
|
|
279
303
|
## How the TinyAgent Hook System Works
|