tinyagent-py 0.0.13__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tinyagent/tiny_agent.py CHANGED
@@ -12,10 +12,46 @@ import uuid
12
12
  from .storage import Storage # ← your abstract base
13
13
  import traceback
14
14
  import time # Add time import for Unix timestamps
15
+ from pathlib import Path
16
+ import random # Add random for jitter in retry backoff
17
+
15
18
  # Module-level logger; configuration is handled externally.
16
19
  logger = logging.getLogger(__name__)
17
20
  #litellm.callbacks = ["arize_phoenix"]
18
21
 
22
+ # Set global LiteLLM configuration
23
+ litellm.drop_params = True # Enable dropping unsupported parameters globally
24
+
25
+ # Define default retry configuration
26
+ DEFAULT_RETRY_CONFIG = {
27
+ "max_retries": 5,
28
+ "min_backoff": 1, # Start with 1 second
29
+ "max_backoff": 60, # Max 60 seconds between retries
30
+ "jitter": True, # Add randomness to backoff
31
+ "backoff_multiplier": 2, # Exponential backoff factor
32
+ "retry_status_codes": [429, 500, 502, 503, 504], # Common server errors
33
+ "retry_exceptions": [
34
+ "litellm.InternalServerError",
35
+ "litellm.APIError",
36
+ "litellm.APIConnectionError",
37
+ "litellm.RateLimitError",
38
+ "litellm.ServiceUnavailableError",
39
+ "litellm.APITimeoutError"
40
+ ],
41
+ # Rate limit specific configuration
42
+ "rate_limit_backoff_min": 60, # Minimum wait time for rate limit errors (60 seconds)
43
+ "rate_limit_backoff_max": 90, # Maximum wait time for rate limit errors (90 seconds)
44
+ }
45
+
46
+ def load_template(path: str,key:str="system_prompt") -> str:
47
+ """
48
+ Load the YAML file and extract its 'system_prompt' field.
49
+ """
50
+ import yaml
51
+ with open(path, "r") as f:
52
+ data = yaml.safe_load(f)
53
+ return data[key]
54
+
19
55
  def tool(name: Optional[str] = None, description: Optional[str] = None,
20
56
  schema: Optional[Dict[str, Any]] = None):
21
57
  """
@@ -39,6 +75,11 @@ def tool(name: Optional[str] = None, description: Optional[str] = None,
39
75
  # Get the description (use provided description or docstring)
40
76
  tool_description = description or inspect.getdoc(func_or_class) or f"Tool based on {tool_name}"
41
77
 
78
+ # Temporarily attach the description to the function/class
79
+ # This allows _generate_schema_from_function to access it for param extraction
80
+ if description:
81
+ func_or_class._temp_tool_description = description
82
+
42
83
  # Generate schema if not provided
43
84
  tool_schema = schema or {}
44
85
  if not tool_schema:
@@ -50,6 +91,10 @@ def tool(name: Optional[str] = None, description: Optional[str] = None,
50
91
  # For functions, use the function itself
51
92
  tool_schema = _generate_schema_from_function(func_or_class)
52
93
 
94
+ # Clean up temporary attribute
95
+ if hasattr(func_or_class, '_temp_tool_description'):
96
+ delattr(func_or_class, '_temp_tool_description')
97
+
53
98
  # Attach metadata to the function or class
54
99
  func_or_class._tool_metadata = {
55
100
  "name": tool_name,
@@ -76,6 +121,65 @@ def _generate_schema_from_function(func: Callable) -> Dict[str, Any]:
76
121
  sig = inspect.signature(func)
77
122
  type_hints = get_type_hints(func)
78
123
 
124
+ # Extract parameter descriptions from docstring
125
+ param_descriptions = {}
126
+
127
+ # First check if we have a tool decorator description (has higher priority)
128
+ decorator_description = None
129
+ if hasattr(func, '_temp_tool_description'):
130
+ decorator_description = func._temp_tool_description
131
+
132
+ # Get function docstring
133
+ docstring = inspect.getdoc(func) or ""
134
+
135
+ # Combine sources to check for parameter descriptions
136
+ sources_to_check = []
137
+ if decorator_description:
138
+ sources_to_check.append(decorator_description)
139
+ if docstring:
140
+ sources_to_check.append(docstring)
141
+
142
+ # Parse parameter descriptions from all sources
143
+ for source in sources_to_check:
144
+ lines = source.split('\n')
145
+ in_args_section = False
146
+ current_param = None
147
+
148
+ for line in lines:
149
+ line = line.strip()
150
+
151
+ # Check for Args/Parameters section markers
152
+ if line.lower() in ('args:', 'arguments:', 'parameters:'):
153
+ in_args_section = True
154
+ continue
155
+
156
+ # Check for other section markers that would end the args section
157
+ if line.lower() in ('returns:', 'raises:', 'yields:', 'examples:') and in_args_section:
158
+ in_args_section = False
159
+
160
+ # Look for :param or :arg style parameter descriptions
161
+ if line.startswith((":param", ":arg")):
162
+ try:
163
+ # e.g., ":param user_id: The ID of the user."
164
+ parts = line.split(" ", 2)
165
+ if len(parts) >= 3:
166
+ param_name = parts[1].strip().split(" ")[0]
167
+ param_descriptions[param_name] = parts[2].strip()
168
+ except (ValueError, IndexError):
169
+ continue
170
+
171
+ # Look for indented parameter descriptions in Args section
172
+ elif in_args_section and line.strip():
173
+ # Check for param: description pattern
174
+ param_match = line.lstrip().split(":", 1)
175
+ if len(param_match) == 2:
176
+ param_name = param_match[0].strip()
177
+ description = param_match[1].strip()
178
+ param_descriptions[param_name] = description
179
+ current_param = param_name
180
+ # Check for continued description from previous param
181
+ elif current_param and line.startswith((' ', '\t')):
182
+ param_descriptions[current_param] += " " + line.strip()
79
183
  # Skip 'self' parameter for methods
80
184
  params = {
81
185
  name: param for name, param in sig.parameters.items()
@@ -91,9 +195,12 @@ def _generate_schema_from_function(func: Callable) -> Dict[str, Any]:
91
195
  param_type = type_hints.get(name, Any)
92
196
 
93
197
  # Create property schema
94
- prop_schema = {"description": ""}
198
+ prop_schema = {}
199
+ description = param_descriptions.get(name)
200
+ if description:
201
+ prop_schema["description"] = description
95
202
 
96
- # Map Python types to JSON schema types
203
+ # Handle different types of type annotations
97
204
  if param_type == str:
98
205
  prop_schema["type"] = "string"
99
206
  elif param_type == int:
@@ -107,7 +214,113 @@ def _generate_schema_from_function(func: Callable) -> Dict[str, Any]:
107
214
  elif param_type == dict or param_type == Dict:
108
215
  prop_schema["type"] = "object"
109
216
  else:
110
- prop_schema["type"] = "string" # Default to string for complex types
217
+ # Handle generic types
218
+ origin = getattr(param_type, "__origin__", None)
219
+ args = getattr(param_type, "__args__", None)
220
+
221
+ if origin is not None and args is not None:
222
+ # Handle List[X], Sequence[X], etc.
223
+ if origin in (list, List) or (hasattr(origin, "__name__") and "List" in origin.__name__):
224
+ prop_schema["type"] = "array"
225
+ # Add items type if we can determine it
226
+ if args and len(args) == 1:
227
+ item_type = args[0]
228
+ if item_type == str:
229
+ prop_schema["items"] = {"type": "string"}
230
+ elif item_type == int:
231
+ prop_schema["items"] = {"type": "integer"}
232
+ elif item_type == float:
233
+ prop_schema["items"] = {"type": "number"}
234
+ elif item_type == bool:
235
+ prop_schema["items"] = {"type": "boolean"}
236
+ else:
237
+ prop_schema["items"] = {"type": "string"}
238
+
239
+ # Handle Dict[K, V], Mapping[K, V], etc.
240
+ elif origin in (dict, Dict) or (hasattr(origin, "__name__") and "Dict" in origin.__name__):
241
+ prop_schema["type"] = "object"
242
+ # We could add additionalProperties for value type, but it's not always needed
243
+ if args and len(args) == 2:
244
+ value_type = args[1]
245
+ if value_type == str:
246
+ prop_schema["additionalProperties"] = {"type": "string"}
247
+ elif value_type == int:
248
+ prop_schema["additionalProperties"] = {"type": "integer"}
249
+ elif value_type == float:
250
+ prop_schema["additionalProperties"] = {"type": "number"}
251
+ elif value_type == bool:
252
+ prop_schema["additionalProperties"] = {"type": "boolean"}
253
+ else:
254
+ prop_schema["additionalProperties"] = {"type": "string"}
255
+
256
+ # Handle Union types (Optional is Union[T, None])
257
+ elif origin is Union:
258
+ # Check if this is Optional[X] (Union[X, None])
259
+ if type(None) in args:
260
+ # Get the non-None type
261
+ non_none_types = [arg for arg in args if arg is not type(None)]
262
+ if non_none_types:
263
+ # Use the first non-None type
264
+ main_type = non_none_types[0]
265
+ # Recursively process this type
266
+ if main_type == str:
267
+ prop_schema["type"] = "string"
268
+ elif main_type == int:
269
+ prop_schema["type"] = "integer"
270
+ elif main_type == float:
271
+ prop_schema["type"] = "number"
272
+ elif main_type == bool:
273
+ prop_schema["type"] = "boolean"
274
+ elif main_type == list or main_type == List:
275
+ prop_schema["type"] = "array"
276
+ elif main_type == dict or main_type == Dict:
277
+ prop_schema["type"] = "object"
278
+ else:
279
+ # Try to handle generic types like List[str]
280
+ inner_origin = getattr(main_type, "__origin__", None)
281
+ inner_args = getattr(main_type, "__args__", None)
282
+
283
+ if inner_origin is not None and inner_args is not None:
284
+ if inner_origin in (list, List) or (hasattr(inner_origin, "__name__") and "List" in inner_origin.__name__):
285
+ prop_schema["type"] = "array"
286
+ if inner_args and len(inner_args) == 1:
287
+ inner_item_type = inner_args[0]
288
+ if inner_item_type == str:
289
+ prop_schema["items"] = {"type": "string"}
290
+ elif inner_item_type == int:
291
+ prop_schema["items"] = {"type": "integer"}
292
+ elif inner_item_type == float:
293
+ prop_schema["items"] = {"type": "number"}
294
+ elif inner_item_type == bool:
295
+ prop_schema["items"] = {"type": "boolean"}
296
+ else:
297
+ prop_schema["items"] = {"type": "string"}
298
+ elif inner_origin in (dict, Dict) or (hasattr(inner_origin, "__name__") and "Dict" in inner_origin.__name__):
299
+ prop_schema["type"] = "object"
300
+ # Add additionalProperties for value type
301
+ if inner_args and len(inner_args) == 2:
302
+ value_type = inner_args[1]
303
+ if value_type == str:
304
+ prop_schema["additionalProperties"] = {"type": "string"}
305
+ elif value_type == int:
306
+ prop_schema["additionalProperties"] = {"type": "integer"}
307
+ elif value_type == float:
308
+ prop_schema["additionalProperties"] = {"type": "number"}
309
+ elif value_type == bool:
310
+ prop_schema["additionalProperties"] = {"type": "boolean"}
311
+ else:
312
+ prop_schema["additionalProperties"] = {"type": "string"}
313
+ else:
314
+ prop_schema["type"] = "string" # Default for complex types
315
+ else:
316
+ prop_schema["type"] = "string" # Default for complex types
317
+ else:
318
+ # For non-Optional Union types, default to string
319
+ prop_schema["type"] = "string"
320
+ else:
321
+ prop_schema["type"] = "string" # Default for other complex types
322
+ else:
323
+ prop_schema["type"] = "string" # Default to string for complex types
111
324
 
112
325
  properties[name] = prop_schema
113
326
 
@@ -132,10 +345,23 @@ DEFAULT_SYSTEM_PROMPT = (
132
345
  "If a tool you need isn't available, just say so."
133
346
  )
134
347
 
348
+ DEFAULT_SUMMARY_SYSTEM_PROMPT = (
349
+ "You are an expert assistant. Your goal is to generate a concise, structured summary "
350
+ "of the conversation below that captures all essential information needed to continue "
351
+ "development after context replacement. Include tasks performed, code areas modified or "
352
+ "reviewed, key decisions or assumptions, test results or errors, and outstanding tasks or next steps."
353
+ )
354
+
135
355
  class TinyAgent:
136
356
  """
137
357
  A minimal implementation of an agent powered by MCP and LiteLLM,
138
- now with session/state persistence.
358
+ now with session/state persistence and robust error handling.
359
+
360
+ Features:
361
+ - Automatic retry mechanism for LLM API calls with exponential backoff
362
+ - Configurable retry parameters (max retries, backoff times, etc.)
363
+ - Session persistence
364
+ - Tool integration via MCP protocol
139
365
  """
140
366
  session_state: Dict[str, Any] = {}
141
367
  user_id: Optional[str] = None
@@ -154,7 +380,10 @@ class TinyAgent:
154
380
  session_id: Optional[str] = None,
155
381
  metadata: Optional[Dict[str, Any]] = None,
156
382
  storage: Optional[Storage] = None,
157
- persist_tool_configs: bool = False
383
+ persist_tool_configs: bool = False,
384
+ summary_config: Optional[Dict[str, Any]] = None,
385
+ retry_config: Optional[Dict[str, Any]] = None,
386
+ parallel_tool_calls: Optional[bool] = True,
158
387
  ):
159
388
  """
160
389
  Initialize the Tiny Agent.
@@ -163,12 +392,29 @@ class TinyAgent:
163
392
  model: The model to use with LiteLLM
164
393
  api_key: The API key for the model provider
165
394
  system_prompt: Custom system prompt for the agent
395
+ temperature: Temperature parameter for the model (controls randomness)
166
396
  logger: Optional logger to use
397
+ model_kwargs: Additional keyword arguments to pass to the model
398
+ user_id: Optional user ID for the session
167
399
  session_id: Optional session ID (if provided with storage, will attempt to load existing session)
168
400
  metadata: Optional metadata for the session
169
401
  storage: Optional storage backend for persistence
170
402
  persist_tool_configs: Whether to persist tool configurations
171
- """
403
+ summary_config: Optional model to use for generating conversation summaries
404
+ retry_config: Optional configuration for LLM API call retries. Supports:
405
+ - max_retries: Maximum number of retry attempts (default: 5)
406
+ - min_backoff: Minimum backoff time in seconds (default: 1)
407
+ - max_backoff: Maximum backoff time in seconds (default: 60)
408
+ - backoff_multiplier: Exponential backoff multiplier (default: 2)
409
+ - jitter: Whether to add randomness to backoff (default: True)
410
+ - retry_status_codes: HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
411
+ - retry_exceptions: Exception types to retry on (default: includes RateLimitError, etc.)
412
+ - rate_limit_backoff_min: Minimum wait time for rate limit errors (default: 60 seconds)
413
+ - rate_limit_backoff_max: Maximum wait time for rate limit errors (default: 90 seconds)
414
+ parallel_tool_calls: Whether to enable parallel tool calls. If True, the agent will ask the model
415
+ to execute multiple tool calls in parallel when possible. Some models like GPT-4
416
+ and Claude 3 support this feature. Default is True.
417
+ """
172
418
  # Set up logger
173
419
  self.logger = logger or logging.getLogger(__name__)
174
420
 
@@ -180,6 +426,12 @@ class TinyAgent:
180
426
  # Simplified hook system - single list of callbacks
181
427
  self.callbacks: List[callable] = []
182
428
 
429
+ # Configure LiteLLM to drop unsupported parameters
430
+ # This is also set globally at the module level, but we set it again here to be sure
431
+ import litellm
432
+ litellm.drop_params = True
433
+ self.logger.info("LiteLLM drop_params feature is enabled")
434
+
183
435
  # LiteLLM configuration
184
436
  self.model = model
185
437
  self.api_key = api_key
@@ -190,6 +442,14 @@ class TinyAgent:
190
442
 
191
443
  self.model_kwargs = model_kwargs
192
444
  self.encoder = tiktoken.get_encoding("o200k_base")
445
+
446
+ # Set up retry configuration
447
+ self.retry_config = DEFAULT_RETRY_CONFIG.copy()
448
+ if retry_config:
449
+ self.retry_config.update(retry_config)
450
+
451
+ # Set parallel tool calls preference
452
+ self.parallel_tool_calls = parallel_tool_calls
193
453
 
194
454
  # Conversation state
195
455
  self.messages = [{
@@ -197,11 +457,16 @@ class TinyAgent:
197
457
  "content": system_prompt or DEFAULT_SYSTEM_PROMPT
198
458
  }]
199
459
 
460
+ self.summary_config = summary_config or {}
461
+
200
462
  # This list now accumulates tools from *all* connected MCP servers:
201
463
  self.available_tools: List[Dict[str, Any]] = []
202
464
 
203
- # Control flow tools
204
- self.exit_loop_tools = [
465
+ # Default built-in tools:
466
+ # - final_answer: Exit tool that completes the task and returns the final answer
467
+ # - ask_question: Exit tool that asks the user a question and waits for a response
468
+ # - notify_user: Non-exit tool that shares progress with the user without stopping the agent loop
469
+ self.default_tools = [
205
470
  {
206
471
  "type": "function",
207
472
  "function": {
@@ -231,6 +496,23 @@ class TinyAgent:
231
496
  "required": ["question"]
232
497
  }
233
498
  }
499
+ },
500
+ {
501
+ "type": "function",
502
+ "function": {
503
+ "name": "notify_user",
504
+ "description": "Share progress or status updates with the user without stopping the agent loop. Use this to keep the user informed during long-running tasks. Unlike final_answer and ask_question, this tool allows the agent to continue processing after sending the notification.",
505
+ "parameters": {
506
+ "type": "object",
507
+ "properties": {
508
+ "message": {
509
+ "type": "string",
510
+ "description": "The progress update or status message to share with the user"
511
+ }
512
+ },
513
+ "required": ["message"]
514
+ }
515
+ }
234
516
  }
235
517
  ]
236
518
 
@@ -376,7 +658,8 @@ class TinyAgent:
376
658
  session_id=session_id,
377
659
  metadata=metadata,
378
660
  storage=storage,
379
- persist_tool_configs=False # default off
661
+ persist_tool_configs=False, # default off
662
+ retry_config=None # Use default retry configuration
380
663
  )
381
664
 
382
665
  # Apply the session data directly instead of loading from storage
@@ -586,6 +869,42 @@ class TinyAgent:
586
869
  self.messages.append(user_message)
587
870
  await self._run_callbacks("message_add", message=self.messages[-1])
588
871
 
872
+ return await self._run_agent_loop(max_turns)
873
+
874
+ async def resume(self, max_turns: int = 10) -> str:
875
+ """
876
+ Resume the conversation without adding a new user message.
877
+
878
+ This method continues the conversation from the current state,
879
+ allowing the agent to process the existing conversation history
880
+ and potentially take additional actions.
881
+
882
+ Args:
883
+ max_turns: Maximum number of conversation turns
884
+
885
+ Returns:
886
+ The agent's response
887
+ """
888
+ # Ensure any deferred session-load happens exactly once
889
+ if self._needs_session_load:
890
+ self.logger.debug(f"Deferred session load detected for {self.session_id}; loading now")
891
+ await self.init_async()
892
+
893
+ # Notify start with resume flag
894
+ await self._run_callbacks("agent_start", resume=True)
895
+
896
+ return await self._run_agent_loop(max_turns)
897
+
898
+ async def _run_agent_loop(self, max_turns: int = 10) -> str:
899
+ """
900
+ Internal method that runs the agent's main loop.
901
+
902
+ Args:
903
+ max_turns: Maximum number of conversation turns
904
+
905
+ Returns:
906
+ The agent's response
907
+ """
589
908
  # Initialize loop control variables
590
909
  num_turns = 0
591
910
  next_turn_should_call_tools = True
@@ -593,21 +912,41 @@ class TinyAgent:
593
912
  # The main agent loop
594
913
  while True:
595
914
  # Get all available tools including exit loop tools
596
- all_tools = self.available_tools + self.exit_loop_tools
915
+ all_tools = self.available_tools + self.default_tools
597
916
 
598
917
  # Call LLM with messages and tools
599
918
  try:
600
919
  self.logger.info(f"Calling LLM with {len(self.messages)} messages and {len(all_tools)} tools")
601
920
 
921
+ # Verify LiteLLM drop_params setting
922
+ import litellm
923
+ self.logger.info(f"LiteLLM drop_params is currently set to: {litellm.drop_params}")
924
+
602
925
  # Notify LLM start
603
926
  await self._run_callbacks("llm_start", messages=self.messages, tools=all_tools)
604
927
 
605
- response = await litellm.acompletion(
928
+ # Use parallel_tool_calls based on user preference, default to False if not specified
929
+ use_parallel_tool_calls = self.parallel_tool_calls if self.parallel_tool_calls is not None else False
930
+
931
+ # Disable parallel_tool_calls for models known not to support it
932
+ unsupported_models = ["o1-mini", "o1-preview", "o3", "o4-mini"]
933
+ for unsupported_model in unsupported_models:
934
+ if unsupported_model in self.model:
935
+ old_value = use_parallel_tool_calls
936
+ use_parallel_tool_calls = False
937
+ if old_value:
938
+ self.logger.warning(f"Disabling parallel_tool_calls for model {self.model} as it's known not to support it")
939
+
940
+ self.logger.info(f"Using parallel tool calls: {use_parallel_tool_calls}")
941
+
942
+ # Use our retry wrapper instead of direct litellm call
943
+ response = await self._litellm_with_retry(
606
944
  model=self.model,
607
945
  api_key=self.api_key,
608
946
  messages=self.messages,
609
947
  tools=all_tools,
610
948
  tool_choice="auto",
949
+ parallel_tool_calls=use_parallel_tool_calls,
611
950
  temperature=self.temperature,
612
951
  **self.model_kwargs
613
952
  )
@@ -645,12 +984,19 @@ class TinyAgent:
645
984
  if has_tool_calls:
646
985
  self.logger.info(f"Tool calls detected: {len(tool_calls)}")
647
986
 
648
- # Process each tool call one by one
649
- for tool_call in tool_calls:
987
+ # Create a list to hold all the tool execution tasks
988
+ tool_tasks = []
989
+
990
+ # Create a function to process a single tool call
991
+ async def process_tool_call(tool_call):
650
992
  tool_call_id = tool_call.id
651
993
  function_info = tool_call.function
652
994
  tool_name = function_info.name
653
995
 
996
+ await self._run_callbacks("tool_start", tool_call=tool_call)
997
+
998
+ tool_result_content = ""
999
+
654
1000
  # Create a tool message
655
1001
  tool_message = {
656
1002
  "role": "tool",
@@ -671,28 +1017,25 @@ class TinyAgent:
671
1017
  # Handle control flow tools
672
1018
  if tool_name == "final_answer":
673
1019
  # Add a response for this tool call before returning
674
- tool_message["content"] = tool_args.get("content", "Task completed without final answer.!!!")
675
- self.messages.append(tool_message)
676
- await self._run_callbacks("message_add", message=tool_message)
677
- await self._run_callbacks("agent_end", result="Task completed.")
678
- return tool_message["content"]
1020
+ tool_result_content = tool_args.get("content", "Task completed without final answer.!!!")
679
1021
  elif tool_name == "ask_question":
680
1022
  question = tool_args.get("question", "Could you provide more details?")
681
1023
  # Add a response for this tool call before returning
682
- tool_message["content"] = f"Question asked: {question}"
683
- self.messages.append(tool_message)
684
- await self._run_callbacks("message_add", message=tool_message)
685
- await self._run_callbacks("agent_end", result=f"I need more information: {question}")
686
- return f"I need more information: {question}"
1024
+ tool_result_content = f"Question asked: {question}"
1025
+ elif tool_name == "notify_user":
1026
+ message = tool_args.get("message", "No message provided.")
1027
+ self.logger.info(f"Received notify_user tool call with message: {message}")
1028
+ # Set the tool result content
1029
+ tool_result_content = "OK"
687
1030
  else:
688
1031
  # Check if it's a custom tool first
689
1032
  if tool_name in self.custom_tool_handlers:
690
- tool_message["content"] = await self._execute_custom_tool(tool_name, tool_args)
1033
+ tool_result_content = await self._execute_custom_tool(tool_name, tool_args)
691
1034
  else:
692
1035
  # Dispatch to the proper MCPClient
693
1036
  client = self.tool_to_client.get(tool_name)
694
1037
  if not client:
695
- tool_message["content"] = f"No MCP server registered for tool '{tool_name}'"
1038
+ tool_result_content = f"No MCP server registered for tool '{tool_name}'"
696
1039
  else:
697
1040
  try:
698
1041
  self.logger.debug(f"Calling tool {tool_name} with args: {tool_args}")
@@ -703,24 +1046,50 @@ class TinyAgent:
703
1046
  if content_list:
704
1047
  # Try different ways to extract the content
705
1048
  if hasattr(content_list[0], 'text'):
706
- tool_message["content"] = content_list[0].text
1049
+ tool_result_content = content_list[0].text
707
1050
  elif isinstance(content_list[0], dict) and 'text' in content_list[0]:
708
- tool_message["content"] = content_list[0]['text']
1051
+ tool_result_content = content_list[0]['text']
709
1052
  else:
710
- tool_message["content"] = str(content_list)
1053
+ tool_result_content = str(content_list)
711
1054
  else:
712
- tool_message["content"] = "Tool returned no content"
1055
+ tool_result_content = "Tool returned no content"
713
1056
  except Exception as e:
714
1057
  self.logger.error(f"Error calling tool {tool_name}: {str(e)}")
715
- tool_message["content"] = f"Error executing tool {tool_name}: {str(e)}"
1058
+ tool_result_content = f"Error executing tool {tool_name}: {str(e)}"
716
1059
  except Exception as e:
717
1060
  # If any error occurs during tool call processing, make sure we still have a tool response
718
1061
  self.logger.error(f"Unexpected error processing tool call {tool_call_id}: {str(e)}")
719
- tool_message["content"] = f"Error processing tool call: {str(e)}"
720
-
721
- # Always add the tool message to ensure each tool call has a response
1062
+ tool_result_content = f"Error processing tool call: {str(e)}"
1063
+ finally:
1064
+ # Always add the tool message to ensure each tool call has a response
1065
+ tool_message["content"] = tool_result_content
1066
+ await self._run_callbacks("tool_end", tool_call=tool_call, result=tool_result_content)
1067
+ return tool_message
1068
+
1069
+ # Create tasks for all tool calls
1070
+ for tool_call in tool_calls:
1071
+ tool_tasks.append(process_tool_call(tool_call))
1072
+
1073
+ # Execute all tool calls concurrently
1074
+ tool_messages = await asyncio.gather(*tool_tasks)
1075
+
1076
+ # Process results of tool calls
1077
+ for tool_message in tool_messages:
722
1078
  self.messages.append(tool_message)
723
1079
  await self._run_callbacks("message_add", message=tool_message)
1080
+
1081
+ # Handle special exit tools
1082
+ if tool_message["name"] == "final_answer":
1083
+ await self._run_callbacks("agent_end", result="Task completed.")
1084
+ return tool_message["content"]
1085
+ elif tool_message["name"] == "ask_question":
1086
+ # Extract the question from the original tool call
1087
+ for tc in tool_calls:
1088
+ if tc.id == tool_message["tool_call_id"]:
1089
+ args = json.loads(tc.function.arguments)
1090
+ question = args.get("question", "")
1091
+ await self._run_callbacks("agent_end", result=f"I need more information: {question}")
1092
+ return f"I need more information: {question}"
724
1093
 
725
1094
  next_turn_should_call_tools = False
726
1095
  else:
@@ -866,6 +1235,154 @@ class TinyAgent:
866
1235
  self._needs_session_load = False
867
1236
 
868
1237
  return self
1238
+
1239
+ def _is_rate_limit_error(self, exception: Exception) -> bool:
1240
+ """
1241
+ Check if an exception is a rate limit error that should be handled with longer backoff.
1242
+
1243
+ Args:
1244
+ exception: The exception to check
1245
+
1246
+ Returns:
1247
+ True if this is a rate limit error, False otherwise
1248
+ """
1249
+ if not exception:
1250
+ return False
1251
+
1252
+ # Check for LiteLLM RateLimitError
1253
+ error_name = exception.__class__.__name__
1254
+ if "RateLimitError" in error_name:
1255
+ return True
1256
+
1257
+ # Check for rate limit in the error message
1258
+ error_message = str(exception).lower()
1259
+ rate_limit_indicators = [
1260
+ "rate limit",
1261
+ "rate_limit_error",
1262
+ "rate-limit",
1263
+ "too many requests",
1264
+ "quota exceeded",
1265
+ "requests per minute",
1266
+ "requests per hour",
1267
+ "requests per day",
1268
+ "rate limiting",
1269
+ "throttled"
1270
+ ]
1271
+
1272
+ for indicator in rate_limit_indicators:
1273
+ if indicator in error_message:
1274
+ return True
1275
+
1276
+ # Check for specific HTTP status codes (429 = Too Many Requests)
1277
+ status_code = getattr(exception, "status_code", None)
1278
+ if status_code == 429:
1279
+ return True
1280
+
1281
+ return False
1282
+
1283
+ async def _litellm_with_retry(self, **kwargs) -> Any:
1284
+ """
1285
+ Execute litellm.acompletion with retry logic for handling transient errors.
1286
+
1287
+ Args:
1288
+ **kwargs: Arguments to pass to litellm.acompletion
1289
+
1290
+ Returns:
1291
+ The response from litellm.acompletion
1292
+
1293
+ Raises:
1294
+ Exception: If all retries fail
1295
+ """
1296
+ max_retries = self.retry_config["max_retries"]
1297
+ min_backoff = self.retry_config["min_backoff"]
1298
+ max_backoff = self.retry_config["max_backoff"]
1299
+ backoff_multiplier = self.retry_config["backoff_multiplier"]
1300
+ jitter = self.retry_config["jitter"]
1301
+ retry_status_codes = self.retry_config["retry_status_codes"]
1302
+ retry_exceptions = self.retry_config["retry_exceptions"]
1303
+
1304
+ # Rate limit specific configuration
1305
+ rate_limit_backoff_min = self.retry_config.get("rate_limit_backoff_min", 60) # 60 seconds
1306
+ rate_limit_backoff_max = self.retry_config.get("rate_limit_backoff_max", 90) # 90 seconds
1307
+
1308
+ attempt = 0
1309
+ last_exception = None
1310
+
1311
+ # Log the model and key parameters being used
1312
+ model_name = kwargs.get('model', 'unknown')
1313
+ self.logger.debug(f"Calling LiteLLM with model: {model_name}")
1314
+ if 'parallel_tool_calls' in kwargs:
1315
+ self.logger.debug(f"Using parallel_tool_calls={kwargs['parallel_tool_calls']}")
1316
+
1317
+ while attempt <= max_retries:
1318
+ try:
1319
+ # First attempt or retry
1320
+ if attempt > 0:
1321
+ # Check if this is a rate limit error and handle it specially
1322
+ is_rate_limit_error = self._is_rate_limit_error(last_exception)
1323
+
1324
+ if is_rate_limit_error:
1325
+ # Use longer backoff for rate limit errors (60-90 seconds)
1326
+ backoff = rate_limit_backoff_min + (rate_limit_backoff_max - rate_limit_backoff_min) * random.random()
1327
+ self.logger.warning(
1328
+ f"Rate limit error detected. Retry attempt {attempt}/{max_retries} for LLM call after {backoff:.2f}s delay. "
1329
+ f"Previous error: {str(last_exception)}"
1330
+ )
1331
+ else:
1332
+ # Use normal exponential backoff for other errors
1333
+ backoff = min(max_backoff, min_backoff * (backoff_multiplier ** (attempt - 1)))
1334
+
1335
+ # Add jitter if enabled (±20% randomness)
1336
+ if jitter:
1337
+ backoff = backoff * (0.8 + 0.4 * random.random())
1338
+
1339
+ self.logger.warning(
1340
+ f"Retry attempt {attempt}/{max_retries} for LLM call after {backoff:.2f}s delay. "
1341
+ f"Previous error: {str(last_exception)}"
1342
+ )
1343
+
1344
+ # Wait before retry
1345
+ await asyncio.sleep(backoff)
1346
+
1347
+ # Make the actual API call
1348
+ return await litellm.acompletion(**kwargs)
1349
+
1350
+ except Exception as e:
1351
+ last_exception = e
1352
+ error_name = e.__class__.__name__
1353
+ full_error_path = f"{e.__class__.__module__}.{error_name}" if hasattr(e, "__module__") else error_name
1354
+
1355
+ # Check if this exception should trigger a retry
1356
+ should_retry = False
1357
+
1358
+ # Check for status code in exception (if available)
1359
+ status_code = getattr(e, "status_code", None)
1360
+ if status_code and status_code in retry_status_codes:
1361
+ should_retry = True
1362
+
1363
+ # Check exception type against retry list
1364
+ for exception_path in retry_exceptions:
1365
+ if exception_path in full_error_path:
1366
+ should_retry = True
1367
+ break
1368
+
1369
+ if not should_retry or attempt >= max_retries:
1370
+ # Either not a retryable error or we've exhausted retries
1371
+ self.logger.error(
1372
+ f"LLM call failed after {attempt} attempt(s). Error: {str(e)}"
1373
+ )
1374
+ raise
1375
+
1376
+ # Log the error and continue to next retry attempt
1377
+ error_type = "rate limit" if self._is_rate_limit_error(e) else "general"
1378
+ self.logger.warning(
1379
+ f"LLM call failed (attempt {attempt+1}/{max_retries+1}) - {error_type} error: {str(e)}. Will retry."
1380
+ )
1381
+
1382
+ attempt += 1
1383
+
1384
+ # This should not be reached due to the raise in the loop, but just in case:
1385
+ raise last_exception
869
1386
 
870
1387
  @classmethod
871
1388
  async def create(
@@ -881,11 +1398,39 @@ class TinyAgent:
881
1398
  session_id: Optional[str] = None,
882
1399
  metadata: Optional[Dict[str, Any]] = None,
883
1400
  storage: Optional[Storage] = None,
884
- persist_tool_configs: bool = False
1401
+ persist_tool_configs: bool = False,
1402
+ retry_config: Optional[Dict[str, Any]] = None,
1403
+ parallel_tool_calls: Optional[bool] = True,
885
1404
  ) -> "TinyAgent":
886
1405
  """
887
1406
  Async factory: constructs the agent, then loads an existing session
888
1407
  if (storage and session_id) were provided.
1408
+
1409
+ Args:
1410
+ model: The model to use with LiteLLM
1411
+ api_key: The API key for the model provider
1412
+ system_prompt: Custom system prompt for the agent
1413
+ temperature: Temperature parameter for the model (controls randomness)
1414
+ logger: Optional logger to use
1415
+ model_kwargs: Additional keyword arguments to pass to the model
1416
+ user_id: Optional user ID for the session
1417
+ session_id: Optional session ID (if provided with storage, will attempt to load existing session)
1418
+ metadata: Optional metadata for the session
1419
+ storage: Optional storage backend for persistence
1420
+ persist_tool_configs: Whether to persist tool configurations
1421
+ retry_config: Optional configuration for LLM API call retries. Supports:
1422
+ - max_retries: Maximum number of retry attempts (default: 5)
1423
+ - min_backoff: Minimum backoff time in seconds (default: 1)
1424
+ - max_backoff: Maximum backoff time in seconds (default: 60)
1425
+ - backoff_multiplier: Exponential backoff multiplier (default: 2)
1426
+ - jitter: Whether to add randomness to backoff (default: True)
1427
+ - retry_status_codes: HTTP status codes to retry on (default: [429, 500, 502, 503, 504])
1428
+ - retry_exceptions: Exception types to retry on (default: includes RateLimitError, etc.)
1429
+ - rate_limit_backoff_min: Minimum wait time for rate limit errors (default: 60 seconds)
1430
+ - rate_limit_backoff_max: Maximum wait time for rate limit errors (default: 90 seconds)
1431
+ parallel_tool_calls: Whether to enable parallel tool calls. If True, the agent will ask the model
1432
+ to execute multiple tool calls in parallel when possible. Some models like GPT-4
1433
+ and Claude 3 support this feature. Default is None (disabled).
889
1434
  """
890
1435
  agent = cls(
891
1436
  model=model,
@@ -898,7 +1443,9 @@ class TinyAgent:
898
1443
  session_id=session_id,
899
1444
  metadata=metadata,
900
1445
  storage=storage,
901
- persist_tool_configs=persist_tool_configs
1446
+ persist_tool_configs=persist_tool_configs,
1447
+ retry_config=retry_config,
1448
+ parallel_tool_calls=parallel_tool_calls
902
1449
  )
903
1450
  if agent._needs_session_load:
904
1451
  await agent.init_async()
@@ -934,6 +1481,162 @@ class TinyAgent:
934
1481
 
935
1482
  # Tool configs would be handled separately if needed
936
1483
 
1484
+ async def summarize(self) -> str:
1485
+ """
1486
+ Generate a summary of the current conversation history.
1487
+
1488
+ Args:
1489
+ custom_model: Optional model to use for summary generation (overrides self.summary_model)
1490
+ custom_system_prompt: Optional system prompt for summary generation (overrides self.summary_system_prompt)
1491
+
1492
+ Returns:
1493
+ A string containing the conversation summary
1494
+ """
1495
+ # Skip if there are no messages or just the system message
1496
+ if len(self.messages) <= 1:
1497
+ return "No conversation to summarize."
1498
+
1499
+ # Use provided parameters or defaults
1500
+ system_prompt = self.summary_config.get("system_prompt",DEFAULT_SUMMARY_SYSTEM_PROMPT)
1501
+
1502
+ # Format the conversation into a single string
1503
+ conversation_text = self._format_conversation_for_summary()
1504
+
1505
+ task_prompt = load_template(str(Path(__file__).parent / "prompts" / "summarize.yaml"),"user_prompt")
1506
+
1507
+ # Build the prompt for the summary model
1508
+ summary_messages = [
1509
+ {
1510
+ "role": "system",
1511
+ "content": system_prompt
1512
+ },
1513
+ {
1514
+ "role": "user",
1515
+ #"content": f"Here is the conversation so far:\n{conversation_text}\n\nPlease summarize this conversation, covering:\n0. What is the task its requirments, goals and constraints\n1. Tasks performed and outcomes\n2. Code files, modules, or functions modified or examined\n3. Important decisions or assumptions made\n4. Errors encountered and test or build results\n5. Remaining tasks, open questions, or next steps\nProvide the summary in a clear, concise format."
1516
+ "content":conversation_text
1517
+ },
1518
+ {
1519
+ "role": "user",
1520
+ "content": task_prompt
1521
+ }
1522
+ ]
1523
+
1524
+ try:
1525
+ # Log that we're generating a summary
1526
+ self.logger.info(f"Generating conversation summary using model {self.summary_config.get('model',self.model)}")
1527
+
1528
+ # Call the LLM to generate the summary using our retry wrapper
1529
+ response = await self._litellm_with_retry(
1530
+ model=self.summary_config.get("model",self.model),
1531
+ api_key=self.summary_config.get("api_key",self.api_key),
1532
+ messages=summary_messages,
1533
+ temperature=self.summary_config.get("temperature",self.temperature),
1534
+ max_tokens=self.summary_config.get("max_tokens",8000)
1535
+ )
1536
+
1537
+ # Extract the summary from the response
1538
+ summary = response.choices[0].message.content
1539
+ return summary
1540
+
1541
+ except Exception as e:
1542
+ self.logger.error(f"Error generating conversation summary: {str(e)}")
1543
+ return f"Failed to generate summary: {str(e)}"
1544
+
1545
+ async def compact(self) -> bool:
1546
+ """
1547
+ Compact the conversation history by replacing it with a summary.
1548
+
1549
+ This method:
1550
+ 1. Generates a summary of the current conversation
1551
+ 2. If successful, replaces the conversation with just [system, user] messages
1552
+ where the user message contains the summary
1553
+ 3. Returns True if compaction was successful, False otherwise
1554
+
1555
+ Returns:
1556
+ Boolean indicating whether the compaction was successful
1557
+ """
1558
+ # Skip if there are no messages or just the system message
1559
+ if len(self.messages) <= 1:
1560
+ self.logger.info("No conversation to compact.")
1561
+ return False
1562
+
1563
+ # Generate the summary
1564
+ summary = await self.summarize()
1565
+
1566
+ # Check if the summary generation was successful
1567
+ if summary.startswith("Failed to generate summary:") or summary == "No conversation to summarize.":
1568
+ self.logger.error(f"Compaction failed: {summary}")
1569
+ return False
1570
+
1571
+ # Save the system message
1572
+ system_message = self.messages[0]
1573
+
1574
+
1575
+ # Create a new user message with the summary
1576
+ summary_message = {
1577
+ "role": "user",
1578
+ "content": f"This session is being continued from a previous conversation that ran out of context. The conversation is summarized below:\n{summary}",
1579
+ "created_at": int(time.time())
1580
+ }
1581
+
1582
+ # Replace the conversation with just [system, user] messages
1583
+ self.messages = [system_message, summary_message]
1584
+
1585
+ # Notify about the compaction
1586
+ self.logger.info("🤐Conversation successfully compacted.")
1587
+ await self._run_callbacks("message_add", message=summary_message)
1588
+
1589
+ return True
1590
+
1591
+ def _format_conversation_for_summary(self) -> str:
1592
+ """
1593
+ Format the conversation history into a string for summarization.
1594
+
1595
+ Returns:
1596
+ A string representing the conversation in the format:
1597
+ user: content
1598
+ assistant: content
1599
+ tool_call: tool name and args
1600
+ tool_response: response content
1601
+ ...
1602
+ """
1603
+ formatted_lines = []
1604
+
1605
+ # Skip the system message (index 0)
1606
+ for message in self.messages[1:]:
1607
+ role = message.get("role", "unknown")
1608
+
1609
+ if role == "user":
1610
+ formatted_lines.append(f"user: {message.get('content', '')}")
1611
+
1612
+ elif role == "assistant":
1613
+ content = message.get("content", "")
1614
+ tool_calls = message.get("tool_calls", [])
1615
+
1616
+ # Add assistant message content if present
1617
+ if content:
1618
+ formatted_lines.append(f"assistant: {content}")
1619
+
1620
+ # Add tool calls if present
1621
+ for tool_call in tool_calls:
1622
+ function_info = tool_call.get("function", {})
1623
+ tool_name = function_info.get("name", "unknown_tool")
1624
+ arguments = function_info.get("arguments", "{}")
1625
+
1626
+ formatted_lines.append(f"tool_call: {tool_name} with args {arguments}")
1627
+
1628
+ elif role == "tool":
1629
+ tool_name = message.get("name", "unknown_tool")
1630
+ content = message.get("content", "")
1631
+ formatted_lines.append(f"tool_response: {content}")
1632
+
1633
+ else:
1634
+ # Handle any other message types
1635
+ formatted_lines.append(f"{role}: {message.get('content', '')}")
1636
+
1637
+ return [{'type': 'text', 'text': f"{x}"} for x in formatted_lines]
1638
+ #return "\n".join(formatted_lines)
1639
+
937
1640
  async def run_example():
938
1641
  """Example usage of TinyAgent with proper logging."""
939
1642
  import os
@@ -970,16 +1673,39 @@ async def run_example():
970
1673
  agent_logger.error("Please set the OPENAI_API_KEY environment variable")
971
1674
  return
972
1675
 
973
- # Initialize the agent with our logger
974
- agent = await TinyAgent.create(
975
- model="gpt-4.1-mini",
1676
+ # Custom retry configuration - more aggressive than default
1677
+ custom_retry_config = {
1678
+ "max_retries": 3, # Fewer retries for the example
1679
+ "min_backoff": 2, # Start with 2 seconds
1680
+ "max_backoff": 30, # Max 30 seconds between retries
1681
+ "retry_exceptions": [
1682
+ "litellm.InternalServerError",
1683
+ "litellm.APIError",
1684
+ "litellm.APIConnectionError",
1685
+ "litellm.RateLimitError",
1686
+ "litellm.ServiceUnavailableError",
1687
+ "litellm.APITimeoutError",
1688
+ "TimeoutError", # Add any additional exceptions
1689
+ "ConnectionError"
1690
+ ],
1691
+ # Rate limit specific configuration
1692
+ "rate_limit_backoff_min": 60, # Wait 60-90 seconds for rate limit errors
1693
+ "rate_limit_backoff_max": 90, # This is the recommended range for most APIs
1694
+ }
1695
+
1696
+ # Example 1: Using a model that supports parallel function calling (GPT-4)
1697
+ agent_logger.info("Example 1: Using a model that supports parallel function calling (GPT-4)")
1698
+ agent1 = await TinyAgent.create(
1699
+ model="gpt-4", # A model that supports parallel function calling
976
1700
  api_key=api_key,
977
1701
  logger=agent_logger,
978
- session_id="my-session-123",
979
- storage=None
1702
+ session_id="parallel-example",
1703
+ retry_config=custom_retry_config,
1704
+ parallel_tool_calls=True, # Explicitly enable parallel function calling
1705
+ drop_unsupported_params=True # Enable dropping unsupported parameters
980
1706
  )
981
1707
 
982
- # Add the Rich UI callback with our logger
1708
+ # Add the Rich UI callback
983
1709
  rich_ui = RichUICallback(
984
1710
  markdown=True,
985
1711
  show_message=True,
@@ -987,15 +1713,51 @@ async def run_example():
987
1713
  show_tool_calls=True,
988
1714
  logger=ui_logger
989
1715
  )
990
- agent.add_callback(rich_ui)
1716
+ agent1.add_callback(rich_ui)
1717
+
1718
+ # Connect to MCP servers for additional tools
1719
+ try:
1720
+ await agent1.connect_to_server("npx", ["-y", "@openbnb/mcp-server-airbnb", "--ignore-robots-txt"])
1721
+ except Exception as e:
1722
+ agent_logger.error(f"Failed to connect to MCP servers: {e}")
1723
+
1724
+ # Run the agent with a task that would benefit from parallel function calling
1725
+ user_input1 = "Compare the weather in Tokyo, New York, and Paris for planning a trip next week."
1726
+ agent_logger.info(f"Running agent with input: {user_input1}")
1727
+ result1 = await agent1.run(user_input1, max_turns=10)
1728
+ agent_logger.info(f"Final result from example 1: {result1}")
1729
+
1730
+ # Clean up
1731
+ await agent1.close()
1732
+
1733
+ # Example 2: Using a model that doesn't support parallel function calling (o4-mini)
1734
+ agent_logger.info("\nExample 2: Using a model that doesn't support parallel function calling (o4-mini)")
1735
+ agent2 = await TinyAgent.create(
1736
+ model="o4-mini", # A model that doesn't support parallel function calling
1737
+ api_key=api_key,
1738
+ logger=agent_logger,
1739
+ session_id="o4-mini-example",
1740
+ retry_config=custom_retry_config,
1741
+ parallel_tool_calls=True, # We still set this to True, but it will be automatically disabled
1742
+ drop_unsupported_params=True # Enable dropping unsupported parameters
1743
+ )
1744
+
1745
+ # Add the Rich UI callback
1746
+ agent2.add_callback(rich_ui)
991
1747
 
992
- # Run the agent with a user query
993
- user_input = "What is the capital of France?"
994
- agent_logger.info(f"Running agent with input: {user_input}")
995
- result = await agent.run(user_input)
1748
+ # Connect to the same MCP server
1749
+ try:
1750
+ await agent2.connect_to_server("npx", ["-y", "@openbnb/mcp-server-airbnb", "--ignore-robots-txt"])
1751
+ except Exception as e:
1752
+ agent_logger.error(f"Failed to connect to MCP servers: {e}")
996
1753
 
997
- agent_logger.info(f"Final result: {result}")
1754
+ # Run the agent with the same task
1755
+ user_input2 = "Compare the weather in Tokyo, New York, and Paris for planning a trip next week."
1756
+ agent_logger.info(f"Running agent with input: {user_input2}")
1757
+ result2 = await agent2.run(user_input2, max_turns=10)
1758
+ agent_logger.info(f"Final result from example 2: {result2}")
998
1759
 
999
1760
  # Clean up
1000
- await agent.close()
1001
- agent_logger.debug("Example completed")
1761
+ await agent2.close()
1762
+
1763
+ agent_logger.debug("Examples completed")