vectara-agentic 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

@@ -42,6 +42,35 @@ def get_event_id(event) -> str:
42
42
 
43
43
  return str(uuid.uuid4())
44
44
 
45
+
46
+ def is_tool_related_event(event) -> bool:
47
+ """
48
+ Determine if an event is actually tool-related and should be tracked.
49
+
50
+ This should only return True for events that represent actual tool calls or tool outputs,
51
+ not for streaming text deltas or other LLM response events.
52
+
53
+ Args:
54
+ event: The stream event to check
55
+
56
+ Returns:
57
+ bool: True if this event should be tracked for tool purposes
58
+ """
59
+ # Track explicit tool events from LlamaIndex workflow
60
+ if isinstance(event, (ToolCall, ToolCallResult)):
61
+ return True
62
+
63
+ has_tool_id = getattr(event, "tool_id", None)
64
+ has_tool_name = getattr(event, "tool_name", None)
65
+ has_delta = getattr(event, "delta", None)
66
+
67
+ # Some providers don't emit ToolCall/ToolCallResult; avoid treating deltas as tool events
68
+ if (has_tool_id or has_tool_name) and not has_delta:
69
+ return True
70
+
71
+ return False
72
+
73
+
45
74
  class StreamingResponseAdapter:
46
75
  """
47
76
  Adapter class that provides a LlamaIndex-compatible streaming response interface.
@@ -90,25 +119,6 @@ class StreamingResponseAdapter:
90
119
  """
91
120
  return AgentResponse(response=self.response, metadata=self.metadata)
92
121
 
93
- def wait_for_completion(self) -> None:
94
- """
95
- Wait for post-processing to complete and update metadata.
96
- This should be called after streaming finishes but before accessing metadata.
97
- """
98
- if self.post_process_task and not self.post_process_task.done():
99
- return
100
- if self.post_process_task and self.post_process_task.done():
101
- try:
102
- final_response = self.post_process_task.result()
103
- if hasattr(final_response, "metadata") and final_response.metadata:
104
- # Update our metadata from the completed task
105
- self.metadata.update(final_response.metadata)
106
- except Exception as e:
107
- logging.error(
108
- f"Error during post-processing: {e}. "
109
- "Ensure the post-processing task is correctly implemented."
110
- )
111
-
112
122
 
113
123
  def extract_response_text_from_chat_message(response_text: Any) -> str:
114
124
  """
@@ -123,15 +133,15 @@ def extract_response_text_from_chat_message(response_text: Any) -> str:
123
133
  str: Extracted text content
124
134
  """
125
135
  # Handle case where response is a ChatMessage object
126
- if hasattr(response_text, "content"):
127
- return response_text.content
128
- elif hasattr(response_text, "blocks"):
136
+ if hasattr(response_text, "blocks"):
129
137
  # Extract text from ChatMessage blocks
130
138
  text_parts = []
131
139
  for block in response_text.blocks:
132
140
  if hasattr(block, "text"):
133
141
  text_parts.append(block.text)
134
142
  return "".join(text_parts)
143
+ elif hasattr(response_text, "content"):
144
+ return response_text.content
135
145
  elif not isinstance(response_text, str):
136
146
  return str(response_text)
137
147
 
@@ -234,9 +244,8 @@ def create_stream_post_processing_task(
234
244
  async def _safe_post_process():
235
245
  try:
236
246
  return await _post_process()
237
- except Exception:
238
- traceback.print_exc()
239
- # Return empty response on error
247
+ except Exception as e:
248
+ logging.error(f"Error {e} occurred during post-processing: {traceback.format_exc()}")
240
249
  return AgentResponse(response="", metadata={})
241
250
 
242
251
  return asyncio.create_task(_safe_post_process())
@@ -244,205 +253,203 @@ def create_stream_post_processing_task(
244
253
 
245
254
  class FunctionCallingStreamHandler:
246
255
  """
247
- Handles streaming for function calling agents with proper event processing.
256
+ Streaming handler for function-calling agents with strict "no leaks" gating.
257
+
258
+ Core ideas:
259
+ - Buffer tokens PER LLM STEP.
260
+ - Commit the buffer ONLY if that step ends with AgentOutput.tool_calls == [].
261
+ - Drop the buffer if the step triggers tool calls (planning/tool-selection).
262
+ - Track pending tool results; handle multi-round (tool -> read -> tool -> ...) loops.
263
+ - Support return_direct tools (tool output is the final answer, no synthesis step).
264
+ - Two streaming modes:
265
+ - final_only: Buffer all tokens and commit only after step completes with no tool calls
266
+ - optimistic_live: Stream tokens live after all tool calls are complete
248
267
  """
249
268
 
250
- def __init__(self, agent_instance, handler, prompt: str):
269
+ def __init__(
270
+ self,
271
+ agent_instance,
272
+ handler,
273
+ prompt: str,
274
+ *,
275
+ stream_policy: str = "optimistic_live", # "final_only" | "optimistic_live"
276
+ ):
251
277
  self.agent_instance = agent_instance
252
- self.handler = handler
278
+ self.handler = handler # awaitable; also has .stream_events()
253
279
  self.prompt = prompt
280
+
281
+ self.stream_policy = stream_policy
282
+
283
+ # Plumbing for your existing adapter/post-processing
254
284
  self.final_response_container = {"resp": None}
255
285
  self.stream_complete_event = asyncio.Event()
256
286
 
257
287
  async def process_stream_events(self) -> AsyncIterator[str]:
258
288
  """
259
- Process streaming events and yield text tokens.
289
+ Process streaming events and yield only valid, final tokens.
260
290
 
261
- Yields:
262
- str: Text tokens from the streaming response
291
+ Contract:
292
+ - Never surface "planning" tokens (tool arguments, scratchpads, etc).
293
+ - Only surface tokens produced in the last, post-tool LLM step,
294
+ or a return_direct tool's output.
263
295
  """
264
- had_tool_calls = False
265
- transitioned_to_prose = False
296
+ # Step-scoped state
297
+ step_buffer: list[str] = []
298
+ step_has_tool_calls = False
299
+
300
+ # Run-scoped state
301
+ pending_tools = 0
302
+ committed_any_text = False
303
+
304
+ def _reset_step():
305
+ nonlocal step_has_tool_calls
306
+ step_buffer.clear()
307
+ step_has_tool_calls = False
266
308
 
267
309
  async for ev in self.handler.stream_events():
268
- # Store tool outputs for VHC regardless of progress callback
310
+ # ---- 1) Capture tool outputs for downstream logging/telemetry ----
269
311
  if isinstance(ev, ToolCallResult):
270
312
  if hasattr(self.agent_instance, "_add_tool_output"):
271
313
  # pylint: disable=W0212
272
- self.agent_instance._add_tool_output(
273
- ev.tool_name, str(ev.tool_output)
274
- )
314
+ self.agent_instance._add_tool_output(ev.tool_name, str(ev.tool_output))
315
+
316
+ pending_tools = max(0, pending_tools - 1)
317
+
318
+ # Return-direct short-circuit: surface tool output as the final answer
319
+ if getattr(ev, "return_direct", False):
320
+ yield str(ev.tool_output)
321
+ committed_any_text = True
322
+ # Do not early-break; keep draining events safely.
275
323
 
276
- # Handle progress callbacks if available
324
+ # ---- 2) Progress callback plumbing (safe and optional) ----
277
325
  if self.agent_instance.agent_progress_callback:
278
- # Only track events that are actual tool-related events
279
- if self._is_tool_related_event(ev):
326
+ if is_tool_related_event(ev):
280
327
  try:
281
328
  event_id = get_event_id(ev)
282
329
  await self._handle_progress_callback(ev, event_id)
283
- except ValueError as e:
284
- logging.warning(f"Skipping event due to missing ID: {e}")
285
- continue
330
+ except Exception as e:
331
+ logging.warning(f"[progress-callback] skipping event: {e}")
332
+
333
+ # ---- 3) Step boundaries & gating logic ----
334
+ # New step starts: clear per-step state
335
+ if isinstance(ev, AgentInput):
336
+ _reset_step()
337
+ continue
286
338
 
287
- # Process streaming text events
339
+ # Streaming deltas (provisional)
288
340
  if hasattr(ev, "__class__") and "AgentStream" in str(ev.__class__):
289
- if hasattr(ev, "tool_calls") and ev.tool_calls:
290
- had_tool_calls = True
291
- elif (
292
- hasattr(ev, "tool_calls")
293
- and not ev.tool_calls
294
- and had_tool_calls
295
- and not transitioned_to_prose
296
- ):
297
- yield "\n\n"
298
- transitioned_to_prose = True
299
- if hasattr(ev, "delta"):
300
- yield ev.delta
301
- elif (
302
- hasattr(ev, "tool_calls")
303
- and not ev.tool_calls
304
- and hasattr(ev, "delta")
305
- and transitioned_to_prose
306
- ):
307
- yield ev.delta
341
+ # If the model is constructing a function call, LlamaIndex will attach tool_calls here
342
+ if getattr(ev, "tool_calls", None):
343
+ step_has_tool_calls = True
308
344
 
309
- # When stream is done, await the handler to get the final response
345
+ delta = getattr(ev, "delta", None)
346
+ if not delta:
347
+ continue
348
+
349
+ # Always buffer first
350
+ step_buffer.append(delta)
351
+
352
+ # Stream live only after all tools are complete
353
+ if self.stream_policy == "optimistic_live" and pending_tools == 0:
354
+ yield delta
355
+
356
+ continue
357
+
358
+ # Step end: decide to commit or drop
359
+ if isinstance(ev, AgentOutput):
360
+ n_calls = len(getattr(ev, "tool_calls", []) or [])
361
+
362
+ if n_calls == 0:
363
+ # Final text step -> commit
364
+ if self.stream_policy == "final_only":
365
+ # We held everything; now stream it out in order.
366
+ for chunk in step_buffer:
367
+ yield chunk
368
+ # In optimistic mode, tokens were streamed live after tools completed.
369
+
370
+ committed_any_text = committed_any_text or bool(step_buffer)
371
+ _reset_step()
372
+
373
+ else:
374
+ # Planning/tool step -> drop buffer
375
+ _reset_step()
376
+ pending_tools += n_calls
377
+
378
+ continue
379
+
380
+ # ---- 4) Finish: await the underlying handler for the final result ----
310
381
  try:
311
382
  self.final_response_container["resp"] = await self.handler
312
383
  except Exception as e:
313
384
  error_str = str(e).lower()
314
385
  if "rate limit" in error_str or "429" in error_str:
315
- logging.error(f"[RATE_LIMIT_ERROR] Rate limit exceeded: {e}")
386
+ logging.error(f"[RATE_LIMIT_ERROR] {e}")
316
387
  self.final_response_container["resp"] = AgentResponse(
317
388
  response="Rate limit exceeded. Please try again later.",
318
389
  source_nodes=[],
319
390
  metadata={"error_type": "rate_limit", "original_error": str(e)},
320
391
  )
321
392
  else:
322
- logging.error(f"[STREAM_ERROR] Error processing stream events: {e}")
323
- logging.error(
324
- f"[STREAM_ERROR] Full traceback: {traceback.format_exc()}"
325
- )
393
+ logging.error(f"[STREAM_ERROR] {e}")
326
394
  self.final_response_container["resp"] = AgentResponse(
327
395
  response="Response completion Error",
328
396
  source_nodes=[],
329
397
  metadata={"error_type": "general", "original_error": str(e)},
330
398
  )
331
399
  finally:
332
- # Signal that stream processing is complete
400
+ # If nothing was ever committed and we ended right after a tool,
401
+ # assume that tool's output is the "final answer" (common with return_direct).
333
402
  self.stream_complete_event.set()
334
403
 
335
- def _is_tool_related_event(self, event) -> bool:
404
+ async def _handle_progress_callback(self, event, event_id: str):
336
405
  """
337
- Determine if an event is actually tool-related and should be tracked.
338
-
339
- This should only return True for events that represent actual tool calls or tool outputs,
340
- not for streaming text deltas or other LLM response events.
341
-
342
- Args:
343
- event: The stream event to check
344
-
345
- Returns:
346
- bool: True if this event should be tracked for tool purposes
406
+ Fan out progress events to the user's callback (sync or async). Mirrors your existing logic.
347
407
  """
348
- # Track explicit tool events from LlamaIndex workflow
349
- if isinstance(event, (ToolCall, ToolCallResult)):
350
- return True
351
-
352
- has_tool_id = hasattr(event, "tool_id") and event.tool_id
353
- has_delta = hasattr(event, "delta") and event.delta
354
- has_tool_name = hasattr(event, "tool_name") and event.tool_name
408
+ cb = self.agent_instance.agent_progress_callback
409
+ is_async = asyncio.iscoroutinefunction(cb)
355
410
 
356
- # We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
357
- # but still avoid streaming deltas
358
- if (has_tool_id or has_tool_name) and not has_delta:
359
- return True
360
-
361
- # Everything else (streaming deltas, agent outputs, workflow events, etc.)
362
- # should NOT be tracked as tool events
363
- return False
364
-
365
- async def _handle_progress_callback(self, event, event_id: str):
366
- """Handle progress callback events for different event types with proper context propagation."""
367
411
  try:
368
412
  if isinstance(event, ToolCall):
369
- # Check if callback is async or sync
370
- if asyncio.iscoroutinefunction(
371
- self.agent_instance.agent_progress_callback
372
- ):
373
- await self.agent_instance.agent_progress_callback(
374
- status_type=AgentStatusType.TOOL_CALL,
375
- msg={
376
- "tool_name": event.tool_name,
377
- "arguments": json.dumps(event.tool_kwargs),
378
- },
379
- event_id=event_id,
380
- )
413
+ payload = {
414
+ "tool_name": event.tool_name,
415
+ "arguments": json.dumps(getattr(event, "tool_kwargs", {})),
416
+ }
417
+ if is_async:
418
+ await cb(status_type=AgentStatusType.TOOL_CALL, msg=payload, event_id=event_id)
381
419
  else:
382
- # For sync callbacks, ensure we call them properly
383
- self.agent_instance.agent_progress_callback(
384
- status_type=AgentStatusType.TOOL_CALL,
385
- msg={
386
- "tool_name": event.tool_name,
387
- "arguments": json.dumps(event.tool_kwargs),
388
- },
389
- event_id=event_id,
390
- )
420
+ cb(status_type=AgentStatusType.TOOL_CALL, msg=payload, event_id=event_id)
391
421
 
392
422
  elif isinstance(event, ToolCallResult):
393
- # Check if callback is async or sync
394
- if asyncio.iscoroutinefunction(
395
- self.agent_instance.agent_progress_callback
396
- ):
397
- await self.agent_instance.agent_progress_callback(
398
- status_type=AgentStatusType.TOOL_OUTPUT,
399
- msg={
400
- "tool_name": event.tool_name,
401
- "content": str(event.tool_output),
402
- },
403
- event_id=event_id,
404
- )
423
+ payload = {
424
+ "tool_name": event.tool_name,
425
+ "content": str(event.tool_output),
426
+ }
427
+ if is_async:
428
+ await cb(status_type=AgentStatusType.TOOL_OUTPUT, msg=payload, event_id=event_id)
405
429
  else:
406
- self.agent_instance.agent_progress_callback(
407
- status_type=AgentStatusType.TOOL_OUTPUT,
408
- msg={
409
- "tool_name": event.tool_name,
410
- "content": str(event.tool_output),
411
- },
412
- event_id=event_id,
413
- )
430
+ cb(status_type=AgentStatusType.TOOL_OUTPUT, msg=payload, event_id=event_id)
414
431
 
415
432
  elif isinstance(event, AgentInput):
416
- self.agent_instance.agent_progress_callback(
417
- status_type=AgentStatusType.AGENT_UPDATE,
418
- msg={"content": f"Agent input: {event.input}"},
419
- event_id=event_id,
420
- )
433
+ payload = {"content": f"Agent input: {getattr(event, 'input', '')}"}
434
+ if is_async:
435
+ await cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
436
+ else:
437
+ cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
421
438
 
422
439
  elif isinstance(event, AgentOutput):
423
- self.agent_instance.agent_progress_callback(
424
- status_type=AgentStatusType.AGENT_UPDATE,
425
- msg={"content": f"Agent output: {event.response}"},
426
- event_id=event_id,
427
- )
440
+ payload = {"content": f"Agent output: {getattr(event, 'response', '')}"}
441
+ if is_async:
442
+ await cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
443
+ else:
444
+ cb(status_type=AgentStatusType.AGENT_UPDATE, msg=payload, event_id=event_id)
428
445
 
429
446
  except Exception as e:
447
+ logging.error(f"[progress-callback] Exception: {e}")
448
+ logging.error(traceback.format_exc())
430
449
 
431
- logging.error(f"Exception in progress callback: {e}")
432
- logging.error(f"Traceback: {traceback.format_exc()}")
433
- # Continue execution despite callback errors
434
-
435
- def create_streaming_response(
436
- self, user_metadata: Dict[str, Any]
437
- ) -> "StreamingResponseAdapter":
450
+ def create_streaming_response(self, user_metadata: Dict[str, Any]) -> "StreamingResponseAdapter":
438
451
  """
439
- Create a StreamingResponseAdapter with proper post-processing.
440
-
441
- Args:
442
- user_metadata: User metadata dictionary to update
443
-
444
- Returns:
445
- StreamingResponseAdapter: Configured streaming adapter
452
+ Build the adapter with post-processing wired in.
446
453
  """
447
454
  post_process_task = create_stream_post_processing_task(
448
455
  self.stream_complete_event,
@@ -454,8 +461,8 @@ class FunctionCallingStreamHandler:
454
461
 
455
462
  return StreamingResponseAdapter(
456
463
  async_response_gen=self.process_stream_events,
457
- response="", # will be filled post-stream
458
- metadata={},
464
+ response="", # will be set by post-processing
465
+ metadata={}, # will be set by post-processing
459
466
  post_process_task=post_process_task,
460
467
  )
461
468
 
@@ -493,7 +500,7 @@ class ReActStreamHandler:
493
500
  # Handle progress callbacks if available - this is the key missing piece!
494
501
  if self.agent_instance.agent_progress_callback:
495
502
  # Only track events that are actual tool-related events
496
- if self._is_tool_related_event(event):
503
+ if is_tool_related_event(event):
497
504
  try:
498
505
  # Get event ID from LlamaIndex event
499
506
  event_id = get_event_id(event)
@@ -603,36 +610,6 @@ class ReActStreamHandler:
603
610
  # Signal that stream processing is complete
604
611
  self.stream_complete_event.set()
605
612
 
606
- def _is_tool_related_event(self, event) -> bool:
607
- """
608
- Determine if an event is actually tool-related and should be tracked.
609
-
610
- This should only return True for events that represent actual tool calls or tool outputs,
611
- not for streaming text deltas or other LLM response events.
612
-
613
- Args:
614
- event: The stream event to check
615
-
616
- Returns:
617
- bool: True if this event should be tracked for tool purposes
618
- """
619
- # Track explicit tool events from LlamaIndex workflow
620
- if isinstance(event, (ToolCall, ToolCallResult)):
621
- return True
622
-
623
- has_tool_id = hasattr(event, "tool_id") and event.tool_id
624
- has_delta = hasattr(event, "delta") and event.delta
625
- has_tool_name = hasattr(event, "tool_name") and event.tool_name
626
-
627
- # We're not seeing ToolCall/ToolCallResult events in the stream, so let's be more liberal
628
- # but still avoid streaming deltas
629
- if (has_tool_id or has_tool_name) and not has_delta:
630
- return True
631
-
632
- # Everything else (streaming deltas, agent outputs, workflow events, etc.)
633
- # should NOT be tracked as tool events
634
- return False
635
-
636
613
  def create_streaming_response(
637
614
  self, user_metadata: Dict[str, Any]
638
615
  ) -> "StreamingResponseAdapter":
@@ -1,12 +1,41 @@
1
1
  """Vectara Hallucination Detection and Correction client."""
2
2
 
3
3
  import logging
4
+ import re
4
5
  from typing import List, Optional, Tuple
5
6
  import requests
6
7
 
7
8
  from llama_index.core.llms import MessageRole
8
9
 
9
10
 
11
+ # Compiled regex patterns for better performance
12
+ _MARKDOWN_LINK_PATTERN = re.compile(r'\[([^\]]*)\]\([^)]*\)')
13
+ _WHITESPACE_CLEANUP_PATTERN = re.compile(r'\s+')
14
+
15
+
16
+ def clean_urls_from_text(text: str) -> str:
17
+ """
18
+ Remove markdown URLs [text](URL) from text, preserving the link text.
19
+ This prevents interference with hallucination detection while keeping useful text content.
20
+
21
+ Args:
22
+ text (str): The input text potentially containing markdown URLs
23
+
24
+ Returns:
25
+ str: Text with markdown URLs replaced by their text content
26
+ """
27
+ if not text:
28
+ return text
29
+
30
+ # Replace markdown links [text](url) with just the text part
31
+ cleaned_text = _MARKDOWN_LINK_PATTERN.sub(r'\1', text)
32
+
33
+ # Clean up any extra whitespace that might result from the replacement
34
+ cleaned_text = _WHITESPACE_CLEANUP_PATTERN.sub(' ', cleaned_text).strip()
35
+
36
+ return cleaned_text
37
+
38
+
10
39
  class Hallucination:
11
40
  """Vectara Hallucination Correction."""
12
41
 
@@ -143,9 +172,12 @@ def analyze_hallucinations(
143
172
  return None, []
144
173
 
145
174
  try:
175
+ # Clean URLs from agent response to prevent interference with hallucination detection
176
+ cleaned_agent_response = clean_urls_from_text(agent_response)
177
+
146
178
  h = Hallucination(vectara_api_key)
147
179
  corrected_text, corrections = h.compute(
148
- query=query, context=context, hypothesis=agent_response
180
+ query=query, context=context, hypothesis=cleaned_agent_response
149
181
  )
150
182
  return corrected_text, corrections
151
183
 
@@ -305,3 +305,7 @@ def patch_sync(func_async: AsyncCallable) -> Callable:
305
305
  return loop.run_until_complete(func_async(*args, **kwargs))
306
306
 
307
307
  return patched_sync
308
+
309
+
310
+ # Tool name suffixes for pattern matching (with underscore prefix)
311
+ DB_TOOL_SUFFIXES = {f"_{func}" for func in DatabaseTools.spec_functions}
@@ -18,7 +18,7 @@ from .agent_config import AgentConfig
18
18
 
19
19
  provider_to_default_model_name = {
20
20
  ModelProvider.OPENAI: "gpt-4.1-mini",
21
- ModelProvider.ANTHROPIC: "claude-sonnet-4-0",
21
+ ModelProvider.ANTHROPIC: "claude-sonnet-4-5",
22
22
  ModelProvider.TOGETHER: "deepseek-ai/DeepSeek-V3",
23
23
  ModelProvider.GROQ: "openai/gpt-oss-20b",
24
24
  ModelProvider.BEDROCK: "us.anthropic.claude-sonnet-4-20250514-v1:0",
@@ -34,6 +34,7 @@ models_to_max_tokens = {
34
34
  "gpt-4.1-mini": 32768,
35
35
  "claude-sonnet-4-20250514": 64000,
36
36
  "claude-sonnet-4-0": 64000,
37
+ "claude-sonnet-4-5": 64000,
37
38
  "deepseek-ai/deepseek-v3": 8192,
38
39
  "models/gemini-2.5-flash": 65536,
39
40
  "models/gemini-2.5-flash-lite": 65536,
@@ -117,6 +118,57 @@ def _get_llm_params_for_role(
117
118
  return model_provider, model_name
118
119
 
119
120
 
121
+ def _cleanup_gemini_clients() -> None:
122
+ """Helper function to cleanup Gemini client sessions."""
123
+ for llm in _llm_cache.values():
124
+ try:
125
+ # Check if this is a GoogleGenAI instance with internal client structure
126
+ if not hasattr(llm, '_client'):
127
+ continue
128
+
129
+ client = getattr(llm, '_client', None)
130
+ if not client:
131
+ continue
132
+
133
+ api_client = getattr(client, '_api_client', None)
134
+ if not api_client:
135
+ continue
136
+
137
+ async_session = getattr(api_client, '_async_session', None)
138
+ if not async_session:
139
+ continue
140
+
141
+ # Close the aiohttp session if it exists
142
+ try:
143
+ import asyncio
144
+ loop = asyncio.get_event_loop()
145
+ if not loop.is_closed():
146
+ loop.run_until_complete(async_session.close())
147
+ except Exception:
148
+ pass
149
+ except Exception:
150
+ pass
151
+
152
+
153
+ def clear_llm_cache(provider: Optional[ModelProvider] = None) -> None:
154
+ """
155
+ Clear the LLM cache, optionally for a specific provider only.
156
+
157
+ Args:
158
+ provider: If specified, only clear cache entries for this provider.
159
+ If None, clear the entire cache.
160
+ """
161
+ # Before clearing, try to cleanup any Gemini clients
162
+ _cleanup_gemini_clients()
163
+
164
+ if provider is None:
165
+ # Clear entire cache
166
+ _llm_cache.clear()
167
+ else:
168
+ # For simplicity, just clear all when provider is specified
169
+ _llm_cache.clear()
170
+
171
+
120
172
  def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
121
173
  """
122
174
  Get the LLM for the specified role, using the provided config
@@ -159,6 +211,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
159
211
  "google_genai not available. Install with: pip install llama-index-llms-google-genai"
160
212
  ) from e
161
213
  import google.genai.types as google_types
214
+
162
215
  generation_config = google_types.GenerateContentConfig(
163
216
  temperature=0.0,
164
217
  seed=123,
@@ -182,7 +235,7 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
182
235
  ) from e
183
236
  additional_kwargs = {"seed": 42}
184
237
  if model_name in [
185
- "deepseek-ai/DeepSeek-V3.1", "openai/gpt-oss-120b",
238
+ "deepseek-ai/DeepSeek-V3.1",
186
239
  "deepseek-ai/DeepSeek-R1", "Qwen/Qwen3-235B-A22B-Thinking-2507"
187
240
  "openai/gpt-oss-120b", "openai/gpt-oss-20b",
188
241
  ]: