vectara-agentic 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {vectara_agentic-0.4.2/vectara_agentic.egg-info → vectara_agentic-0.4.4}/PKG-INFO +133 -36
  2. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/README.md +125 -29
  3. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/requirements.txt +8 -7
  4. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/__init__.py +1 -0
  5. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/benchmark_models.py +547 -372
  6. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/conftest.py +14 -12
  7. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/endpoint.py +9 -5
  8. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/run_tests.py +1 -0
  9. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_agent.py +22 -9
  10. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_agent_fallback_memory.py +4 -4
  11. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_agent_memory_consistency.py +4 -4
  12. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_agent_type.py +2 -0
  13. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_api_endpoint.py +13 -13
  14. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_bedrock.py +9 -1
  15. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_fallback.py +18 -7
  16. vectara_agentic-0.4.4/tests/test_gemini.py +57 -0
  17. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_groq.py +43 -1
  18. vectara_agentic-0.4.4/tests/test_openai.py +160 -0
  19. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_private_llm.py +19 -6
  20. vectara_agentic-0.4.4/tests/test_react_error_handling.py +293 -0
  21. vectara_agentic-0.4.4/tests/test_react_memory.py +257 -0
  22. vectara_agentic-0.4.4/tests/test_react_streaming.py +135 -0
  23. vectara_agentic-0.4.4/tests/test_react_workflow_events.py +395 -0
  24. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_return_direct.py +1 -0
  25. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_serialization.py +58 -20
  26. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_session_memory.py +11 -11
  27. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_streaming.py +0 -44
  28. vectara_agentic-0.4.4/tests/test_together.py +136 -0
  29. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_tools.py +3 -1
  30. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_vectara_llms.py +2 -2
  31. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_vhc.py +7 -2
  32. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/tests/test_workflow.py +17 -11
  33. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/_callback.py +79 -21
  34. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/_version.py +1 -1
  35. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent.py +65 -27
  36. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/serialization.py +5 -9
  37. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/streaming.py +245 -64
  38. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/utils/schemas.py +2 -2
  39. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/llm_utils.py +64 -15
  40. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/tools.py +88 -31
  41. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4/vectara_agentic.egg-info}/PKG-INFO +133 -36
  42. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic.egg-info/SOURCES.txt +5 -0
  43. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic.egg-info/requires.txt +8 -7
  44. vectara_agentic-0.4.2/tests/test_gemini.py +0 -83
  45. vectara_agentic-0.4.2/tests/test_together.py +0 -62
  46. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/LICENSE +0 -0
  47. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/MANIFEST.in +0 -0
  48. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/setup.cfg +0 -0
  49. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/setup.py +0 -0
  50. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/__init__.py +0 -0
  51. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/_observability.py +0 -0
  52. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_config.py +0 -0
  53. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/__init__.py +0 -0
  54. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/factory.py +0 -0
  55. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/prompts.py +0 -0
  56. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/utils/__init__.py +0 -0
  57. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/utils/hallucination.py +0 -0
  58. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/utils/logging.py +0 -0
  59. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_core/utils/tools.py +0 -0
  60. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/agent_endpoint.py +0 -0
  61. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/db_tools.py +0 -0
  62. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/sub_query_workflow.py +0 -0
  63. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/tool_utils.py +0 -0
  64. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/tools_catalog.py +0 -0
  65. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/types.py +0 -0
  66. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic/utils.py +0 -0
  67. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic.egg-info/dependency_links.txt +0 -0
  68. {vectara_agentic-0.4.2 → vectara_agentic-0.4.4}/vectara_agentic.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -16,19 +16,20 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: llama-index==0.13.1
20
- Requires-Dist: llama-index-core==0.13.1
21
- Requires-Dist: llama-index-workflow==1.0.1
19
+ Requires-Dist: llama-index==0.13.3
20
+ Requires-Dist: llama-index-core==0.13.3
21
+ Requires-Dist: llama-index-workflows==1.3.0
22
22
  Requires-Dist: llama-index-cli==0.5.0
23
23
  Requires-Dist: llama-index-indices-managed-vectara==0.5.0
24
- Requires-Dist: llama-index-llms-openai==0.5.2
24
+ Requires-Dist: llama-index-llms-openai==0.5.4
25
25
  Requires-Dist: llama-index-llms-openai-like==0.5.0
26
- Requires-Dist: llama-index-llms-anthropic==0.8.2
26
+ Requires-Dist: llama-index-llms-anthropic==0.8.5
27
27
  Requires-Dist: llama-index-llms-together==0.4.0
28
28
  Requires-Dist: llama-index-llms-groq==0.4.0
29
29
  Requires-Dist: llama-index-llms-cohere==0.6.0
30
30
  Requires-Dist: llama-index-llms-google-genai==0.3.0
31
- Requires-Dist: llama-index-llms-bedrock-converse==0.8.0
31
+ Requires-Dist: google_genai>=1.31.0
32
+ Requires-Dist: llama-index-llms-bedrock-converse==0.8.2
32
33
  Requires-Dist: llama-index-tools-yahoo-finance==0.4.0
33
34
  Requires-Dist: llama-index-tools-arxiv==0.4.0
34
35
  Requires-Dist: llama-index-tools-database==0.4.0
@@ -54,7 +55,7 @@ Requires-Dist: protobuf==5.29.5
54
55
  Requires-Dist: tokenizers>=0.20
55
56
  Requires-Dist: pydantic>=2.11.5
56
57
  Requires-Dist: pandas==2.2.3
57
- Requires-Dist: retrying==1.3.4
58
+ Requires-Dist: retrying==1.4.2
58
59
  Requires-Dist: python-dotenv==1.0.1
59
60
  Requires-Dist: cloudpickle>=3.1.1
60
61
  Requires-Dist: httpx==0.28.1
@@ -100,16 +101,17 @@ Dynamic: summary
100
101
 
101
102
  ## 📑 Table of Contents
102
103
 
103
- - [Overview](#-overview)
104
- - [Quick Start](#-quick-start)
105
- - [Using Tools](#using-tools)
106
- - [Advanced Usage: Workflows](#advanced-usage-workflows)
107
- - [Configuration](#️-configuration)
108
- - [Migrating from v0.3.x](#-migrating-from-v03x)
109
- - [Contributing](#-contributing)
110
- - [License](#-license)
104
+ - [Overview](#overview)
105
+ - [🚀 Quick Start](#quick-start)
106
+ - [🗒️ Agent Instructions](#agent-instructions)
107
+ - [🧰 Defining Tools](#defining-tools)
108
+ - [🌊 Streaming & Real-time Responses](#streaming--real-time-responses)
109
+ - [🔍 Vectara Hallucination Correction (VHC)](#vectara-hallucination-correction-vhc)
110
+ - [🔄 Advanced Usage: Workflows](#advanced-usage-workflows)
111
+ - [🛠️ Configuration](#configuration)
112
+ - [📝 Migrating from v0.3.x](#migrating-from-v03x)
111
113
 
112
- ## Overview
114
+ ## Overview
113
115
 
114
116
  `vectara-agentic` is a Python library for developing powerful AI assistants and agents using Vectara and Agentic-RAG. It leverages the LlamaIndex Agent framework and provides helper functions to quickly create tools that connect to Vectara corpora.
115
117
 
@@ -158,7 +160,7 @@ Check out our example AI assistants:
158
160
  pip install vectara-agentic
159
161
  ```
160
162
 
161
- ## 🚀 Quick Start
163
+ ## Quick Start
162
164
 
163
165
  Let's see how we create a simple AI assistant to answer questions about financial data ingested into Vectara, using `vectara-agentic`.
164
166
 
@@ -181,7 +183,7 @@ A RAG tool calls the full Vectara RAG pipeline to provide summarized responses t
181
183
  ```python
182
184
  from pydantic import BaseModel, Field
183
185
 
184
- years = list(range(2020, 2024))
186
+ years = list(range(2020, 2025))
185
187
  tickers = {
186
188
  "AAPL": "Apple Computer",
187
189
  "GOOG": "Google",
@@ -213,7 +215,7 @@ To learn about additional arguments `create_rag_tool`, please see the full [docs
213
215
  In addition to RAG tools or search tools, you can generate additional tools the agent can use. These could be mathematical tools, tools
214
216
  that call other APIs to get more information, or any other type of tool.
215
217
 
216
- See [Agent Tools](#️-agent-tools-at-a-glance) for more information.
218
+ See [Agent Tools](#agent-tools-at-a-glance) for more information.
217
219
 
218
220
  ### 4. Create your agent
219
221
 
@@ -247,26 +249,67 @@ agent = Agent(
247
249
 
248
250
  The `topic` parameter helps identify the agent's area of expertise, while `custom_instructions` lets you customize how the agent behaves and presents information. The agent will combine these with its default general instructions to determine its complete behavior.
249
251
 
250
- The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur, and can be used to track agent steps.
252
+ The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur (tool calls, tool outputs, etc.), and can be used to track agent steps in real-time. This works with both regular chat methods (`chat()`, `achat()`) and streaming methods (`stream_chat()`, `astream_chat()`).
251
253
 
252
254
  ### 5. Run a chat interaction
253
255
 
256
+ You have multiple ways to interact with your agent:
257
+
258
+ **Standard Chat (synchronous)**
254
259
  ```python
255
260
  res = agent.chat("What was the revenue for Apple in 2021?")
256
261
  print(res.response)
257
262
  ```
258
263
 
264
+ **Async Chat**
265
+ ```python
266
+ res = await agent.achat("What was the revenue for Apple in 2021?")
267
+ print(res.response)
268
+ ```
269
+
270
+ **Streaming Chat with AgentStreamingResponse**
271
+ ```python
272
+ # Synchronous streaming
273
+ stream_response = agent.stream_chat("What was the revenue for Apple in 2021?")
274
+
275
+ # Option 1: Process stream manually
276
+ async for chunk in stream_response.async_response_gen():
277
+ print(chunk, end="", flush=True)
278
+
279
+ # Option 2: Get final response without streaming
280
+ # (Note: stream still executes, just not processed chunk by chunk)
281
+
282
+ # Get final response after streaming
283
+ final_response = stream_response.get_response()
284
+ print(f"\nFinal response: {final_response.response}")
285
+ ```
286
+
287
+ **Async Streaming Chat**
288
+ ```python
289
+ # Asynchronous streaming
290
+ stream_response = await agent.astream_chat("What was the revenue for Apple in 2021?")
291
+
292
+ # Process chunks manually
293
+ async for chunk in stream_response.async_response_gen():
294
+ print(chunk, end="", flush=True)
295
+
296
+ # Get final response after streaming
297
+ final_response = await stream_response.aget_response()
298
+ print(f"\nFinal response: {final_response.response}")
299
+ ```
300
+
259
301
  > **Note:**
260
- > 1. `vectara-agentic` also supports `achat()` as well as two streaming variants `stream_chat()` and `astream_chat()`.
261
- > 2. The response types from `chat()` and `achat()` are of type `AgentResponse`. If you just need the actual string
262
- > response it's available as the `response` variable, or just use `str()`. For advanced use-cases you can look
263
- > at other `AgentResponse` variables [such as `sources`](https://github.com/run-llama/llama_index/blob/659f9faaafbecebb6e6c65f42143c0bf19274a37/llama-index-core/llama_index/core/chat_engine/types.py#L53).
302
+ > 1. Both `chat()` and `achat()` return `AgentResponse` objects. Access the text with `.response` or use `str()`.
303
+ > 2. Streaming methods return `AgentStreamingResponse` objects that provide both real-time chunks and final responses.
304
+ > 3. For advanced use-cases, explore other `AgentResponse` properties like `sources` and `metadata`.
305
+ > 4. Streaming is ideal for long responses and real-time user interfaces. See [Streaming & Real-time Responses](#streaming--real-time-responses) for detailed examples.
306
+ > 5. The `agent_progress_callback` works with both regular chat methods (`chat()`, `achat()`) and streaming methods to track tool calls in real-time.
264
307
 
265
308
  ## Agent Instructions
266
309
 
267
- When creating an agent, it already comes with a set of general base instructions, designed carefully to enhance its operation and improve how the agent works.
310
+ When creating an agent, it already comes with a set of general base instructions, designed to enhance its operation and improve how the agent works.
268
311
 
269
- In addition, you can add `custom_instructions` that are specific to your use case that customize how the agent behaves.
312
+ In addition, you can add `custom_instructions` that are specific to your use case to customize how the agent behaves.
270
313
 
271
314
  When writing custom instructions:
272
315
  - Focus on behavior and presentation rather than tool usage (that's what tool descriptions are for)
@@ -279,7 +322,7 @@ The agent will combine both the general instructions and your custom instruction
279
322
 
280
323
  It is not recommended to change the general instructions, but it is possible as well to override them with the optional `general_instructions` parameter. If you do change them, your agent may not work as intended, so be careful if overriding these instructions.
281
324
 
282
- ## 🧰 Defining Tools
325
+ ## Defining Tools
283
326
 
284
327
  ### Vectara tools
285
328
 
@@ -333,7 +376,7 @@ The Vectara search tool allows the agent to list documents that match a query.
333
376
  This can be helpful to the agent to answer queries like "how many documents discuss the iPhone?" or other
334
377
  similar queries that require a response in terms of a list of matching documents.
335
378
 
336
- ### 🛠️ Agent Tools at a Glance
379
+ ### Agent Tools at a Glance
337
380
 
338
381
  `vectara-agentic` provides a few tools out of the box (see `ToolsCatalog` for details):
339
382
 
@@ -481,7 +524,7 @@ mult_tool = ToolsFactory().create_tool(mult_func)
481
524
 
482
525
  #### VHC Eligibility
483
526
 
484
- When creating tools, you can control whether they participate in Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
527
+ When creating tools, you can control whether their output is eligible for Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
485
528
 
486
529
  ```python
487
530
  # Tool that provides factual data - should participate in VHC
@@ -529,7 +572,61 @@ Built-in formatters include `format_as_table`, `format_as_json`, and `format_as_
529
572
 
530
573
  The human-readable format, if available, is used when using Vectara Hallucination Correction.
531
574
 
532
- ## 🔍 Vectara Hallucination Correction (VHC)
575
+ ## Streaming & Real-time Responses
576
+
577
+ `vectara-agentic` provides powerful streaming capabilities for real-time response generation, ideal for interactive applications and long-form content.
578
+
579
+ ### Why Use Streaming?
580
+
581
+ - **Better User Experience**: Users see responses as they're generated instead of waiting for completion
582
+ - **Real-time Feedback**: Perfect for chat interfaces, web applications, and interactive demos
583
+ - **Progress Visibility**: Combined with callbacks, users can see both tool usage and response generation
584
+ - **Reduced Perceived Latency**: Streaming makes applications feel faster and more responsive
585
+
586
+ ### Quick Streaming Example
587
+
588
+ ```python
589
+ # Create streaming response
590
+ stream_response = agent.stream_chat("Analyze the financial performance of tech companies in 2022")
591
+ async for chunk in stream_response.async_response_gen():
592
+ print(chunk, end="", flush=True) # Update your UI here
593
+
594
+ # Get complete response with metadata after streaming completes
595
+ final_response = stream_response.get_response()
596
+ print(f"\nSources consulted: {len(final_response.sources)}")
597
+ ```
598
+
599
+ ### Tool Call Progress Tracking
600
+
601
+ You can track tool calls and outputs in real-time with `agent_progress_callback` - this works with both regular chat and streaming methods:
602
+
603
+ ```python
604
+ from vectara_agentic import AgentStatusType
605
+
606
+ def tool_tracker(status_type, msg, event_id):
607
+ if status_type == AgentStatusType.TOOL_CALL:
608
+ print(f"🔧 Using {msg['tool_name']} with {msg['arguments']}")
609
+ elif status_type == AgentStatusType.TOOL_OUTPUT:
610
+ print(f"📊 {msg['tool_name']} completed")
611
+
612
+ agent = Agent(
613
+ tools=[your_tools],
614
+ agent_progress_callback=tool_tracker
615
+ )
616
+
617
+ # With streaming - see tool calls as they happen, plus streaming response
618
+ stream_response = await agent.astream_chat("Analyze Apple's finances")
619
+ async for chunk in stream_response.async_response_gen():
620
+ print(chunk, end="", flush=True)
621
+
622
+ # With regular chat - see tool calls as they happen, then get final response
623
+ response = await agent.achat("Analyze Apple's finances")
624
+ print(response.response)
625
+ ```
626
+
627
+ For detailed examples including FastAPI integration, Streamlit apps, and decision guidelines, see our [comprehensive streaming documentation](https://vectara.github.io/py-vectara-agentic/latest/usage/#streaming-chat-methods).
628
+
629
+ ## Vectara Hallucination Correction (VHC)
533
630
 
534
631
  `vectara-agentic` provides built-in support for Vectara Hallucination Correction (VHC), which analyzes agent responses and corrects any detected hallucinations based on the factual content retrieved by VHC-eligible tools.
535
632
 
@@ -587,7 +684,7 @@ agent = Agent(
587
684
 
588
685
  This helps catch errors where your instructions reference tools that aren't available to the agent.
589
686
 
590
- ## 🔄 Advanced Usage: Workflows
687
+ ## Advanced Usage: Workflows
591
688
 
592
689
  In addition to standard chat interactions, `vectara-agentic` supports custom workflows via the `run()` method.
593
690
  Workflows allow you to structure multi-step interactions where inputs and outputs are validated using Pydantic models.
@@ -758,7 +855,7 @@ The workflow works in two steps:
758
855
  - You need to implement complex business logic
759
856
  - You want to integrate with external systems or APIs in a specific way
760
857
 
761
- ## 🛠️ Configuration
858
+ ## Configuration
762
859
 
763
860
  ### Configuring Vectara-agentic
764
861
 
@@ -773,7 +870,7 @@ agent_config = AgentConfig(
773
870
  main_llm_provider = ModelProvider.ANTHROPIC,
774
871
  main_llm_model_name = 'claude-3-5-sonnet-20241022',
775
872
  tool_llm_provider = ModelProvider.TOGETHER,
776
- tool_llm_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
873
+ tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
777
874
  )
778
875
 
779
876
  agent = Agent(
@@ -789,7 +886,7 @@ The `AgentConfig` object may include the following items:
789
886
  - `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
790
887
 
791
888
  > **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
792
- - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash).
889
+ - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-lite).
793
890
  - `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
794
891
  - `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
795
892
 
@@ -826,7 +923,7 @@ agent = Agent(
826
923
  )
827
924
  ```
828
925
 
829
- ## 🚀 Migrating from v0.3.x
926
+ ## Migrating from v0.3.x
830
927
 
831
928
  If you're upgrading from v0.3.x, please note the following breaking changes in v0.4.0:
832
929
 
@@ -26,16 +26,17 @@
26
26
 
27
27
  ## 📑 Table of Contents
28
28
 
29
- - [Overview](#-overview)
30
- - [Quick Start](#-quick-start)
31
- - [Using Tools](#using-tools)
32
- - [Advanced Usage: Workflows](#advanced-usage-workflows)
33
- - [Configuration](#️-configuration)
34
- - [Migrating from v0.3.x](#-migrating-from-v03x)
35
- - [Contributing](#-contributing)
36
- - [License](#-license)
37
-
38
- ## ✨ Overview
29
+ - [Overview](#overview)
30
+ - [🚀 Quick Start](#quick-start)
31
+ - [🗒️ Agent Instructions](#agent-instructions)
32
+ - [🧰 Defining Tools](#defining-tools)
33
+ - [🌊 Streaming & Real-time Responses](#streaming--real-time-responses)
34
+ - [🔍 Vectara Hallucination Correction (VHC)](#vectara-hallucination-correction-vhc)
35
+ - [🔄 Advanced Usage: Workflows](#advanced-usage-workflows)
36
+ - [🛠️ Configuration](#configuration)
37
+ - [📝 Migrating from v0.3.x](#migrating-from-v03x)
38
+
39
+ ## Overview
39
40
 
40
41
  `vectara-agentic` is a Python library for developing powerful AI assistants and agents using Vectara and Agentic-RAG. It leverages the LlamaIndex Agent framework and provides helper functions to quickly create tools that connect to Vectara corpora.
41
42
 
@@ -84,7 +85,7 @@ Check out our example AI assistants:
84
85
  pip install vectara-agentic
85
86
  ```
86
87
 
87
- ## 🚀 Quick Start
88
+ ## Quick Start
88
89
 
89
90
  Let's see how we create a simple AI assistant to answer questions about financial data ingested into Vectara, using `vectara-agentic`.
90
91
 
@@ -107,7 +108,7 @@ A RAG tool calls the full Vectara RAG pipeline to provide summarized responses t
107
108
  ```python
108
109
  from pydantic import BaseModel, Field
109
110
 
110
- years = list(range(2020, 2024))
111
+ years = list(range(2020, 2025))
111
112
  tickers = {
112
113
  "AAPL": "Apple Computer",
113
114
  "GOOG": "Google",
@@ -139,7 +140,7 @@ To learn about additional arguments `create_rag_tool`, please see the full [docs
139
140
  In addition to RAG tools or search tools, you can generate additional tools the agent can use. These could be mathematical tools, tools
140
141
  that call other APIs to get more information, or any other type of tool.
141
142
 
142
- See [Agent Tools](#️-agent-tools-at-a-glance) for more information.
143
+ See [Agent Tools](#agent-tools-at-a-glance) for more information.
143
144
 
144
145
  ### 4. Create your agent
145
146
 
@@ -173,26 +174,67 @@ agent = Agent(
173
174
 
174
175
  The `topic` parameter helps identify the agent's area of expertise, while `custom_instructions` lets you customize how the agent behaves and presents information. The agent will combine these with its default general instructions to determine its complete behavior.
175
176
 
176
- The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur, and can be used to track agent steps.
177
+ The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur (tool calls, tool outputs, etc.), and can be used to track agent steps in real-time. This works with both regular chat methods (`chat()`, `achat()`) and streaming methods (`stream_chat()`, `astream_chat()`).
177
178
 
178
179
  ### 5. Run a chat interaction
179
180
 
181
+ You have multiple ways to interact with your agent:
182
+
183
+ **Standard Chat (synchronous)**
180
184
  ```python
181
185
  res = agent.chat("What was the revenue for Apple in 2021?")
182
186
  print(res.response)
183
187
  ```
184
188
 
189
+ **Async Chat**
190
+ ```python
191
+ res = await agent.achat("What was the revenue for Apple in 2021?")
192
+ print(res.response)
193
+ ```
194
+
195
+ **Streaming Chat with AgentStreamingResponse**
196
+ ```python
197
+ # Synchronous streaming
198
+ stream_response = agent.stream_chat("What was the revenue for Apple in 2021?")
199
+
200
+ # Option 1: Process stream manually
201
+ async for chunk in stream_response.async_response_gen():
202
+ print(chunk, end="", flush=True)
203
+
204
+ # Option 2: Get final response without streaming
205
+ # (Note: stream still executes, just not processed chunk by chunk)
206
+
207
+ # Get final response after streaming
208
+ final_response = stream_response.get_response()
209
+ print(f"\nFinal response: {final_response.response}")
210
+ ```
211
+
212
+ **Async Streaming Chat**
213
+ ```python
214
+ # Asynchronous streaming
215
+ stream_response = await agent.astream_chat("What was the revenue for Apple in 2021?")
216
+
217
+ # Process chunks manually
218
+ async for chunk in stream_response.async_response_gen():
219
+ print(chunk, end="", flush=True)
220
+
221
+ # Get final response after streaming
222
+ final_response = await stream_response.aget_response()
223
+ print(f"\nFinal response: {final_response.response}")
224
+ ```
225
+
185
226
  > **Note:**
186
- > 1. `vectara-agentic` also supports `achat()` as well as two streaming variants `stream_chat()` and `astream_chat()`.
187
- > 2. The response types from `chat()` and `achat()` are of type `AgentResponse`. If you just need the actual string
188
- > response it's available as the `response` variable, or just use `str()`. For advanced use-cases you can look
189
- > at other `AgentResponse` variables [such as `sources`](https://github.com/run-llama/llama_index/blob/659f9faaafbecebb6e6c65f42143c0bf19274a37/llama-index-core/llama_index/core/chat_engine/types.py#L53).
227
+ > 1. Both `chat()` and `achat()` return `AgentResponse` objects. Access the text with `.response` or use `str()`.
228
+ > 2. Streaming methods return `AgentStreamingResponse` objects that provide both real-time chunks and final responses.
229
+ > 3. For advanced use-cases, explore other `AgentResponse` properties like `sources` and `metadata`.
230
+ > 4. Streaming is ideal for long responses and real-time user interfaces. See [Streaming & Real-time Responses](#streaming--real-time-responses) for detailed examples.
231
+ > 5. The `agent_progress_callback` works with both regular chat methods (`chat()`, `achat()`) and streaming methods to track tool calls in real-time.
190
232
 
191
233
  ## Agent Instructions
192
234
 
193
- When creating an agent, it already comes with a set of general base instructions, designed carefully to enhance its operation and improve how the agent works.
235
+ When creating an agent, it already comes with a set of general base instructions, designed to enhance its operation and improve how the agent works.
194
236
 
195
- In addition, you can add `custom_instructions` that are specific to your use case that customize how the agent behaves.
237
+ In addition, you can add `custom_instructions` that are specific to your use case to customize how the agent behaves.
196
238
 
197
239
  When writing custom instructions:
198
240
  - Focus on behavior and presentation rather than tool usage (that's what tool descriptions are for)
@@ -205,7 +247,7 @@ The agent will combine both the general instructions and your custom instruction
205
247
 
206
248
  It is not recommended to change the general instructions, but it is possible as well to override them with the optional `general_instructions` parameter. If you do change them, your agent may not work as intended, so be careful if overriding these instructions.
207
249
 
208
- ## 🧰 Defining Tools
250
+ ## Defining Tools
209
251
 
210
252
  ### Vectara tools
211
253
 
@@ -259,7 +301,7 @@ The Vectara search tool allows the agent to list documents that match a query.
259
301
  This can be helpful to the agent to answer queries like "how many documents discuss the iPhone?" or other
260
302
  similar queries that require a response in terms of a list of matching documents.
261
303
 
262
- ### 🛠️ Agent Tools at a Glance
304
+ ### Agent Tools at a Glance
263
305
 
264
306
  `vectara-agentic` provides a few tools out of the box (see `ToolsCatalog` for details):
265
307
 
@@ -407,7 +449,7 @@ mult_tool = ToolsFactory().create_tool(mult_func)
407
449
 
408
450
  #### VHC Eligibility
409
451
 
410
- When creating tools, you can control whether they participate in Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
452
+ When creating tools, you can control whether their output is eligible for Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
411
453
 
412
454
  ```python
413
455
  # Tool that provides factual data - should participate in VHC
@@ -455,7 +497,61 @@ Built-in formatters include `format_as_table`, `format_as_json`, and `format_as_
455
497
 
456
498
  The human-readable format, if available, is used when using Vectara Hallucination Correction.
457
499
 
458
- ## 🔍 Vectara Hallucination Correction (VHC)
500
+ ## Streaming & Real-time Responses
501
+
502
+ `vectara-agentic` provides powerful streaming capabilities for real-time response generation, ideal for interactive applications and long-form content.
503
+
504
+ ### Why Use Streaming?
505
+
506
+ - **Better User Experience**: Users see responses as they're generated instead of waiting for completion
507
+ - **Real-time Feedback**: Perfect for chat interfaces, web applications, and interactive demos
508
+ - **Progress Visibility**: Combined with callbacks, users can see both tool usage and response generation
509
+ - **Reduced Perceived Latency**: Streaming makes applications feel faster and more responsive
510
+
511
+ ### Quick Streaming Example
512
+
513
+ ```python
514
+ # Create streaming response
515
+ stream_response = agent.stream_chat("Analyze the financial performance of tech companies in 2022")
516
+ async for chunk in stream_response.async_response_gen():
517
+ print(chunk, end="", flush=True) # Update your UI here
518
+
519
+ # Get complete response with metadata after streaming completes
520
+ final_response = stream_response.get_response()
521
+ print(f"\nSources consulted: {len(final_response.sources)}")
522
+ ```
523
+
524
+ ### Tool Call Progress Tracking
525
+
526
+ You can track tool calls and outputs in real-time with `agent_progress_callback` - this works with both regular chat and streaming methods:
527
+
528
+ ```python
529
+ from vectara_agentic import AgentStatusType
530
+
531
+ def tool_tracker(status_type, msg, event_id):
532
+ if status_type == AgentStatusType.TOOL_CALL:
533
+ print(f"🔧 Using {msg['tool_name']} with {msg['arguments']}")
534
+ elif status_type == AgentStatusType.TOOL_OUTPUT:
535
+ print(f"📊 {msg['tool_name']} completed")
536
+
537
+ agent = Agent(
538
+ tools=[your_tools],
539
+ agent_progress_callback=tool_tracker
540
+ )
541
+
542
+ # With streaming - see tool calls as they happen, plus streaming response
543
+ stream_response = await agent.astream_chat("Analyze Apple's finances")
544
+ async for chunk in stream_response.async_response_gen():
545
+ print(chunk, end="", flush=True)
546
+
547
+ # With regular chat - see tool calls as they happen, then get final response
548
+ response = await agent.achat("Analyze Apple's finances")
549
+ print(response.response)
550
+ ```
551
+
552
+ For detailed examples including FastAPI integration, Streamlit apps, and decision guidelines, see our [comprehensive streaming documentation](https://vectara.github.io/py-vectara-agentic/latest/usage/#streaming-chat-methods).
553
+
554
+ ## Vectara Hallucination Correction (VHC)
459
555
 
460
556
  `vectara-agentic` provides built-in support for Vectara Hallucination Correction (VHC), which analyzes agent responses and corrects any detected hallucinations based on the factual content retrieved by VHC-eligible tools.
461
557
 
@@ -513,7 +609,7 @@ agent = Agent(
513
609
 
514
610
  This helps catch errors where your instructions reference tools that aren't available to the agent.
515
611
 
516
- ## 🔄 Advanced Usage: Workflows
612
+ ## Advanced Usage: Workflows
517
613
 
518
614
  In addition to standard chat interactions, `vectara-agentic` supports custom workflows via the `run()` method.
519
615
  Workflows allow you to structure multi-step interactions where inputs and outputs are validated using Pydantic models.
@@ -684,7 +780,7 @@ The workflow works in two steps:
684
780
  - You need to implement complex business logic
685
781
  - You want to integrate with external systems or APIs in a specific way
686
782
 
687
- ## 🛠️ Configuration
783
+ ## Configuration
688
784
 
689
785
  ### Configuring Vectara-agentic
690
786
 
@@ -699,7 +795,7 @@ agent_config = AgentConfig(
699
795
  main_llm_provider = ModelProvider.ANTHROPIC,
700
796
  main_llm_model_name = 'claude-3-5-sonnet-20241022',
701
797
  tool_llm_provider = ModelProvider.TOGETHER,
702
- tool_llm_model_name = 'meta-llama/Llama-3.3-70B-Instruct-Turbo'
798
+ tool_llm_model_name = 'deepseek-ai/DeepSeek-V3'
703
799
  )
704
800
 
705
801
  agent = Agent(
@@ -715,7 +811,7 @@ The `AgentConfig` object may include the following items:
715
811
  - `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
716
812
 
717
813
  > **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
718
- - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash).
814
+ - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-lite).
719
815
  - `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
720
816
  - `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
721
817
 
@@ -752,7 +848,7 @@ agent = Agent(
752
848
  )
753
849
  ```
754
850
 
755
- ## 🚀 Migrating from v0.3.x
851
+ ## Migrating from v0.3.x
756
852
 
757
853
  If you're upgrading from v0.3.x, please note the following breaking changes in v0.4.0:
758
854
 
@@ -1,16 +1,17 @@
1
- llama-index==0.13.1
2
- llama-index-core==0.13.1
3
- llama-index-workflow==1.0.1
1
+ llama-index==0.13.3
2
+ llama-index-core==0.13.3
3
+ llama-index-workflows==1.3.0
4
4
  llama-index-cli==0.5.0
5
5
  llama-index-indices-managed-vectara==0.5.0
6
- llama-index-llms-openai==0.5.2
6
+ llama-index-llms-openai==0.5.4
7
7
  llama-index-llms-openai-like==0.5.0
8
- llama-index-llms-anthropic==0.8.2
8
+ llama-index-llms-anthropic==0.8.5
9
9
  llama-index-llms-together==0.4.0
10
10
  llama-index-llms-groq==0.4.0
11
11
  llama-index-llms-cohere==0.6.0
12
12
  llama-index-llms-google-genai==0.3.0
13
- llama-index-llms-bedrock-converse==0.8.0
13
+ google_genai>=1.31.0
14
+ llama-index-llms-bedrock-converse==0.8.2
14
15
  llama-index-tools-yahoo-finance==0.4.0
15
16
  llama-index-tools-arxiv==0.4.0
16
17
  llama-index-tools-database==0.4.0
@@ -36,7 +37,7 @@ protobuf==5.29.5
36
37
  tokenizers>=0.20
37
38
  pydantic>=2.11.5
38
39
  pandas==2.2.3
39
- retrying==1.3.4
40
+ retrying==1.4.2
40
41
  python-dotenv==1.0.1
41
42
  cloudpickle>=3.1.1
42
43
  httpx==0.28.1
@@ -4,4 +4,5 @@ Tests package for vectara_agentic.
4
4
 
5
5
  # Suppress external dependency warnings globally for all tests
6
6
  import warnings
7
+
7
8
  warnings.simplefilter("ignore", DeprecationWarning)