vectara-agentic 0.4.2__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {vectara_agentic-0.4.2/vectara_agentic.egg-info → vectara_agentic-0.4.3}/PKG-INFO +127 -31
  2. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/README.md +124 -28
  3. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/requirements.txt +3 -3
  4. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/__init__.py +1 -0
  5. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/benchmark_models.py +547 -372
  6. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/conftest.py +14 -12
  7. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/endpoint.py +9 -5
  8. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/run_tests.py +1 -0
  9. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_agent.py +22 -9
  10. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_agent_fallback_memory.py +4 -4
  11. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_agent_memory_consistency.py +4 -4
  12. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_agent_type.py +2 -0
  13. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_api_endpoint.py +13 -13
  14. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_bedrock.py +9 -1
  15. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_fallback.py +18 -7
  16. vectara_agentic-0.4.3/tests/test_gemini.py +57 -0
  17. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_groq.py +9 -1
  18. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_private_llm.py +19 -6
  19. vectara_agentic-0.4.3/tests/test_react_error_handling.py +293 -0
  20. vectara_agentic-0.4.3/tests/test_react_memory.py +257 -0
  21. vectara_agentic-0.4.3/tests/test_react_streaming.py +135 -0
  22. vectara_agentic-0.4.3/tests/test_react_workflow_events.py +395 -0
  23. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_return_direct.py +1 -0
  24. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_serialization.py +58 -20
  25. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_session_memory.py +11 -11
  26. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_together.py +9 -1
  27. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_tools.py +3 -1
  28. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_vectara_llms.py +2 -2
  29. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_vhc.py +7 -2
  30. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_workflow.py +17 -11
  31. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/_callback.py +79 -21
  32. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/_version.py +1 -1
  33. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent.py +65 -27
  34. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/serialization.py +5 -9
  35. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/streaming.py +245 -64
  36. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/utils/schemas.py +2 -2
  37. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/llm_utils.py +4 -2
  38. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3/vectara_agentic.egg-info}/PKG-INFO +127 -31
  39. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic.egg-info/SOURCES.txt +4 -0
  40. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic.egg-info/requires.txt +3 -3
  41. vectara_agentic-0.4.2/tests/test_gemini.py +0 -83
  42. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/LICENSE +0 -0
  43. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/MANIFEST.in +0 -0
  44. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/setup.cfg +0 -0
  45. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/setup.py +0 -0
  46. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/tests/test_streaming.py +0 -0
  47. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/__init__.py +0 -0
  48. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/_observability.py +0 -0
  49. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_config.py +0 -0
  50. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/__init__.py +0 -0
  51. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/factory.py +0 -0
  52. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/prompts.py +0 -0
  53. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/utils/__init__.py +0 -0
  54. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/utils/hallucination.py +0 -0
  55. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/utils/logging.py +0 -0
  56. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_core/utils/tools.py +0 -0
  57. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/agent_endpoint.py +0 -0
  58. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/db_tools.py +0 -0
  59. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/sub_query_workflow.py +0 -0
  60. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/tool_utils.py +0 -0
  61. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/tools.py +0 -0
  62. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/tools_catalog.py +0 -0
  63. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/types.py +0 -0
  64. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic/utils.py +0 -0
  65. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic.egg-info/dependency_links.txt +0 -0
  66. {vectara_agentic-0.4.2 → vectara_agentic-0.4.3}/vectara_agentic.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.4.2
3
+ Version: 0.4.3
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -16,9 +16,9 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
16
  Requires-Python: >=3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: llama-index==0.13.1
20
- Requires-Dist: llama-index-core==0.13.1
21
- Requires-Dist: llama-index-workflow==1.0.1
19
+ Requires-Dist: llama-index==0.13.2
20
+ Requires-Dist: llama-index-core==0.13.2
21
+ Requires-Dist: llama-index-workflows==1.3.0
22
22
  Requires-Dist: llama-index-cli==0.5.0
23
23
  Requires-Dist: llama-index-indices-managed-vectara==0.5.0
24
24
  Requires-Dist: llama-index-llms-openai==0.5.2
@@ -100,16 +100,17 @@ Dynamic: summary
100
100
 
101
101
  ## 📑 Table of Contents
102
102
 
103
- - [Overview](#-overview)
104
- - [Quick Start](#-quick-start)
105
- - [Using Tools](#using-tools)
106
- - [Advanced Usage: Workflows](#advanced-usage-workflows)
107
- - [Configuration](#️-configuration)
108
- - [Migrating from v0.3.x](#-migrating-from-v03x)
109
- - [Contributing](#-contributing)
110
- - [License](#-license)
103
+ - [Overview](#overview)
104
+ - [🚀 Quick Start](#quick-start)
105
+ - [🗒️ Agent Instructions](#agent-instructions)
106
+ - [🧰 Defining Tools](#defining-tools)
107
+ - [🌊 Streaming & Real-time Responses](#streaming--real-time-responses)
108
+ - [🔍 Vectara Hallucination Correction (VHC)](#vectara-hallucination-correction-vhc)
109
+ - [🔄 Advanced Usage: Workflows](#advanced-usage-workflows)
110
+ - [🛠️ Configuration](#configuration)
111
+ - [📝 Migrating from v0.3.x](#migrating-from-v03x)
111
112
 
112
- ## Overview
113
+ ## Overview
113
114
 
114
115
  `vectara-agentic` is a Python library for developing powerful AI assistants and agents using Vectara and Agentic-RAG. It leverages the LlamaIndex Agent framework and provides helper functions to quickly create tools that connect to Vectara corpora.
115
116
 
@@ -158,7 +159,7 @@ Check out our example AI assistants:
158
159
  pip install vectara-agentic
159
160
  ```
160
161
 
161
- ## 🚀 Quick Start
162
+ ## Quick Start
162
163
 
163
164
  Let's see how we create a simple AI assistant to answer questions about financial data ingested into Vectara, using `vectara-agentic`.
164
165
 
@@ -181,7 +182,7 @@ A RAG tool calls the full Vectara RAG pipeline to provide summarized responses t
181
182
  ```python
182
183
  from pydantic import BaseModel, Field
183
184
 
184
- years = list(range(2020, 2024))
185
+ years = list(range(2020, 2025))
185
186
  tickers = {
186
187
  "AAPL": "Apple Computer",
187
188
  "GOOG": "Google",
@@ -213,7 +214,7 @@ To learn about additional arguments `create_rag_tool`, please see the full [docs
213
214
  In addition to RAG tools or search tools, you can generate additional tools the agent can use. These could be mathematical tools, tools
214
215
  that call other APIs to get more information, or any other type of tool.
215
216
 
216
- See [Agent Tools](#️-agent-tools-at-a-glance) for more information.
217
+ See [Agent Tools](#agent-tools-at-a-glance) for more information.
217
218
 
218
219
  ### 4. Create your agent
219
220
 
@@ -247,26 +248,67 @@ agent = Agent(
247
248
 
248
249
  The `topic` parameter helps identify the agent's area of expertise, while `custom_instructions` lets you customize how the agent behaves and presents information. The agent will combine these with its default general instructions to determine its complete behavior.
249
250
 
250
- The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur, and can be used to track agent steps.
251
+ The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur (tool calls, tool outputs, etc.), and can be used to track agent steps in real-time. This works with both regular chat methods (`chat()`, `achat()`) and streaming methods (`stream_chat()`, `astream_chat()`).
251
252
 
252
253
  ### 5. Run a chat interaction
253
254
 
255
+ You have multiple ways to interact with your agent:
256
+
257
+ **Standard Chat (synchronous)**
254
258
  ```python
255
259
  res = agent.chat("What was the revenue for Apple in 2021?")
256
260
  print(res.response)
257
261
  ```
258
262
 
263
+ **Async Chat**
264
+ ```python
265
+ res = await agent.achat("What was the revenue for Apple in 2021?")
266
+ print(res.response)
267
+ ```
268
+
269
+ **Streaming Chat with AgentStreamingResponse**
270
+ ```python
271
+ # Synchronous streaming
272
+ stream_response = agent.stream_chat("What was the revenue for Apple in 2021?")
273
+
274
+ # Option 1: Process stream manually
275
+ async for chunk in stream_response.async_response_gen():
276
+ print(chunk, end="", flush=True)
277
+
278
+ # Option 2: Get final response without streaming
279
+ # (Note: stream still executes, just not processed chunk by chunk)
280
+
281
+ # Get final response after streaming
282
+ final_response = stream_response.get_response()
283
+ print(f"\nFinal response: {final_response.response}")
284
+ ```
285
+
286
+ **Async Streaming Chat**
287
+ ```python
288
+ # Asynchronous streaming
289
+ stream_response = await agent.astream_chat("What was the revenue for Apple in 2021?")
290
+
291
+ # Process chunks manually
292
+ async for chunk in stream_response.async_response_gen():
293
+ print(chunk, end="", flush=True)
294
+
295
+ # Get final response after streaming
296
+ final_response = await stream_response.aget_response()
297
+ print(f"\nFinal response: {final_response.response}")
298
+ ```
299
+
259
300
  > **Note:**
260
- > 1. `vectara-agentic` also supports `achat()` as well as two streaming variants `stream_chat()` and `astream_chat()`.
261
- > 2. The response types from `chat()` and `achat()` are of type `AgentResponse`. If you just need the actual string
262
- > response it's available as the `response` variable, or just use `str()`. For advanced use-cases you can look
263
- > at other `AgentResponse` variables [such as `sources`](https://github.com/run-llama/llama_index/blob/659f9faaafbecebb6e6c65f42143c0bf19274a37/llama-index-core/llama_index/core/chat_engine/types.py#L53).
301
+ > 1. Both `chat()` and `achat()` return `AgentResponse` objects. Access the text with `.response` or use `str()`.
302
+ > 2. Streaming methods return `AgentStreamingResponse` objects that provide both real-time chunks and final responses.
303
+ > 3. For advanced use-cases, explore other `AgentResponse` properties like `sources` and `metadata`.
304
+ > 4. Streaming is ideal for long responses and real-time user interfaces. See [Streaming & Real-time Responses](#streaming--real-time-responses) for detailed examples.
305
+ > 5. The `agent_progress_callback` works with both regular chat methods (`chat()`, `achat()`) and streaming methods to track tool calls in real-time.
264
306
 
265
307
  ## Agent Instructions
266
308
 
267
- When creating an agent, it already comes with a set of general base instructions, designed carefully to enhance its operation and improve how the agent works.
309
+ When creating an agent, it already comes with a set of general base instructions, designed to enhance its operation and improve how the agent works.
268
310
 
269
- In addition, you can add `custom_instructions` that are specific to your use case that customize how the agent behaves.
311
+ In addition, you can add `custom_instructions` that are specific to your use case to customize how the agent behaves.
270
312
 
271
313
  When writing custom instructions:
272
314
  - Focus on behavior and presentation rather than tool usage (that's what tool descriptions are for)
@@ -279,7 +321,7 @@ The agent will combine both the general instructions and your custom instruction
279
321
 
280
322
  It is not recommended to change the general instructions, but it is possible as well to override them with the optional `general_instructions` parameter. If you do change them, your agent may not work as intended, so be careful if overriding these instructions.
281
323
 
282
- ## 🧰 Defining Tools
324
+ ## Defining Tools
283
325
 
284
326
  ### Vectara tools
285
327
 
@@ -333,7 +375,7 @@ The Vectara search tool allows the agent to list documents that match a query.
333
375
  This can be helpful to the agent to answer queries like "how many documents discuss the iPhone?" or other
334
376
  similar queries that require a response in terms of a list of matching documents.
335
377
 
336
- ### 🛠️ Agent Tools at a Glance
378
+ ### Agent Tools at a Glance
337
379
 
338
380
  `vectara-agentic` provides a few tools out of the box (see `ToolsCatalog` for details):
339
381
 
@@ -481,7 +523,7 @@ mult_tool = ToolsFactory().create_tool(mult_func)
481
523
 
482
524
  #### VHC Eligibility
483
525
 
484
- When creating tools, you can control whether they participate in Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
526
+ When creating tools, you can control whether their output is eligible for Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
485
527
 
486
528
  ```python
487
529
  # Tool that provides factual data - should participate in VHC
@@ -529,7 +571,61 @@ Built-in formatters include `format_as_table`, `format_as_json`, and `format_as_
529
571
 
530
572
  The human-readable format, if available, is used when using Vectara Hallucination Correction.
531
573
 
532
- ## 🔍 Vectara Hallucination Correction (VHC)
574
+ ## Streaming & Real-time Responses
575
+
576
+ `vectara-agentic` provides powerful streaming capabilities for real-time response generation, ideal for interactive applications and long-form content.
577
+
578
+ ### Why Use Streaming?
579
+
580
+ - **Better User Experience**: Users see responses as they're generated instead of waiting for completion
581
+ - **Real-time Feedback**: Perfect for chat interfaces, web applications, and interactive demos
582
+ - **Progress Visibility**: Combined with callbacks, users can see both tool usage and response generation
583
+ - **Reduced Perceived Latency**: Streaming makes applications feel faster and more responsive
584
+
585
+ ### Quick Streaming Example
586
+
587
+ ```python
588
+ # Create streaming response
589
+ stream_response = agent.stream_chat("Analyze the financial performance of tech companies in 2022")
590
+ async for chunk in stream_response.async_response_gen():
591
+ print(chunk, end="", flush=True) # Update your UI here
592
+
593
+ # Get complete response with metadata after streaming completes
594
+ final_response = stream_response.get_response()
595
+ print(f"\nSources consulted: {len(final_response.sources)}")
596
+ ```
597
+
598
+ ### Tool Call Progress Tracking
599
+
600
+ You can track tool calls and outputs in real-time with `agent_progress_callback` - this works with both regular chat and streaming methods:
601
+
602
+ ```python
603
+ from vectara_agentic import AgentStatusType
604
+
605
+ def tool_tracker(status_type, msg, event_id):
606
+ if status_type == AgentStatusType.TOOL_CALL:
607
+ print(f"🔧 Using {msg['tool_name']} with {msg['arguments']}")
608
+ elif status_type == AgentStatusType.TOOL_OUTPUT:
609
+ print(f"📊 {msg['tool_name']} completed")
610
+
611
+ agent = Agent(
612
+ tools=[your_tools],
613
+ agent_progress_callback=tool_tracker
614
+ )
615
+
616
+ # With streaming - see tool calls as they happen, plus streaming response
617
+ stream_response = await agent.astream_chat("Analyze Apple's finances")
618
+ async for chunk in stream_response.async_response_gen():
619
+ print(chunk, end="", flush=True)
620
+
621
+ # With regular chat - see tool calls as they happen, then get final response
622
+ response = await agent.achat("Analyze Apple's finances")
623
+ print(response.response)
624
+ ```
625
+
626
+ For detailed examples including FastAPI integration, Streamlit apps, and decision guidelines, see our [comprehensive streaming documentation](https://vectara.github.io/py-vectara-agentic/latest/usage/#streaming-chat-methods).
627
+
628
+ ## Vectara Hallucination Correction (VHC)
533
629
 
534
630
  `vectara-agentic` provides built-in support for Vectara Hallucination Correction (VHC), which analyzes agent responses and corrects any detected hallucinations based on the factual content retrieved by VHC-eligible tools.
535
631
 
@@ -587,7 +683,7 @@ agent = Agent(
587
683
 
588
684
  This helps catch errors where your instructions reference tools that aren't available to the agent.
589
685
 
590
- ## 🔄 Advanced Usage: Workflows
686
+ ## Advanced Usage: Workflows
591
687
 
592
688
  In addition to standard chat interactions, `vectara-agentic` supports custom workflows via the `run()` method.
593
689
  Workflows allow you to structure multi-step interactions where inputs and outputs are validated using Pydantic models.
@@ -758,7 +854,7 @@ The workflow works in two steps:
758
854
  - You need to implement complex business logic
759
855
  - You want to integrate with external systems or APIs in a specific way
760
856
 
761
- ## 🛠️ Configuration
857
+ ## Configuration
762
858
 
763
859
  ### Configuring Vectara-agentic
764
860
 
@@ -789,7 +885,7 @@ The `AgentConfig` object may include the following items:
789
885
  - `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
790
886
 
791
887
  > **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
792
- - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash).
888
+ - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-lite).
793
889
  - `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
794
890
  - `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
795
891
 
@@ -826,7 +922,7 @@ agent = Agent(
826
922
  )
827
923
  ```
828
924
 
829
- ## 🚀 Migrating from v0.3.x
925
+ ## Migrating from v0.3.x
830
926
 
831
927
  If you're upgrading from v0.3.x, please note the following breaking changes in v0.4.0:
832
928
 
@@ -26,16 +26,17 @@
26
26
 
27
27
  ## 📑 Table of Contents
28
28
 
29
- - [Overview](#-overview)
30
- - [Quick Start](#-quick-start)
31
- - [Using Tools](#using-tools)
32
- - [Advanced Usage: Workflows](#advanced-usage-workflows)
33
- - [Configuration](#️-configuration)
34
- - [Migrating from v0.3.x](#-migrating-from-v03x)
35
- - [Contributing](#-contributing)
36
- - [License](#-license)
37
-
38
- ## ✨ Overview
29
+ - [Overview](#overview)
30
+ - [🚀 Quick Start](#quick-start)
31
+ - [🗒️ Agent Instructions](#agent-instructions)
32
+ - [🧰 Defining Tools](#defining-tools)
33
+ - [🌊 Streaming & Real-time Responses](#streaming--real-time-responses)
34
+ - [🔍 Vectara Hallucination Correction (VHC)](#vectara-hallucination-correction-vhc)
35
+ - [🔄 Advanced Usage: Workflows](#advanced-usage-workflows)
36
+ - [🛠️ Configuration](#configuration)
37
+ - [📝 Migrating from v0.3.x](#migrating-from-v03x)
38
+
39
+ ## Overview
39
40
 
40
41
  `vectara-agentic` is a Python library for developing powerful AI assistants and agents using Vectara and Agentic-RAG. It leverages the LlamaIndex Agent framework and provides helper functions to quickly create tools that connect to Vectara corpora.
41
42
 
@@ -84,7 +85,7 @@ Check out our example AI assistants:
84
85
  pip install vectara-agentic
85
86
  ```
86
87
 
87
- ## 🚀 Quick Start
88
+ ## Quick Start
88
89
 
89
90
  Let's see how we create a simple AI assistant to answer questions about financial data ingested into Vectara, using `vectara-agentic`.
90
91
 
@@ -107,7 +108,7 @@ A RAG tool calls the full Vectara RAG pipeline to provide summarized responses t
107
108
  ```python
108
109
  from pydantic import BaseModel, Field
109
110
 
110
- years = list(range(2020, 2024))
111
+ years = list(range(2020, 2025))
111
112
  tickers = {
112
113
  "AAPL": "Apple Computer",
113
114
  "GOOG": "Google",
@@ -139,7 +140,7 @@ To learn about additional arguments `create_rag_tool`, please see the full [docs
139
140
  In addition to RAG tools or search tools, you can generate additional tools the agent can use. These could be mathematical tools, tools
140
141
  that call other APIs to get more information, or any other type of tool.
141
142
 
142
- See [Agent Tools](#️-agent-tools-at-a-glance) for more information.
143
+ See [Agent Tools](#agent-tools-at-a-glance) for more information.
143
144
 
144
145
  ### 4. Create your agent
145
146
 
@@ -173,26 +174,67 @@ agent = Agent(
173
174
 
174
175
  The `topic` parameter helps identify the agent's area of expertise, while `custom_instructions` lets you customize how the agent behaves and presents information. The agent will combine these with its default general instructions to determine its complete behavior.
175
176
 
176
- The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur, and can be used to track agent steps.
177
+ The `agent_progress_callback` argument is an optional function that will be called when various Agent events occur (tool calls, tool outputs, etc.), and can be used to track agent steps in real-time. This works with both regular chat methods (`chat()`, `achat()`) and streaming methods (`stream_chat()`, `astream_chat()`).
177
178
 
178
179
  ### 5. Run a chat interaction
179
180
 
181
+ You have multiple ways to interact with your agent:
182
+
183
+ **Standard Chat (synchronous)**
180
184
  ```python
181
185
  res = agent.chat("What was the revenue for Apple in 2021?")
182
186
  print(res.response)
183
187
  ```
184
188
 
189
+ **Async Chat**
190
+ ```python
191
+ res = await agent.achat("What was the revenue for Apple in 2021?")
192
+ print(res.response)
193
+ ```
194
+
195
+ **Streaming Chat with AgentStreamingResponse**
196
+ ```python
197
+ # Synchronous streaming
198
+ stream_response = agent.stream_chat("What was the revenue for Apple in 2021?")
199
+
200
+ # Option 1: Process stream manually
201
+ async for chunk in stream_response.async_response_gen():
202
+ print(chunk, end="", flush=True)
203
+
204
+ # Option 2: Get final response without streaming
205
+ # (Note: stream still executes, just not processed chunk by chunk)
206
+
207
+ # Get final response after streaming
208
+ final_response = stream_response.get_response()
209
+ print(f"\nFinal response: {final_response.response}")
210
+ ```
211
+
212
+ **Async Streaming Chat**
213
+ ```python
214
+ # Asynchronous streaming
215
+ stream_response = await agent.astream_chat("What was the revenue for Apple in 2021?")
216
+
217
+ # Process chunks manually
218
+ async for chunk in stream_response.async_response_gen():
219
+ print(chunk, end="", flush=True)
220
+
221
+ # Get final response after streaming
222
+ final_response = await stream_response.aget_response()
223
+ print(f"\nFinal response: {final_response.response}")
224
+ ```
225
+
185
226
  > **Note:**
186
- > 1. `vectara-agentic` also supports `achat()` as well as two streaming variants `stream_chat()` and `astream_chat()`.
187
- > 2. The response types from `chat()` and `achat()` are of type `AgentResponse`. If you just need the actual string
188
- > response it's available as the `response` variable, or just use `str()`. For advanced use-cases you can look
189
- > at other `AgentResponse` variables [such as `sources`](https://github.com/run-llama/llama_index/blob/659f9faaafbecebb6e6c65f42143c0bf19274a37/llama-index-core/llama_index/core/chat_engine/types.py#L53).
227
+ > 1. Both `chat()` and `achat()` return `AgentResponse` objects. Access the text with `.response` or use `str()`.
228
+ > 2. Streaming methods return `AgentStreamingResponse` objects that provide both real-time chunks and final responses.
229
+ > 3. For advanced use-cases, explore other `AgentResponse` properties like `sources` and `metadata`.
230
+ > 4. Streaming is ideal for long responses and real-time user interfaces. See [Streaming & Real-time Responses](#streaming--real-time-responses) for detailed examples.
231
+ > 5. The `agent_progress_callback` works with both regular chat methods (`chat()`, `achat()`) and streaming methods to track tool calls in real-time.
190
232
 
191
233
  ## Agent Instructions
192
234
 
193
- When creating an agent, it already comes with a set of general base instructions, designed carefully to enhance its operation and improve how the agent works.
235
+ When creating an agent, it already comes with a set of general base instructions, designed to enhance its operation and improve how the agent works.
194
236
 
195
- In addition, you can add `custom_instructions` that are specific to your use case that customize how the agent behaves.
237
+ In addition, you can add `custom_instructions` that are specific to your use case to customize how the agent behaves.
196
238
 
197
239
  When writing custom instructions:
198
240
  - Focus on behavior and presentation rather than tool usage (that's what tool descriptions are for)
@@ -205,7 +247,7 @@ The agent will combine both the general instructions and your custom instruction
205
247
 
206
248
  It is not recommended to change the general instructions, but it is possible as well to override them with the optional `general_instructions` parameter. If you do change them, your agent may not work as intended, so be careful if overriding these instructions.
207
249
 
208
- ## 🧰 Defining Tools
250
+ ## Defining Tools
209
251
 
210
252
  ### Vectara tools
211
253
 
@@ -259,7 +301,7 @@ The Vectara search tool allows the agent to list documents that match a query.
259
301
  This can be helpful to the agent to answer queries like "how many documents discuss the iPhone?" or other
260
302
  similar queries that require a response in terms of a list of matching documents.
261
303
 
262
- ### 🛠️ Agent Tools at a Glance
304
+ ### Agent Tools at a Glance
263
305
 
264
306
  `vectara-agentic` provides a few tools out of the box (see `ToolsCatalog` for details):
265
307
 
@@ -407,7 +449,7 @@ mult_tool = ToolsFactory().create_tool(mult_func)
407
449
 
408
450
  #### VHC Eligibility
409
451
 
410
- When creating tools, you can control whether they participate in Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
452
+ When creating tools, you can control whether their output is eligible for Vectara Hallucination Correction, by using the `vhc_eligible` parameter:
411
453
 
412
454
  ```python
413
455
  # Tool that provides factual data - should participate in VHC
@@ -455,7 +497,61 @@ Built-in formatters include `format_as_table`, `format_as_json`, and `format_as_
455
497
 
456
498
  The human-readable format, if available, is used when using Vectara Hallucination Correction.
457
499
 
458
- ## 🔍 Vectara Hallucination Correction (VHC)
500
+ ## Streaming & Real-time Responses
501
+
502
+ `vectara-agentic` provides powerful streaming capabilities for real-time response generation, ideal for interactive applications and long-form content.
503
+
504
+ ### Why Use Streaming?
505
+
506
+ - **Better User Experience**: Users see responses as they're generated instead of waiting for completion
507
+ - **Real-time Feedback**: Perfect for chat interfaces, web applications, and interactive demos
508
+ - **Progress Visibility**: Combined with callbacks, users can see both tool usage and response generation
509
+ - **Reduced Perceived Latency**: Streaming makes applications feel faster and more responsive
510
+
511
+ ### Quick Streaming Example
512
+
513
+ ```python
514
+ # Create streaming response
515
+ stream_response = agent.stream_chat("Analyze the financial performance of tech companies in 2022")
516
+ async for chunk in stream_response.async_response_gen():
517
+ print(chunk, end="", flush=True) # Update your UI here
518
+
519
+ # Get complete response with metadata after streaming completes
520
+ final_response = stream_response.get_response()
521
+ print(f"\nSources consulted: {len(final_response.sources)}")
522
+ ```
523
+
524
+ ### Tool Call Progress Tracking
525
+
526
+ You can track tool calls and outputs in real-time with `agent_progress_callback` - this works with both regular chat and streaming methods:
527
+
528
+ ```python
529
+ from vectara_agentic import AgentStatusType
530
+
531
+ def tool_tracker(status_type, msg, event_id):
532
+ if status_type == AgentStatusType.TOOL_CALL:
533
+ print(f"🔧 Using {msg['tool_name']} with {msg['arguments']}")
534
+ elif status_type == AgentStatusType.TOOL_OUTPUT:
535
+ print(f"📊 {msg['tool_name']} completed")
536
+
537
+ agent = Agent(
538
+ tools=[your_tools],
539
+ agent_progress_callback=tool_tracker
540
+ )
541
+
542
+ # With streaming - see tool calls as they happen, plus streaming response
543
+ stream_response = await agent.astream_chat("Analyze Apple's finances")
544
+ async for chunk in stream_response.async_response_gen():
545
+ print(chunk, end="", flush=True)
546
+
547
+ # With regular chat - see tool calls as they happen, then get final response
548
+ response = await agent.achat("Analyze Apple's finances")
549
+ print(response.response)
550
+ ```
551
+
552
+ For detailed examples including FastAPI integration, Streamlit apps, and decision guidelines, see our [comprehensive streaming documentation](https://vectara.github.io/py-vectara-agentic/latest/usage/#streaming-chat-methods).
553
+
554
+ ## Vectara Hallucination Correction (VHC)
459
555
 
460
556
  `vectara-agentic` provides built-in support for Vectara Hallucination Correction (VHC), which analyzes agent responses and corrects any detected hallucinations based on the factual content retrieved by VHC-eligible tools.
461
557
 
@@ -513,7 +609,7 @@ agent = Agent(
513
609
 
514
610
  This helps catch errors where your instructions reference tools that aren't available to the agent.
515
611
 
516
- ## 🔄 Advanced Usage: Workflows
612
+ ## Advanced Usage: Workflows
517
613
 
518
614
  In addition to standard chat interactions, `vectara-agentic` supports custom workflows via the `run()` method.
519
615
  Workflows allow you to structure multi-step interactions where inputs and outputs are validated using Pydantic models.
@@ -684,7 +780,7 @@ The workflow works in two steps:
684
780
  - You need to implement complex business logic
685
781
  - You want to integrate with external systems or APIs in a specific way
686
782
 
687
- ## 🛠️ Configuration
783
+ ## Configuration
688
784
 
689
785
  ### Configuring Vectara-agentic
690
786
 
@@ -715,7 +811,7 @@ The `AgentConfig` object may include the following items:
715
811
  - `main_llm_provider` and `tool_llm_provider`: the LLM provider for main agent and for the tools. Valid values are `OPENAI`, `ANTHROPIC`, `TOGETHER`, `GROQ`, `COHERE`, `BEDROCK`, `GEMINI` (default: `OPENAI`).
716
812
 
717
813
  > **Note:** Fireworks AI support has been removed. If you were using Fireworks, please migrate to one of the supported providers listed above.
718
- - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash).
814
+ - `main_llm_model_name` and `tool_llm_model_name`: agent model name for agent and tools (default depends on provider: OpenAI uses gpt-4.1-mini, Gemini uses gemini-2.5-flash-lite).
719
815
  - `observer`: the observer type; should be `ARIZE_PHOENIX` or if undefined no observation framework will be used.
720
816
  - `endpoint_api_key`: a secret key if using the API endpoint option (defaults to `dev-api-key`)
721
817
 
@@ -752,7 +848,7 @@ agent = Agent(
752
848
  )
753
849
  ```
754
850
 
755
- ## 🚀 Migrating from v0.3.x
851
+ ## Migrating from v0.3.x
756
852
 
757
853
  If you're upgrading from v0.3.x, please note the following breaking changes in v0.4.0:
758
854
 
@@ -1,6 +1,6 @@
1
- llama-index==0.13.1
2
- llama-index-core==0.13.1
3
- llama-index-workflow==1.0.1
1
+ llama-index==0.13.2
2
+ llama-index-core==0.13.2
3
+ llama-index-workflows==1.3.0
4
4
  llama-index-cli==0.5.0
5
5
  llama-index-indices-managed-vectara==0.5.0
6
6
  llama-index-llms-openai==0.5.2
@@ -4,4 +4,5 @@ Tests package for vectara_agentic.
4
4
 
5
5
  # Suppress external dependency warnings globally for all tests
6
6
  import warnings
7
+
7
8
  warnings.simplefilter("ignore", DeprecationWarning)