vectara-agentic 0.2.17__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

Files changed (44) hide show
  1. {vectara_agentic-0.2.17/vectara_agentic.egg-info → vectara_agentic-0.2.18}/PKG-INFO +4 -3
  2. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/requirements.txt +3 -2
  3. vectara_agentic-0.2.18/tests/test_api_endpoint.py +126 -0
  4. vectara_agentic-0.2.18/tests/test_gemini.py +115 -0
  5. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_groq.py +0 -7
  6. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_tools.py +9 -33
  7. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/_version.py +1 -1
  8. vectara_agentic-0.2.18/vectara_agentic/agent_endpoint.py +252 -0
  9. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/llm_utils.py +0 -17
  10. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/tool_utils.py +1 -43
  11. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18/vectara_agentic.egg-info}/PKG-INFO +4 -3
  12. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic.egg-info/SOURCES.txt +2 -0
  13. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic.egg-info/requires.txt +3 -2
  14. vectara_agentic-0.2.17/vectara_agentic/agent_endpoint.py +0 -62
  15. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/LICENSE +0 -0
  16. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/MANIFEST.in +0 -0
  17. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/README.md +0 -0
  18. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/setup.cfg +0 -0
  19. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/setup.py +0 -0
  20. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/__init__.py +0 -0
  21. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/endpoint.py +0 -0
  22. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_agent.py +0 -0
  23. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_agent_planning.py +0 -0
  24. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_agent_type.py +0 -0
  25. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_fallback.py +0 -0
  26. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_private_llm.py +0 -0
  27. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_return_direct.py +0 -0
  28. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_serialization.py +0 -0
  29. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_vectara_llms.py +0 -0
  30. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/tests/test_workflow.py +0 -0
  31. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/__init__.py +0 -0
  32. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/_callback.py +0 -0
  33. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/_observability.py +0 -0
  34. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/_prompts.py +0 -0
  35. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/agent.py +0 -0
  36. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/agent_config.py +0 -0
  37. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/db_tools.py +0 -0
  38. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/sub_query_workflow.py +0 -0
  39. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/tools.py +0 -0
  40. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/tools_catalog.py +0 -0
  41. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/types.py +0 -0
  42. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic/utils.py +0 -0
  43. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic.egg-info/dependency_links.txt +0 -0
  44. {vectara_agentic-0.2.17 → vectara_agentic-0.2.18}/vectara_agentic.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -20,8 +20,9 @@ Requires-Dist: llama-index==0.12.35
20
20
  Requires-Dist: llama-index-indices-managed-vectara==0.4.5
21
21
  Requires-Dist: llama-index-agent-llm-compiler==0.3.0
22
22
  Requires-Dist: llama-index-agent-lats==0.3.0
23
- Requires-Dist: llama-index-agent-openai==0.4.7
24
- Requires-Dist: llama-index-llms-openai==0.3.38
23
+ Requires-Dist: llama-index-agent-openai==0.4.8
24
+ Requires-Dist: llama-index-llms-openai==0.3.42
25
+ Requires-Dist: llama-index-llms-openai-like>=0.3.5
25
26
  Requires-Dist: llama-index-llms-anthropic==0.6.10
26
27
  Requires-Dist: llama-index-llms-together==0.3.1
27
28
  Requires-Dist: llama-index-llms-groq==0.3.1
@@ -2,8 +2,9 @@ llama-index==0.12.35
2
2
  llama-index-indices-managed-vectara==0.4.5
3
3
  llama-index-agent-llm-compiler==0.3.0
4
4
  llama-index-agent-lats==0.3.0
5
- llama-index-agent-openai==0.4.7
6
- llama-index-llms-openai==0.3.38
5
+ llama-index-agent-openai==0.4.8
6
+ llama-index-llms-openai==0.3.42
7
+ llama-index-llms-openai-like>=0.3.5
7
8
  llama-index-llms-anthropic==0.6.10
8
9
  llama-index-llms-together==0.3.1
9
10
  llama-index-llms-groq==0.3.1
@@ -0,0 +1,126 @@
1
+ import unittest
2
+ from uuid import UUID
3
+
4
+ from fastapi.testclient import TestClient
5
+
6
+ # Adjust this import to point at the file where you put create_app
7
+ from vectara_agentic.agent_endpoint import create_app
8
+ from vectara_agentic.agent import Agent
9
+ from vectara_agentic.agent_config import AgentConfig
10
+
11
+
12
+ class DummyAgent(Agent):
13
+ def __init__(self):
14
+ # satisfy Agent.__init__(tools: ...)
15
+ super().__init__(tools=[])
16
+
17
+ def chat(self, message: str) -> str:
18
+ return f"Echo: {message}"
19
+
20
+ class APITestCase(unittest.TestCase):
21
+ @classmethod
22
+ def setUpClass(cls):
23
+ cls.agent = DummyAgent()
24
+ # Override only the endpoint_api_key, leave everything else as default
25
+ cls.config = AgentConfig(endpoint_api_key="testkey")
26
+ app = create_app(cls.agent, cls.config)
27
+ cls.client = TestClient(app)
28
+ cls.headers = {"X-API-Key": cls.config.endpoint_api_key}
29
+
30
+ def test_chat_success(self):
31
+ r = self.client.get("/chat", params={"message": "hello"}, headers=self.headers)
32
+ self.assertEqual(r.status_code, 200)
33
+ self.assertEqual(r.json(), {"response": "Echo: hello"})
34
+
35
+ def test_chat_empty_message(self):
36
+ r = self.client.get("/chat", params={"message": ""}, headers=self.headers)
37
+ self.assertEqual(r.status_code, 400)
38
+ self.assertIn("No message provided", r.json()["detail"])
39
+
40
+ def test_chat_unauthorized(self):
41
+ r = self.client.get("/chat", params={"message": "hello"}, headers={"X-API-Key": "bad"})
42
+ self.assertEqual(r.status_code, 403)
43
+
44
+ def test_completions_success(self):
45
+ payload = {"model": "m1", "prompt": "test"}
46
+ r = self.client.post("/v1/completions", json=payload, headers=self.headers)
47
+ self.assertEqual(r.status_code, 200)
48
+ data = r.json()
49
+
50
+ # ID prefix + valid UUID check
51
+ self.assertTrue(data["id"].startswith("cmpl-"))
52
+ UUID(data["id"].split("-", 1)[1])
53
+
54
+ self.assertEqual(data["model"], "m1")
55
+ self.assertEqual(data["choices"][0]["text"], "Echo: test")
56
+ # prompt_tokens=1, completion_tokens=2 ("Echo:", "test")
57
+ self.assertEqual(data["usage"]["prompt_tokens"], 1)
58
+ self.assertEqual(data["usage"]["completion_tokens"], 2)
59
+
60
+ def test_completions_no_prompt(self):
61
+ payload = {"model": "m1"} # missing prompt
62
+ r = self.client.post("/v1/completions", json=payload, headers=self.headers)
63
+ self.assertEqual(r.status_code, 400)
64
+ self.assertIn("`prompt` is required", r.json()["detail"])
65
+
66
+ def test_completions_unauthorized(self):
67
+ payload = {"model": "m1", "prompt": "hi"}
68
+ r = self.client.post("/v1/completions", json=payload, headers={"X-API-Key": "bad"})
69
+ self.assertEqual(r.status_code, 403)
70
+
71
+ def test_chat_completion_success(self):
72
+ payload = {
73
+ "model": "m1",
74
+ "messages": [{"role": "user", "content": "hello"}]
75
+ }
76
+ r = self.client.post("/v1/chat", json=payload, headers=self.headers)
77
+ self.assertEqual(r.status_code, 200)
78
+ data = r.json()
79
+
80
+ # ID prefix + valid UUID check
81
+ self.assertTrue(data["id"].startswith("chatcmpl-"))
82
+ UUID(data["id"].split("-", 1)[1])
83
+
84
+ self.assertEqual(data["model"], "m1")
85
+ self.assertEqual(data["choices"][0]["message"]["content"], "Echo: hello")
86
+
87
+ # prompt_tokens=1, completion_tokens=2 ("Echo:", "hello")
88
+ self.assertEqual(data["usage"]["prompt_tokens"], 1)
89
+ self.assertEqual(data["usage"]["completion_tokens"], 2)
90
+
91
+ def test_chat_completion_multiple_user_messages(self):
92
+ payload = {
93
+ "model": "m1",
94
+ "messages": [
95
+ {"role": "system", "content": "ignore me"},
96
+ {"role": "user", "content": "foo"},
97
+ {"role": "assistant", "content": "pong"},
98
+ {"role": "user", "content": "bar"}
99
+ ]
100
+ }
101
+ r = self.client.post("/v1/chat", json=payload, headers=self.headers)
102
+ self.assertEqual(r.status_code, 200)
103
+ data = r.json()
104
+
105
+ # Should concatenate only user messages: "foo bar"
106
+ self.assertEqual(data["choices"][0]["message"]["content"], "Echo: foo bar")
107
+ self.assertEqual(data["usage"]["prompt_tokens"], 2) # "foo","bar"
108
+ self.assertEqual(data["usage"]["completion_tokens"], 3) # "Echo:","foo","bar"
109
+
110
+ def test_chat_completion_no_messages(self):
111
+ payload = {"model": "m1", "messages": []}
112
+ r = self.client.post("/v1/chat", json=payload, headers=self.headers)
113
+ self.assertEqual(r.status_code, 400)
114
+ self.assertIn("`messages` is required", r.json()["detail"])
115
+
116
+ def test_chat_completion_unauthorized(self):
117
+ payload = {
118
+ "model": "m1",
119
+ "messages": [{"role": "user", "content": "oops"}]
120
+ }
121
+ r = self.client.post("/v1/chat", json=payload, headers={"X-API-Key": "bad"})
122
+ self.assertEqual(r.status_code, 403)
123
+
124
+
125
+ if __name__ == "__main__":
126
+ unittest.main()
@@ -0,0 +1,115 @@
1
+ import unittest
2
+
3
+ from pydantic import Field, BaseModel
4
+
5
+ from vectara_agentic.agent import Agent, AgentType
6
+ from vectara_agentic.agent_config import AgentConfig
7
+ from vectara_agentic.tools import VectaraToolFactory
8
+ from vectara_agentic.types import ModelProvider
9
+
10
+
11
+ import nest_asyncio
12
+ nest_asyncio.apply()
13
+
14
+ tickers = {
15
+ "C": "Citigroup",
16
+ "COF": "Capital One",
17
+ "JPM": "JPMorgan Chase",
18
+ "AAPL": "Apple Computer",
19
+ "GOOG": "Google",
20
+ "AMZN": "Amazon",
21
+ "SNOW": "Snowflake",
22
+ "TEAM": "Atlassian",
23
+ "TSLA": "Tesla",
24
+ "NVDA": "Nvidia",
25
+ "MSFT": "Microsoft",
26
+ "AMD": "Advanced Micro Devices",
27
+ "INTC": "Intel",
28
+ "NFLX": "Netflix",
29
+ "STT": "State Street",
30
+ "BK": "Bank of New York Mellon",
31
+ }
32
+ years = list(range(2015, 2025))
33
+
34
+
35
+ def mult(x: float, y: float) -> float:
36
+ "Multiply two numbers"
37
+ return x * y
38
+
39
+
40
+ def get_company_info() -> list[str]:
41
+ """
42
+ Returns a dictionary of companies you can query about. Always check this before using any other tool.
43
+ The output is a dictionary of valid ticker symbols mapped to company names.
44
+ You can use this to identify the companies you can query about, and their ticker information.
45
+ """
46
+ return tickers
47
+
48
+
49
+ def get_valid_years() -> list[str]:
50
+ """
51
+ Returns a list of the years for which financial reports are available.
52
+ Always check this before using any other tool.
53
+ """
54
+ return years
55
+
56
+
57
+ fc_config_gemini = AgentConfig(
58
+ agent_type=AgentType.FUNCTION_CALLING,
59
+ main_llm_provider=ModelProvider.GEMINI,
60
+ tool_llm_provider=ModelProvider.GEMINI,
61
+ )
62
+
63
+
64
+ class TestGEMINI(unittest.TestCase):
65
+
66
+ def test_tool_with_many_arguments(self):
67
+
68
+ vectara_corpus_key = "vectara-docs_1"
69
+ vectara_api_key = "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA"
70
+ vec_factory = VectaraToolFactory(vectara_corpus_key, vectara_api_key)
71
+
72
+ class QueryToolArgs(BaseModel):
73
+ arg1: str = Field(description="the first argument", examples=["val1"])
74
+ arg2: str = Field(description="the second argument", examples=["val2"])
75
+ arg3: str = Field(description="the third argument", examples=["val3"])
76
+ arg4: str = Field(description="the fourth argument", examples=["val4"])
77
+ arg5: str = Field(description="the fifth argument", examples=["val5"])
78
+ arg6: str = Field(description="the sixth argument", examples=["val6"])
79
+ arg7: str = Field(description="the seventh argument", examples=["val7"])
80
+ arg8: str = Field(description="the eighth argument", examples=["val8"])
81
+ arg9: str = Field(description="the ninth argument", examples=["val9"])
82
+ arg10: str = Field(description="the tenth argument", examples=["val10"])
83
+ arg11: str = Field(description="the eleventh argument", examples=["val11"])
84
+ arg12: str = Field(description="the twelfth argument", examples=["val12"])
85
+ arg13: str = Field(
86
+ description="the thirteenth argument", examples=["val13"]
87
+ )
88
+ arg14: str = Field(
89
+ description="the fourteenth argument", examples=["val14"]
90
+ )
91
+ arg15: str = Field(description="the fifteenth argument", examples=["val15"])
92
+
93
+ query_tool_1 = vec_factory.create_rag_tool(
94
+ tool_name="rag_tool",
95
+ tool_description="""
96
+ A dummy tool that takes 15 arguments and returns a response (str) to the user query based on the data in this corpus.
97
+ We are using this tool to test the tool factory works and does not crash with OpenAI.
98
+ """,
99
+ tool_args_schema=QueryToolArgs,
100
+ )
101
+
102
+ agent = Agent(
103
+ tools=[query_tool_1],
104
+ topic="Sample topic",
105
+ custom_instructions="Call the tool with 15 arguments",
106
+ agent_config=fc_config_gemini,
107
+ )
108
+ res = agent.chat("What is the stock price?")
109
+ self.assertTrue(
110
+ any(sub in str(res) for sub in ["I don't know", "I do not have"])
111
+ )
112
+
113
+
114
+ if __name__ == "__main__":
115
+ unittest.main()
@@ -54,13 +54,6 @@ def get_valid_years() -> list[str]:
54
54
  return years
55
55
 
56
56
 
57
- config_gemini = AgentConfig(
58
- agent_type=AgentType.FUNCTION_CALLING,
59
- main_llm_provider=ModelProvider.GEMINI,
60
- tool_llm_provider=ModelProvider.GEMINI,
61
- )
62
-
63
-
64
57
  fc_config_groq = AgentConfig(
65
58
  agent_type=AgentType.FUNCTION_CALLING,
66
59
  main_llm_provider=ModelProvider.GROQ,
@@ -251,17 +251,19 @@ class TestToolsPackage(unittest.TestCase):
251
251
  arg11: str = Field(description="the eleventh argument", examples=["val11"])
252
252
  arg12: str = Field(description="the twelfth argument", examples=["val12"])
253
253
  arg13: str = Field(description="the thirteenth argument", examples=["val13"])
254
+ arg14: str = Field(description="the fourteenth argument", examples=["val14"])
255
+ arg15: str = Field(description="the fifteenth argument", examples=["val15"])
254
256
 
255
257
  query_tool_1 = vec_factory.create_rag_tool(
256
258
  tool_name="rag_tool",
257
259
  tool_description="""
258
- A dummy tool that takes 13 arguments and returns a response (str) to the user query based on the data in this corpus.
260
+ A dummy tool that takes 15 arguments and returns a response (str) to the user query based on the data in this corpus.
259
261
  We are using this tool to test the tool factory works and does not crash with OpenAI.
260
262
  """,
261
263
  tool_args_schema=QueryToolArgs,
262
264
  )
263
265
 
264
- # Test with 13 arguments which go over the 1024 limit.
266
+ # Test with 15 arguments to make sure no issues occur
265
267
  config = AgentConfig(
266
268
  agent_type=AgentType.OPENAI
267
269
  )
@@ -272,9 +274,9 @@ class TestToolsPackage(unittest.TestCase):
272
274
  agent_config=config,
273
275
  )
274
276
  res = agent.chat("What is the stock price for Yahoo on 12/31/22?")
275
- self.assertIn("maximum length of 1024 characters", str(res))
277
+ self.assertNotIn("maximum length of 1024 characters", str(res))
276
278
 
277
- # Same test but with GROQ
279
+ # Same test but with GROQ, should not have this limit
278
280
  config = AgentConfig(
279
281
  agent_type=AgentType.FUNCTION_CALLING,
280
282
  main_llm_provider=ModelProvider.GROQ,
@@ -283,13 +285,13 @@ class TestToolsPackage(unittest.TestCase):
283
285
  agent = Agent(
284
286
  tools=[query_tool_1],
285
287
  topic="Sample topic",
286
- custom_instructions="Call the tool with 13 arguments for GROQ",
288
+ custom_instructions="Call the tool with 15 arguments for GROQ",
287
289
  agent_config=config,
288
290
  )
289
291
  res = agent.chat("What is the stock price?")
290
292
  self.assertNotIn("maximum length of 1024 characters", str(res))
291
293
 
292
- # Same test but with ANTHROPIC
294
+ # Same test but with ANTHROPIC, should not have this limit
293
295
  config = AgentConfig(
294
296
  agent_type=AgentType.FUNCTION_CALLING,
295
297
  main_llm_provider=ModelProvider.ANTHROPIC,
@@ -298,38 +300,12 @@ class TestToolsPackage(unittest.TestCase):
298
300
  agent = Agent(
299
301
  tools=[query_tool_1],
300
302
  topic="Sample topic",
301
- custom_instructions="Call the tool with 13 arguments for ANTHROPIC",
303
+ custom_instructions="Call the tool with 15 arguments for ANTHROPIC",
302
304
  agent_config=config,
303
305
  )
304
306
  res = agent.chat("What is the stock price?")
305
- # ANTHROPIC does not have that 1024 limit
306
307
  self.assertIn("stock price", str(res))
307
308
 
308
- # But using Compact_docstring=True, we can pass 13 arguments successfully.
309
- vec_factory = VectaraToolFactory(
310
- vectara_corpus_key, vectara_api_key, compact_docstring=True
311
- )
312
- query_tool_2 = vec_factory.create_rag_tool(
313
- tool_name="rag_tool",
314
- tool_description="""
315
- A dummy tool that takes 15 arguments and returns a response (str) to the user query based on the data in this corpus.
316
- We are using this tool to test the tool factory works and doesn not crash with OpenAI.
317
- """,
318
- tool_args_schema=QueryToolArgs,
319
- )
320
-
321
- config = AgentConfig()
322
- agent = Agent(
323
- tools=[query_tool_2],
324
- topic="Sample topic",
325
- custom_instructions="Call the tool with 15 arguments",
326
- agent_config=config,
327
- )
328
- res = agent.chat("What is the stock price?")
329
- self.assertTrue(
330
- any(sub in str(res) for sub in ["I don't know", "stock price"])
331
- )
332
-
333
309
  def test_public_repo(self):
334
310
  vectara_corpus_key = "vectara-docs_1"
335
311
  vectara_api_key = "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA"
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Define the version of the package.
3
3
  """
4
- __version__ = "0.2.17"
4
+ __version__ = "0.2.18"
@@ -0,0 +1,252 @@
1
+ """
2
+ agent_endpoint.py
3
+ """
4
+
5
+ import logging
6
+ import time
7
+ import uuid
8
+ from typing import Any, List, Literal, Optional, Union
9
+
10
+ from fastapi import Depends, FastAPI, HTTPException
11
+ from fastapi.security.api_key import APIKeyHeader
12
+ from pydantic import BaseModel, Field
13
+ import uvicorn
14
+
15
+ from .agent import Agent
16
+ from .agent_config import AgentConfig
17
+
18
+
19
+ class ChatRequest(BaseModel):
20
+ """Request schema for the /chat endpoint."""
21
+
22
+ message: str
23
+
24
+
25
+ class CompletionRequest(BaseModel):
26
+ """Request schema for the /v1/completions endpoint."""
27
+
28
+ model: str
29
+ prompt: Optional[Union[str, List[str]]] = None
30
+ max_tokens: Optional[int] = Field(16, ge=1)
31
+ temperature: Optional[float] = Field(1.0, ge=0.0, le=2.0)
32
+ top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0)
33
+ n: Optional[int] = Field(1, ge=1)
34
+ stop: Optional[Union[str, List[str]]] = None
35
+
36
+
37
+ class Choice(BaseModel):
38
+ """Choice schema returned in CompletionResponse."""
39
+
40
+ text: str
41
+ index: int
42
+ logprobs: Optional[Any] = None
43
+ finish_reason: Literal["stop", "length", "error", None]
44
+
45
+
46
+ class CompletionUsage(BaseModel):
47
+ """Token usage details in CompletionResponse."""
48
+
49
+ prompt_tokens: int
50
+ completion_tokens: int
51
+ total_tokens: int
52
+
53
+
54
+ class CompletionResponse(BaseModel):
55
+ """Response schema for the /v1/completions endpoint."""
56
+
57
+ id: str
58
+ object: Literal["text_completion"]
59
+ created: int
60
+ model: str
61
+ choices: List[Choice]
62
+ usage: CompletionUsage
63
+
64
+
65
+ class ChatMessage(BaseModel):
66
+ """Schema for individual chat messages in ChatCompletionRequest."""
67
+ role: Literal["system", "user", "assistant"]
68
+ content: str
69
+
70
+
71
+ class ChatCompletionRequest(BaseModel):
72
+ """Request schema for the /v1/chat endpoint."""
73
+ model: str
74
+ messages: List[ChatMessage]
75
+ temperature: Optional[float] = Field(1.0, ge=0.0, le=2.0)
76
+ top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0)
77
+ n: Optional[int] = Field(1, ge=1)
78
+
79
+
80
+ class ChatCompletionChoice(BaseModel):
81
+ """Choice schema returned in ChatCompletionResponse."""
82
+ index: int
83
+ message: ChatMessage
84
+ finish_reason: Literal["stop", "length", "error", None]
85
+
86
+
87
+ class ChatCompletionResponse(BaseModel):
88
+ """Response schema for the /v1/chat endpoint."""
89
+ id: str
90
+ object: Literal["chat.completion"]
91
+ created: int
92
+ model: str
93
+ choices: List[ChatCompletionChoice]
94
+ usage: CompletionUsage
95
+
96
+
97
+ def create_app(agent: Agent, config: AgentConfig) -> FastAPI:
98
+ """
99
+ Create and configure the FastAPI app.
100
+
101
+ Args:
102
+ agent (Agent): The agent instance to handle chat/completion.
103
+ config (AgentConfig): Configuration containing the API key.
104
+
105
+ Returns:
106
+ FastAPI: Configured FastAPI application.
107
+ """
108
+ app = FastAPI()
109
+ logger = logging.getLogger("uvicorn.error")
110
+ logging.basicConfig(level=logging.INFO)
111
+
112
+ api_key_header = APIKeyHeader(name="X-API-Key")
113
+
114
+ async def _verify_api_key(api_key: str = Depends(api_key_header)):
115
+ """
116
+ Dependency that verifies the X-API-Key header.
117
+
118
+ Raises:
119
+ HTTPException(403): If the provided key does not match.
120
+
121
+ Returns:
122
+ bool: True if key is valid.
123
+ """
124
+ if api_key != config.endpoint_api_key:
125
+ raise HTTPException(status_code=403, detail="Unauthorized")
126
+ return True
127
+
128
+ @app.get(
129
+ "/chat", summary="Chat with the agent", dependencies=[Depends(_verify_api_key)]
130
+ )
131
+ async def chat(message: str):
132
+ """
133
+ Handle GET /chat requests.
134
+
135
+ Args:
136
+ message (str): The user's message to the agent.
137
+
138
+ Returns:
139
+ dict: Contains the agent's response under 'response'.
140
+
141
+ Raises:
142
+ HTTPException(400): If message is empty.
143
+ HTTPException(500): On internal errors.
144
+ """
145
+ if not message:
146
+ raise HTTPException(status_code=400, detail="No message provided")
147
+ try:
148
+ res = agent.chat(message)
149
+ return {"response": res}
150
+ except Exception as e:
151
+ raise HTTPException(status_code=500, detail="Internal server error") from e
152
+
153
+ @app.post(
154
+ "/v1/completions",
155
+ response_model=CompletionResponse,
156
+ dependencies=[Depends(_verify_api_key)],
157
+ )
158
+ async def completions(req: CompletionRequest):
159
+ """
160
+ Handle POST /v1/completions requests.
161
+
162
+ Args:
163
+ req (CompletionRequest): The completion request payload.
164
+
165
+ Returns:
166
+ CompletionResponse: The generated completion and usage stats.
167
+
168
+ Raises:
169
+ HTTPException(400): If prompt is missing.
170
+ HTTPException(500): On internal errors.
171
+ """
172
+ if not req.prompt:
173
+ raise HTTPException(status_code=400, detail="`prompt` is required")
174
+ raw = req.prompt if isinstance(req.prompt, str) else req.prompt[0]
175
+ try:
176
+ start = time.time()
177
+ text = agent.chat(raw)
178
+ logger.info(f"Agent returned in {time.time()-start:.2f}s")
179
+ except Exception as e:
180
+ raise HTTPException(status_code=500, detail="Internal server error") from e
181
+
182
+ p_tokens = len(raw.split())
183
+ c_tokens = len(text.split())
184
+
185
+ return CompletionResponse(
186
+ id=f"cmpl-{uuid.uuid4()}",
187
+ object="text_completion",
188
+ created=int(time.time()),
189
+ model=req.model,
190
+ choices=[Choice(text=text, index=0, logprobs=None, finish_reason="stop")],
191
+ usage=CompletionUsage(
192
+ prompt_tokens=p_tokens,
193
+ completion_tokens=c_tokens,
194
+ total_tokens=p_tokens + c_tokens,
195
+ ),
196
+ )
197
+
198
+ @app.post(
199
+ "/v1/chat",
200
+ response_model=ChatCompletionResponse,
201
+ dependencies=[Depends(_verify_api_key)],
202
+ )
203
+ async def chat_completion(req: ChatCompletionRequest):
204
+ if not req.messages:
205
+ raise HTTPException(status_code=400, detail="`messages` is required")
206
+
207
+ # concatenate all user messages into a single prompt
208
+ raw = " ".join(m.content for m in req.messages if m.role == "user")
209
+
210
+ try:
211
+ start = time.time()
212
+ text = agent.chat(raw)
213
+ logger.info(f"Agent returned in {time.time()-start:.2f}s")
214
+ except Exception as e:
215
+ raise HTTPException(status_code=500, detail="Internal server error") from e
216
+
217
+ p_tokens = len(raw.split())
218
+ c_tokens = len(text.split())
219
+
220
+ return ChatCompletionResponse(
221
+ id=f"chatcmpl-{uuid.uuid4()}",
222
+ object="chat.completion",
223
+ created=int(time.time()),
224
+ model=req.model,
225
+ choices=[
226
+ ChatCompletionChoice(
227
+ index=0,
228
+ message=ChatMessage(role="assistant", content=text),
229
+ finish_reason="stop",
230
+ )
231
+ ],
232
+ usage=CompletionUsage(
233
+ prompt_tokens=p_tokens,
234
+ completion_tokens=c_tokens,
235
+ total_tokens=p_tokens + c_tokens,
236
+ ),
237
+ )
238
+
239
+ return app
240
+
241
+
242
+ def start_app(agent: Agent, host="0.0.0.0", port=8000):
243
+ """
244
+ Launch the FastAPI application using Uvicorn.
245
+
246
+ Args:
247
+ agent (Agent): The agent instance for request handling.
248
+ host (str, optional): Host interface. Defaults to "0.0.0.0".
249
+ port (int, optional): Port number. Defaults to 8000.
250
+ """
251
+ app = create_app(agent, config=AgentConfig())
252
+ uvicorn.run(app, host=host, port=port)
@@ -1,7 +1,6 @@
1
1
  """
2
2
  Utilities for the Vectara agentic.
3
3
  """
4
- from types import MethodType
5
4
  from typing import Tuple, Callable, Optional
6
5
  from functools import lru_cache
7
6
  import tiktoken
@@ -12,7 +11,6 @@ from llama_index.llms.anthropic import Anthropic
12
11
 
13
12
  from .types import LLMRole, AgentType, ModelProvider
14
13
  from .agent_config import AgentConfig
15
- from .tool_utils import _updated_openai_prepare_chat_with_tools
16
14
 
17
15
  provider_to_default_model_name = {
18
16
  ModelProvider.OPENAI: "gpt-4o",
@@ -124,11 +122,6 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
124
122
  is_function_calling_model=True,
125
123
  max_tokens=max_tokens,
126
124
  )
127
- # pylint: disable=protected-access
128
- llm._prepare_chat_with_tools = MethodType(
129
- _updated_openai_prepare_chat_with_tools,
130
- llm,
131
- )
132
125
  elif model_provider == ModelProvider.GROQ:
133
126
  from llama_index.llms.groq import Groq
134
127
 
@@ -138,11 +131,6 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
138
131
  is_function_calling_model=True,
139
132
  max_tokens=max_tokens,
140
133
  )
141
- # pylint: disable=protected-access
142
- llm._prepare_chat_with_tools = MethodType(
143
- _updated_openai_prepare_chat_with_tools,
144
- llm,
145
- )
146
134
  elif model_provider == ModelProvider.FIREWORKS:
147
135
  from llama_index.llms.fireworks import Fireworks
148
136
 
@@ -167,11 +155,6 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
167
155
  api_key=config.private_llm_api_key,
168
156
  max_tokens=max_tokens,
169
157
  )
170
- # pylint: disable=protected-access
171
- llm._prepare_chat_with_tools = MethodType(
172
- _updated_openai_prepare_chat_with_tools,
173
- llm,
174
- )
175
158
 
176
159
  else:
177
160
  raise ValueError(f"Unknown LLM provider: {model_provider}")
@@ -7,7 +7,7 @@ import re
7
7
 
8
8
  from typing import (
9
9
  Callable, List, Dict, Any, Optional, Union, Type, Tuple,
10
- Sequence, get_origin, get_args
10
+ get_origin, get_args
11
11
  )
12
12
  from pydantic import BaseModel, create_model
13
13
  from pydantic_core import PydanticUndefined
@@ -17,52 +17,10 @@ from llama_index.core.tools.function_tool import AsyncCallable
17
17
  from llama_index.core.tools.types import ToolMetadata, ToolOutput
18
18
  from llama_index.core.workflow.context import Context
19
19
 
20
- from llama_index.core.tools.types import BaseTool
21
- from llama_index.core.base.llms.types import ChatMessage, MessageRole
22
- from llama_index.llms.openai.utils import resolve_tool_choice
23
-
24
20
  from .types import ToolType
25
21
  from .utils import is_float
26
22
 
27
23
 
28
- def _updated_openai_prepare_chat_with_tools(
29
- self,
30
- tools: Sequence["BaseTool"],
31
- user_msg: Optional[Union[str, ChatMessage]] = None,
32
- chat_history: Optional[List[ChatMessage]] = None,
33
- verbose: bool = False,
34
- allow_parallel_tool_calls: bool = False,
35
- tool_choice: Union[str, dict] = "auto",
36
- strict: Optional[bool] = None,
37
- **kwargs: Any,
38
- ) -> Dict[str, Any]:
39
- """Predict and call the tool."""
40
- tool_specs = [tool.metadata.to_openai_tool(skip_length_check=True) for tool in tools]
41
-
42
- # if strict is passed in, use, else default to the class-level attribute, else default to True`
43
- strict = strict if strict is not None else self.strict
44
-
45
- if self.metadata.is_function_calling_model:
46
- for tool_spec in tool_specs:
47
- if tool_spec["type"] == "function":
48
- tool_spec["function"]["strict"] = strict
49
- # in current openai 1.40.0 it is always false.
50
- tool_spec["function"]["parameters"]["additionalProperties"] = False
51
-
52
- if isinstance(user_msg, str):
53
- user_msg = ChatMessage(role=MessageRole.USER, content=user_msg)
54
-
55
- messages = chat_history or []
56
- if user_msg:
57
- messages.append(user_msg)
58
-
59
- return {
60
- "messages": messages,
61
- "tools": tool_specs or None,
62
- "tool_choice": resolve_tool_choice(tool_choice) if tool_specs else None,
63
- **kwargs,
64
- }
65
-
66
24
  class VectaraToolMetadata(ToolMetadata):
67
25
  """
68
26
  A subclass of ToolMetadata adding the tool_type attribute.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -20,8 +20,9 @@ Requires-Dist: llama-index==0.12.35
20
20
  Requires-Dist: llama-index-indices-managed-vectara==0.4.5
21
21
  Requires-Dist: llama-index-agent-llm-compiler==0.3.0
22
22
  Requires-Dist: llama-index-agent-lats==0.3.0
23
- Requires-Dist: llama-index-agent-openai==0.4.7
24
- Requires-Dist: llama-index-llms-openai==0.3.38
23
+ Requires-Dist: llama-index-agent-openai==0.4.8
24
+ Requires-Dist: llama-index-llms-openai==0.3.42
25
+ Requires-Dist: llama-index-llms-openai-like>=0.3.5
25
26
  Requires-Dist: llama-index-llms-anthropic==0.6.10
26
27
  Requires-Dist: llama-index-llms-together==0.3.1
27
28
  Requires-Dist: llama-index-llms-groq==0.3.1
@@ -8,7 +8,9 @@ tests/endpoint.py
8
8
  tests/test_agent.py
9
9
  tests/test_agent_planning.py
10
10
  tests/test_agent_type.py
11
+ tests/test_api_endpoint.py
11
12
  tests/test_fallback.py
13
+ tests/test_gemini.py
12
14
  tests/test_groq.py
13
15
  tests/test_private_llm.py
14
16
  tests/test_return_direct.py
@@ -2,8 +2,9 @@ llama-index==0.12.35
2
2
  llama-index-indices-managed-vectara==0.4.5
3
3
  llama-index-agent-llm-compiler==0.3.0
4
4
  llama-index-agent-lats==0.3.0
5
- llama-index-agent-openai==0.4.7
6
- llama-index-llms-openai==0.3.38
5
+ llama-index-agent-openai==0.4.8
6
+ llama-index-llms-openai==0.3.42
7
+ llama-index-llms-openai-like>=0.3.5
7
8
  llama-index-llms-anthropic==0.6.10
8
9
  llama-index-llms-together==0.3.1
9
10
  llama-index-llms-groq==0.3.1
@@ -1,62 +0,0 @@
1
- """
2
- This module contains functions to start the agent behind an API endpoint.
3
- """
4
- import logging
5
- from fastapi import FastAPI, HTTPException, Depends
6
- from fastapi.security.api_key import APIKeyHeader
7
- from pydantic import BaseModel
8
- import uvicorn
9
-
10
- from .agent import Agent
11
- from .agent_config import AgentConfig
12
-
13
- api_key_header = APIKeyHeader(name="X-API-Key")
14
-
15
- class ChatRequest(BaseModel):
16
- """
17
- A request model for the chat endpoint.
18
- """
19
- message: str
20
-
21
-
22
- def create_app(agent: Agent, config: AgentConfig) -> FastAPI:
23
- """
24
- Create a FastAPI application with a chat endpoint.
25
- """
26
- app = FastAPI()
27
- logger = logging.getLogger("uvicorn.error")
28
- logging.basicConfig(level=logging.INFO)
29
- endpoint_api_key = config.endpoint_api_key
30
-
31
- @app.get("/chat", summary="Chat with the agent")
32
- async def chat(message: str, api_key: str = Depends(api_key_header)):
33
- logger.info(f"Received message: {message}")
34
- if api_key != endpoint_api_key:
35
- logger.warning("Unauthorized access attempt")
36
- raise HTTPException(status_code=403, detail="Unauthorized")
37
-
38
- if not message:
39
- logger.error("No message provided in the request")
40
- raise HTTPException(status_code=400, detail="No message provided")
41
-
42
- try:
43
- response = agent.chat(message)
44
- logger.info(f"Generated response: {response}")
45
- return {"response": response}
46
- except Exception as e:
47
- logger.error(f"Error during agent processing: {e}")
48
- raise HTTPException(status_code=500, detail="Internal server error") from e
49
-
50
- return app
51
-
52
-
53
- def start_app(agent: Agent, host='0.0.0.0', port=8000):
54
- """
55
- Start the FastAPI server.
56
-
57
- Args:
58
- host (str, optional): The host address for the API. Defaults to '127.0.0.1'.
59
- port (int, optional): The port for the API. Defaults to 8000.
60
- """
61
- app = create_app(agent, config=AgentConfig())
62
- uvicorn.run(app, host=host, port=port)