vectara-agentic 0.2.17__py3-none-any.whl → 0.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vectara-agentic might be problematic. Click here for more details.

@@ -0,0 +1,126 @@
1
+ import unittest
2
+ from uuid import UUID
3
+
4
+ from fastapi.testclient import TestClient
5
+
6
+ # Adjust this import to point at the file where you put create_app
7
+ from vectara_agentic.agent_endpoint import create_app
8
+ from vectara_agentic.agent import Agent
9
+ from vectara_agentic.agent_config import AgentConfig
10
+
11
+
12
+ class DummyAgent(Agent):
13
+ def __init__(self):
14
+ # satisfy Agent.__init__(tools: ...)
15
+ super().__init__(tools=[])
16
+
17
+ def chat(self, message: str) -> str:
18
+ return f"Echo: {message}"
19
+
20
+ class APITestCase(unittest.TestCase):
21
+ @classmethod
22
+ def setUpClass(cls):
23
+ cls.agent = DummyAgent()
24
+ # Override only the endpoint_api_key, leave everything else as default
25
+ cls.config = AgentConfig(endpoint_api_key="testkey")
26
+ app = create_app(cls.agent, cls.config)
27
+ cls.client = TestClient(app)
28
+ cls.headers = {"X-API-Key": cls.config.endpoint_api_key}
29
+
30
+ def test_chat_success(self):
31
+ r = self.client.get("/chat", params={"message": "hello"}, headers=self.headers)
32
+ self.assertEqual(r.status_code, 200)
33
+ self.assertEqual(r.json(), {"response": "Echo: hello"})
34
+
35
+ def test_chat_empty_message(self):
36
+ r = self.client.get("/chat", params={"message": ""}, headers=self.headers)
37
+ self.assertEqual(r.status_code, 400)
38
+ self.assertIn("No message provided", r.json()["detail"])
39
+
40
+ def test_chat_unauthorized(self):
41
+ r = self.client.get("/chat", params={"message": "hello"}, headers={"X-API-Key": "bad"})
42
+ self.assertEqual(r.status_code, 403)
43
+
44
+ def test_completions_success(self):
45
+ payload = {"model": "m1", "prompt": "test"}
46
+ r = self.client.post("/v1/completions", json=payload, headers=self.headers)
47
+ self.assertEqual(r.status_code, 200)
48
+ data = r.json()
49
+
50
+ # ID prefix + valid UUID check
51
+ self.assertTrue(data["id"].startswith("cmpl-"))
52
+ UUID(data["id"].split("-", 1)[1])
53
+
54
+ self.assertEqual(data["model"], "m1")
55
+ self.assertEqual(data["choices"][0]["text"], "Echo: test")
56
+ # prompt_tokens=1, completion_tokens=2 ("Echo:", "test")
57
+ self.assertEqual(data["usage"]["prompt_tokens"], 1)
58
+ self.assertEqual(data["usage"]["completion_tokens"], 2)
59
+
60
+ def test_completions_no_prompt(self):
61
+ payload = {"model": "m1"} # missing prompt
62
+ r = self.client.post("/v1/completions", json=payload, headers=self.headers)
63
+ self.assertEqual(r.status_code, 400)
64
+ self.assertIn("`prompt` is required", r.json()["detail"])
65
+
66
+ def test_completions_unauthorized(self):
67
+ payload = {"model": "m1", "prompt": "hi"}
68
+ r = self.client.post("/v1/completions", json=payload, headers={"X-API-Key": "bad"})
69
+ self.assertEqual(r.status_code, 403)
70
+
71
+ def test_chat_completion_success(self):
72
+ payload = {
73
+ "model": "m1",
74
+ "messages": [{"role": "user", "content": "hello"}]
75
+ }
76
+ r = self.client.post("/v1/chat", json=payload, headers=self.headers)
77
+ self.assertEqual(r.status_code, 200)
78
+ data = r.json()
79
+
80
+ # ID prefix + valid UUID check
81
+ self.assertTrue(data["id"].startswith("chatcmpl-"))
82
+ UUID(data["id"].split("-", 1)[1])
83
+
84
+ self.assertEqual(data["model"], "m1")
85
+ self.assertEqual(data["choices"][0]["message"]["content"], "Echo: hello")
86
+
87
+ # prompt_tokens=1, completion_tokens=2 ("Echo:", "hello")
88
+ self.assertEqual(data["usage"]["prompt_tokens"], 1)
89
+ self.assertEqual(data["usage"]["completion_tokens"], 2)
90
+
91
+ def test_chat_completion_multiple_user_messages(self):
92
+ payload = {
93
+ "model": "m1",
94
+ "messages": [
95
+ {"role": "system", "content": "ignore me"},
96
+ {"role": "user", "content": "foo"},
97
+ {"role": "assistant", "content": "pong"},
98
+ {"role": "user", "content": "bar"}
99
+ ]
100
+ }
101
+ r = self.client.post("/v1/chat", json=payload, headers=self.headers)
102
+ self.assertEqual(r.status_code, 200)
103
+ data = r.json()
104
+
105
+ # Should concatenate only user messages: "foo bar"
106
+ self.assertEqual(data["choices"][0]["message"]["content"], "Echo: foo bar")
107
+ self.assertEqual(data["usage"]["prompt_tokens"], 2) # "foo","bar"
108
+ self.assertEqual(data["usage"]["completion_tokens"], 3) # "Echo:","foo","bar"
109
+
110
+ def test_chat_completion_no_messages(self):
111
+ payload = {"model": "m1", "messages": []}
112
+ r = self.client.post("/v1/chat", json=payload, headers=self.headers)
113
+ self.assertEqual(r.status_code, 400)
114
+ self.assertIn("`messages` is required", r.json()["detail"])
115
+
116
+ def test_chat_completion_unauthorized(self):
117
+ payload = {
118
+ "model": "m1",
119
+ "messages": [{"role": "user", "content": "oops"}]
120
+ }
121
+ r = self.client.post("/v1/chat", json=payload, headers={"X-API-Key": "bad"})
122
+ self.assertEqual(r.status_code, 403)
123
+
124
+
125
+ if __name__ == "__main__":
126
+ unittest.main()
tests/test_gemini.py ADDED
@@ -0,0 +1,115 @@
1
+ import unittest
2
+
3
+ from pydantic import Field, BaseModel
4
+
5
+ from vectara_agentic.agent import Agent, AgentType
6
+ from vectara_agentic.agent_config import AgentConfig
7
+ from vectara_agentic.tools import VectaraToolFactory
8
+ from vectara_agentic.types import ModelProvider
9
+
10
+
11
+ import nest_asyncio
12
+ nest_asyncio.apply()
13
+
14
+ tickers = {
15
+ "C": "Citigroup",
16
+ "COF": "Capital One",
17
+ "JPM": "JPMorgan Chase",
18
+ "AAPL": "Apple Computer",
19
+ "GOOG": "Google",
20
+ "AMZN": "Amazon",
21
+ "SNOW": "Snowflake",
22
+ "TEAM": "Atlassian",
23
+ "TSLA": "Tesla",
24
+ "NVDA": "Nvidia",
25
+ "MSFT": "Microsoft",
26
+ "AMD": "Advanced Micro Devices",
27
+ "INTC": "Intel",
28
+ "NFLX": "Netflix",
29
+ "STT": "State Street",
30
+ "BK": "Bank of New York Mellon",
31
+ }
32
+ years = list(range(2015, 2025))
33
+
34
+
35
+ def mult(x: float, y: float) -> float:
36
+ "Multiply two numbers"
37
+ return x * y
38
+
39
+
40
+ def get_company_info() -> list[str]:
41
+ """
42
+ Returns a dictionary of companies you can query about. Always check this before using any other tool.
43
+ The output is a dictionary of valid ticker symbols mapped to company names.
44
+ You can use this to identify the companies you can query about, and their ticker information.
45
+ """
46
+ return tickers
47
+
48
+
49
+ def get_valid_years() -> list[str]:
50
+ """
51
+ Returns a list of the years for which financial reports are available.
52
+ Always check this before using any other tool.
53
+ """
54
+ return years
55
+
56
+
57
+ fc_config_gemini = AgentConfig(
58
+ agent_type=AgentType.FUNCTION_CALLING,
59
+ main_llm_provider=ModelProvider.GEMINI,
60
+ tool_llm_provider=ModelProvider.GEMINI,
61
+ )
62
+
63
+
64
+ class TestGEMINI(unittest.TestCase):
65
+
66
+ def test_tool_with_many_arguments(self):
67
+
68
+ vectara_corpus_key = "vectara-docs_1"
69
+ vectara_api_key = "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA"
70
+ vec_factory = VectaraToolFactory(vectara_corpus_key, vectara_api_key)
71
+
72
+ class QueryToolArgs(BaseModel):
73
+ arg1: str = Field(description="the first argument", examples=["val1"])
74
+ arg2: str = Field(description="the second argument", examples=["val2"])
75
+ arg3: str = Field(description="the third argument", examples=["val3"])
76
+ arg4: str = Field(description="the fourth argument", examples=["val4"])
77
+ arg5: str = Field(description="the fifth argument", examples=["val5"])
78
+ arg6: str = Field(description="the sixth argument", examples=["val6"])
79
+ arg7: str = Field(description="the seventh argument", examples=["val7"])
80
+ arg8: str = Field(description="the eighth argument", examples=["val8"])
81
+ arg9: str = Field(description="the ninth argument", examples=["val9"])
82
+ arg10: str = Field(description="the tenth argument", examples=["val10"])
83
+ arg11: str = Field(description="the eleventh argument", examples=["val11"])
84
+ arg12: str = Field(description="the twelfth argument", examples=["val12"])
85
+ arg13: str = Field(
86
+ description="the thirteenth argument", examples=["val13"]
87
+ )
88
+ arg14: str = Field(
89
+ description="the fourteenth argument", examples=["val14"]
90
+ )
91
+ arg15: str = Field(description="the fifteenth argument", examples=["val15"])
92
+
93
+ query_tool_1 = vec_factory.create_rag_tool(
94
+ tool_name="rag_tool",
95
+ tool_description="""
96
+ A dummy tool that takes 15 arguments and returns a response (str) to the user query based on the data in this corpus.
97
+ We are using this tool to test the tool factory works and does not crash with OpenAI.
98
+ """,
99
+ tool_args_schema=QueryToolArgs,
100
+ )
101
+
102
+ agent = Agent(
103
+ tools=[query_tool_1],
104
+ topic="Sample topic",
105
+ custom_instructions="Call the tool with 15 arguments",
106
+ agent_config=fc_config_gemini,
107
+ )
108
+ res = agent.chat("What is the stock price?")
109
+ self.assertTrue(
110
+ any(sub in str(res) for sub in ["I don't know", "I do not have"])
111
+ )
112
+
113
+
114
+ if __name__ == "__main__":
115
+ unittest.main()
tests/test_groq.py CHANGED
@@ -54,13 +54,6 @@ def get_valid_years() -> list[str]:
54
54
  return years
55
55
 
56
56
 
57
- config_gemini = AgentConfig(
58
- agent_type=AgentType.FUNCTION_CALLING,
59
- main_llm_provider=ModelProvider.GEMINI,
60
- tool_llm_provider=ModelProvider.GEMINI,
61
- )
62
-
63
-
64
57
  fc_config_groq = AgentConfig(
65
58
  agent_type=AgentType.FUNCTION_CALLING,
66
59
  main_llm_provider=ModelProvider.GROQ,
tests/test_tools.py CHANGED
@@ -251,17 +251,19 @@ class TestToolsPackage(unittest.TestCase):
251
251
  arg11: str = Field(description="the eleventh argument", examples=["val11"])
252
252
  arg12: str = Field(description="the twelfth argument", examples=["val12"])
253
253
  arg13: str = Field(description="the thirteenth argument", examples=["val13"])
254
+ arg14: str = Field(description="the fourteenth argument", examples=["val14"])
255
+ arg15: str = Field(description="the fifteenth argument", examples=["val15"])
254
256
 
255
257
  query_tool_1 = vec_factory.create_rag_tool(
256
258
  tool_name="rag_tool",
257
259
  tool_description="""
258
- A dummy tool that takes 13 arguments and returns a response (str) to the user query based on the data in this corpus.
260
+ A dummy tool that takes 15 arguments and returns a response (str) to the user query based on the data in this corpus.
259
261
  We are using this tool to test the tool factory works and does not crash with OpenAI.
260
262
  """,
261
263
  tool_args_schema=QueryToolArgs,
262
264
  )
263
265
 
264
- # Test with 13 arguments which go over the 1024 limit.
266
+ # Test with 15 arguments to make sure no issues occur
265
267
  config = AgentConfig(
266
268
  agent_type=AgentType.OPENAI
267
269
  )
@@ -272,9 +274,9 @@ class TestToolsPackage(unittest.TestCase):
272
274
  agent_config=config,
273
275
  )
274
276
  res = agent.chat("What is the stock price for Yahoo on 12/31/22?")
275
- self.assertIn("maximum length of 1024 characters", str(res))
277
+ self.assertNotIn("maximum length of 1024 characters", str(res))
276
278
 
277
- # Same test but with GROQ
279
+ # Same test but with GROQ, should not have this limit
278
280
  config = AgentConfig(
279
281
  agent_type=AgentType.FUNCTION_CALLING,
280
282
  main_llm_provider=ModelProvider.GROQ,
@@ -283,13 +285,13 @@ class TestToolsPackage(unittest.TestCase):
283
285
  agent = Agent(
284
286
  tools=[query_tool_1],
285
287
  topic="Sample topic",
286
- custom_instructions="Call the tool with 13 arguments for GROQ",
288
+ custom_instructions="Call the tool with 15 arguments for GROQ",
287
289
  agent_config=config,
288
290
  )
289
291
  res = agent.chat("What is the stock price?")
290
292
  self.assertNotIn("maximum length of 1024 characters", str(res))
291
293
 
292
- # Same test but with ANTHROPIC
294
+ # Same test but with ANTHROPIC, should not have this limit
293
295
  config = AgentConfig(
294
296
  agent_type=AgentType.FUNCTION_CALLING,
295
297
  main_llm_provider=ModelProvider.ANTHROPIC,
@@ -298,38 +300,12 @@ class TestToolsPackage(unittest.TestCase):
298
300
  agent = Agent(
299
301
  tools=[query_tool_1],
300
302
  topic="Sample topic",
301
- custom_instructions="Call the tool with 13 arguments for ANTHROPIC",
303
+ custom_instructions="Call the tool with 15 arguments for ANTHROPIC",
302
304
  agent_config=config,
303
305
  )
304
306
  res = agent.chat("What is the stock price?")
305
- # ANTHROPIC does not have that 1024 limit
306
307
  self.assertIn("stock price", str(res))
307
308
 
308
- # But using Compact_docstring=True, we can pass 13 arguments successfully.
309
- vec_factory = VectaraToolFactory(
310
- vectara_corpus_key, vectara_api_key, compact_docstring=True
311
- )
312
- query_tool_2 = vec_factory.create_rag_tool(
313
- tool_name="rag_tool",
314
- tool_description="""
315
- A dummy tool that takes 15 arguments and returns a response (str) to the user query based on the data in this corpus.
316
- We are using this tool to test the tool factory works and doesn not crash with OpenAI.
317
- """,
318
- tool_args_schema=QueryToolArgs,
319
- )
320
-
321
- config = AgentConfig()
322
- agent = Agent(
323
- tools=[query_tool_2],
324
- topic="Sample topic",
325
- custom_instructions="Call the tool with 15 arguments",
326
- agent_config=config,
327
- )
328
- res = agent.chat("What is the stock price?")
329
- self.assertTrue(
330
- any(sub in str(res) for sub in ["I don't know", "stock price"])
331
- )
332
-
333
309
  def test_public_repo(self):
334
310
  vectara_corpus_key = "vectara-docs_1"
335
311
  vectara_api_key = "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA"
@@ -1,4 +1,4 @@
1
1
  """
2
2
  Define the version of the package.
3
3
  """
4
- __version__ = "0.2.17"
4
+ __version__ = "0.2.18"
@@ -1,62 +1,252 @@
1
1
  """
2
- This module contains functions to start the agent behind an API endpoint.
2
+ agent_endpoint.py
3
3
  """
4
+
4
5
  import logging
5
- from fastapi import FastAPI, HTTPException, Depends
6
+ import time
7
+ import uuid
8
+ from typing import Any, List, Literal, Optional, Union
9
+
10
+ from fastapi import Depends, FastAPI, HTTPException
6
11
  from fastapi.security.api_key import APIKeyHeader
7
- from pydantic import BaseModel
12
+ from pydantic import BaseModel, Field
8
13
  import uvicorn
9
14
 
10
15
  from .agent import Agent
11
16
  from .agent_config import AgentConfig
12
17
 
13
- api_key_header = APIKeyHeader(name="X-API-Key")
14
18
 
15
19
  class ChatRequest(BaseModel):
16
- """
17
- A request model for the chat endpoint.
18
- """
20
+ """Request schema for the /chat endpoint."""
21
+
19
22
  message: str
20
23
 
21
24
 
25
+ class CompletionRequest(BaseModel):
26
+ """Request schema for the /v1/completions endpoint."""
27
+
28
+ model: str
29
+ prompt: Optional[Union[str, List[str]]] = None
30
+ max_tokens: Optional[int] = Field(16, ge=1)
31
+ temperature: Optional[float] = Field(1.0, ge=0.0, le=2.0)
32
+ top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0)
33
+ n: Optional[int] = Field(1, ge=1)
34
+ stop: Optional[Union[str, List[str]]] = None
35
+
36
+
37
+ class Choice(BaseModel):
38
+ """Choice schema returned in CompletionResponse."""
39
+
40
+ text: str
41
+ index: int
42
+ logprobs: Optional[Any] = None
43
+ finish_reason: Literal["stop", "length", "error", None]
44
+
45
+
46
+ class CompletionUsage(BaseModel):
47
+ """Token usage details in CompletionResponse."""
48
+
49
+ prompt_tokens: int
50
+ completion_tokens: int
51
+ total_tokens: int
52
+
53
+
54
+ class CompletionResponse(BaseModel):
55
+ """Response schema for the /v1/completions endpoint."""
56
+
57
+ id: str
58
+ object: Literal["text_completion"]
59
+ created: int
60
+ model: str
61
+ choices: List[Choice]
62
+ usage: CompletionUsage
63
+
64
+
65
+ class ChatMessage(BaseModel):
66
+ """Schema for individual chat messages in ChatCompletionRequest."""
67
+ role: Literal["system", "user", "assistant"]
68
+ content: str
69
+
70
+
71
+ class ChatCompletionRequest(BaseModel):
72
+ """Request schema for the /v1/chat endpoint."""
73
+ model: str
74
+ messages: List[ChatMessage]
75
+ temperature: Optional[float] = Field(1.0, ge=0.0, le=2.0)
76
+ top_p: Optional[float] = Field(1.0, ge=0.0, le=1.0)
77
+ n: Optional[int] = Field(1, ge=1)
78
+
79
+
80
+ class ChatCompletionChoice(BaseModel):
81
+ """Choice schema returned in ChatCompletionResponse."""
82
+ index: int
83
+ message: ChatMessage
84
+ finish_reason: Literal["stop", "length", "error", None]
85
+
86
+
87
+ class ChatCompletionResponse(BaseModel):
88
+ """Response schema for the /v1/chat endpoint."""
89
+ id: str
90
+ object: Literal["chat.completion"]
91
+ created: int
92
+ model: str
93
+ choices: List[ChatCompletionChoice]
94
+ usage: CompletionUsage
95
+
96
+
22
97
  def create_app(agent: Agent, config: AgentConfig) -> FastAPI:
23
98
  """
24
- Create a FastAPI application with a chat endpoint.
99
+ Create and configure the FastAPI app.
100
+
101
+ Args:
102
+ agent (Agent): The agent instance to handle chat/completion.
103
+ config (AgentConfig): Configuration containing the API key.
104
+
105
+ Returns:
106
+ FastAPI: Configured FastAPI application.
25
107
  """
26
108
  app = FastAPI()
27
109
  logger = logging.getLogger("uvicorn.error")
28
110
  logging.basicConfig(level=logging.INFO)
29
- endpoint_api_key = config.endpoint_api_key
30
111
 
31
- @app.get("/chat", summary="Chat with the agent")
32
- async def chat(message: str, api_key: str = Depends(api_key_header)):
33
- logger.info(f"Received message: {message}")
34
- if api_key != endpoint_api_key:
35
- logger.warning("Unauthorized access attempt")
112
+ api_key_header = APIKeyHeader(name="X-API-Key")
113
+
114
+ async def _verify_api_key(api_key: str = Depends(api_key_header)):
115
+ """
116
+ Dependency that verifies the X-API-Key header.
117
+
118
+ Raises:
119
+ HTTPException(403): If the provided key does not match.
120
+
121
+ Returns:
122
+ bool: True if key is valid.
123
+ """
124
+ if api_key != config.endpoint_api_key:
36
125
  raise HTTPException(status_code=403, detail="Unauthorized")
126
+ return True
127
+
128
+ @app.get(
129
+ "/chat", summary="Chat with the agent", dependencies=[Depends(_verify_api_key)]
130
+ )
131
+ async def chat(message: str):
132
+ """
133
+ Handle GET /chat requests.
37
134
 
135
+ Args:
136
+ message (str): The user's message to the agent.
137
+
138
+ Returns:
139
+ dict: Contains the agent's response under 'response'.
140
+
141
+ Raises:
142
+ HTTPException(400): If message is empty.
143
+ HTTPException(500): On internal errors.
144
+ """
38
145
  if not message:
39
- logger.error("No message provided in the request")
40
146
  raise HTTPException(status_code=400, detail="No message provided")
147
+ try:
148
+ res = agent.chat(message)
149
+ return {"response": res}
150
+ except Exception as e:
151
+ raise HTTPException(status_code=500, detail="Internal server error") from e
152
+
153
+ @app.post(
154
+ "/v1/completions",
155
+ response_model=CompletionResponse,
156
+ dependencies=[Depends(_verify_api_key)],
157
+ )
158
+ async def completions(req: CompletionRequest):
159
+ """
160
+ Handle POST /v1/completions requests.
161
+
162
+ Args:
163
+ req (CompletionRequest): The completion request payload.
164
+
165
+ Returns:
166
+ CompletionResponse: The generated completion and usage stats.
167
+
168
+ Raises:
169
+ HTTPException(400): If prompt is missing.
170
+ HTTPException(500): On internal errors.
171
+ """
172
+ if not req.prompt:
173
+ raise HTTPException(status_code=400, detail="`prompt` is required")
174
+ raw = req.prompt if isinstance(req.prompt, str) else req.prompt[0]
175
+ try:
176
+ start = time.time()
177
+ text = agent.chat(raw)
178
+ logger.info(f"Agent returned in {time.time()-start:.2f}s")
179
+ except Exception as e:
180
+ raise HTTPException(status_code=500, detail="Internal server error") from e
181
+
182
+ p_tokens = len(raw.split())
183
+ c_tokens = len(text.split())
184
+
185
+ return CompletionResponse(
186
+ id=f"cmpl-{uuid.uuid4()}",
187
+ object="text_completion",
188
+ created=int(time.time()),
189
+ model=req.model,
190
+ choices=[Choice(text=text, index=0, logprobs=None, finish_reason="stop")],
191
+ usage=CompletionUsage(
192
+ prompt_tokens=p_tokens,
193
+ completion_tokens=c_tokens,
194
+ total_tokens=p_tokens + c_tokens,
195
+ ),
196
+ )
197
+
198
+ @app.post(
199
+ "/v1/chat",
200
+ response_model=ChatCompletionResponse,
201
+ dependencies=[Depends(_verify_api_key)],
202
+ )
203
+ async def chat_completion(req: ChatCompletionRequest):
204
+ if not req.messages:
205
+ raise HTTPException(status_code=400, detail="`messages` is required")
206
+
207
+ # concatenate all user messages into a single prompt
208
+ raw = " ".join(m.content for m in req.messages if m.role == "user")
41
209
 
42
210
  try:
43
- response = agent.chat(message)
44
- logger.info(f"Generated response: {response}")
45
- return {"response": response}
211
+ start = time.time()
212
+ text = agent.chat(raw)
213
+ logger.info(f"Agent returned in {time.time()-start:.2f}s")
46
214
  except Exception as e:
47
- logger.error(f"Error during agent processing: {e}")
48
215
  raise HTTPException(status_code=500, detail="Internal server error") from e
49
216
 
217
+ p_tokens = len(raw.split())
218
+ c_tokens = len(text.split())
219
+
220
+ return ChatCompletionResponse(
221
+ id=f"chatcmpl-{uuid.uuid4()}",
222
+ object="chat.completion",
223
+ created=int(time.time()),
224
+ model=req.model,
225
+ choices=[
226
+ ChatCompletionChoice(
227
+ index=0,
228
+ message=ChatMessage(role="assistant", content=text),
229
+ finish_reason="stop",
230
+ )
231
+ ],
232
+ usage=CompletionUsage(
233
+ prompt_tokens=p_tokens,
234
+ completion_tokens=c_tokens,
235
+ total_tokens=p_tokens + c_tokens,
236
+ ),
237
+ )
238
+
50
239
  return app
51
240
 
52
241
 
53
- def start_app(agent: Agent, host='0.0.0.0', port=8000):
242
+ def start_app(agent: Agent, host="0.0.0.0", port=8000):
54
243
  """
55
- Start the FastAPI server.
244
+ Launch the FastAPI application using Uvicorn.
56
245
 
57
246
  Args:
58
- host (str, optional): The host address for the API. Defaults to '127.0.0.1'.
59
- port (int, optional): The port for the API. Defaults to 8000.
247
+ agent (Agent): The agent instance for request handling.
248
+ host (str, optional): Host interface. Defaults to "0.0.0.0".
249
+ port (int, optional): Port number. Defaults to 8000.
60
250
  """
61
251
  app = create_app(agent, config=AgentConfig())
62
252
  uvicorn.run(app, host=host, port=port)
@@ -1,7 +1,6 @@
1
1
  """
2
2
  Utilities for the Vectara agentic.
3
3
  """
4
- from types import MethodType
5
4
  from typing import Tuple, Callable, Optional
6
5
  from functools import lru_cache
7
6
  import tiktoken
@@ -12,7 +11,6 @@ from llama_index.llms.anthropic import Anthropic
12
11
 
13
12
  from .types import LLMRole, AgentType, ModelProvider
14
13
  from .agent_config import AgentConfig
15
- from .tool_utils import _updated_openai_prepare_chat_with_tools
16
14
 
17
15
  provider_to_default_model_name = {
18
16
  ModelProvider.OPENAI: "gpt-4o",
@@ -124,11 +122,6 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
124
122
  is_function_calling_model=True,
125
123
  max_tokens=max_tokens,
126
124
  )
127
- # pylint: disable=protected-access
128
- llm._prepare_chat_with_tools = MethodType(
129
- _updated_openai_prepare_chat_with_tools,
130
- llm,
131
- )
132
125
  elif model_provider == ModelProvider.GROQ:
133
126
  from llama_index.llms.groq import Groq
134
127
 
@@ -138,11 +131,6 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
138
131
  is_function_calling_model=True,
139
132
  max_tokens=max_tokens,
140
133
  )
141
- # pylint: disable=protected-access
142
- llm._prepare_chat_with_tools = MethodType(
143
- _updated_openai_prepare_chat_with_tools,
144
- llm,
145
- )
146
134
  elif model_provider == ModelProvider.FIREWORKS:
147
135
  from llama_index.llms.fireworks import Fireworks
148
136
 
@@ -167,11 +155,6 @@ def get_llm(role: LLMRole, config: Optional[AgentConfig] = None) -> LLM:
167
155
  api_key=config.private_llm_api_key,
168
156
  max_tokens=max_tokens,
169
157
  )
170
- # pylint: disable=protected-access
171
- llm._prepare_chat_with_tools = MethodType(
172
- _updated_openai_prepare_chat_with_tools,
173
- llm,
174
- )
175
158
 
176
159
  else:
177
160
  raise ValueError(f"Unknown LLM provider: {model_provider}")
@@ -7,7 +7,7 @@ import re
7
7
 
8
8
  from typing import (
9
9
  Callable, List, Dict, Any, Optional, Union, Type, Tuple,
10
- Sequence, get_origin, get_args
10
+ get_origin, get_args
11
11
  )
12
12
  from pydantic import BaseModel, create_model
13
13
  from pydantic_core import PydanticUndefined
@@ -17,52 +17,10 @@ from llama_index.core.tools.function_tool import AsyncCallable
17
17
  from llama_index.core.tools.types import ToolMetadata, ToolOutput
18
18
  from llama_index.core.workflow.context import Context
19
19
 
20
- from llama_index.core.tools.types import BaseTool
21
- from llama_index.core.base.llms.types import ChatMessage, MessageRole
22
- from llama_index.llms.openai.utils import resolve_tool_choice
23
-
24
20
  from .types import ToolType
25
21
  from .utils import is_float
26
22
 
27
23
 
28
- def _updated_openai_prepare_chat_with_tools(
29
- self,
30
- tools: Sequence["BaseTool"],
31
- user_msg: Optional[Union[str, ChatMessage]] = None,
32
- chat_history: Optional[List[ChatMessage]] = None,
33
- verbose: bool = False,
34
- allow_parallel_tool_calls: bool = False,
35
- tool_choice: Union[str, dict] = "auto",
36
- strict: Optional[bool] = None,
37
- **kwargs: Any,
38
- ) -> Dict[str, Any]:
39
- """Predict and call the tool."""
40
- tool_specs = [tool.metadata.to_openai_tool(skip_length_check=True) for tool in tools]
41
-
42
- # if strict is passed in, use, else default to the class-level attribute, else default to True`
43
- strict = strict if strict is not None else self.strict
44
-
45
- if self.metadata.is_function_calling_model:
46
- for tool_spec in tool_specs:
47
- if tool_spec["type"] == "function":
48
- tool_spec["function"]["strict"] = strict
49
- # in current openai 1.40.0 it is always false.
50
- tool_spec["function"]["parameters"]["additionalProperties"] = False
51
-
52
- if isinstance(user_msg, str):
53
- user_msg = ChatMessage(role=MessageRole.USER, content=user_msg)
54
-
55
- messages = chat_history or []
56
- if user_msg:
57
- messages.append(user_msg)
58
-
59
- return {
60
- "messages": messages,
61
- "tools": tool_specs or None,
62
- "tool_choice": resolve_tool_choice(tool_choice) if tool_specs else None,
63
- **kwargs,
64
- }
65
-
66
24
  class VectaraToolMetadata(ToolMetadata):
67
25
  """
68
26
  A subclass of ToolMetadata adding the tool_type attribute.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectara_agentic
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: A Python package for creating AI Assistants and AI Agents with Vectara
5
5
  Home-page: https://github.com/vectara/py-vectara-agentic
6
6
  Author: Ofer Mendelevitch
@@ -20,8 +20,9 @@ Requires-Dist: llama-index==0.12.35
20
20
  Requires-Dist: llama-index-indices-managed-vectara==0.4.5
21
21
  Requires-Dist: llama-index-agent-llm-compiler==0.3.0
22
22
  Requires-Dist: llama-index-agent-lats==0.3.0
23
- Requires-Dist: llama-index-agent-openai==0.4.7
24
- Requires-Dist: llama-index-llms-openai==0.3.38
23
+ Requires-Dist: llama-index-agent-openai==0.4.8
24
+ Requires-Dist: llama-index-llms-openai==0.3.42
25
+ Requires-Dist: llama-index-llms-openai-like>=0.3.5
25
26
  Requires-Dist: llama-index-llms-anthropic==0.6.10
26
27
  Requires-Dist: llama-index-llms-together==0.3.1
27
28
  Requires-Dist: llama-index-llms-groq==0.3.1
@@ -3,32 +3,34 @@ tests/endpoint.py,sha256=frnpdZQpnuQNNKNYgAn2rFTarNG8MCJaNA77Bw_W22A,1420
3
3
  tests/test_agent.py,sha256=o5U3K1AJllsSDvucrgFJPQRdAmHPq3LCuFpsnECUTFk,5483
4
4
  tests/test_agent_planning.py,sha256=JwEebGooROAvsQ9JZoaH6KEcrSyv1F0lL4TD4FjP8a8,2213
5
5
  tests/test_agent_type.py,sha256=mWo-pTQNDj4fWFPETm5jnb7Y5N48aW35keTVvxdIaCc,7173
6
+ tests/test_api_endpoint.py,sha256=M9YGFCy_Jphzq9JznP4ftHqxZ_yu6dgWdX1jRvdsORA,5002
6
7
  tests/test_fallback.py,sha256=M5YD7NHZ0joVU1frYIr9_OiRAIje5mrXrYVcekzlyGs,2829
7
- tests/test_groq.py,sha256=Knsz-xEBY-eoq8T0DzAC09UJWZqwtLmcjbx6QY37rJg,4235
8
+ tests/test_gemini.py,sha256=QUBYWhZkX9AjnhPn5qa7sREf6YHZWeJEmYzKwVC23Io,4081
9
+ tests/test_groq.py,sha256=5RA6uFC6qra-Do55f6HUotk3EQqOosw0GjOGiHDBS4o,4071
8
10
  tests/test_private_llm.py,sha256=CY-_rCpxGUuxnZ3ypkodw5Jj-sJCNdh6rLbCvULwuJI,2247
9
11
  tests/test_return_direct.py,sha256=Y_K_v88eS_kJfxE6A0Yghma0nUT8u6COitj0SNnZGNs,1523
10
12
  tests/test_serialization.py,sha256=Ed23GN2zhSJNdPFrVK4aqLkOhJKviczR_o0t-r9TuRI,4762
11
- tests/test_tools.py,sha256=MWExM3n1oKmVpLmayIgHXqF6_hOPq44KPkRphitBKik,15709
13
+ tests/test_tools.py,sha256=sCgV74LZSRU1zKBhv_emUNe1ZmWIeGVrelNXpd9UV1c,14872
12
14
  tests/test_vectara_llms.py,sha256=m-fDAamJR1I5IdV0IpXuTegerTUNCVRm27lsHd4wQjg,2367
13
15
  tests/test_workflow.py,sha256=06NvgUQMzPb2b2mrxtVo7xribZEDQM1LdcXNJdiOfPc,4391
14
16
  vectara_agentic/__init__.py,sha256=2GLDS3U6KckK-dBRl9v_x1kSV507gEhjOfuMmmu0Qxg,850
15
17
  vectara_agentic/_callback.py,sha256=ron49t1t-ox-736WaXzrZ99vhN4NI9bMiHFyj0iIPqg,13062
16
18
  vectara_agentic/_observability.py,sha256=UbJxiOJFOdLq3b1t0-Y7swMC3BzJu3IOlTUM-c1oUk8,4328
17
19
  vectara_agentic/_prompts.py,sha256=vAb02oahA7GKRgLOsDGqgKl-BLBop2AjOlCTgLrf3M4,9694
18
- vectara_agentic/_version.py,sha256=o3KLIOSUALmaTPqzIK1UP5BKpJQc99yysN5Rcv3m8Qk,66
20
+ vectara_agentic/_version.py,sha256=6bWhPhOhATgGaKBsmcgPdRKvZBguw3zZOhD6CJaNJPs,66
19
21
  vectara_agentic/agent.py,sha256=zJ7ucFf8jc0VO4mTFqujfwREz2B-rJCpIgCJKAtNlEk,54884
20
22
  vectara_agentic/agent_config.py,sha256=E-rtYMcpoGxnEAyy8231bizo2n0uGQ2qWxuSgTEfwdQ,4327
21
- vectara_agentic/agent_endpoint.py,sha256=QIMejCLlpW2qzXxeDAxv3anF46XMDdVMdKGWhJh3azY,1996
23
+ vectara_agentic/agent_endpoint.py,sha256=PzIN7HhEHv8Mq_Zo5cZ2xYrgdv2AN6kx6dc_2AJq28I,7497
22
24
  vectara_agentic/db_tools.py,sha256=Kfz6n-rSj5TQEbAiJnWGmqWtcwB0A5GpxD7d1UwGzlc,11194
23
- vectara_agentic/llm_utils.py,sha256=FOQG6if6D7l1eVRx_r-HSUhh5wBguIaxsYMKrZl2fJo,6302
25
+ vectara_agentic/llm_utils.py,sha256=_dkxA9DcBwyIzg-BOTi7NwZZhhjV5G2cnaVlzd9J7do,5687
24
26
  vectara_agentic/sub_query_workflow.py,sha256=cPeossVPFajpSAwy45fSXhTXbQOfzv_l66pxSa4molM,12366
25
- vectara_agentic/tool_utils.py,sha256=fQFjbWc-ucHRiQ06vTbvNma7gwxYLKMAL41KZm4_MLs,20506
27
+ vectara_agentic/tool_utils.py,sha256=jv98vCMYb9afFa-HaPxI2A8BXxplfQRv2Z9b5w7ztZc,18919
26
28
  vectara_agentic/tools.py,sha256=2_9YBqszFqYDpvlTIZfdfplRKffe660jQRxp0akM-cE,32918
27
29
  vectara_agentic/tools_catalog.py,sha256=cAN_kDOWZUoW4GNFwY5GdS6ImMUQNnF2sggx9OGK9Cg,4906
28
30
  vectara_agentic/types.py,sha256=HcS7vR8P2v2xQTlOc6ZFV2vvlr3OpzSNWhtcLMxqUZc,1792
29
31
  vectara_agentic/utils.py,sha256=R9HitEG5K3Q_p2M_teosT181OUxkhs1-hnj98qDYGbE,2545
30
- vectara_agentic-0.2.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vectara_agentic-0.2.17.dist-info/METADATA,sha256=uqgVpTya69UeykCim5Ur-V-r3tFSqJPMMYLDnKZHagk,29895
32
- vectara_agentic-0.2.17.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
33
- vectara_agentic-0.2.17.dist-info/top_level.txt,sha256=Y7TQTFdOYGYodQRltUGRieZKIYuzeZj2kHqAUpfCUfg,22
34
- vectara_agentic-0.2.17.dist-info/RECORD,,
32
+ vectara_agentic-0.2.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vectara_agentic-0.2.18.dist-info/METADATA,sha256=FO_4pmBWrl_-7DWPWF32HwOHbbicHdB86YvRi4cA67Y,29946
34
+ vectara_agentic-0.2.18.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
35
+ vectara_agentic-0.2.18.dist-info/top_level.txt,sha256=Y7TQTFdOYGYodQRltUGRieZKIYuzeZj2kHqAUpfCUfg,22
36
+ vectara_agentic-0.2.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5