vectara-agentic 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vectara-agentic might be problematic. Click here for more details.
- tests/test_bedrock.py +101 -0
- tests/test_gemini.py +64 -0
- tests/test_groq.py +196 -11
- tests/test_openai.py +101 -0
- tests/test_tools.py +161 -0
- vectara_agentic/_version.py +1 -1
- vectara_agentic/agent.py +1 -1
- vectara_agentic/agent_core/prompts.py +12 -11
- vectara_agentic/agent_core/streaming.py +178 -195
- vectara_agentic/llm_utils.py +1 -1
- vectara_agentic/sub_query_workflow.py +31 -31
- vectara_agentic/tools.py +108 -4
- {vectara_agentic-0.4.6.dist-info → vectara_agentic-0.4.8.dist-info}/METADATA +32 -31
- {vectara_agentic-0.4.6.dist-info → vectara_agentic-0.4.8.dist-info}/RECORD +17 -17
- {vectara_agentic-0.4.6.dist-info → vectara_agentic-0.4.8.dist-info}/WHEEL +0 -0
- {vectara_agentic-0.4.6.dist-info → vectara_agentic-0.4.8.dist-info}/licenses/LICENSE +0 -0
- {vectara_agentic-0.4.6.dist-info → vectara_agentic-0.4.8.dist-info}/top_level.txt +0 -0
tests/test_tools.py
CHANGED
|
@@ -15,6 +15,8 @@ from vectara_agentic.tools import (
|
|
|
15
15
|
VectaraToolFactory,
|
|
16
16
|
ToolsFactory,
|
|
17
17
|
ToolType,
|
|
18
|
+
normalize_url,
|
|
19
|
+
citation_appears_in_text,
|
|
18
20
|
)
|
|
19
21
|
from vectara_agentic.agent import Agent
|
|
20
22
|
from vectara_agentic.agent_config import AgentConfig
|
|
@@ -367,6 +369,165 @@ class TestToolsPackage(unittest.TestCase):
|
|
|
367
369
|
self.assertIn("Returns:", doc)
|
|
368
370
|
self.assertIn("dict[str, Any]: A dictionary containing the result data.", doc)
|
|
369
371
|
|
|
372
|
+
def test_normalize_url(self):
|
|
373
|
+
"""Test URL normalization function"""
|
|
374
|
+
# Test space encoding normalization
|
|
375
|
+
self.assertEqual(
|
|
376
|
+
normalize_url("http://example.com/file with spaces.pdf"),
|
|
377
|
+
"http://example.com/file%20with%20spaces.pdf",
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Test that already encoded URLs remain normalized
|
|
381
|
+
self.assertEqual(
|
|
382
|
+
normalize_url("http://example.com/file%20with%20spaces.pdf"),
|
|
383
|
+
"http://example.com/file%20with%20spaces.pdf",
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Test special characters
|
|
387
|
+
self.assertEqual(
|
|
388
|
+
normalize_url("http://example.com/path?query=hello world&foo=bar"),
|
|
389
|
+
"http://example.com/path?query=hello%20world&foo=bar",
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Test empty/None input
|
|
393
|
+
self.assertEqual(normalize_url(""), "")
|
|
394
|
+
self.assertEqual(normalize_url(None), None)
|
|
395
|
+
|
|
396
|
+
# Test complex URL with multiple encodable characters
|
|
397
|
+
result = normalize_url("http://example.com/docs/My Document [v2].pdf#section 1")
|
|
398
|
+
expected = "http://example.com/docs/My%20Document%20[v2].pdf#section%201"
|
|
399
|
+
self.assertEqual(result, expected)
|
|
400
|
+
|
|
401
|
+
def test_citation_appears_in_text_exact_match(self):
|
|
402
|
+
"""Test citation matching with exact format"""
|
|
403
|
+
response_text = "Here's the info [Document Title](http://example.com/doc.pdf) for reference."
|
|
404
|
+
|
|
405
|
+
# Should match exact citation
|
|
406
|
+
self.assertTrue(
|
|
407
|
+
citation_appears_in_text(
|
|
408
|
+
"Document Title", "http://example.com/doc.pdf", response_text
|
|
409
|
+
)
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Should not match different text with different URL
|
|
413
|
+
self.assertFalse(
|
|
414
|
+
citation_appears_in_text(
|
|
415
|
+
"Wrong Title", "http://different.com/other.pdf", response_text
|
|
416
|
+
)
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
def test_citation_appears_in_text_url_encoding(self):
|
|
420
|
+
"""Test citation matching with URL encoding differences"""
|
|
421
|
+
# Response text with percent-encoded URL
|
|
422
|
+
response_text_encoded = (
|
|
423
|
+
"See [My Doc](http://example.com/my%20document.pdf) for details."
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Should match when citation URL has spaces
|
|
427
|
+
self.assertTrue(
|
|
428
|
+
citation_appears_in_text(
|
|
429
|
+
"My Doc", "http://example.com/my document.pdf", response_text_encoded
|
|
430
|
+
)
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Response text with spaces in URL
|
|
434
|
+
response_text_spaces = (
|
|
435
|
+
"See [My Doc](http://example.com/my document.pdf) for details."
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
# Should match when citation URL is encoded
|
|
439
|
+
self.assertTrue(
|
|
440
|
+
citation_appears_in_text(
|
|
441
|
+
"My Doc", "http://example.com/my%20document.pdf", response_text_spaces
|
|
442
|
+
)
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
def test_citation_appears_in_text_url_presence(self):
|
|
446
|
+
"""Test fallback URL presence matching"""
|
|
447
|
+
# Response text that contains URL but not in exact citation format
|
|
448
|
+
response_text = (
|
|
449
|
+
"The document at http://example.com/report.pdf contains the analysis."
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# Should match based on URL presence
|
|
453
|
+
self.assertTrue(
|
|
454
|
+
citation_appears_in_text(
|
|
455
|
+
"Report", "http://example.com/report.pdf", response_text
|
|
456
|
+
)
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# Should work with encoded URL in response
|
|
460
|
+
response_encoded = (
|
|
461
|
+
"The document at http://example.com/my%20report.pdf contains data."
|
|
462
|
+
)
|
|
463
|
+
self.assertTrue(
|
|
464
|
+
citation_appears_in_text(
|
|
465
|
+
"Report", "http://example.com/my report.pdf", response_encoded
|
|
466
|
+
)
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
def test_citation_appears_in_text_edge_cases(self):
|
|
470
|
+
"""Test edge cases and error conditions"""
|
|
471
|
+
response_text = "Some text with [citations](http://example.com/doc.pdf) here."
|
|
472
|
+
|
|
473
|
+
# Empty inputs should return False
|
|
474
|
+
self.assertFalse(
|
|
475
|
+
citation_appears_in_text("", "http://example.com/doc.pdf", response_text)
|
|
476
|
+
)
|
|
477
|
+
self.assertFalse(citation_appears_in_text("Title", "", response_text))
|
|
478
|
+
self.assertFalse(
|
|
479
|
+
citation_appears_in_text("Title", "http://example.com/doc.pdf", "")
|
|
480
|
+
)
|
|
481
|
+
self.assertFalse(
|
|
482
|
+
citation_appears_in_text(None, "http://example.com/doc.pdf", response_text)
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Both None should return False
|
|
486
|
+
self.assertFalse(citation_appears_in_text(None, None, response_text))
|
|
487
|
+
|
|
488
|
+
# Very short filename should not trigger filename matching
|
|
489
|
+
self.assertFalse(
|
|
490
|
+
citation_appears_in_text(
|
|
491
|
+
"Title", "http://example.com/x.y", "Different content"
|
|
492
|
+
)
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
def test_citation_appears_in_text_complex_encoding(self):
|
|
496
|
+
"""Test complex URL encoding scenarios"""
|
|
497
|
+
# Test case with multiple special characters
|
|
498
|
+
response_text = "Document: [Legal Doc](http://example.com/docs/Contract%20%5B2024%5D%20%26%20Agreement.pdf)"
|
|
499
|
+
|
|
500
|
+
# Should match with unencoded URL
|
|
501
|
+
self.assertTrue(
|
|
502
|
+
citation_appears_in_text(
|
|
503
|
+
"Legal Doc",
|
|
504
|
+
"http://example.com/docs/Contract [2024] & Agreement.pdf",
|
|
505
|
+
response_text,
|
|
506
|
+
)
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def test_citation_appears_in_text_url_only(self):
|
|
510
|
+
"""Test citation matching when only URL is available (no text)"""
|
|
511
|
+
# Test the [(url)] format when only URL is available
|
|
512
|
+
response_text = "Reference: [(http://example.com/report.pdf)] shows data."
|
|
513
|
+
|
|
514
|
+
# Should match with URL-only citation format
|
|
515
|
+
self.assertTrue(
|
|
516
|
+
citation_appears_in_text(
|
|
517
|
+
None, "http://example.com/report.pdf", response_text
|
|
518
|
+
)
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
# Should also work with URL encoding differences
|
|
522
|
+
response_encoded = (
|
|
523
|
+
"Reference: [(http://example.com/my%20report.pdf)] shows data."
|
|
524
|
+
)
|
|
525
|
+
self.assertTrue(
|
|
526
|
+
citation_appears_in_text(
|
|
527
|
+
None, "http://example.com/my report.pdf", response_encoded
|
|
528
|
+
)
|
|
529
|
+
)
|
|
530
|
+
|
|
370
531
|
|
|
371
532
|
if __name__ == "__main__":
|
|
372
533
|
unittest.main()
|
vectara_agentic/_version.py
CHANGED
vectara_agentic/agent.py
CHANGED
|
@@ -1096,7 +1096,7 @@ class Agent:
|
|
|
1096
1096
|
model_fields = outputs_model_on_fail_cls.model_fields
|
|
1097
1097
|
input_dict = {}
|
|
1098
1098
|
for key in model_fields:
|
|
1099
|
-
value = await workflow_context.get(key, default=_missing)
|
|
1099
|
+
value = await workflow_context.store.get(key, default=_missing) # pylint: disable=no-member
|
|
1100
1100
|
if value is not _missing:
|
|
1101
1101
|
input_dict[key] = value
|
|
1102
1102
|
output = outputs_model_on_fail_cls.model_validate(input_dict)
|
|
@@ -23,7 +23,7 @@ GENERAL_INSTRUCTIONS = """
|
|
|
23
23
|
In rephrasing, aim for alternative queries that may work better for searching for the information.
|
|
24
24
|
For example, you can rephrase "CEO" with "Chief Executive Officer".
|
|
25
25
|
2) Break the question into sub-questions and call this tool or another tool for each sub-question, then combine the answers to provide a complete response.
|
|
26
|
-
For example if asked "what is the population of France and Germany", you can call the tool twice, once for France and once for Germany
|
|
26
|
+
For example if asked "what is the population of France and Germany", you can call the tool twice, once for France and once for Germany,
|
|
27
27
|
and then combine the responses to provide the full answer.
|
|
28
28
|
3) If a tool fails, try other tools that might be appropriate to gain the information you need.
|
|
29
29
|
- If after retrying you can't get the information or answer the question, respond with "I don't know".
|
|
@@ -31,22 +31,22 @@ GENERAL_INSTRUCTIONS = """
|
|
|
31
31
|
Be consistent with the format of numbers and dates across multi turn conversations.
|
|
32
32
|
- Handling citations - IMPORTANT:
|
|
33
33
|
1) Always embed citations inline with the text of your response, using valid URLs provided by tools.
|
|
34
|
-
Never omit a legitimate
|
|
35
|
-
|
|
34
|
+
Never omit a legitimate citation.
|
|
35
|
+
Never repeat the same citation multiple times in a response.
|
|
36
|
+
2) Avoid creating a bibliography or a list of sources at the end of your response, and referring the reader to that list.
|
|
36
37
|
Instead, embed citations directly in the text where the information is presented.
|
|
37
38
|
For example, "According to the [Nvidia 10-K report](https://www.nvidia.com/doc.pdf#page=8), revenue in 2021 was $10B."
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
3) When including URLs in the citation, only use well-formed, non-empty URLs (beginning with “http://” or “https://”) and ignore any malformed or placeholder links.
|
|
40
|
+
4) Use descriptive link text for citations whenever possible, falling back to numeric labels only when necessary.
|
|
40
41
|
Preferred: "According to the [Nvidia 10-K report](https://www.nvidia.com/doc.pdf#page=8), revenue in 2021 was $10B."
|
|
41
42
|
Fallback: "According to the Nvidia 10-K report, revenue in 2021 was $10B [1](https://www.nvidia.com/doc.pdf#page=8)."
|
|
42
|
-
|
|
43
|
+
5) If a URL is for a PDF file, and the tool also provided a page number, append "#page=X" to the URL.
|
|
43
44
|
For example, if the URL is "https://www.xxx.com/doc.pdf" and "page='5'", then the URL used in the citation would be "https://www.xxx.com/doc.pdf#page=5".
|
|
44
45
|
Always include the page number in the URL, whether you use anchor text or a numeric label.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
Here's an example where there is no proper spacing, and the citation is shown right after "10-K": "As shown in the [Nvidia 10-K](https://www.nvidia.com), the revenue in 2021 was $10B".
|
|
46
|
+
6) When citing images, figures, or tables, link directly to the file (or PDF page) just as you would for text.
|
|
47
|
+
7) Give each discrete fact its own citation (or citations), even if multiple facts come from the same document.
|
|
48
|
+
8) Ensure a space or punctuation precedes and follows every citation.
|
|
49
|
+
Here's an example where there is no proper spacing, and the citation is shown right after "10-K": "As shown in the[Nvidia 10-K](https://www.nvidia.com), the revenue in 2021 was $10B".
|
|
50
50
|
Instead use spacing properly: "As shown in the [Nvidia 10-K](https://www.nvidia.com), the revenue in 2021 was $10B".
|
|
51
51
|
- If a tool returns a "Malfunction" error - notify the user that you cannot respond due a tool not operating properly (and the tool name).
|
|
52
52
|
- Your response should never be the input to a tool, only the output.
|
|
@@ -58,6 +58,7 @@ GENERAL_INSTRUCTIONS = """
|
|
|
58
58
|
- Always respond in the language of the question, and in text (no images, videos or code).
|
|
59
59
|
- If you are provided with database tools use them for analytical queries (such as counting, calculating max, min, average, sum, or other statistics).
|
|
60
60
|
For each database, the database tools include: x_list_tables, x_load_data, x_describe_tables, x_load_unique_values, and x_load_sample_data, where 'x' in the database name.
|
|
61
|
+
Do not call any database tool unless it is included in your list of available tools.
|
|
61
62
|
for example, if the database name is "ev", the tools are: ev_list_tables, ev_load_data, ev_describe_tables, ev_load_unique_values, and ev_load_sample_data.
|
|
62
63
|
Use ANSI SQL-92 syntax for the SQL queries, and do not use any other SQL dialect.
|
|
63
64
|
Before using the x_load_data with a SQL query, always follow these discovery steps:
|