vanna 0.7.9__py3-none-any.whl → 2.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. vanna/__init__.py +167 -395
  2. vanna/agents/__init__.py +7 -0
  3. vanna/capabilities/__init__.py +17 -0
  4. vanna/capabilities/agent_memory/__init__.py +21 -0
  5. vanna/capabilities/agent_memory/base.py +103 -0
  6. vanna/capabilities/agent_memory/models.py +53 -0
  7. vanna/capabilities/file_system/__init__.py +14 -0
  8. vanna/capabilities/file_system/base.py +71 -0
  9. vanna/capabilities/file_system/models.py +25 -0
  10. vanna/capabilities/sql_runner/__init__.py +13 -0
  11. vanna/capabilities/sql_runner/base.py +37 -0
  12. vanna/capabilities/sql_runner/models.py +13 -0
  13. vanna/components/__init__.py +92 -0
  14. vanna/components/base.py +11 -0
  15. vanna/components/rich/__init__.py +83 -0
  16. vanna/components/rich/containers/__init__.py +7 -0
  17. vanna/components/rich/containers/card.py +20 -0
  18. vanna/components/rich/data/__init__.py +9 -0
  19. vanna/components/rich/data/chart.py +17 -0
  20. vanna/components/rich/data/dataframe.py +93 -0
  21. vanna/components/rich/feedback/__init__.py +21 -0
  22. vanna/components/rich/feedback/badge.py +16 -0
  23. vanna/components/rich/feedback/icon_text.py +14 -0
  24. vanna/components/rich/feedback/log_viewer.py +41 -0
  25. vanna/components/rich/feedback/notification.py +19 -0
  26. vanna/components/rich/feedback/progress.py +37 -0
  27. vanna/components/rich/feedback/status_card.py +28 -0
  28. vanna/components/rich/feedback/status_indicator.py +14 -0
  29. vanna/components/rich/interactive/__init__.py +21 -0
  30. vanna/components/rich/interactive/button.py +95 -0
  31. vanna/components/rich/interactive/task_list.py +58 -0
  32. vanna/components/rich/interactive/ui_state.py +93 -0
  33. vanna/components/rich/specialized/__init__.py +7 -0
  34. vanna/components/rich/specialized/artifact.py +20 -0
  35. vanna/components/rich/text.py +16 -0
  36. vanna/components/simple/__init__.py +15 -0
  37. vanna/components/simple/image.py +15 -0
  38. vanna/components/simple/link.py +15 -0
  39. vanna/components/simple/text.py +11 -0
  40. vanna/core/__init__.py +193 -0
  41. vanna/core/_compat.py +19 -0
  42. vanna/core/agent/__init__.py +10 -0
  43. vanna/core/agent/agent.py +1407 -0
  44. vanna/core/agent/config.py +123 -0
  45. vanna/core/audit/__init__.py +28 -0
  46. vanna/core/audit/base.py +299 -0
  47. vanna/core/audit/models.py +131 -0
  48. vanna/core/component_manager.py +329 -0
  49. vanna/core/components.py +53 -0
  50. vanna/core/enhancer/__init__.py +11 -0
  51. vanna/core/enhancer/base.py +94 -0
  52. vanna/core/enhancer/default.py +118 -0
  53. vanna/core/enricher/__init__.py +10 -0
  54. vanna/core/enricher/base.py +59 -0
  55. vanna/core/errors.py +47 -0
  56. vanna/core/evaluation/__init__.py +81 -0
  57. vanna/core/evaluation/base.py +186 -0
  58. vanna/core/evaluation/dataset.py +254 -0
  59. vanna/core/evaluation/evaluators.py +376 -0
  60. vanna/core/evaluation/report.py +289 -0
  61. vanna/core/evaluation/runner.py +313 -0
  62. vanna/core/filter/__init__.py +10 -0
  63. vanna/core/filter/base.py +67 -0
  64. vanna/core/lifecycle/__init__.py +10 -0
  65. vanna/core/lifecycle/base.py +83 -0
  66. vanna/core/llm/__init__.py +16 -0
  67. vanna/core/llm/base.py +40 -0
  68. vanna/core/llm/models.py +61 -0
  69. vanna/core/middleware/__init__.py +10 -0
  70. vanna/core/middleware/base.py +69 -0
  71. vanna/core/observability/__init__.py +11 -0
  72. vanna/core/observability/base.py +88 -0
  73. vanna/core/observability/models.py +47 -0
  74. vanna/core/recovery/__init__.py +11 -0
  75. vanna/core/recovery/base.py +84 -0
  76. vanna/core/recovery/models.py +32 -0
  77. vanna/core/registry.py +278 -0
  78. vanna/core/rich_component.py +156 -0
  79. vanna/core/simple_component.py +27 -0
  80. vanna/core/storage/__init__.py +14 -0
  81. vanna/core/storage/base.py +46 -0
  82. vanna/core/storage/models.py +46 -0
  83. vanna/core/system_prompt/__init__.py +13 -0
  84. vanna/core/system_prompt/base.py +36 -0
  85. vanna/core/system_prompt/default.py +157 -0
  86. vanna/core/tool/__init__.py +18 -0
  87. vanna/core/tool/base.py +70 -0
  88. vanna/core/tool/models.py +84 -0
  89. vanna/core/user/__init__.py +17 -0
  90. vanna/core/user/base.py +29 -0
  91. vanna/core/user/models.py +25 -0
  92. vanna/core/user/request_context.py +70 -0
  93. vanna/core/user/resolver.py +42 -0
  94. vanna/core/validation.py +164 -0
  95. vanna/core/workflow/__init__.py +12 -0
  96. vanna/core/workflow/base.py +254 -0
  97. vanna/core/workflow/default.py +789 -0
  98. vanna/examples/__init__.py +1 -0
  99. vanna/examples/__main__.py +44 -0
  100. vanna/examples/anthropic_quickstart.py +80 -0
  101. vanna/examples/artifact_example.py +293 -0
  102. vanna/examples/claude_sqlite_example.py +236 -0
  103. vanna/examples/coding_agent_example.py +300 -0
  104. vanna/examples/custom_system_prompt_example.py +174 -0
  105. vanna/examples/default_workflow_handler_example.py +208 -0
  106. vanna/examples/email_auth_example.py +340 -0
  107. vanna/examples/evaluation_example.py +269 -0
  108. vanna/examples/extensibility_example.py +262 -0
  109. vanna/examples/minimal_example.py +67 -0
  110. vanna/examples/mock_auth_example.py +227 -0
  111. vanna/examples/mock_custom_tool.py +311 -0
  112. vanna/examples/mock_quickstart.py +79 -0
  113. vanna/examples/mock_quota_example.py +145 -0
  114. vanna/examples/mock_rich_components_demo.py +396 -0
  115. vanna/examples/mock_sqlite_example.py +223 -0
  116. vanna/examples/openai_quickstart.py +83 -0
  117. vanna/examples/primitive_components_demo.py +305 -0
  118. vanna/examples/quota_lifecycle_example.py +139 -0
  119. vanna/examples/visualization_example.py +251 -0
  120. vanna/integrations/__init__.py +17 -0
  121. vanna/integrations/anthropic/__init__.py +9 -0
  122. vanna/integrations/anthropic/llm.py +270 -0
  123. vanna/integrations/azureopenai/__init__.py +9 -0
  124. vanna/integrations/azureopenai/llm.py +329 -0
  125. vanna/integrations/azuresearch/__init__.py +7 -0
  126. vanna/integrations/azuresearch/agent_memory.py +413 -0
  127. vanna/integrations/bigquery/__init__.py +5 -0
  128. vanna/integrations/bigquery/sql_runner.py +81 -0
  129. vanna/integrations/chromadb/__init__.py +104 -0
  130. vanna/integrations/chromadb/agent_memory.py +416 -0
  131. vanna/integrations/clickhouse/__init__.py +5 -0
  132. vanna/integrations/clickhouse/sql_runner.py +82 -0
  133. vanna/integrations/duckdb/__init__.py +5 -0
  134. vanna/integrations/duckdb/sql_runner.py +65 -0
  135. vanna/integrations/faiss/__init__.py +7 -0
  136. vanna/integrations/faiss/agent_memory.py +431 -0
  137. vanna/integrations/google/__init__.py +9 -0
  138. vanna/integrations/google/gemini.py +370 -0
  139. vanna/integrations/hive/__init__.py +5 -0
  140. vanna/integrations/hive/sql_runner.py +87 -0
  141. vanna/integrations/local/__init__.py +17 -0
  142. vanna/integrations/local/agent_memory/__init__.py +7 -0
  143. vanna/integrations/local/agent_memory/in_memory.py +285 -0
  144. vanna/integrations/local/audit.py +59 -0
  145. vanna/integrations/local/file_system.py +242 -0
  146. vanna/integrations/local/file_system_conversation_store.py +255 -0
  147. vanna/integrations/local/storage.py +62 -0
  148. vanna/integrations/marqo/__init__.py +7 -0
  149. vanna/integrations/marqo/agent_memory.py +354 -0
  150. vanna/integrations/milvus/__init__.py +7 -0
  151. vanna/integrations/milvus/agent_memory.py +458 -0
  152. vanna/integrations/mock/__init__.py +9 -0
  153. vanna/integrations/mock/llm.py +65 -0
  154. vanna/integrations/mssql/__init__.py +5 -0
  155. vanna/integrations/mssql/sql_runner.py +66 -0
  156. vanna/integrations/mysql/__init__.py +5 -0
  157. vanna/integrations/mysql/sql_runner.py +92 -0
  158. vanna/integrations/ollama/__init__.py +7 -0
  159. vanna/integrations/ollama/llm.py +252 -0
  160. vanna/integrations/openai/__init__.py +10 -0
  161. vanna/integrations/openai/llm.py +267 -0
  162. vanna/integrations/openai/responses.py +163 -0
  163. vanna/integrations/opensearch/__init__.py +7 -0
  164. vanna/integrations/opensearch/agent_memory.py +411 -0
  165. vanna/integrations/oracle/__init__.py +5 -0
  166. vanna/integrations/oracle/sql_runner.py +75 -0
  167. vanna/integrations/pinecone/__init__.py +7 -0
  168. vanna/integrations/pinecone/agent_memory.py +329 -0
  169. vanna/integrations/plotly/__init__.py +5 -0
  170. vanna/integrations/plotly/chart_generator.py +313 -0
  171. vanna/integrations/postgres/__init__.py +9 -0
  172. vanna/integrations/postgres/sql_runner.py +112 -0
  173. vanna/integrations/premium/agent_memory/__init__.py +7 -0
  174. vanna/integrations/premium/agent_memory/premium.py +186 -0
  175. vanna/integrations/presto/__init__.py +5 -0
  176. vanna/integrations/presto/sql_runner.py +107 -0
  177. vanna/integrations/qdrant/__init__.py +7 -0
  178. vanna/integrations/qdrant/agent_memory.py +439 -0
  179. vanna/integrations/snowflake/__init__.py +5 -0
  180. vanna/integrations/snowflake/sql_runner.py +147 -0
  181. vanna/integrations/sqlite/__init__.py +9 -0
  182. vanna/integrations/sqlite/sql_runner.py +65 -0
  183. vanna/integrations/weaviate/__init__.py +7 -0
  184. vanna/integrations/weaviate/agent_memory.py +428 -0
  185. vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_embeddings.py +11 -11
  186. vanna/legacy/__init__.py +403 -0
  187. vanna/legacy/adapter.py +463 -0
  188. vanna/{advanced → legacy/advanced}/__init__.py +3 -1
  189. vanna/{anthropic → legacy/anthropic}/anthropic_chat.py +9 -7
  190. vanna/{azuresearch → legacy/azuresearch}/azuresearch_vector.py +79 -41
  191. vanna/{base → legacy/base}/base.py +224 -217
  192. vanna/legacy/bedrock/__init__.py +1 -0
  193. vanna/{bedrock → legacy/bedrock}/bedrock_converse.py +13 -12
  194. vanna/{chromadb → legacy/chromadb}/chromadb_vector.py +3 -1
  195. vanna/legacy/cohere/__init__.py +2 -0
  196. vanna/{cohere → legacy/cohere}/cohere_chat.py +19 -14
  197. vanna/{cohere → legacy/cohere}/cohere_embeddings.py +25 -19
  198. vanna/{deepseek → legacy/deepseek}/deepseek_chat.py +5 -6
  199. vanna/legacy/faiss/__init__.py +1 -0
  200. vanna/{faiss → legacy/faiss}/faiss.py +113 -59
  201. vanna/{flask → legacy/flask}/__init__.py +84 -43
  202. vanna/{flask → legacy/flask}/assets.py +5 -5
  203. vanna/{flask → legacy/flask}/auth.py +5 -4
  204. vanna/{google → legacy/google}/bigquery_vector.py +75 -42
  205. vanna/{google → legacy/google}/gemini_chat.py +7 -3
  206. vanna/{hf → legacy/hf}/hf.py +0 -1
  207. vanna/{milvus → legacy/milvus}/milvus_vector.py +58 -35
  208. vanna/{mock → legacy/mock}/llm.py +0 -1
  209. vanna/legacy/mock/vectordb.py +67 -0
  210. vanna/legacy/ollama/ollama.py +110 -0
  211. vanna/{openai → legacy/openai}/openai_chat.py +2 -6
  212. vanna/legacy/opensearch/opensearch_vector.py +369 -0
  213. vanna/legacy/opensearch/opensearch_vector_semantic.py +200 -0
  214. vanna/legacy/oracle/oracle_vector.py +584 -0
  215. vanna/{pgvector → legacy/pgvector}/pgvector.py +42 -13
  216. vanna/{qdrant → legacy/qdrant}/qdrant.py +2 -6
  217. vanna/legacy/qianfan/Qianfan_Chat.py +170 -0
  218. vanna/legacy/qianfan/Qianfan_embeddings.py +36 -0
  219. vanna/legacy/qianwen/QianwenAI_chat.py +132 -0
  220. vanna/{remote.py → legacy/remote.py} +28 -26
  221. vanna/{utils.py → legacy/utils.py} +6 -11
  222. vanna/{vannadb → legacy/vannadb}/vannadb_vector.py +115 -46
  223. vanna/{vllm → legacy/vllm}/vllm.py +5 -6
  224. vanna/{weaviate → legacy/weaviate}/weaviate_vector.py +59 -40
  225. vanna/{xinference → legacy/xinference}/xinference.py +6 -6
  226. vanna/py.typed +0 -0
  227. vanna/servers/__init__.py +16 -0
  228. vanna/servers/__main__.py +8 -0
  229. vanna/servers/base/__init__.py +18 -0
  230. vanna/servers/base/chat_handler.py +65 -0
  231. vanna/servers/base/models.py +111 -0
  232. vanna/servers/base/rich_chat_handler.py +141 -0
  233. vanna/servers/base/templates.py +331 -0
  234. vanna/servers/cli/__init__.py +7 -0
  235. vanna/servers/cli/server_runner.py +204 -0
  236. vanna/servers/fastapi/__init__.py +7 -0
  237. vanna/servers/fastapi/app.py +163 -0
  238. vanna/servers/fastapi/routes.py +183 -0
  239. vanna/servers/flask/__init__.py +7 -0
  240. vanna/servers/flask/app.py +132 -0
  241. vanna/servers/flask/routes.py +137 -0
  242. vanna/tools/__init__.py +41 -0
  243. vanna/tools/agent_memory.py +322 -0
  244. vanna/tools/file_system.py +879 -0
  245. vanna/tools/python.py +222 -0
  246. vanna/tools/run_sql.py +165 -0
  247. vanna/tools/visualize_data.py +195 -0
  248. vanna/utils/__init__.py +0 -0
  249. vanna/web_components/__init__.py +44 -0
  250. vanna-2.0.0rc1.dist-info/METADATA +868 -0
  251. vanna-2.0.0rc1.dist-info/RECORD +289 -0
  252. vanna-2.0.0rc1.dist-info/entry_points.txt +3 -0
  253. vanna/bedrock/__init__.py +0 -1
  254. vanna/cohere/__init__.py +0 -2
  255. vanna/faiss/__init__.py +0 -1
  256. vanna/mock/vectordb.py +0 -55
  257. vanna/ollama/ollama.py +0 -103
  258. vanna/opensearch/opensearch_vector.py +0 -392
  259. vanna/opensearch/opensearch_vector_semantic.py +0 -175
  260. vanna/oracle/oracle_vector.py +0 -585
  261. vanna/qianfan/Qianfan_Chat.py +0 -165
  262. vanna/qianfan/Qianfan_embeddings.py +0 -36
  263. vanna/qianwen/QianwenAI_chat.py +0 -133
  264. vanna-0.7.9.dist-info/METADATA +0 -408
  265. vanna-0.7.9.dist-info/RECORD +0 -79
  266. /vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_Chat.py +0 -0
  267. /vanna/{ZhipuAI → legacy/ZhipuAI}/__init__.py +0 -0
  268. /vanna/{anthropic → legacy/anthropic}/__init__.py +0 -0
  269. /vanna/{azuresearch → legacy/azuresearch}/__init__.py +0 -0
  270. /vanna/{base → legacy/base}/__init__.py +0 -0
  271. /vanna/{chromadb → legacy/chromadb}/__init__.py +0 -0
  272. /vanna/{deepseek → legacy/deepseek}/__init__.py +0 -0
  273. /vanna/{exceptions → legacy/exceptions}/__init__.py +0 -0
  274. /vanna/{google → legacy/google}/__init__.py +0 -0
  275. /vanna/{hf → legacy/hf}/__init__.py +0 -0
  276. /vanna/{local.py → legacy/local.py} +0 -0
  277. /vanna/{marqo → legacy/marqo}/__init__.py +0 -0
  278. /vanna/{marqo → legacy/marqo}/marqo.py +0 -0
  279. /vanna/{milvus → legacy/milvus}/__init__.py +0 -0
  280. /vanna/{mistral → legacy/mistral}/__init__.py +0 -0
  281. /vanna/{mistral → legacy/mistral}/mistral.py +0 -0
  282. /vanna/{mock → legacy/mock}/__init__.py +0 -0
  283. /vanna/{mock → legacy/mock}/embedding.py +0 -0
  284. /vanna/{ollama → legacy/ollama}/__init__.py +0 -0
  285. /vanna/{openai → legacy/openai}/__init__.py +0 -0
  286. /vanna/{openai → legacy/openai}/openai_embeddings.py +0 -0
  287. /vanna/{opensearch → legacy/opensearch}/__init__.py +0 -0
  288. /vanna/{oracle → legacy/oracle}/__init__.py +0 -0
  289. /vanna/{pgvector → legacy/pgvector}/__init__.py +0 -0
  290. /vanna/{pinecone → legacy/pinecone}/__init__.py +0 -0
  291. /vanna/{pinecone → legacy/pinecone}/pinecone_vector.py +0 -0
  292. /vanna/{qdrant → legacy/qdrant}/__init__.py +0 -0
  293. /vanna/{qianfan → legacy/qianfan}/__init__.py +0 -0
  294. /vanna/{qianwen → legacy/qianwen}/QianwenAI_embeddings.py +0 -0
  295. /vanna/{qianwen → legacy/qianwen}/__init__.py +0 -0
  296. /vanna/{types → legacy/types}/__init__.py +0 -0
  297. /vanna/{vannadb → legacy/vannadb}/__init__.py +0 -0
  298. /vanna/{vllm → legacy/vllm}/__init__.py +0 -0
  299. /vanna/{weaviate → legacy/weaviate}/__init__.py +0 -0
  300. /vanna/{xinference → legacy/xinference}/__init__.py +0 -0
  301. {vanna-0.7.9.dist-info → vanna-2.0.0rc1.dist-info}/WHEEL +0 -0
  302. {vanna-0.7.9.dist-info → vanna-2.0.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,313 @@
1
+ """
2
+ Evaluation runner with parallel execution support.
3
+
4
+ This module provides the EvaluationRunner class that executes test cases
5
+ against agents with configurable parallelism for efficient evaluation,
6
+ especially when comparing multiple LLMs or model versions.
7
+ """
8
+
9
+ import asyncio
10
+ from typing import Any, List, Dict, Optional, AsyncGenerator, TYPE_CHECKING
11
+ from datetime import datetime
12
+
13
+ from .base import (
14
+ TestCase,
15
+ AgentResult,
16
+ TestCaseResult,
17
+ AgentVariant,
18
+ Evaluator,
19
+ )
20
+ from vanna.core import UiComponent
21
+ from vanna.core.user.request_context import RequestContext
22
+ from vanna.core.observability import ObservabilityProvider
23
+
24
+ if TYPE_CHECKING:
25
+ from vanna import Agent
26
+ from .report import EvaluationReport, ComparisonReport
27
+
28
+
29
+ class EvaluationRunner:
30
+ """Run evaluations with parallel execution support.
31
+
32
+ The primary use case is comparing multiple agent variants (e.g., different LLMs)
33
+ on the same set of test cases. The runner executes test cases in parallel with
34
+ configurable concurrency to handle I/O-bound LLM operations efficiently.
35
+
36
+ Example:
37
+ >>> runner = EvaluationRunner(
38
+ ... evaluators=[TrajectoryEvaluator(), OutputEvaluator()],
39
+ ... max_concurrency=20
40
+ ... )
41
+ >>> comparison = await runner.compare_agents(
42
+ ... agent_variants=[claude_variant, gpt_variant],
43
+ ... test_cases=dataset.test_cases
44
+ ... )
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ evaluators: List[Evaluator],
50
+ max_concurrency: int = 10,
51
+ observability_provider: Optional[ObservabilityProvider] = None,
52
+ ):
53
+ """Initialize the evaluation runner.
54
+
55
+ Args:
56
+ evaluators: List of evaluators to apply to each test case
57
+ max_concurrency: Maximum number of concurrent test case executions
58
+ observability_provider: Optional observability for tracking eval runs
59
+ """
60
+ self.evaluators = evaluators
61
+ self.max_concurrency = max_concurrency
62
+ self.observability = observability_provider
63
+ self._semaphore = asyncio.Semaphore(max_concurrency)
64
+
65
+ async def run_evaluation(
66
+ self,
67
+ agent: "Agent",
68
+ test_cases: List[TestCase],
69
+ ) -> "EvaluationReport":
70
+ """Run evaluation on a single agent.
71
+
72
+ Args:
73
+ agent: The agent to evaluate
74
+ test_cases: List of test cases to run
75
+
76
+ Returns:
77
+ EvaluationReport with results for all test cases
78
+ """
79
+ from .report import EvaluationReport
80
+
81
+ results = await self._run_test_cases_parallel(agent, test_cases)
82
+ return EvaluationReport(
83
+ agent_name="agent",
84
+ results=results,
85
+ evaluators=self.evaluators,
86
+ timestamp=datetime.now(),
87
+ )
88
+
89
+ async def compare_agents(
90
+ self,
91
+ agent_variants: List[AgentVariant],
92
+ test_cases: List[TestCase],
93
+ ) -> "ComparisonReport":
94
+ """Compare multiple agent variants on same test cases.
95
+
96
+ This is the PRIMARY use case for LLM comparison. Runs all variants
97
+ in parallel for maximum efficiency with I/O-bound LLM calls.
98
+
99
+ Args:
100
+ agent_variants: List of agent variants to compare
101
+ test_cases: Test cases to run on each variant
102
+
103
+ Returns:
104
+ ComparisonReport with results for all variants
105
+ """
106
+ from .report import ComparisonReport
107
+
108
+ # Create span for overall comparison
109
+ if self.observability:
110
+ span = await self.observability.create_span(
111
+ "agent_comparison",
112
+ attributes={
113
+ "num_variants": len(agent_variants),
114
+ "num_test_cases": len(test_cases),
115
+ },
116
+ )
117
+
118
+ # Run all variants in parallel
119
+ tasks = [
120
+ self._run_agent_variant(variant, test_cases) for variant in agent_variants
121
+ ]
122
+
123
+ variant_reports = await asyncio.gather(*tasks)
124
+
125
+ if self.observability:
126
+ await self.observability.end_span(span)
127
+
128
+ return ComparisonReport(
129
+ variants=agent_variants,
130
+ reports=dict(zip([v.name for v in agent_variants], variant_reports)),
131
+ test_cases=test_cases,
132
+ timestamp=datetime.now(),
133
+ )
134
+
135
+ async def compare_agents_streaming(
136
+ self,
137
+ agent_variants: List[AgentVariant],
138
+ test_cases: List[TestCase],
139
+ ) -> AsyncGenerator[tuple[str, TestCaseResult, int, int], None]:
140
+ """Stream comparison results as they complete.
141
+
142
+ Useful for long-running evaluations where you want to see
143
+ progress updates in real-time (e.g., for UI display).
144
+
145
+ Args:
146
+ agent_variants: Agent variants to compare
147
+ test_cases: Test cases to run
148
+
149
+ Yields:
150
+ Tuples of (variant_name, result, completed_count, total_count)
151
+ """
152
+ queue: asyncio.Queue[tuple[str, TestCaseResult]] = asyncio.Queue()
153
+
154
+ async def worker(variant: AgentVariant) -> None:
155
+ """Worker that runs test cases for one variant."""
156
+ results = await self._run_test_cases_parallel(variant.agent, test_cases)
157
+ for result in results:
158
+ await queue.put((variant.name, result))
159
+
160
+ # Start all workers
161
+ workers = [asyncio.create_task(worker(v)) for v in agent_variants]
162
+
163
+ # Yield results as they arrive
164
+ completed = 0
165
+ total = len(agent_variants) * len(test_cases)
166
+
167
+ while completed < total:
168
+ variant_name, result = await queue.get()
169
+ completed += 1
170
+ yield variant_name, result, completed, total
171
+
172
+ # Wait for all workers to complete
173
+ await asyncio.gather(*workers)
174
+
175
+ async def _run_agent_variant(
176
+ self,
177
+ variant: AgentVariant,
178
+ test_cases: List[TestCase],
179
+ ) -> "EvaluationReport":
180
+ """Run a single agent variant on all test cases.
181
+
182
+ Args:
183
+ variant: The agent variant to evaluate
184
+ test_cases: Test cases to run
185
+
186
+ Returns:
187
+ EvaluationReport for this variant
188
+ """
189
+ from .report import EvaluationReport
190
+
191
+ if self.observability:
192
+ span = await self.observability.create_span(
193
+ f"variant_{variant.name}",
194
+ attributes={
195
+ "variant": variant.name,
196
+ "num_test_cases": len(test_cases),
197
+ **variant.metadata,
198
+ },
199
+ )
200
+
201
+ results = await self._run_test_cases_parallel(variant.agent, test_cases)
202
+
203
+ if self.observability:
204
+ await self.observability.end_span(span)
205
+
206
+ return EvaluationReport(
207
+ agent_name=variant.name,
208
+ results=results,
209
+ evaluators=self.evaluators,
210
+ metadata=variant.metadata,
211
+ timestamp=datetime.now(),
212
+ )
213
+
214
+ async def _run_test_cases_parallel(
215
+ self,
216
+ agent: "Agent",
217
+ test_cases: List[TestCase],
218
+ ) -> List[TestCaseResult]:
219
+ """Run test cases in parallel with concurrency limit.
220
+
221
+ Args:
222
+ agent: The agent to run test cases on
223
+ test_cases: Test cases to execute
224
+
225
+ Returns:
226
+ List of TestCaseResult, one per test case
227
+ """
228
+ tasks = [
229
+ self._run_single_test_case(agent, test_case) for test_case in test_cases
230
+ ]
231
+
232
+ return await asyncio.gather(*tasks)
233
+
234
+ async def _run_single_test_case(
235
+ self,
236
+ agent: "Agent",
237
+ test_case: TestCase,
238
+ ) -> TestCaseResult:
239
+ """Run a single test case with semaphore to limit concurrency.
240
+
241
+ Args:
242
+ agent: The agent to execute
243
+ test_case: The test case to run
244
+
245
+ Returns:
246
+ TestCaseResult with agent execution and evaluations
247
+ """
248
+ async with self._semaphore:
249
+ # Execute agent
250
+ start_time = asyncio.get_event_loop().time()
251
+ agent_result = await self._execute_agent(agent, test_case)
252
+ execution_time = asyncio.get_event_loop().time() - start_time
253
+
254
+ # Run evaluators
255
+ eval_results = []
256
+ for evaluator in self.evaluators:
257
+ eval_result = await evaluator.evaluate(test_case, agent_result)
258
+ eval_results.append(eval_result)
259
+
260
+ return TestCaseResult(
261
+ test_case=test_case,
262
+ agent_result=agent_result,
263
+ evaluations=eval_results,
264
+ execution_time_ms=execution_time * 1000,
265
+ )
266
+
267
+ async def _execute_agent(
268
+ self,
269
+ agent: "Agent",
270
+ test_case: TestCase,
271
+ ) -> AgentResult:
272
+ """Execute agent and capture full trajectory.
273
+
274
+ Args:
275
+ agent: The agent to execute
276
+ test_case: The test case to run
277
+
278
+ Returns:
279
+ AgentResult with all captured data
280
+ """
281
+ components: List[UiComponent] = []
282
+ tool_calls: List[Dict[str, Any]] = []
283
+ error: Optional[str] = None
284
+
285
+ try:
286
+ # Create request context with user info from test case
287
+ # This allows the agent's UserResolver to resolve the correct user
288
+ request_context = RequestContext(
289
+ cookies={"user_id": test_case.user.id},
290
+ headers={},
291
+ metadata={"test_case_user": test_case.user},
292
+ )
293
+
294
+ async for component in agent.send_message(
295
+ request_context=request_context,
296
+ message=test_case.message,
297
+ conversation_id=test_case.conversation_id,
298
+ ):
299
+ components.append(component)
300
+
301
+ except Exception as e:
302
+ error = str(e)
303
+
304
+ # TODO: Extract tool calls and LLM requests from observability
305
+ # For now, these will be empty unless we hook into observability
306
+
307
+ return AgentResult(
308
+ test_case_id=test_case.id,
309
+ components=components,
310
+ tool_calls=tool_calls,
311
+ llm_requests=[],
312
+ error=error,
313
+ )
@@ -0,0 +1,10 @@
1
+ """
2
+ Conversation filtering system for managing conversation history.
3
+
4
+ This module provides interfaces for filtering and transforming conversation
5
+ history before it's sent to the LLM.
6
+ """
7
+
8
+ from .base import ConversationFilter
9
+
10
+ __all__ = ["ConversationFilter"]
@@ -0,0 +1,67 @@
1
+ """
2
+ Base conversation filter interface.
3
+
4
+ Conversation filters allow you to transform conversation history before
5
+ it's sent to the LLM for processing.
6
+ """
7
+
8
+ from abc import ABC
9
+ from typing import TYPE_CHECKING, List
10
+
11
+ if TYPE_CHECKING:
12
+ from ..storage import Message
13
+
14
+
15
+ class ConversationFilter(ABC):
16
+ """Filter for transforming conversation history.
17
+
18
+ Subclass this to create custom filters that can:
19
+ - Remove sensitive information
20
+ - Summarize long conversations
21
+ - Manage context window limits
22
+ - Deduplicate similar messages
23
+ - Prioritize recent or relevant messages
24
+
25
+ Example:
26
+ class ContextWindowFilter(ConversationFilter):
27
+ def __init__(self, max_tokens: int = 8000):
28
+ self.max_tokens = max_tokens
29
+
30
+ async def filter_messages(self, messages: List[Message]) -> List[Message]:
31
+ # Estimate tokens (rough approximation)
32
+ total_tokens = 0
33
+ filtered = []
34
+
35
+ # Keep system message and recent messages
36
+ for msg in reversed(messages):
37
+ msg_tokens = len(msg.content or "") // 4
38
+ if total_tokens + msg_tokens > self.max_tokens:
39
+ break
40
+ filtered.insert(0, msg)
41
+ total_tokens += msg_tokens
42
+
43
+ return filtered
44
+
45
+ agent = AgentRunner(
46
+ llm_service=...,
47
+ conversation_filters=[
48
+ SensitiveDataFilter(),
49
+ ContextWindowFilter(max_tokens=8000)
50
+ ]
51
+ )
52
+ """
53
+
54
+ async def filter_messages(self, messages: List["Message"]) -> List["Message"]:
55
+ """Filter and transform conversation messages.
56
+
57
+ Args:
58
+ messages: List of conversation messages
59
+
60
+ Returns:
61
+ Filtered/transformed list of messages
62
+
63
+ Note:
64
+ Filters are applied in order, so messages passed to later
65
+ filters may already be modified by earlier filters.
66
+ """
67
+ return messages
@@ -0,0 +1,10 @@
1
+ """
2
+ Lifecycle hook system for agent execution.
3
+
4
+ This module provides hooks for intercepting and modifying agent behavior
5
+ at various points in the execution lifecycle.
6
+ """
7
+
8
+ from .base import LifecycleHook
9
+
10
+ __all__ = ["LifecycleHook"]
@@ -0,0 +1,83 @@
1
+ """
2
+ Base lifecycle hook interface.
3
+
4
+ Lifecycle hooks allow you to intercept and customize agent behavior
5
+ at key points in the execution flow.
6
+ """
7
+
8
+ from abc import ABC
9
+ from typing import TYPE_CHECKING, Any, Optional
10
+
11
+ if TYPE_CHECKING:
12
+ from ..user.models import User
13
+ from ..tool import Tool
14
+ from ..tool.models import ToolContext, ToolResult
15
+
16
+
17
+ class LifecycleHook(ABC):
18
+ """Hook into agent execution lifecycle.
19
+
20
+ Subclass this to create custom hooks that can:
21
+ - Modify messages before processing
22
+ - Add logging or telemetry
23
+ - Enforce quotas or rate limits
24
+ - Transform tool results
25
+ - Add custom validation
26
+
27
+ Example:
28
+ class LoggingHook(LifecycleHook):
29
+ async def before_message(self, user: User, message: str) -> Optional[str]:
30
+ print(f"User {user.username} sent: {message}")
31
+ return None # Don't modify
32
+
33
+ agent = AgentRunner(
34
+ llm_service=...,
35
+ lifecycle_hooks=[LoggingHook(), QuotaCheckHook()]
36
+ )
37
+ """
38
+
39
+ async def before_message(self, user: "User", message: str) -> Optional[str]:
40
+ """Called before processing a user message.
41
+
42
+ Args:
43
+ user: User sending the message
44
+ message: Original message content
45
+
46
+ Returns:
47
+ Modified message string, or None to keep original
48
+
49
+ Raises:
50
+ AgentError: To halt message processing (e.g., quota exceeded)
51
+ """
52
+ return None
53
+
54
+ async def after_message(self, result: Any) -> None:
55
+ """Called after message has been fully processed.
56
+
57
+ Args:
58
+ result: Final result from message processing
59
+ """
60
+ pass
61
+
62
+ async def before_tool(self, tool: "Tool[Any]", context: "ToolContext") -> None:
63
+ """Called before tool execution.
64
+
65
+ Args:
66
+ tool: Tool about to be executed
67
+ context: Tool execution context
68
+
69
+ Raises:
70
+ AgentError: To prevent tool execution
71
+ """
72
+ pass
73
+
74
+ async def after_tool(self, result: "ToolResult") -> Optional["ToolResult"]:
75
+ """Called after tool execution.
76
+
77
+ Args:
78
+ result: Result from tool execution
79
+
80
+ Returns:
81
+ Modified ToolResult, or None to keep original
82
+ """
83
+ return None
@@ -0,0 +1,16 @@
1
+ """
2
+ LLM domain.
3
+
4
+ This module provides the core abstractions for LLM services in the Vanna Agents framework.
5
+ """
6
+
7
+ from .base import LlmService
8
+ from .models import LlmMessage, LlmRequest, LlmResponse, LlmStreamChunk
9
+
10
+ __all__ = [
11
+ "LlmService",
12
+ "LlmMessage",
13
+ "LlmRequest",
14
+ "LlmResponse",
15
+ "LlmStreamChunk",
16
+ ]
vanna/core/llm/base.py ADDED
@@ -0,0 +1,40 @@
1
+ """
2
+ LLM domain interface.
3
+
4
+ This module contains the abstract base class for LLM services.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import Any, AsyncGenerator, List
9
+
10
+ from .models import LlmRequest, LlmResponse, LlmStreamChunk
11
+
12
+
13
+ class LlmService(ABC):
14
+ """Service for LLM communication."""
15
+
16
+ @abstractmethod
17
+ async def send_request(self, request: LlmRequest) -> LlmResponse:
18
+ """Send a request to the LLM."""
19
+ pass
20
+
21
+ @abstractmethod
22
+ async def stream_request(
23
+ self, request: LlmRequest
24
+ ) -> AsyncGenerator[LlmStreamChunk, None]:
25
+ """Stream a request to the LLM.
26
+
27
+ Args:
28
+ request: The LLM request to stream
29
+
30
+ Yields:
31
+ LlmStreamChunk instances as they arrive
32
+ """
33
+ # This is an async generator method
34
+ raise NotImplementedError
35
+ yield # pragma: no cover - makes this an async generator
36
+
37
+ @abstractmethod
38
+ async def validate_tools(self, tools: List[Any]) -> List[str]:
39
+ """Validate tool schemas and return any errors."""
40
+ pass
@@ -0,0 +1,61 @@
1
+ """
2
+ LLM domain models.
3
+
4
+ This module contains data models for LLM communication.
5
+ """
6
+
7
+ from typing import Any, Dict, List, Optional
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ from ..tool.models import ToolCall
12
+ from ..user.models import User
13
+
14
+
15
+ class LlmMessage(BaseModel):
16
+ """Message format for LLM communication."""
17
+
18
+ role: str = Field(description="Message role")
19
+ content: str = Field(description="Message content")
20
+ tool_calls: Optional[List[ToolCall]] = Field(default=None)
21
+ tool_call_id: Optional[str] = Field(default=None)
22
+
23
+
24
+ class LlmRequest(BaseModel):
25
+ """Request to LLM service."""
26
+
27
+ messages: List[LlmMessage] = Field(description="Messages to send")
28
+ tools: Optional[List[Any]] = Field(
29
+ default=None, description="Available tools"
30
+ ) # Will be ToolSchema but avoiding circular import
31
+ user: User = Field(description="User making the request")
32
+ stream: bool = Field(default=False, description="Whether to stream response")
33
+ temperature: float = Field(default=0.7, ge=0.0, le=2.0)
34
+ max_tokens: Optional[int] = Field(default=None, gt=0)
35
+ system_prompt: Optional[str] = Field(
36
+ default=None, description="System prompt for the LLM"
37
+ )
38
+ metadata: Dict[str, Any] = Field(default_factory=dict)
39
+
40
+
41
+ class LlmResponse(BaseModel):
42
+ """Response from LLM."""
43
+
44
+ content: Optional[str] = None
45
+ tool_calls: Optional[List[ToolCall]] = None
46
+ finish_reason: Optional[str] = None
47
+ usage: Optional[Dict[str, int]] = None
48
+ metadata: Dict[str, Any] = Field(default_factory=dict)
49
+
50
+ def is_tool_call(self) -> bool:
51
+ """Check if this response contains tool calls."""
52
+ return self.tool_calls is not None and len(self.tool_calls) > 0
53
+
54
+
55
+ class LlmStreamChunk(BaseModel):
56
+ """Streaming chunk from LLM."""
57
+
58
+ content: Optional[str] = None
59
+ tool_calls: Optional[List[ToolCall]] = None
60
+ finish_reason: Optional[str] = None
61
+ metadata: Dict[str, Any] = Field(default_factory=dict)
@@ -0,0 +1,10 @@
1
+ """
2
+ Middleware system for LLM request/response interception.
3
+
4
+ This module provides middleware interfaces for intercepting and transforming
5
+ LLM requests and responses.
6
+ """
7
+
8
+ from .base import LlmMiddleware
9
+
10
+ __all__ = ["LlmMiddleware"]
@@ -0,0 +1,69 @@
1
+ """
2
+ Base LLM middleware interface.
3
+
4
+ Middleware allows you to intercept and transform LLM requests and responses
5
+ for caching, monitoring, content filtering, and more.
6
+ """
7
+
8
+ from abc import ABC
9
+ from typing import TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ from ..llm import LlmRequest, LlmResponse
13
+
14
+
15
+ class LlmMiddleware(ABC):
16
+ """Middleware for intercepting LLM requests and responses.
17
+
18
+ Subclass this to create custom middleware that can:
19
+ - Cache LLM responses
20
+ - Log requests/responses
21
+ - Filter or modify content
22
+ - Track costs and usage
23
+ - Implement fallback strategies
24
+
25
+ Example:
26
+ class CachingMiddleware(LlmMiddleware):
27
+ def __init__(self):
28
+ self.cache = {}
29
+
30
+ async def before_llm_request(self, request: LlmRequest) -> LlmRequest:
31
+ # Could check cache here
32
+ return request
33
+
34
+ async def after_llm_response(self, request: LlmRequest, response: LlmResponse) -> LlmResponse:
35
+ # Cache the response
36
+ cache_key = self._compute_key(request)
37
+ self.cache[cache_key] = response
38
+ return response
39
+
40
+ agent = AgentRunner(
41
+ llm_service=...,
42
+ llm_middlewares=[CachingMiddleware(), LoggingMiddleware()]
43
+ )
44
+ """
45
+
46
+ async def before_llm_request(self, request: "LlmRequest") -> "LlmRequest":
47
+ """Called before sending request to LLM.
48
+
49
+ Args:
50
+ request: The LLM request about to be sent
51
+
52
+ Returns:
53
+ Modified request, or original if no changes
54
+ """
55
+ return request
56
+
57
+ async def after_llm_response(
58
+ self, request: "LlmRequest", response: "LlmResponse"
59
+ ) -> "LlmResponse":
60
+ """Called after receiving response from LLM.
61
+
62
+ Args:
63
+ request: The original request
64
+ response: The LLM response
65
+
66
+ Returns:
67
+ Modified response, or original if no changes
68
+ """
69
+ return response
@@ -0,0 +1,11 @@
1
+ """
2
+ Observability system for telemetry and monitoring.
3
+
4
+ This module provides interfaces for collecting metrics, traces, and
5
+ monitoring agent behavior.
6
+ """
7
+
8
+ from .base import ObservabilityProvider
9
+ from .models import Span, Metric
10
+
11
+ __all__ = ["ObservabilityProvider", "Span", "Metric"]