vanna 0.7.9__py3-none-any.whl → 2.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. vanna/__init__.py +167 -395
  2. vanna/agents/__init__.py +7 -0
  3. vanna/capabilities/__init__.py +17 -0
  4. vanna/capabilities/agent_memory/__init__.py +21 -0
  5. vanna/capabilities/agent_memory/base.py +103 -0
  6. vanna/capabilities/agent_memory/models.py +53 -0
  7. vanna/capabilities/file_system/__init__.py +14 -0
  8. vanna/capabilities/file_system/base.py +71 -0
  9. vanna/capabilities/file_system/models.py +25 -0
  10. vanna/capabilities/sql_runner/__init__.py +13 -0
  11. vanna/capabilities/sql_runner/base.py +37 -0
  12. vanna/capabilities/sql_runner/models.py +13 -0
  13. vanna/components/__init__.py +92 -0
  14. vanna/components/base.py +11 -0
  15. vanna/components/rich/__init__.py +83 -0
  16. vanna/components/rich/containers/__init__.py +7 -0
  17. vanna/components/rich/containers/card.py +20 -0
  18. vanna/components/rich/data/__init__.py +9 -0
  19. vanna/components/rich/data/chart.py +17 -0
  20. vanna/components/rich/data/dataframe.py +93 -0
  21. vanna/components/rich/feedback/__init__.py +21 -0
  22. vanna/components/rich/feedback/badge.py +16 -0
  23. vanna/components/rich/feedback/icon_text.py +14 -0
  24. vanna/components/rich/feedback/log_viewer.py +41 -0
  25. vanna/components/rich/feedback/notification.py +19 -0
  26. vanna/components/rich/feedback/progress.py +37 -0
  27. vanna/components/rich/feedback/status_card.py +28 -0
  28. vanna/components/rich/feedback/status_indicator.py +14 -0
  29. vanna/components/rich/interactive/__init__.py +21 -0
  30. vanna/components/rich/interactive/button.py +95 -0
  31. vanna/components/rich/interactive/task_list.py +58 -0
  32. vanna/components/rich/interactive/ui_state.py +93 -0
  33. vanna/components/rich/specialized/__init__.py +7 -0
  34. vanna/components/rich/specialized/artifact.py +20 -0
  35. vanna/components/rich/text.py +16 -0
  36. vanna/components/simple/__init__.py +15 -0
  37. vanna/components/simple/image.py +15 -0
  38. vanna/components/simple/link.py +15 -0
  39. vanna/components/simple/text.py +11 -0
  40. vanna/core/__init__.py +193 -0
  41. vanna/core/_compat.py +19 -0
  42. vanna/core/agent/__init__.py +10 -0
  43. vanna/core/agent/agent.py +1407 -0
  44. vanna/core/agent/config.py +123 -0
  45. vanna/core/audit/__init__.py +28 -0
  46. vanna/core/audit/base.py +299 -0
  47. vanna/core/audit/models.py +131 -0
  48. vanna/core/component_manager.py +329 -0
  49. vanna/core/components.py +53 -0
  50. vanna/core/enhancer/__init__.py +11 -0
  51. vanna/core/enhancer/base.py +94 -0
  52. vanna/core/enhancer/default.py +118 -0
  53. vanna/core/enricher/__init__.py +10 -0
  54. vanna/core/enricher/base.py +59 -0
  55. vanna/core/errors.py +47 -0
  56. vanna/core/evaluation/__init__.py +81 -0
  57. vanna/core/evaluation/base.py +186 -0
  58. vanna/core/evaluation/dataset.py +254 -0
  59. vanna/core/evaluation/evaluators.py +376 -0
  60. vanna/core/evaluation/report.py +289 -0
  61. vanna/core/evaluation/runner.py +313 -0
  62. vanna/core/filter/__init__.py +10 -0
  63. vanna/core/filter/base.py +67 -0
  64. vanna/core/lifecycle/__init__.py +10 -0
  65. vanna/core/lifecycle/base.py +83 -0
  66. vanna/core/llm/__init__.py +16 -0
  67. vanna/core/llm/base.py +40 -0
  68. vanna/core/llm/models.py +61 -0
  69. vanna/core/middleware/__init__.py +10 -0
  70. vanna/core/middleware/base.py +69 -0
  71. vanna/core/observability/__init__.py +11 -0
  72. vanna/core/observability/base.py +88 -0
  73. vanna/core/observability/models.py +47 -0
  74. vanna/core/recovery/__init__.py +11 -0
  75. vanna/core/recovery/base.py +84 -0
  76. vanna/core/recovery/models.py +32 -0
  77. vanna/core/registry.py +278 -0
  78. vanna/core/rich_component.py +156 -0
  79. vanna/core/simple_component.py +27 -0
  80. vanna/core/storage/__init__.py +14 -0
  81. vanna/core/storage/base.py +46 -0
  82. vanna/core/storage/models.py +46 -0
  83. vanna/core/system_prompt/__init__.py +13 -0
  84. vanna/core/system_prompt/base.py +36 -0
  85. vanna/core/system_prompt/default.py +157 -0
  86. vanna/core/tool/__init__.py +18 -0
  87. vanna/core/tool/base.py +70 -0
  88. vanna/core/tool/models.py +84 -0
  89. vanna/core/user/__init__.py +17 -0
  90. vanna/core/user/base.py +29 -0
  91. vanna/core/user/models.py +25 -0
  92. vanna/core/user/request_context.py +70 -0
  93. vanna/core/user/resolver.py +42 -0
  94. vanna/core/validation.py +164 -0
  95. vanna/core/workflow/__init__.py +12 -0
  96. vanna/core/workflow/base.py +254 -0
  97. vanna/core/workflow/default.py +789 -0
  98. vanna/examples/__init__.py +1 -0
  99. vanna/examples/__main__.py +44 -0
  100. vanna/examples/anthropic_quickstart.py +80 -0
  101. vanna/examples/artifact_example.py +293 -0
  102. vanna/examples/claude_sqlite_example.py +236 -0
  103. vanna/examples/coding_agent_example.py +300 -0
  104. vanna/examples/custom_system_prompt_example.py +174 -0
  105. vanna/examples/default_workflow_handler_example.py +208 -0
  106. vanna/examples/email_auth_example.py +340 -0
  107. vanna/examples/evaluation_example.py +269 -0
  108. vanna/examples/extensibility_example.py +262 -0
  109. vanna/examples/minimal_example.py +67 -0
  110. vanna/examples/mock_auth_example.py +227 -0
  111. vanna/examples/mock_custom_tool.py +311 -0
  112. vanna/examples/mock_quickstart.py +79 -0
  113. vanna/examples/mock_quota_example.py +145 -0
  114. vanna/examples/mock_rich_components_demo.py +396 -0
  115. vanna/examples/mock_sqlite_example.py +223 -0
  116. vanna/examples/openai_quickstart.py +83 -0
  117. vanna/examples/primitive_components_demo.py +305 -0
  118. vanna/examples/quota_lifecycle_example.py +139 -0
  119. vanna/examples/visualization_example.py +251 -0
  120. vanna/integrations/__init__.py +17 -0
  121. vanna/integrations/anthropic/__init__.py +9 -0
  122. vanna/integrations/anthropic/llm.py +270 -0
  123. vanna/integrations/azureopenai/__init__.py +9 -0
  124. vanna/integrations/azureopenai/llm.py +329 -0
  125. vanna/integrations/azuresearch/__init__.py +7 -0
  126. vanna/integrations/azuresearch/agent_memory.py +413 -0
  127. vanna/integrations/bigquery/__init__.py +5 -0
  128. vanna/integrations/bigquery/sql_runner.py +81 -0
  129. vanna/integrations/chromadb/__init__.py +104 -0
  130. vanna/integrations/chromadb/agent_memory.py +416 -0
  131. vanna/integrations/clickhouse/__init__.py +5 -0
  132. vanna/integrations/clickhouse/sql_runner.py +82 -0
  133. vanna/integrations/duckdb/__init__.py +5 -0
  134. vanna/integrations/duckdb/sql_runner.py +65 -0
  135. vanna/integrations/faiss/__init__.py +7 -0
  136. vanna/integrations/faiss/agent_memory.py +431 -0
  137. vanna/integrations/google/__init__.py +9 -0
  138. vanna/integrations/google/gemini.py +370 -0
  139. vanna/integrations/hive/__init__.py +5 -0
  140. vanna/integrations/hive/sql_runner.py +87 -0
  141. vanna/integrations/local/__init__.py +17 -0
  142. vanna/integrations/local/agent_memory/__init__.py +7 -0
  143. vanna/integrations/local/agent_memory/in_memory.py +285 -0
  144. vanna/integrations/local/audit.py +59 -0
  145. vanna/integrations/local/file_system.py +242 -0
  146. vanna/integrations/local/file_system_conversation_store.py +255 -0
  147. vanna/integrations/local/storage.py +62 -0
  148. vanna/integrations/marqo/__init__.py +7 -0
  149. vanna/integrations/marqo/agent_memory.py +354 -0
  150. vanna/integrations/milvus/__init__.py +7 -0
  151. vanna/integrations/milvus/agent_memory.py +458 -0
  152. vanna/integrations/mock/__init__.py +9 -0
  153. vanna/integrations/mock/llm.py +65 -0
  154. vanna/integrations/mssql/__init__.py +5 -0
  155. vanna/integrations/mssql/sql_runner.py +66 -0
  156. vanna/integrations/mysql/__init__.py +5 -0
  157. vanna/integrations/mysql/sql_runner.py +92 -0
  158. vanna/integrations/ollama/__init__.py +7 -0
  159. vanna/integrations/ollama/llm.py +252 -0
  160. vanna/integrations/openai/__init__.py +10 -0
  161. vanna/integrations/openai/llm.py +267 -0
  162. vanna/integrations/openai/responses.py +163 -0
  163. vanna/integrations/opensearch/__init__.py +7 -0
  164. vanna/integrations/opensearch/agent_memory.py +411 -0
  165. vanna/integrations/oracle/__init__.py +5 -0
  166. vanna/integrations/oracle/sql_runner.py +75 -0
  167. vanna/integrations/pinecone/__init__.py +7 -0
  168. vanna/integrations/pinecone/agent_memory.py +329 -0
  169. vanna/integrations/plotly/__init__.py +5 -0
  170. vanna/integrations/plotly/chart_generator.py +313 -0
  171. vanna/integrations/postgres/__init__.py +9 -0
  172. vanna/integrations/postgres/sql_runner.py +112 -0
  173. vanna/integrations/premium/agent_memory/__init__.py +7 -0
  174. vanna/integrations/premium/agent_memory/premium.py +186 -0
  175. vanna/integrations/presto/__init__.py +5 -0
  176. vanna/integrations/presto/sql_runner.py +107 -0
  177. vanna/integrations/qdrant/__init__.py +7 -0
  178. vanna/integrations/qdrant/agent_memory.py +439 -0
  179. vanna/integrations/snowflake/__init__.py +5 -0
  180. vanna/integrations/snowflake/sql_runner.py +147 -0
  181. vanna/integrations/sqlite/__init__.py +9 -0
  182. vanna/integrations/sqlite/sql_runner.py +65 -0
  183. vanna/integrations/weaviate/__init__.py +7 -0
  184. vanna/integrations/weaviate/agent_memory.py +428 -0
  185. vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_embeddings.py +11 -11
  186. vanna/legacy/__init__.py +403 -0
  187. vanna/legacy/adapter.py +463 -0
  188. vanna/{advanced → legacy/advanced}/__init__.py +3 -1
  189. vanna/{anthropic → legacy/anthropic}/anthropic_chat.py +9 -7
  190. vanna/{azuresearch → legacy/azuresearch}/azuresearch_vector.py +79 -41
  191. vanna/{base → legacy/base}/base.py +224 -217
  192. vanna/legacy/bedrock/__init__.py +1 -0
  193. vanna/{bedrock → legacy/bedrock}/bedrock_converse.py +13 -12
  194. vanna/{chromadb → legacy/chromadb}/chromadb_vector.py +3 -1
  195. vanna/legacy/cohere/__init__.py +2 -0
  196. vanna/{cohere → legacy/cohere}/cohere_chat.py +19 -14
  197. vanna/{cohere → legacy/cohere}/cohere_embeddings.py +25 -19
  198. vanna/{deepseek → legacy/deepseek}/deepseek_chat.py +5 -6
  199. vanna/legacy/faiss/__init__.py +1 -0
  200. vanna/{faiss → legacy/faiss}/faiss.py +113 -59
  201. vanna/{flask → legacy/flask}/__init__.py +84 -43
  202. vanna/{flask → legacy/flask}/assets.py +5 -5
  203. vanna/{flask → legacy/flask}/auth.py +5 -4
  204. vanna/{google → legacy/google}/bigquery_vector.py +75 -42
  205. vanna/{google → legacy/google}/gemini_chat.py +7 -3
  206. vanna/{hf → legacy/hf}/hf.py +0 -1
  207. vanna/{milvus → legacy/milvus}/milvus_vector.py +58 -35
  208. vanna/{mock → legacy/mock}/llm.py +0 -1
  209. vanna/legacy/mock/vectordb.py +67 -0
  210. vanna/legacy/ollama/ollama.py +110 -0
  211. vanna/{openai → legacy/openai}/openai_chat.py +2 -6
  212. vanna/legacy/opensearch/opensearch_vector.py +369 -0
  213. vanna/legacy/opensearch/opensearch_vector_semantic.py +200 -0
  214. vanna/legacy/oracle/oracle_vector.py +584 -0
  215. vanna/{pgvector → legacy/pgvector}/pgvector.py +42 -13
  216. vanna/{qdrant → legacy/qdrant}/qdrant.py +2 -6
  217. vanna/legacy/qianfan/Qianfan_Chat.py +170 -0
  218. vanna/legacy/qianfan/Qianfan_embeddings.py +36 -0
  219. vanna/legacy/qianwen/QianwenAI_chat.py +132 -0
  220. vanna/{remote.py → legacy/remote.py} +28 -26
  221. vanna/{utils.py → legacy/utils.py} +6 -11
  222. vanna/{vannadb → legacy/vannadb}/vannadb_vector.py +115 -46
  223. vanna/{vllm → legacy/vllm}/vllm.py +5 -6
  224. vanna/{weaviate → legacy/weaviate}/weaviate_vector.py +59 -40
  225. vanna/{xinference → legacy/xinference}/xinference.py +6 -6
  226. vanna/py.typed +0 -0
  227. vanna/servers/__init__.py +16 -0
  228. vanna/servers/__main__.py +8 -0
  229. vanna/servers/base/__init__.py +18 -0
  230. vanna/servers/base/chat_handler.py +65 -0
  231. vanna/servers/base/models.py +111 -0
  232. vanna/servers/base/rich_chat_handler.py +141 -0
  233. vanna/servers/base/templates.py +331 -0
  234. vanna/servers/cli/__init__.py +7 -0
  235. vanna/servers/cli/server_runner.py +204 -0
  236. vanna/servers/fastapi/__init__.py +7 -0
  237. vanna/servers/fastapi/app.py +163 -0
  238. vanna/servers/fastapi/routes.py +183 -0
  239. vanna/servers/flask/__init__.py +7 -0
  240. vanna/servers/flask/app.py +132 -0
  241. vanna/servers/flask/routes.py +137 -0
  242. vanna/tools/__init__.py +41 -0
  243. vanna/tools/agent_memory.py +322 -0
  244. vanna/tools/file_system.py +879 -0
  245. vanna/tools/python.py +222 -0
  246. vanna/tools/run_sql.py +165 -0
  247. vanna/tools/visualize_data.py +195 -0
  248. vanna/utils/__init__.py +0 -0
  249. vanna/web_components/__init__.py +44 -0
  250. vanna-2.0.0rc1.dist-info/METADATA +868 -0
  251. vanna-2.0.0rc1.dist-info/RECORD +289 -0
  252. vanna-2.0.0rc1.dist-info/entry_points.txt +3 -0
  253. vanna/bedrock/__init__.py +0 -1
  254. vanna/cohere/__init__.py +0 -2
  255. vanna/faiss/__init__.py +0 -1
  256. vanna/mock/vectordb.py +0 -55
  257. vanna/ollama/ollama.py +0 -103
  258. vanna/opensearch/opensearch_vector.py +0 -392
  259. vanna/opensearch/opensearch_vector_semantic.py +0 -175
  260. vanna/oracle/oracle_vector.py +0 -585
  261. vanna/qianfan/Qianfan_Chat.py +0 -165
  262. vanna/qianfan/Qianfan_embeddings.py +0 -36
  263. vanna/qianwen/QianwenAI_chat.py +0 -133
  264. vanna-0.7.9.dist-info/METADATA +0 -408
  265. vanna-0.7.9.dist-info/RECORD +0 -79
  266. /vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_Chat.py +0 -0
  267. /vanna/{ZhipuAI → legacy/ZhipuAI}/__init__.py +0 -0
  268. /vanna/{anthropic → legacy/anthropic}/__init__.py +0 -0
  269. /vanna/{azuresearch → legacy/azuresearch}/__init__.py +0 -0
  270. /vanna/{base → legacy/base}/__init__.py +0 -0
  271. /vanna/{chromadb → legacy/chromadb}/__init__.py +0 -0
  272. /vanna/{deepseek → legacy/deepseek}/__init__.py +0 -0
  273. /vanna/{exceptions → legacy/exceptions}/__init__.py +0 -0
  274. /vanna/{google → legacy/google}/__init__.py +0 -0
  275. /vanna/{hf → legacy/hf}/__init__.py +0 -0
  276. /vanna/{local.py → legacy/local.py} +0 -0
  277. /vanna/{marqo → legacy/marqo}/__init__.py +0 -0
  278. /vanna/{marqo → legacy/marqo}/marqo.py +0 -0
  279. /vanna/{milvus → legacy/milvus}/__init__.py +0 -0
  280. /vanna/{mistral → legacy/mistral}/__init__.py +0 -0
  281. /vanna/{mistral → legacy/mistral}/mistral.py +0 -0
  282. /vanna/{mock → legacy/mock}/__init__.py +0 -0
  283. /vanna/{mock → legacy/mock}/embedding.py +0 -0
  284. /vanna/{ollama → legacy/ollama}/__init__.py +0 -0
  285. /vanna/{openai → legacy/openai}/__init__.py +0 -0
  286. /vanna/{openai → legacy/openai}/openai_embeddings.py +0 -0
  287. /vanna/{opensearch → legacy/opensearch}/__init__.py +0 -0
  288. /vanna/{oracle → legacy/oracle}/__init__.py +0 -0
  289. /vanna/{pgvector → legacy/pgvector}/__init__.py +0 -0
  290. /vanna/{pinecone → legacy/pinecone}/__init__.py +0 -0
  291. /vanna/{pinecone → legacy/pinecone}/pinecone_vector.py +0 -0
  292. /vanna/{qdrant → legacy/qdrant}/__init__.py +0 -0
  293. /vanna/{qianfan → legacy/qianfan}/__init__.py +0 -0
  294. /vanna/{qianwen → legacy/qianwen}/QianwenAI_embeddings.py +0 -0
  295. /vanna/{qianwen → legacy/qianwen}/__init__.py +0 -0
  296. /vanna/{types → legacy/types}/__init__.py +0 -0
  297. /vanna/{vannadb → legacy/vannadb}/__init__.py +0 -0
  298. /vanna/{vllm → legacy/vllm}/__init__.py +0 -0
  299. /vanna/{weaviate → legacy/weaviate}/__init__.py +0 -0
  300. /vanna/{xinference → legacy/xinference}/__init__.py +0 -0
  301. {vanna-0.7.9.dist-info → vanna-2.0.0rc1.dist-info}/WHEEL +0 -0
  302. {vanna-0.7.9.dist-info → vanna-2.0.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,369 @@
1
+ import base64
2
+ import uuid
3
+ from typing import List
4
+
5
+ import pandas as pd
6
+ from opensearchpy import OpenSearch
7
+
8
+ from ..base import VannaBase
9
+
10
+
11
+ class OpenSearch_VectorStore(VannaBase):
12
+ def __init__(self, config=None):
13
+ VannaBase.__init__(self, config=config)
14
+ document_index = "vanna_document_index"
15
+ ddl_index = "vanna_ddl_index"
16
+ question_sql_index = "vanna_questions_sql_index"
17
+ if config is not None and "es_document_index" in config:
18
+ document_index = config["es_document_index"]
19
+ if config is not None and "es_ddl_index" in config:
20
+ ddl_index = config["es_ddl_index"]
21
+ if config is not None and "es_question_sql_index" in config:
22
+ question_sql_index = config["es_question_sql_index"]
23
+
24
+ self.document_index = document_index
25
+ self.ddl_index = ddl_index
26
+ self.question_sql_index = question_sql_index
27
+ print(
28
+ "OpenSearch_VectorStore initialized with document_index: ",
29
+ document_index,
30
+ " ddl_index: ",
31
+ ddl_index,
32
+ " question_sql_index: ",
33
+ question_sql_index,
34
+ )
35
+
36
+ document_index_settings = {
37
+ "settings": {"index": {"number_of_shards": 6, "number_of_replicas": 2}},
38
+ "mappings": {
39
+ "properties": {
40
+ "question": {
41
+ "type": "text",
42
+ },
43
+ "doc": {
44
+ "type": "text",
45
+ },
46
+ }
47
+ },
48
+ }
49
+
50
+ ddl_index_settings = {
51
+ "settings": {"index": {"number_of_shards": 6, "number_of_replicas": 2}},
52
+ "mappings": {
53
+ "properties": {
54
+ "ddl": {
55
+ "type": "text",
56
+ },
57
+ "doc": {
58
+ "type": "text",
59
+ },
60
+ }
61
+ },
62
+ }
63
+
64
+ question_sql_index_settings = {
65
+ "settings": {"index": {"number_of_shards": 6, "number_of_replicas": 2}},
66
+ "mappings": {
67
+ "properties": {
68
+ "question": {
69
+ "type": "text",
70
+ },
71
+ "sql": {
72
+ "type": "text",
73
+ },
74
+ }
75
+ },
76
+ }
77
+
78
+ if config is not None and "es_document_index_settings" in config:
79
+ document_index_settings = config["es_document_index_settings"]
80
+ if config is not None and "es_ddl_index_settings" in config:
81
+ ddl_index_settings = config["es_ddl_index_settings"]
82
+ if config is not None and "es_question_sql_index_settings" in config:
83
+ question_sql_index_settings = config["es_question_sql_index_settings"]
84
+
85
+ self.document_index_settings = document_index_settings
86
+ self.ddl_index_settings = ddl_index_settings
87
+ self.question_sql_index_settings = question_sql_index_settings
88
+
89
+ es_urls = None
90
+ if config is not None and "es_urls" in config:
91
+ es_urls = config["es_urls"]
92
+
93
+ # Host and port
94
+ if config is not None and "es_host" in config:
95
+ host = config["es_host"]
96
+ else:
97
+ host = "localhost"
98
+
99
+ if config is not None and "es_port" in config:
100
+ port = config["es_port"]
101
+ else:
102
+ port = 9200
103
+
104
+ if config is not None and "es_ssl" in config:
105
+ ssl = config["es_ssl"]
106
+ else:
107
+ ssl = False
108
+
109
+ if config is not None and "es_verify_certs" in config:
110
+ verify_certs = config["es_verify_certs"]
111
+ else:
112
+ verify_certs = False
113
+
114
+ # Authentication
115
+ if config is not None and "es_user" in config:
116
+ auth = (config["es_user"], config["es_password"])
117
+ else:
118
+ # Default to admin:admin
119
+ auth = None
120
+
121
+ headers = None
122
+ # base64 authentication
123
+ if (
124
+ config is not None
125
+ and "es_encoded_base64" in config
126
+ and "es_user" in config
127
+ and "es_password" in config
128
+ ):
129
+ if config["es_encoded_base64"]:
130
+ encoded_credentials = base64.b64encode(
131
+ (config["es_user"] + ":" + config["es_password"]).encode("utf-8")
132
+ ).decode("utf-8")
133
+ headers = {"Authorization": "Basic " + encoded_credentials}
134
+ # remove auth from config
135
+ auth = None
136
+
137
+ # custom headers
138
+ if config is not None and "es_headers" in config:
139
+ headers = config["es_headers"]
140
+
141
+ if config is not None and "es_timeout" in config:
142
+ timeout = config["es_timeout"]
143
+ else:
144
+ timeout = 60
145
+
146
+ if config is not None and "es_max_retries" in config:
147
+ max_retries = config["es_max_retries"]
148
+ else:
149
+ max_retries = 10
150
+
151
+ if config is not None and "es_http_compress" in config:
152
+ es_http_compress = config["es_http_compress"]
153
+ else:
154
+ es_http_compress = False
155
+
156
+ print(
157
+ "OpenSearch_VectorStore initialized with es_urls: ",
158
+ es_urls,
159
+ " host: ",
160
+ host,
161
+ " port: ",
162
+ port,
163
+ " ssl: ",
164
+ ssl,
165
+ " verify_certs: ",
166
+ verify_certs,
167
+ " timeout: ",
168
+ timeout,
169
+ " max_retries: ",
170
+ max_retries,
171
+ )
172
+ if es_urls is not None:
173
+ # Initialize the OpenSearch client by passing a list of URLs
174
+ self.client = OpenSearch(
175
+ hosts=[es_urls],
176
+ http_compress=es_http_compress,
177
+ use_ssl=ssl,
178
+ verify_certs=verify_certs,
179
+ timeout=timeout,
180
+ max_retries=max_retries,
181
+ retry_on_timeout=True,
182
+ http_auth=auth,
183
+ headers=headers,
184
+ )
185
+ else:
186
+ # Initialize the OpenSearch client by passing a host and port
187
+ self.client = OpenSearch(
188
+ hosts=[{"host": host, "port": port}],
189
+ http_compress=es_http_compress,
190
+ use_ssl=ssl,
191
+ verify_certs=verify_certs,
192
+ timeout=timeout,
193
+ max_retries=max_retries,
194
+ retry_on_timeout=True,
195
+ http_auth=auth,
196
+ headers=headers,
197
+ )
198
+
199
+ print("OpenSearch_VectorStore initialized with client over ")
200
+
201
+ # 执行一个简单的查询来检查连接
202
+ try:
203
+ print("Connected to OpenSearch cluster:")
204
+ info = self.client.info()
205
+ print("OpenSearch cluster info:", info)
206
+ except Exception as e:
207
+ print("Error connecting to OpenSearch cluster:", e)
208
+
209
+ # Create the indices if they don't exist
210
+ self.create_index_if_not_exists(
211
+ self.document_index, self.document_index_settings
212
+ )
213
+ self.create_index_if_not_exists(self.ddl_index, self.ddl_index_settings)
214
+ self.create_index_if_not_exists(
215
+ self.question_sql_index, self.question_sql_index_settings
216
+ )
217
+
218
+ def create_index(self):
219
+ for index in [self.document_index, self.ddl_index, self.question_sql_index]:
220
+ try:
221
+ self.client.indices.create(index)
222
+ except Exception as e:
223
+ print("Error creating index: ", e)
224
+ print(f"opensearch index {index} already exists")
225
+ pass
226
+
227
+ def create_index_if_not_exists(self, index_name: str, index_settings: dict) -> bool:
228
+ try:
229
+ if not self.client.indices.exists(index_name):
230
+ print(f"Index {index_name} does not exist. Creating...")
231
+ self.client.indices.create(index=index_name, body=index_settings)
232
+ return True
233
+ else:
234
+ print(f"Index {index_name} already exists.")
235
+ return False
236
+ except Exception as e:
237
+ print(f"Error creating index: {index_name} ", e)
238
+ return False
239
+
240
+ def add_ddl(self, ddl: str, **kwargs) -> str:
241
+ # Assuming that you have a DDL index in your OpenSearch
242
+ id = str(uuid.uuid4()) + "-ddl"
243
+ ddl_dict = {"ddl": ddl}
244
+ response = self.client.index(
245
+ index=self.ddl_index, body=ddl_dict, id=id, **kwargs
246
+ )
247
+ return response["_id"]
248
+
249
+ def add_documentation(self, doc: str, **kwargs) -> str:
250
+ # Assuming you have a documentation index in your OpenSearch
251
+ id = str(uuid.uuid4()) + "-doc"
252
+ doc_dict = {"doc": doc}
253
+ response = self.client.index(
254
+ index=self.document_index, id=id, body=doc_dict, **kwargs
255
+ )
256
+ return response["_id"]
257
+
258
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
259
+ # Assuming you have a Questions and SQL index in your OpenSearch
260
+ id = str(uuid.uuid4()) + "-sql"
261
+ question_sql_dict = {"question": question, "sql": sql}
262
+ response = self.client.index(
263
+ index=self.question_sql_index, body=question_sql_dict, id=id, **kwargs
264
+ )
265
+ return response["_id"]
266
+
267
+ def get_related_ddl(self, question: str, **kwargs) -> List[str]:
268
+ # Assume you have some vector search mechanism associated with your data
269
+ query = {"query": {"match": {"ddl": question}}}
270
+ print(query)
271
+ response = self.client.search(index=self.ddl_index, body=query, **kwargs)
272
+ return [hit["_source"]["ddl"] for hit in response["hits"]["hits"]]
273
+
274
+ def get_related_documentation(self, question: str, **kwargs) -> List[str]:
275
+ query = {"query": {"match": {"doc": question}}}
276
+ print(query)
277
+ response = self.client.search(index=self.document_index, body=query, **kwargs)
278
+ return [hit["_source"]["doc"] for hit in response["hits"]["hits"]]
279
+
280
+ def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
281
+ query = {"query": {"match": {"question": question}}}
282
+ print(query)
283
+ response = self.client.search(
284
+ index=self.question_sql_index, body=query, **kwargs
285
+ )
286
+ return [
287
+ (hit["_source"]["question"], hit["_source"]["sql"])
288
+ for hit in response["hits"]["hits"]
289
+ ]
290
+
291
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
292
+ # This will be a simple example pulling all data from an index
293
+ # WARNING: Do not use this approach in production for large indices!
294
+ data = []
295
+ response = self.client.search(
296
+ index=self.document_index, body={"query": {"match_all": {}}}, size=1000
297
+ )
298
+ print(response)
299
+ # records = [hit['_source'] for hit in response['hits']['hits']]
300
+ for hit in response["hits"]["hits"]:
301
+ data.append(
302
+ {
303
+ "id": hit["_id"],
304
+ "training_data_type": "documentation",
305
+ "question": "",
306
+ "content": hit["_source"]["doc"],
307
+ }
308
+ )
309
+
310
+ response = self.client.search(
311
+ index=self.question_sql_index, body={"query": {"match_all": {}}}, size=1000
312
+ )
313
+ # records = [hit['_source'] for hit in response['hits']['hits']]
314
+ for hit in response["hits"]["hits"]:
315
+ data.append(
316
+ {
317
+ "id": hit["_id"],
318
+ "training_data_type": "sql",
319
+ "question": hit.get("_source", {}).get("question", ""),
320
+ "content": hit.get("_source", {}).get("sql", ""),
321
+ }
322
+ )
323
+
324
+ response = self.client.search(
325
+ index=self.ddl_index, body={"query": {"match_all": {}}}, size=1000
326
+ )
327
+ # records = [hit['_source'] for hit in response['hits']['hits']]
328
+ for hit in response["hits"]["hits"]:
329
+ data.append(
330
+ {
331
+ "id": hit["_id"],
332
+ "training_data_type": "ddl",
333
+ "question": "",
334
+ "content": hit["_source"]["ddl"],
335
+ }
336
+ )
337
+
338
+ return pd.DataFrame(data)
339
+
340
+ def remove_training_data(self, id: str, **kwargs) -> bool:
341
+ try:
342
+ if id.endswith("-sql"):
343
+ self.client.delete(index=self.question_sql_index, id=id)
344
+ return True
345
+ elif id.endswith("-ddl"):
346
+ self.client.delete(index=self.ddl_index, id=id, **kwargs)
347
+ return True
348
+ elif id.endswith("-doc"):
349
+ self.client.delete(index=self.document_index, id=id, **kwargs)
350
+ return True
351
+ else:
352
+ return False
353
+ except Exception as e:
354
+ print("Error deleting training dataError deleting training data: ", e)
355
+ return False
356
+
357
+ def generate_embedding(self, data: str, **kwargs) -> list[float]:
358
+ # opensearch doesn't need to generate embeddings
359
+ pass
360
+
361
+
362
+ # OpenSearch_VectorStore.__init__(self, config={'es_urls':
363
+ # "https://opensearch-node.test.com:9200", 'es_encoded_base64': True, 'es_user':
364
+ # "admin", 'es_password': "admin", 'es_verify_certs': True})
365
+
366
+
367
+ # OpenSearch_VectorStore.__init__(self, config={'es_host':
368
+ # "https://opensearch-node.test.com", 'es_port': 9200, 'es_user': "admin",
369
+ # 'es_password': "admin", 'es_verify_certs': True})
@@ -0,0 +1,200 @@
1
+ import json
2
+
3
+ import pandas as pd
4
+ from langchain_community.vectorstores import OpenSearchVectorSearch
5
+
6
+ from ..base import VannaBase
7
+ from ..utils import deterministic_uuid
8
+
9
+
10
+ class OpenSearch_Semantic_VectorStore(VannaBase):
11
+ def __init__(self, config=None):
12
+ VannaBase.__init__(self, config=config)
13
+ if config is None:
14
+ config = {}
15
+
16
+ if "embedding_function" in config:
17
+ self.embedding_function = config.get("embedding_function")
18
+ else:
19
+ from langchain_huggingface import HuggingFaceEmbeddings
20
+
21
+ self.embedding_function = HuggingFaceEmbeddings(
22
+ model_name="all-MiniLM-L6-v2"
23
+ )
24
+
25
+ self.n_results_sql = config.get("n_results_sql", config.get("n_results", 10))
26
+ self.n_results_documentation = config.get(
27
+ "n_results_documentation", config.get("n_results", 10)
28
+ )
29
+ self.n_results_ddl = config.get("n_results_ddl", config.get("n_results", 10))
30
+
31
+ self.document_index = config.get("es_document_index", "vanna_document_index")
32
+ self.ddl_index = config.get("es_ddl_index", "vanna_ddl_index")
33
+ self.question_sql_index = config.get(
34
+ "es_question_sql_index", "vanna_questions_sql_index"
35
+ )
36
+
37
+ self.log(
38
+ f"OpenSearch_Semantic_VectorStore initialized with document_index: {self.document_index}, ddl_index: {self.ddl_index}, question_sql_index: {self.question_sql_index}"
39
+ )
40
+
41
+ es_urls = config.get("es_urls", "https://localhost:9200")
42
+ ssl = config.get("es_ssl", True)
43
+ verify_certs = config.get("es_verify_certs", True)
44
+
45
+ if "es_user" in config:
46
+ auth = (config["es_user"], config["es_password"])
47
+ else:
48
+ auth = None
49
+
50
+ headers = config.get("es_headers", None)
51
+ timeout = config.get("es_timeout", 60)
52
+ max_retries = config.get("es_max_retries", 10)
53
+
54
+ common_args = {
55
+ "opensearch_url": es_urls,
56
+ "embedding_function": self.embedding_function,
57
+ "engine": "faiss",
58
+ "http_auth": auth,
59
+ "use_ssl": ssl,
60
+ "verify_certs": verify_certs,
61
+ "timeout": timeout,
62
+ "max_retries": max_retries,
63
+ "retry_on_timeout": True,
64
+ "headers": headers,
65
+ }
66
+
67
+ self.documentation_store = OpenSearchVectorSearch(
68
+ index_name=self.document_index, **common_args
69
+ )
70
+ self.ddl_store = OpenSearchVectorSearch(
71
+ index_name=self.ddl_index, **common_args
72
+ )
73
+ self.sql_store = OpenSearchVectorSearch(
74
+ index_name=self.question_sql_index, **common_args
75
+ )
76
+
77
+ def add_ddl(self, ddl: str, **kwargs) -> str:
78
+ _id = deterministic_uuid(ddl) + "-ddl"
79
+ self.ddl_store.add_texts(texts=[ddl], ids=[_id], **kwargs)
80
+ return _id
81
+
82
+ def add_documentation(self, documentation: str, **kwargs) -> str:
83
+ _id = deterministic_uuid(documentation) + "-doc"
84
+ self.documentation_store.add_texts(texts=[documentation], ids=[_id], **kwargs)
85
+ return _id
86
+
87
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
88
+ question_sql_json = json.dumps(
89
+ {
90
+ "question": question,
91
+ "sql": sql,
92
+ },
93
+ ensure_ascii=False,
94
+ )
95
+
96
+ _id = deterministic_uuid(question_sql_json) + "-sql"
97
+ self.sql_store.add_texts(texts=[question_sql_json], ids=[_id], **kwargs)
98
+ return _id
99
+
100
+ def get_related_ddl(self, question: str, **kwargs) -> list:
101
+ documents = self.ddl_store.similarity_search(
102
+ query=question, k=self.n_results_ddl
103
+ )
104
+ return [document.page_content for document in documents]
105
+
106
+ def get_related_documentation(self, question: str, **kwargs) -> list:
107
+ documents = self.documentation_store.similarity_search(
108
+ query=question, k=self.n_results_documentation
109
+ )
110
+ return [document.page_content for document in documents]
111
+
112
+ def get_similar_question_sql(self, question: str, **kwargs) -> list:
113
+ documents = self.sql_store.similarity_search(
114
+ query=question, k=self.n_results_sql
115
+ )
116
+ return [json.loads(document.page_content) for document in documents]
117
+
118
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
119
+ data = []
120
+ query = {"query": {"match_all": {}}}
121
+
122
+ indices = [
123
+ {"index": self.document_index, "type": "documentation"},
124
+ {"index": self.question_sql_index, "type": "sql"},
125
+ {"index": self.ddl_index, "type": "ddl"},
126
+ ]
127
+
128
+ # Use documentation_store.client consistently for search on all indices
129
+ opensearch_client = self.documentation_store.client
130
+
131
+ for index_info in indices:
132
+ index_name = index_info["index"]
133
+ training_data_type = index_info["type"]
134
+ scroll = "1m" # keep scroll context for 1 minute
135
+ response = opensearch_client.search(
136
+ index=index_name,
137
+ ignore_unavailable=True,
138
+ body=query,
139
+ scroll=scroll,
140
+ size=1000,
141
+ )
142
+
143
+ scroll_id = response.get("_scroll_id")
144
+
145
+ while scroll_id:
146
+ hits = response["hits"]["hits"]
147
+ if not hits:
148
+ break # No more hits, exit loop
149
+
150
+ for hit in hits:
151
+ source = hit["_source"]
152
+ if training_data_type == "sql":
153
+ try:
154
+ doc_dict = json.loads(source["text"])
155
+ content = doc_dict.get("sql")
156
+ question = doc_dict.get("question")
157
+ except json.JSONDecodeError as e:
158
+ self.log(
159
+ f"Skipping row with custom_id {hit['_id']} due to JSON parsing error: {e}",
160
+ "Error",
161
+ )
162
+ continue
163
+ else: # documentation or ddl
164
+ content = source["text"]
165
+ question = None
166
+
167
+ data.append(
168
+ {
169
+ "id": hit["_id"],
170
+ "training_data_type": training_data_type,
171
+ "question": question,
172
+ "content": content,
173
+ }
174
+ )
175
+
176
+ # Get next batch of results, using documentation_store.client.scroll
177
+ response = opensearch_client.scroll(scroll_id=scroll_id, scroll=scroll)
178
+ scroll_id = response.get("_scroll_id")
179
+
180
+ return pd.DataFrame(data)
181
+
182
+ def remove_training_data(self, id: str, **kwargs) -> bool:
183
+ try:
184
+ if id.endswith("-sql"):
185
+ return self.sql_store.delete(ids=[id], **kwargs)
186
+ elif id.endswith("-ddl"):
187
+ return self.ddl_store.delete(ids=[id], **kwargs)
188
+ elif id.endswith("-doc"):
189
+ return self.documentation_store.delete(ids=[id], **kwargs)
190
+ else:
191
+ return False
192
+ except Exception as e:
193
+ self.log(
194
+ f"Error deleting training dataError deleting training data: {e}",
195
+ "Error",
196
+ )
197
+ return False
198
+
199
+ def generate_embedding(self, data: str, **kwargs) -> list[float]:
200
+ pass