vanna 0.7.8__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. vanna/__init__.py +167 -395
  2. vanna/agents/__init__.py +7 -0
  3. vanna/capabilities/__init__.py +17 -0
  4. vanna/capabilities/agent_memory/__init__.py +21 -0
  5. vanna/capabilities/agent_memory/base.py +103 -0
  6. vanna/capabilities/agent_memory/models.py +53 -0
  7. vanna/capabilities/file_system/__init__.py +14 -0
  8. vanna/capabilities/file_system/base.py +71 -0
  9. vanna/capabilities/file_system/models.py +25 -0
  10. vanna/capabilities/sql_runner/__init__.py +13 -0
  11. vanna/capabilities/sql_runner/base.py +37 -0
  12. vanna/capabilities/sql_runner/models.py +13 -0
  13. vanna/components/__init__.py +92 -0
  14. vanna/components/base.py +11 -0
  15. vanna/components/rich/__init__.py +83 -0
  16. vanna/components/rich/containers/__init__.py +7 -0
  17. vanna/components/rich/containers/card.py +20 -0
  18. vanna/components/rich/data/__init__.py +9 -0
  19. vanna/components/rich/data/chart.py +17 -0
  20. vanna/components/rich/data/dataframe.py +93 -0
  21. vanna/components/rich/feedback/__init__.py +21 -0
  22. vanna/components/rich/feedback/badge.py +16 -0
  23. vanna/components/rich/feedback/icon_text.py +14 -0
  24. vanna/components/rich/feedback/log_viewer.py +41 -0
  25. vanna/components/rich/feedback/notification.py +19 -0
  26. vanna/components/rich/feedback/progress.py +37 -0
  27. vanna/components/rich/feedback/status_card.py +28 -0
  28. vanna/components/rich/feedback/status_indicator.py +14 -0
  29. vanna/components/rich/interactive/__init__.py +21 -0
  30. vanna/components/rich/interactive/button.py +95 -0
  31. vanna/components/rich/interactive/task_list.py +58 -0
  32. vanna/components/rich/interactive/ui_state.py +93 -0
  33. vanna/components/rich/specialized/__init__.py +7 -0
  34. vanna/components/rich/specialized/artifact.py +20 -0
  35. vanna/components/rich/text.py +16 -0
  36. vanna/components/simple/__init__.py +15 -0
  37. vanna/components/simple/image.py +15 -0
  38. vanna/components/simple/link.py +15 -0
  39. vanna/components/simple/text.py +11 -0
  40. vanna/core/__init__.py +193 -0
  41. vanna/core/_compat.py +19 -0
  42. vanna/core/agent/__init__.py +10 -0
  43. vanna/core/agent/agent.py +1407 -0
  44. vanna/core/agent/config.py +123 -0
  45. vanna/core/audit/__init__.py +28 -0
  46. vanna/core/audit/base.py +299 -0
  47. vanna/core/audit/models.py +131 -0
  48. vanna/core/component_manager.py +329 -0
  49. vanna/core/components.py +53 -0
  50. vanna/core/enhancer/__init__.py +11 -0
  51. vanna/core/enhancer/base.py +94 -0
  52. vanna/core/enhancer/default.py +118 -0
  53. vanna/core/enricher/__init__.py +10 -0
  54. vanna/core/enricher/base.py +59 -0
  55. vanna/core/errors.py +47 -0
  56. vanna/core/evaluation/__init__.py +81 -0
  57. vanna/core/evaluation/base.py +186 -0
  58. vanna/core/evaluation/dataset.py +254 -0
  59. vanna/core/evaluation/evaluators.py +376 -0
  60. vanna/core/evaluation/report.py +289 -0
  61. vanna/core/evaluation/runner.py +313 -0
  62. vanna/core/filter/__init__.py +10 -0
  63. vanna/core/filter/base.py +67 -0
  64. vanna/core/lifecycle/__init__.py +10 -0
  65. vanna/core/lifecycle/base.py +83 -0
  66. vanna/core/llm/__init__.py +16 -0
  67. vanna/core/llm/base.py +40 -0
  68. vanna/core/llm/models.py +61 -0
  69. vanna/core/middleware/__init__.py +10 -0
  70. vanna/core/middleware/base.py +69 -0
  71. vanna/core/observability/__init__.py +11 -0
  72. vanna/core/observability/base.py +88 -0
  73. vanna/core/observability/models.py +47 -0
  74. vanna/core/recovery/__init__.py +11 -0
  75. vanna/core/recovery/base.py +84 -0
  76. vanna/core/recovery/models.py +32 -0
  77. vanna/core/registry.py +278 -0
  78. vanna/core/rich_component.py +156 -0
  79. vanna/core/simple_component.py +27 -0
  80. vanna/core/storage/__init__.py +14 -0
  81. vanna/core/storage/base.py +46 -0
  82. vanna/core/storage/models.py +46 -0
  83. vanna/core/system_prompt/__init__.py +13 -0
  84. vanna/core/system_prompt/base.py +36 -0
  85. vanna/core/system_prompt/default.py +157 -0
  86. vanna/core/tool/__init__.py +18 -0
  87. vanna/core/tool/base.py +70 -0
  88. vanna/core/tool/models.py +84 -0
  89. vanna/core/user/__init__.py +17 -0
  90. vanna/core/user/base.py +29 -0
  91. vanna/core/user/models.py +25 -0
  92. vanna/core/user/request_context.py +70 -0
  93. vanna/core/user/resolver.py +42 -0
  94. vanna/core/validation.py +164 -0
  95. vanna/core/workflow/__init__.py +12 -0
  96. vanna/core/workflow/base.py +254 -0
  97. vanna/core/workflow/default.py +789 -0
  98. vanna/examples/__init__.py +1 -0
  99. vanna/examples/__main__.py +44 -0
  100. vanna/examples/anthropic_quickstart.py +80 -0
  101. vanna/examples/artifact_example.py +293 -0
  102. vanna/examples/claude_sqlite_example.py +236 -0
  103. vanna/examples/coding_agent_example.py +300 -0
  104. vanna/examples/custom_system_prompt_example.py +174 -0
  105. vanna/examples/default_workflow_handler_example.py +208 -0
  106. vanna/examples/email_auth_example.py +340 -0
  107. vanna/examples/evaluation_example.py +269 -0
  108. vanna/examples/extensibility_example.py +262 -0
  109. vanna/examples/minimal_example.py +67 -0
  110. vanna/examples/mock_auth_example.py +227 -0
  111. vanna/examples/mock_custom_tool.py +311 -0
  112. vanna/examples/mock_quickstart.py +79 -0
  113. vanna/examples/mock_quota_example.py +145 -0
  114. vanna/examples/mock_rich_components_demo.py +396 -0
  115. vanna/examples/mock_sqlite_example.py +223 -0
  116. vanna/examples/openai_quickstart.py +83 -0
  117. vanna/examples/primitive_components_demo.py +305 -0
  118. vanna/examples/quota_lifecycle_example.py +139 -0
  119. vanna/examples/visualization_example.py +251 -0
  120. vanna/integrations/__init__.py +17 -0
  121. vanna/integrations/anthropic/__init__.py +9 -0
  122. vanna/integrations/anthropic/llm.py +270 -0
  123. vanna/integrations/azureopenai/__init__.py +9 -0
  124. vanna/integrations/azureopenai/llm.py +329 -0
  125. vanna/integrations/azuresearch/__init__.py +7 -0
  126. vanna/integrations/azuresearch/agent_memory.py +413 -0
  127. vanna/integrations/bigquery/__init__.py +5 -0
  128. vanna/integrations/bigquery/sql_runner.py +81 -0
  129. vanna/integrations/chromadb/__init__.py +104 -0
  130. vanna/integrations/chromadb/agent_memory.py +416 -0
  131. vanna/integrations/clickhouse/__init__.py +5 -0
  132. vanna/integrations/clickhouse/sql_runner.py +82 -0
  133. vanna/integrations/duckdb/__init__.py +5 -0
  134. vanna/integrations/duckdb/sql_runner.py +65 -0
  135. vanna/integrations/faiss/__init__.py +7 -0
  136. vanna/integrations/faiss/agent_memory.py +431 -0
  137. vanna/integrations/google/__init__.py +9 -0
  138. vanna/integrations/google/gemini.py +370 -0
  139. vanna/integrations/hive/__init__.py +5 -0
  140. vanna/integrations/hive/sql_runner.py +87 -0
  141. vanna/integrations/local/__init__.py +17 -0
  142. vanna/integrations/local/agent_memory/__init__.py +7 -0
  143. vanna/integrations/local/agent_memory/in_memory.py +285 -0
  144. vanna/integrations/local/audit.py +59 -0
  145. vanna/integrations/local/file_system.py +242 -0
  146. vanna/integrations/local/file_system_conversation_store.py +255 -0
  147. vanna/integrations/local/storage.py +62 -0
  148. vanna/integrations/marqo/__init__.py +7 -0
  149. vanna/integrations/marqo/agent_memory.py +354 -0
  150. vanna/integrations/milvus/__init__.py +7 -0
  151. vanna/integrations/milvus/agent_memory.py +458 -0
  152. vanna/integrations/mock/__init__.py +9 -0
  153. vanna/integrations/mock/llm.py +65 -0
  154. vanna/integrations/mssql/__init__.py +5 -0
  155. vanna/integrations/mssql/sql_runner.py +66 -0
  156. vanna/integrations/mysql/__init__.py +5 -0
  157. vanna/integrations/mysql/sql_runner.py +92 -0
  158. vanna/integrations/ollama/__init__.py +7 -0
  159. vanna/integrations/ollama/llm.py +252 -0
  160. vanna/integrations/openai/__init__.py +10 -0
  161. vanna/integrations/openai/llm.py +267 -0
  162. vanna/integrations/openai/responses.py +163 -0
  163. vanna/integrations/opensearch/__init__.py +7 -0
  164. vanna/integrations/opensearch/agent_memory.py +411 -0
  165. vanna/integrations/oracle/__init__.py +5 -0
  166. vanna/integrations/oracle/sql_runner.py +75 -0
  167. vanna/integrations/pinecone/__init__.py +7 -0
  168. vanna/integrations/pinecone/agent_memory.py +329 -0
  169. vanna/integrations/plotly/__init__.py +5 -0
  170. vanna/integrations/plotly/chart_generator.py +313 -0
  171. vanna/integrations/postgres/__init__.py +9 -0
  172. vanna/integrations/postgres/sql_runner.py +112 -0
  173. vanna/integrations/premium/agent_memory/__init__.py +7 -0
  174. vanna/integrations/premium/agent_memory/premium.py +186 -0
  175. vanna/integrations/presto/__init__.py +5 -0
  176. vanna/integrations/presto/sql_runner.py +107 -0
  177. vanna/integrations/qdrant/__init__.py +7 -0
  178. vanna/integrations/qdrant/agent_memory.py +461 -0
  179. vanna/integrations/snowflake/__init__.py +5 -0
  180. vanna/integrations/snowflake/sql_runner.py +147 -0
  181. vanna/integrations/sqlite/__init__.py +9 -0
  182. vanna/integrations/sqlite/sql_runner.py +65 -0
  183. vanna/integrations/weaviate/__init__.py +7 -0
  184. vanna/integrations/weaviate/agent_memory.py +428 -0
  185. vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_embeddings.py +11 -11
  186. vanna/legacy/__init__.py +403 -0
  187. vanna/legacy/adapter.py +463 -0
  188. vanna/{advanced → legacy/advanced}/__init__.py +3 -1
  189. vanna/{anthropic → legacy/anthropic}/anthropic_chat.py +9 -7
  190. vanna/{azuresearch → legacy/azuresearch}/azuresearch_vector.py +79 -41
  191. vanna/{base → legacy/base}/base.py +247 -223
  192. vanna/legacy/bedrock/__init__.py +1 -0
  193. vanna/{bedrock → legacy/bedrock}/bedrock_converse.py +13 -12
  194. vanna/{chromadb → legacy/chromadb}/chromadb_vector.py +3 -1
  195. vanna/legacy/cohere/__init__.py +2 -0
  196. vanna/{cohere → legacy/cohere}/cohere_chat.py +19 -14
  197. vanna/{cohere → legacy/cohere}/cohere_embeddings.py +25 -19
  198. vanna/{deepseek → legacy/deepseek}/deepseek_chat.py +5 -6
  199. vanna/legacy/faiss/__init__.py +1 -0
  200. vanna/{faiss → legacy/faiss}/faiss.py +113 -59
  201. vanna/{flask → legacy/flask}/__init__.py +84 -43
  202. vanna/{flask → legacy/flask}/assets.py +5 -5
  203. vanna/{flask → legacy/flask}/auth.py +5 -4
  204. vanna/{google → legacy/google}/bigquery_vector.py +75 -42
  205. vanna/{google → legacy/google}/gemini_chat.py +7 -3
  206. vanna/{hf → legacy/hf}/hf.py +0 -1
  207. vanna/{milvus → legacy/milvus}/milvus_vector.py +58 -35
  208. vanna/{mock → legacy/mock}/llm.py +0 -1
  209. vanna/legacy/mock/vectordb.py +67 -0
  210. vanna/legacy/ollama/ollama.py +110 -0
  211. vanna/{openai → legacy/openai}/openai_chat.py +2 -6
  212. vanna/legacy/opensearch/opensearch_vector.py +369 -0
  213. vanna/legacy/opensearch/opensearch_vector_semantic.py +200 -0
  214. vanna/legacy/oracle/oracle_vector.py +584 -0
  215. vanna/{pgvector → legacy/pgvector}/pgvector.py +42 -13
  216. vanna/{qdrant → legacy/qdrant}/qdrant.py +2 -6
  217. vanna/legacy/qianfan/Qianfan_Chat.py +170 -0
  218. vanna/legacy/qianfan/Qianfan_embeddings.py +36 -0
  219. vanna/legacy/qianwen/QianwenAI_chat.py +132 -0
  220. vanna/{remote.py → legacy/remote.py} +28 -26
  221. vanna/{utils.py → legacy/utils.py} +6 -11
  222. vanna/{vannadb → legacy/vannadb}/vannadb_vector.py +115 -46
  223. vanna/{vllm → legacy/vllm}/vllm.py +5 -6
  224. vanna/{weaviate → legacy/weaviate}/weaviate_vector.py +59 -40
  225. vanna/{xinference → legacy/xinference}/xinference.py +6 -6
  226. vanna/py.typed +0 -0
  227. vanna/servers/__init__.py +16 -0
  228. vanna/servers/__main__.py +8 -0
  229. vanna/servers/base/__init__.py +18 -0
  230. vanna/servers/base/chat_handler.py +65 -0
  231. vanna/servers/base/models.py +111 -0
  232. vanna/servers/base/rich_chat_handler.py +141 -0
  233. vanna/servers/base/templates.py +331 -0
  234. vanna/servers/cli/__init__.py +7 -0
  235. vanna/servers/cli/server_runner.py +204 -0
  236. vanna/servers/fastapi/__init__.py +7 -0
  237. vanna/servers/fastapi/app.py +163 -0
  238. vanna/servers/fastapi/routes.py +183 -0
  239. vanna/servers/flask/__init__.py +7 -0
  240. vanna/servers/flask/app.py +132 -0
  241. vanna/servers/flask/routes.py +137 -0
  242. vanna/tools/__init__.py +41 -0
  243. vanna/tools/agent_memory.py +322 -0
  244. vanna/tools/file_system.py +879 -0
  245. vanna/tools/python.py +222 -0
  246. vanna/tools/run_sql.py +165 -0
  247. vanna/tools/visualize_data.py +195 -0
  248. vanna/utils/__init__.py +0 -0
  249. vanna/web_components/__init__.py +44 -0
  250. vanna-2.0.0.dist-info/METADATA +485 -0
  251. vanna-2.0.0.dist-info/RECORD +289 -0
  252. vanna-2.0.0.dist-info/entry_points.txt +3 -0
  253. vanna/bedrock/__init__.py +0 -1
  254. vanna/cohere/__init__.py +0 -2
  255. vanna/faiss/__init__.py +0 -1
  256. vanna/mock/vectordb.py +0 -55
  257. vanna/ollama/ollama.py +0 -103
  258. vanna/opensearch/opensearch_vector.py +0 -392
  259. vanna/opensearch/opensearch_vector_semantic.py +0 -175
  260. vanna/oracle/oracle_vector.py +0 -585
  261. vanna/qianfan/Qianfan_Chat.py +0 -165
  262. vanna/qianfan/Qianfan_embeddings.py +0 -36
  263. vanna/qianwen/QianwenAI_chat.py +0 -133
  264. vanna-0.7.8.dist-info/METADATA +0 -408
  265. vanna-0.7.8.dist-info/RECORD +0 -79
  266. /vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_Chat.py +0 -0
  267. /vanna/{ZhipuAI → legacy/ZhipuAI}/__init__.py +0 -0
  268. /vanna/{anthropic → legacy/anthropic}/__init__.py +0 -0
  269. /vanna/{azuresearch → legacy/azuresearch}/__init__.py +0 -0
  270. /vanna/{base → legacy/base}/__init__.py +0 -0
  271. /vanna/{chromadb → legacy/chromadb}/__init__.py +0 -0
  272. /vanna/{deepseek → legacy/deepseek}/__init__.py +0 -0
  273. /vanna/{exceptions → legacy/exceptions}/__init__.py +0 -0
  274. /vanna/{google → legacy/google}/__init__.py +0 -0
  275. /vanna/{hf → legacy/hf}/__init__.py +0 -0
  276. /vanna/{local.py → legacy/local.py} +0 -0
  277. /vanna/{marqo → legacy/marqo}/__init__.py +0 -0
  278. /vanna/{marqo → legacy/marqo}/marqo.py +0 -0
  279. /vanna/{milvus → legacy/milvus}/__init__.py +0 -0
  280. /vanna/{mistral → legacy/mistral}/__init__.py +0 -0
  281. /vanna/{mistral → legacy/mistral}/mistral.py +0 -0
  282. /vanna/{mock → legacy/mock}/__init__.py +0 -0
  283. /vanna/{mock → legacy/mock}/embedding.py +0 -0
  284. /vanna/{ollama → legacy/ollama}/__init__.py +0 -0
  285. /vanna/{openai → legacy/openai}/__init__.py +0 -0
  286. /vanna/{openai → legacy/openai}/openai_embeddings.py +0 -0
  287. /vanna/{opensearch → legacy/opensearch}/__init__.py +0 -0
  288. /vanna/{oracle → legacy/oracle}/__init__.py +0 -0
  289. /vanna/{pgvector → legacy/pgvector}/__init__.py +0 -0
  290. /vanna/{pinecone → legacy/pinecone}/__init__.py +0 -0
  291. /vanna/{pinecone → legacy/pinecone}/pinecone_vector.py +0 -0
  292. /vanna/{qdrant → legacy/qdrant}/__init__.py +0 -0
  293. /vanna/{qianfan → legacy/qianfan}/__init__.py +0 -0
  294. /vanna/{qianwen → legacy/qianwen}/QianwenAI_embeddings.py +0 -0
  295. /vanna/{qianwen → legacy/qianwen}/__init__.py +0 -0
  296. /vanna/{types → legacy/types}/__init__.py +0 -0
  297. /vanna/{vannadb → legacy/vannadb}/__init__.py +0 -0
  298. /vanna/{vllm → legacy/vllm}/__init__.py +0 -0
  299. /vanna/{weaviate → legacy/weaviate}/__init__.py +0 -0
  300. /vanna/{xinference → legacy/xinference}/__init__.py +0 -0
  301. {vanna-0.7.8.dist-info → vanna-2.0.0.dist-info}/WHEEL +0 -0
  302. {vanna-0.7.8.dist-info → vanna-2.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,392 +0,0 @@
1
- import base64
2
- import uuid
3
- from typing import List
4
-
5
- import pandas as pd
6
- from opensearchpy import OpenSearch
7
-
8
- from ..base import VannaBase
9
-
10
-
11
- class OpenSearch_VectorStore(VannaBase):
12
- def __init__(self, config=None):
13
- VannaBase.__init__(self, config=config)
14
- document_index = "vanna_document_index"
15
- ddl_index = "vanna_ddl_index"
16
- question_sql_index = "vanna_questions_sql_index"
17
- if config is not None and "es_document_index" in config:
18
- document_index = config["es_document_index"]
19
- if config is not None and "es_ddl_index" in config:
20
- ddl_index = config["es_ddl_index"]
21
- if config is not None and "es_question_sql_index" in config:
22
- question_sql_index = config["es_question_sql_index"]
23
-
24
- self.document_index = document_index
25
- self.ddl_index = ddl_index
26
- self.question_sql_index = question_sql_index
27
- print("OpenSearch_VectorStore initialized with document_index: ",
28
- document_index, " ddl_index: ", ddl_index, " question_sql_index: ",
29
- question_sql_index)
30
-
31
- document_index_settings = {
32
- "settings": {
33
- "index": {
34
- "number_of_shards": 6,
35
- "number_of_replicas": 2
36
- }
37
- },
38
- "mappings": {
39
- "properties": {
40
- "question": {
41
- "type": "text",
42
- },
43
- "doc": {
44
- "type": "text",
45
- }
46
- }
47
- }
48
- }
49
-
50
- ddl_index_settings = {
51
- "settings": {
52
- "index": {
53
- "number_of_shards": 6,
54
- "number_of_replicas": 2
55
- }
56
- },
57
- "mappings": {
58
- "properties": {
59
- "ddl": {
60
- "type": "text",
61
- },
62
- "doc": {
63
- "type": "text",
64
- }
65
- }
66
- }
67
- }
68
-
69
- question_sql_index_settings = {
70
- "settings": {
71
- "index": {
72
- "number_of_shards": 6,
73
- "number_of_replicas": 2
74
- }
75
- },
76
- "mappings": {
77
- "properties": {
78
- "question": {
79
- "type": "text",
80
- },
81
- "sql": {
82
- "type": "text",
83
- }
84
- }
85
- }
86
- }
87
-
88
- if config is not None and "es_document_index_settings" in config:
89
- document_index_settings = config["es_document_index_settings"]
90
- if config is not None and "es_ddl_index_settings" in config:
91
- ddl_index_settings = config["es_ddl_index_settings"]
92
- if config is not None and "es_question_sql_index_settings" in config:
93
- question_sql_index_settings = config["es_question_sql_index_settings"]
94
-
95
- self.document_index_settings = document_index_settings
96
- self.ddl_index_settings = ddl_index_settings
97
- self.question_sql_index_settings = question_sql_index_settings
98
-
99
- es_urls = None
100
- if config is not None and "es_urls" in config:
101
- es_urls = config["es_urls"]
102
-
103
- # Host and port
104
- if config is not None and "es_host" in config:
105
- host = config["es_host"]
106
- else:
107
- host = "localhost"
108
-
109
- if config is not None and "es_port" in config:
110
- port = config["es_port"]
111
- else:
112
- port = 9200
113
-
114
- if config is not None and "es_ssl" in config:
115
- ssl = config["es_ssl"]
116
- else:
117
- ssl = False
118
-
119
- if config is not None and "es_verify_certs" in config:
120
- verify_certs = config["es_verify_certs"]
121
- else:
122
- verify_certs = False
123
-
124
- # Authentication
125
- if config is not None and "es_user" in config:
126
- auth = (config["es_user"], config["es_password"])
127
- else:
128
- # Default to admin:admin
129
- auth = None
130
-
131
- headers = None
132
- # base64 authentication
133
- if config is not None and "es_encoded_base64" in config and "es_user" in config and "es_password" in config:
134
- if config["es_encoded_base64"]:
135
- encoded_credentials = base64.b64encode(
136
- (config["es_user"] + ":" + config["es_password"]).encode("utf-8")
137
- ).decode("utf-8")
138
- headers = {
139
- 'Authorization': 'Basic ' + encoded_credentials
140
- }
141
- # remove auth from config
142
- auth = None
143
-
144
- # custom headers
145
- if config is not None and "es_headers" in config:
146
- headers = config["es_headers"]
147
-
148
- if config is not None and "es_timeout" in config:
149
- timeout = config["es_timeout"]
150
- else:
151
- timeout = 60
152
-
153
- if config is not None and "es_max_retries" in config:
154
- max_retries = config["es_max_retries"]
155
- else:
156
- max_retries = 10
157
-
158
- if config is not None and "es_http_compress" in config:
159
- es_http_compress = config["es_http_compress"]
160
- else:
161
- es_http_compress = False
162
-
163
- print("OpenSearch_VectorStore initialized with es_urls: ", es_urls,
164
- " host: ", host, " port: ", port, " ssl: ", ssl, " verify_certs: ",
165
- verify_certs, " timeout: ", timeout, " max_retries: ", max_retries)
166
- if es_urls is not None:
167
- # Initialize the OpenSearch client by passing a list of URLs
168
- self.client = OpenSearch(
169
- hosts=[es_urls],
170
- http_compress=es_http_compress,
171
- use_ssl=ssl,
172
- verify_certs=verify_certs,
173
- timeout=timeout,
174
- max_retries=max_retries,
175
- retry_on_timeout=True,
176
- http_auth=auth,
177
- headers=headers
178
- )
179
- else:
180
- # Initialize the OpenSearch client by passing a host and port
181
- self.client = OpenSearch(
182
- hosts=[{'host': host, 'port': port}],
183
- http_compress=es_http_compress,
184
- use_ssl=ssl,
185
- verify_certs=verify_certs,
186
- timeout=timeout,
187
- max_retries=max_retries,
188
- retry_on_timeout=True,
189
- http_auth=auth,
190
- headers=headers
191
- )
192
-
193
- print("OpenSearch_VectorStore initialized with client over ")
194
-
195
- # 执行一个简单的查询来检查连接
196
- try:
197
- print('Connected to OpenSearch cluster:')
198
- info = self.client.info()
199
- print('OpenSearch cluster info:', info)
200
- except Exception as e:
201
- print('Error connecting to OpenSearch cluster:', e)
202
-
203
- # Create the indices if they don't exist
204
- self.create_index_if_not_exists(self.document_index,
205
- self.document_index_settings)
206
- self.create_index_if_not_exists(self.ddl_index, self.ddl_index_settings)
207
- self.create_index_if_not_exists(self.question_sql_index,
208
- self.question_sql_index_settings)
209
-
210
- def create_index(self):
211
- for index in [self.document_index, self.ddl_index,
212
- self.question_sql_index]:
213
- try:
214
- self.client.indices.create(index)
215
- except Exception as e:
216
- print("Error creating index: ", e)
217
- print(f"opensearch index {index} already exists")
218
- pass
219
-
220
- def create_index_if_not_exists(self, index_name: str,
221
- index_settings: dict) -> bool:
222
- try:
223
- if not self.client.indices.exists(index_name):
224
- print(f"Index {index_name} does not exist. Creating...")
225
- self.client.indices.create(index=index_name, body=index_settings)
226
- return True
227
- else:
228
- print(f"Index {index_name} already exists.")
229
- return False
230
- except Exception as e:
231
- print(f"Error creating index: {index_name} ", e)
232
- return False
233
-
234
- def add_ddl(self, ddl: str, **kwargs) -> str:
235
- # Assuming that you have a DDL index in your OpenSearch
236
- id = str(uuid.uuid4()) + "-ddl"
237
- ddl_dict = {
238
- "ddl": ddl
239
- }
240
- response = self.client.index(index=self.ddl_index, body=ddl_dict, id=id,
241
- **kwargs)
242
- return response['_id']
243
-
244
- def add_documentation(self, doc: str, **kwargs) -> str:
245
- # Assuming you have a documentation index in your OpenSearch
246
- id = str(uuid.uuid4()) + "-doc"
247
- doc_dict = {
248
- "doc": doc
249
- }
250
- response = self.client.index(index=self.document_index, id=id,
251
- body=doc_dict, **kwargs)
252
- return response['_id']
253
-
254
- def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
255
- # Assuming you have a Questions and SQL index in your OpenSearch
256
- id = str(uuid.uuid4()) + "-sql"
257
- question_sql_dict = {
258
- "question": question,
259
- "sql": sql
260
- }
261
- response = self.client.index(index=self.question_sql_index,
262
- body=question_sql_dict, id=id,
263
- **kwargs)
264
- return response['_id']
265
-
266
- def get_related_ddl(self, question: str, **kwargs) -> List[str]:
267
- # Assume you have some vector search mechanism associated with your data
268
- query = {
269
- "query": {
270
- "match": {
271
- "ddl": question
272
- }
273
- }
274
- }
275
- print(query)
276
- response = self.client.search(index=self.ddl_index, body=query,
277
- **kwargs)
278
- return [hit['_source']['ddl'] for hit in response['hits']['hits']]
279
-
280
- def get_related_documentation(self, question: str, **kwargs) -> List[str]:
281
- query = {
282
- "query": {
283
- "match": {
284
- "doc": question
285
- }
286
- }
287
- }
288
- print(query)
289
- response = self.client.search(index=self.document_index,
290
- body=query,
291
- **kwargs)
292
- return [hit['_source']['doc'] for hit in response['hits']['hits']]
293
-
294
- def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
295
- query = {
296
- "query": {
297
- "match": {
298
- "question": question
299
- }
300
- }
301
- }
302
- print(query)
303
- response = self.client.search(index=self.question_sql_index,
304
- body=query,
305
- **kwargs)
306
- return [(hit['_source']['question'], hit['_source']['sql']) for hit in
307
- response['hits']['hits']]
308
-
309
- def get_training_data(self, **kwargs) -> pd.DataFrame:
310
- # This will be a simple example pulling all data from an index
311
- # WARNING: Do not use this approach in production for large indices!
312
- data = []
313
- response = self.client.search(
314
- index=self.document_index,
315
- body={"query": {"match_all": {}}},
316
- size=1000
317
- )
318
- print(query)
319
- # records = [hit['_source'] for hit in response['hits']['hits']]
320
- for hit in response['hits']['hits']:
321
- data.append(
322
- {
323
- "id": hit["_id"],
324
- "training_data_type": "documentation",
325
- "question": "",
326
- "content": hit["_source"]['doc'],
327
- }
328
- )
329
-
330
- response = self.client.search(
331
- index=self.question_sql_index,
332
- body={"query": {"match_all": {}}},
333
- size=1000
334
- )
335
- # records = [hit['_source'] for hit in response['hits']['hits']]
336
- for hit in response['hits']['hits']:
337
- data.append(
338
- {
339
- "id": hit["_id"],
340
- "training_data_type": "sql",
341
- "question": hit.get("_source", {}).get("question", ""),
342
- "content": hit.get("_source", {}).get("sql", ""),
343
- }
344
- )
345
-
346
- response = self.client.search(
347
- index=self.ddl_index,
348
- body={"query": {"match_all": {}}},
349
- size=1000
350
- )
351
- # records = [hit['_source'] for hit in response['hits']['hits']]
352
- for hit in response['hits']['hits']:
353
- data.append(
354
- {
355
- "id": hit["_id"],
356
- "training_data_type": "ddl",
357
- "question": "",
358
- "content": hit["_source"]['ddl'],
359
- }
360
- )
361
-
362
- return pd.DataFrame(data)
363
-
364
- def remove_training_data(self, id: str, **kwargs) -> bool:
365
- try:
366
- if id.endswith("-sql"):
367
- self.client.delete(index=self.question_sql_index, id=id)
368
- return True
369
- elif id.endswith("-ddl"):
370
- self.client.delete(index=self.ddl_index, id=id, **kwargs)
371
- return True
372
- elif id.endswith("-doc"):
373
- self.client.delete(index=self.document_index, id=id, **kwargs)
374
- return True
375
- else:
376
- return False
377
- except Exception as e:
378
- print("Error deleting training dataError deleting training data: ", e)
379
- return False
380
-
381
- def generate_embedding(self, data: str, **kwargs) -> list[float]:
382
- # opensearch doesn't need to generate embeddings
383
- pass
384
-
385
- # OpenSearch_VectorStore.__init__(self, config={'es_urls':
386
- # "https://opensearch-node.test.com:9200", 'es_encoded_base64': True, 'es_user':
387
- # "admin", 'es_password': "admin", 'es_verify_certs': True})
388
-
389
-
390
- # OpenSearch_VectorStore.__init__(self, config={'es_host':
391
- # "https://opensearch-node.test.com", 'es_port': 9200, 'es_user': "admin",
392
- # 'es_password': "admin", 'es_verify_certs': True})
@@ -1,175 +0,0 @@
1
- import json
2
-
3
- import pandas as pd
4
- from langchain_community.vectorstores import OpenSearchVectorSearch
5
-
6
- from ..base import VannaBase
7
- from ..utils import deterministic_uuid
8
-
9
-
10
- class OpenSearch_Semantic_VectorStore(VannaBase):
11
- def __init__(self, config=None):
12
- VannaBase.__init__(self, config=config)
13
- if config is None:
14
- config = {}
15
-
16
- if "embedding_function" in config:
17
- self.embedding_function = config.get("embedding_function")
18
- else:
19
- from langchain_huggingface import HuggingFaceEmbeddings
20
- self.embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
21
-
22
- self.n_results_sql = config.get("n_results_sql", config.get("n_results", 10))
23
- self.n_results_documentation = config.get("n_results_documentation", config.get("n_results", 10))
24
- self.n_results_ddl = config.get("n_results_ddl", config.get("n_results", 10))
25
-
26
- self.document_index = config.get("es_document_index", "vanna_document_index")
27
- self.ddl_index = config.get("es_ddl_index", "vanna_ddl_index")
28
- self.question_sql_index = config.get("es_question_sql_index", "vanna_questions_sql_index")
29
-
30
- self.log(f"OpenSearch_Semantic_VectorStore initialized with document_index: {self.document_index}, ddl_index: {self.ddl_index}, question_sql_index: {self.question_sql_index}")
31
-
32
- es_urls = config.get("es_urls", "https://localhost:9200")
33
- ssl = config.get("es_ssl", True)
34
- verify_certs = config.get("es_verify_certs", True)
35
-
36
- if "es_user" in config:
37
- auth = (config["es_user"], config["es_password"])
38
- else:
39
- auth = None
40
-
41
- headers = config.get("es_headers", None)
42
- timeout = config.get("es_timeout", 60)
43
- max_retries = config.get("es_max_retries", 10)
44
-
45
- common_args = {
46
- "opensearch_url": es_urls,
47
- "embedding_function": self.embedding_function,
48
- "engine": "faiss",
49
- "http_auth": auth,
50
- "use_ssl": ssl,
51
- "verify_certs": verify_certs,
52
- "timeout": timeout,
53
- "max_retries": max_retries,
54
- "retry_on_timeout": True,
55
- "headers": headers,
56
- }
57
-
58
- self.documentation_store = OpenSearchVectorSearch(index_name=self.document_index, **common_args)
59
- self.ddl_store = OpenSearchVectorSearch(index_name=self.ddl_index, **common_args)
60
- self.sql_store = OpenSearchVectorSearch(index_name=self.question_sql_index, **common_args)
61
-
62
- def add_ddl(self, ddl: str, **kwargs) -> str:
63
- _id = deterministic_uuid(ddl) + "-ddl"
64
- self.ddl_store.add_texts(texts=[ddl], ids=[_id], **kwargs)
65
- return _id
66
-
67
- def add_documentation(self, documentation: str, **kwargs) -> str:
68
- _id = deterministic_uuid(documentation) + "-doc"
69
- self.documentation_store.add_texts(texts=[documentation], ids=[_id], **kwargs)
70
- return _id
71
-
72
- def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
73
- question_sql_json = json.dumps(
74
- {
75
- "question": question,
76
- "sql": sql,
77
- },
78
- ensure_ascii=False,
79
- )
80
-
81
- _id = deterministic_uuid(question_sql_json) + "-sql"
82
- self.sql_store.add_texts(texts=[question_sql_json], ids=[_id], **kwargs)
83
- return _id
84
-
85
- def get_related_ddl(self, question: str, **kwargs) -> list:
86
- documents = self.ddl_store.similarity_search(query=question, k=self.n_results_ddl)
87
- return [document.page_content for document in documents]
88
-
89
- def get_related_documentation(self, question: str, **kwargs) -> list:
90
- documents = self.documentation_store.similarity_search(query=question, k=self.n_results_documentation)
91
- return [document.page_content for document in documents]
92
-
93
- def get_similar_question_sql(self, question: str, **kwargs) -> list:
94
- documents = self.sql_store.similarity_search(query=question, k=self.n_results_sql)
95
- return [json.loads(document.page_content) for document in documents]
96
-
97
- def get_training_data(self, **kwargs) -> pd.DataFrame:
98
- data = []
99
- query = {
100
- "query": {
101
- "match_all": {}
102
- }
103
- }
104
-
105
- indices = [
106
- {"index": self.document_index, "type": "documentation"},
107
- {"index": self.question_sql_index, "type": "sql"},
108
- {"index": self.ddl_index, "type": "ddl"},
109
- ]
110
-
111
- # Use documentation_store.client consistently for search on all indices
112
- opensearch_client = self.documentation_store.client
113
-
114
- for index_info in indices:
115
- index_name = index_info["index"]
116
- training_data_type = index_info["type"]
117
- scroll = '1m' # keep scroll context for 1 minute
118
- response = opensearch_client.search(
119
- index=index_name,
120
- ignore_unavailable=True,
121
- body=query,
122
- scroll=scroll,
123
- size=1000
124
- )
125
-
126
- scroll_id = response.get('_scroll_id')
127
-
128
- while scroll_id:
129
- hits = response['hits']['hits']
130
- if not hits:
131
- break # No more hits, exit loop
132
-
133
- for hit in hits:
134
- source = hit['_source']
135
- if training_data_type == "sql":
136
- try:
137
- doc_dict = json.loads(source['text'])
138
- content = doc_dict.get("sql")
139
- question = doc_dict.get("question")
140
- except json.JSONDecodeError as e:
141
- self.log(f"Skipping row with custom_id {hit['_id']} due to JSON parsing error: {e}","Error")
142
- continue
143
- else: # documentation or ddl
144
- content = source['text']
145
- question = None
146
-
147
- data.append({
148
- "id": hit["_id"],
149
- "training_data_type": training_data_type,
150
- "question": question,
151
- "content": content,
152
- })
153
-
154
- # Get next batch of results, using documentation_store.client.scroll
155
- response = opensearch_client.scroll(scroll_id=scroll_id, scroll=scroll)
156
- scroll_id = response.get('_scroll_id')
157
-
158
- return pd.DataFrame(data)
159
-
160
- def remove_training_data(self, id: str, **kwargs) -> bool:
161
- try:
162
- if id.endswith("-sql"):
163
- return self.sql_store.delete(ids=[id], **kwargs)
164
- elif id.endswith("-ddl"):
165
- return self.ddl_store.delete(ids=[id], **kwargs)
166
- elif id.endswith("-doc"):
167
- return self.documentation_store.delete(ids=[id], **kwargs)
168
- else:
169
- return False
170
- except Exception as e:
171
- self.log(f"Error deleting training dataError deleting training data: {e}", "Error")
172
- return False
173
-
174
- def generate_embedding(self, data: str, **kwargs) -> list[float]:
175
- pass