vanna 0.7.9__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vanna/__init__.py +167 -395
- vanna/agents/__init__.py +7 -0
- vanna/capabilities/__init__.py +17 -0
- vanna/capabilities/agent_memory/__init__.py +21 -0
- vanna/capabilities/agent_memory/base.py +103 -0
- vanna/capabilities/agent_memory/models.py +53 -0
- vanna/capabilities/file_system/__init__.py +14 -0
- vanna/capabilities/file_system/base.py +71 -0
- vanna/capabilities/file_system/models.py +25 -0
- vanna/capabilities/sql_runner/__init__.py +13 -0
- vanna/capabilities/sql_runner/base.py +37 -0
- vanna/capabilities/sql_runner/models.py +13 -0
- vanna/components/__init__.py +92 -0
- vanna/components/base.py +11 -0
- vanna/components/rich/__init__.py +83 -0
- vanna/components/rich/containers/__init__.py +7 -0
- vanna/components/rich/containers/card.py +20 -0
- vanna/components/rich/data/__init__.py +9 -0
- vanna/components/rich/data/chart.py +17 -0
- vanna/components/rich/data/dataframe.py +93 -0
- vanna/components/rich/feedback/__init__.py +21 -0
- vanna/components/rich/feedback/badge.py +16 -0
- vanna/components/rich/feedback/icon_text.py +14 -0
- vanna/components/rich/feedback/log_viewer.py +41 -0
- vanna/components/rich/feedback/notification.py +19 -0
- vanna/components/rich/feedback/progress.py +37 -0
- vanna/components/rich/feedback/status_card.py +28 -0
- vanna/components/rich/feedback/status_indicator.py +14 -0
- vanna/components/rich/interactive/__init__.py +21 -0
- vanna/components/rich/interactive/button.py +95 -0
- vanna/components/rich/interactive/task_list.py +58 -0
- vanna/components/rich/interactive/ui_state.py +93 -0
- vanna/components/rich/specialized/__init__.py +7 -0
- vanna/components/rich/specialized/artifact.py +20 -0
- vanna/components/rich/text.py +16 -0
- vanna/components/simple/__init__.py +15 -0
- vanna/components/simple/image.py +15 -0
- vanna/components/simple/link.py +15 -0
- vanna/components/simple/text.py +11 -0
- vanna/core/__init__.py +193 -0
- vanna/core/_compat.py +19 -0
- vanna/core/agent/__init__.py +10 -0
- vanna/core/agent/agent.py +1407 -0
- vanna/core/agent/config.py +123 -0
- vanna/core/audit/__init__.py +28 -0
- vanna/core/audit/base.py +299 -0
- vanna/core/audit/models.py +131 -0
- vanna/core/component_manager.py +329 -0
- vanna/core/components.py +53 -0
- vanna/core/enhancer/__init__.py +11 -0
- vanna/core/enhancer/base.py +94 -0
- vanna/core/enhancer/default.py +118 -0
- vanna/core/enricher/__init__.py +10 -0
- vanna/core/enricher/base.py +59 -0
- vanna/core/errors.py +47 -0
- vanna/core/evaluation/__init__.py +81 -0
- vanna/core/evaluation/base.py +186 -0
- vanna/core/evaluation/dataset.py +254 -0
- vanna/core/evaluation/evaluators.py +376 -0
- vanna/core/evaluation/report.py +289 -0
- vanna/core/evaluation/runner.py +313 -0
- vanna/core/filter/__init__.py +10 -0
- vanna/core/filter/base.py +67 -0
- vanna/core/lifecycle/__init__.py +10 -0
- vanna/core/lifecycle/base.py +83 -0
- vanna/core/llm/__init__.py +16 -0
- vanna/core/llm/base.py +40 -0
- vanna/core/llm/models.py +61 -0
- vanna/core/middleware/__init__.py +10 -0
- vanna/core/middleware/base.py +69 -0
- vanna/core/observability/__init__.py +11 -0
- vanna/core/observability/base.py +88 -0
- vanna/core/observability/models.py +47 -0
- vanna/core/recovery/__init__.py +11 -0
- vanna/core/recovery/base.py +84 -0
- vanna/core/recovery/models.py +32 -0
- vanna/core/registry.py +278 -0
- vanna/core/rich_component.py +156 -0
- vanna/core/simple_component.py +27 -0
- vanna/core/storage/__init__.py +14 -0
- vanna/core/storage/base.py +46 -0
- vanna/core/storage/models.py +46 -0
- vanna/core/system_prompt/__init__.py +13 -0
- vanna/core/system_prompt/base.py +36 -0
- vanna/core/system_prompt/default.py +157 -0
- vanna/core/tool/__init__.py +18 -0
- vanna/core/tool/base.py +70 -0
- vanna/core/tool/models.py +84 -0
- vanna/core/user/__init__.py +17 -0
- vanna/core/user/base.py +29 -0
- vanna/core/user/models.py +25 -0
- vanna/core/user/request_context.py +70 -0
- vanna/core/user/resolver.py +42 -0
- vanna/core/validation.py +164 -0
- vanna/core/workflow/__init__.py +12 -0
- vanna/core/workflow/base.py +254 -0
- vanna/core/workflow/default.py +789 -0
- vanna/examples/__init__.py +1 -0
- vanna/examples/__main__.py +44 -0
- vanna/examples/anthropic_quickstart.py +80 -0
- vanna/examples/artifact_example.py +293 -0
- vanna/examples/claude_sqlite_example.py +236 -0
- vanna/examples/coding_agent_example.py +300 -0
- vanna/examples/custom_system_prompt_example.py +174 -0
- vanna/examples/default_workflow_handler_example.py +208 -0
- vanna/examples/email_auth_example.py +340 -0
- vanna/examples/evaluation_example.py +269 -0
- vanna/examples/extensibility_example.py +262 -0
- vanna/examples/minimal_example.py +67 -0
- vanna/examples/mock_auth_example.py +227 -0
- vanna/examples/mock_custom_tool.py +311 -0
- vanna/examples/mock_quickstart.py +79 -0
- vanna/examples/mock_quota_example.py +145 -0
- vanna/examples/mock_rich_components_demo.py +396 -0
- vanna/examples/mock_sqlite_example.py +223 -0
- vanna/examples/openai_quickstart.py +83 -0
- vanna/examples/primitive_components_demo.py +305 -0
- vanna/examples/quota_lifecycle_example.py +139 -0
- vanna/examples/visualization_example.py +251 -0
- vanna/integrations/__init__.py +17 -0
- vanna/integrations/anthropic/__init__.py +9 -0
- vanna/integrations/anthropic/llm.py +270 -0
- vanna/integrations/azureopenai/__init__.py +9 -0
- vanna/integrations/azureopenai/llm.py +329 -0
- vanna/integrations/azuresearch/__init__.py +7 -0
- vanna/integrations/azuresearch/agent_memory.py +413 -0
- vanna/integrations/bigquery/__init__.py +5 -0
- vanna/integrations/bigquery/sql_runner.py +81 -0
- vanna/integrations/chromadb/__init__.py +104 -0
- vanna/integrations/chromadb/agent_memory.py +416 -0
- vanna/integrations/clickhouse/__init__.py +5 -0
- vanna/integrations/clickhouse/sql_runner.py +82 -0
- vanna/integrations/duckdb/__init__.py +5 -0
- vanna/integrations/duckdb/sql_runner.py +65 -0
- vanna/integrations/faiss/__init__.py +7 -0
- vanna/integrations/faiss/agent_memory.py +431 -0
- vanna/integrations/google/__init__.py +9 -0
- vanna/integrations/google/gemini.py +370 -0
- vanna/integrations/hive/__init__.py +5 -0
- vanna/integrations/hive/sql_runner.py +87 -0
- vanna/integrations/local/__init__.py +17 -0
- vanna/integrations/local/agent_memory/__init__.py +7 -0
- vanna/integrations/local/agent_memory/in_memory.py +285 -0
- vanna/integrations/local/audit.py +59 -0
- vanna/integrations/local/file_system.py +242 -0
- vanna/integrations/local/file_system_conversation_store.py +255 -0
- vanna/integrations/local/storage.py +62 -0
- vanna/integrations/marqo/__init__.py +7 -0
- vanna/integrations/marqo/agent_memory.py +354 -0
- vanna/integrations/milvus/__init__.py +7 -0
- vanna/integrations/milvus/agent_memory.py +458 -0
- vanna/integrations/mock/__init__.py +9 -0
- vanna/integrations/mock/llm.py +65 -0
- vanna/integrations/mssql/__init__.py +5 -0
- vanna/integrations/mssql/sql_runner.py +66 -0
- vanna/integrations/mysql/__init__.py +5 -0
- vanna/integrations/mysql/sql_runner.py +92 -0
- vanna/integrations/ollama/__init__.py +7 -0
- vanna/integrations/ollama/llm.py +252 -0
- vanna/integrations/openai/__init__.py +10 -0
- vanna/integrations/openai/llm.py +267 -0
- vanna/integrations/openai/responses.py +163 -0
- vanna/integrations/opensearch/__init__.py +7 -0
- vanna/integrations/opensearch/agent_memory.py +411 -0
- vanna/integrations/oracle/__init__.py +5 -0
- vanna/integrations/oracle/sql_runner.py +75 -0
- vanna/integrations/pinecone/__init__.py +7 -0
- vanna/integrations/pinecone/agent_memory.py +329 -0
- vanna/integrations/plotly/__init__.py +5 -0
- vanna/integrations/plotly/chart_generator.py +313 -0
- vanna/integrations/postgres/__init__.py +9 -0
- vanna/integrations/postgres/sql_runner.py +112 -0
- vanna/integrations/premium/agent_memory/__init__.py +7 -0
- vanna/integrations/premium/agent_memory/premium.py +186 -0
- vanna/integrations/presto/__init__.py +5 -0
- vanna/integrations/presto/sql_runner.py +107 -0
- vanna/integrations/qdrant/__init__.py +7 -0
- vanna/integrations/qdrant/agent_memory.py +461 -0
- vanna/integrations/snowflake/__init__.py +5 -0
- vanna/integrations/snowflake/sql_runner.py +147 -0
- vanna/integrations/sqlite/__init__.py +9 -0
- vanna/integrations/sqlite/sql_runner.py +65 -0
- vanna/integrations/weaviate/__init__.py +7 -0
- vanna/integrations/weaviate/agent_memory.py +428 -0
- vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_embeddings.py +11 -11
- vanna/legacy/__init__.py +403 -0
- vanna/legacy/adapter.py +463 -0
- vanna/{advanced → legacy/advanced}/__init__.py +3 -1
- vanna/{anthropic → legacy/anthropic}/anthropic_chat.py +9 -7
- vanna/{azuresearch → legacy/azuresearch}/azuresearch_vector.py +79 -41
- vanna/{base → legacy/base}/base.py +224 -217
- vanna/legacy/bedrock/__init__.py +1 -0
- vanna/{bedrock → legacy/bedrock}/bedrock_converse.py +13 -12
- vanna/{chromadb → legacy/chromadb}/chromadb_vector.py +3 -1
- vanna/legacy/cohere/__init__.py +2 -0
- vanna/{cohere → legacy/cohere}/cohere_chat.py +19 -14
- vanna/{cohere → legacy/cohere}/cohere_embeddings.py +25 -19
- vanna/{deepseek → legacy/deepseek}/deepseek_chat.py +5 -6
- vanna/legacy/faiss/__init__.py +1 -0
- vanna/{faiss → legacy/faiss}/faiss.py +113 -59
- vanna/{flask → legacy/flask}/__init__.py +84 -43
- vanna/{flask → legacy/flask}/assets.py +5 -5
- vanna/{flask → legacy/flask}/auth.py +5 -4
- vanna/{google → legacy/google}/bigquery_vector.py +75 -42
- vanna/{google → legacy/google}/gemini_chat.py +7 -3
- vanna/{hf → legacy/hf}/hf.py +0 -1
- vanna/{milvus → legacy/milvus}/milvus_vector.py +58 -35
- vanna/{mock → legacy/mock}/llm.py +0 -1
- vanna/legacy/mock/vectordb.py +67 -0
- vanna/legacy/ollama/ollama.py +110 -0
- vanna/{openai → legacy/openai}/openai_chat.py +2 -6
- vanna/legacy/opensearch/opensearch_vector.py +369 -0
- vanna/legacy/opensearch/opensearch_vector_semantic.py +200 -0
- vanna/legacy/oracle/oracle_vector.py +584 -0
- vanna/{pgvector → legacy/pgvector}/pgvector.py +42 -13
- vanna/{qdrant → legacy/qdrant}/qdrant.py +2 -6
- vanna/legacy/qianfan/Qianfan_Chat.py +170 -0
- vanna/legacy/qianfan/Qianfan_embeddings.py +36 -0
- vanna/legacy/qianwen/QianwenAI_chat.py +132 -0
- vanna/{remote.py → legacy/remote.py} +28 -26
- vanna/{utils.py → legacy/utils.py} +6 -11
- vanna/{vannadb → legacy/vannadb}/vannadb_vector.py +115 -46
- vanna/{vllm → legacy/vllm}/vllm.py +5 -6
- vanna/{weaviate → legacy/weaviate}/weaviate_vector.py +59 -40
- vanna/{xinference → legacy/xinference}/xinference.py +6 -6
- vanna/py.typed +0 -0
- vanna/servers/__init__.py +16 -0
- vanna/servers/__main__.py +8 -0
- vanna/servers/base/__init__.py +18 -0
- vanna/servers/base/chat_handler.py +65 -0
- vanna/servers/base/models.py +111 -0
- vanna/servers/base/rich_chat_handler.py +141 -0
- vanna/servers/base/templates.py +331 -0
- vanna/servers/cli/__init__.py +7 -0
- vanna/servers/cli/server_runner.py +204 -0
- vanna/servers/fastapi/__init__.py +7 -0
- vanna/servers/fastapi/app.py +163 -0
- vanna/servers/fastapi/routes.py +183 -0
- vanna/servers/flask/__init__.py +7 -0
- vanna/servers/flask/app.py +132 -0
- vanna/servers/flask/routes.py +137 -0
- vanna/tools/__init__.py +41 -0
- vanna/tools/agent_memory.py +322 -0
- vanna/tools/file_system.py +879 -0
- vanna/tools/python.py +222 -0
- vanna/tools/run_sql.py +165 -0
- vanna/tools/visualize_data.py +195 -0
- vanna/utils/__init__.py +0 -0
- vanna/web_components/__init__.py +44 -0
- vanna-2.0.0.dist-info/METADATA +485 -0
- vanna-2.0.0.dist-info/RECORD +289 -0
- vanna-2.0.0.dist-info/entry_points.txt +3 -0
- vanna/bedrock/__init__.py +0 -1
- vanna/cohere/__init__.py +0 -2
- vanna/faiss/__init__.py +0 -1
- vanna/mock/vectordb.py +0 -55
- vanna/ollama/ollama.py +0 -103
- vanna/opensearch/opensearch_vector.py +0 -392
- vanna/opensearch/opensearch_vector_semantic.py +0 -175
- vanna/oracle/oracle_vector.py +0 -585
- vanna/qianfan/Qianfan_Chat.py +0 -165
- vanna/qianfan/Qianfan_embeddings.py +0 -36
- vanna/qianwen/QianwenAI_chat.py +0 -133
- vanna-0.7.9.dist-info/METADATA +0 -408
- vanna-0.7.9.dist-info/RECORD +0 -79
- /vanna/{ZhipuAI → legacy/ZhipuAI}/ZhipuAI_Chat.py +0 -0
- /vanna/{ZhipuAI → legacy/ZhipuAI}/__init__.py +0 -0
- /vanna/{anthropic → legacy/anthropic}/__init__.py +0 -0
- /vanna/{azuresearch → legacy/azuresearch}/__init__.py +0 -0
- /vanna/{base → legacy/base}/__init__.py +0 -0
- /vanna/{chromadb → legacy/chromadb}/__init__.py +0 -0
- /vanna/{deepseek → legacy/deepseek}/__init__.py +0 -0
- /vanna/{exceptions → legacy/exceptions}/__init__.py +0 -0
- /vanna/{google → legacy/google}/__init__.py +0 -0
- /vanna/{hf → legacy/hf}/__init__.py +0 -0
- /vanna/{local.py → legacy/local.py} +0 -0
- /vanna/{marqo → legacy/marqo}/__init__.py +0 -0
- /vanna/{marqo → legacy/marqo}/marqo.py +0 -0
- /vanna/{milvus → legacy/milvus}/__init__.py +0 -0
- /vanna/{mistral → legacy/mistral}/__init__.py +0 -0
- /vanna/{mistral → legacy/mistral}/mistral.py +0 -0
- /vanna/{mock → legacy/mock}/__init__.py +0 -0
- /vanna/{mock → legacy/mock}/embedding.py +0 -0
- /vanna/{ollama → legacy/ollama}/__init__.py +0 -0
- /vanna/{openai → legacy/openai}/__init__.py +0 -0
- /vanna/{openai → legacy/openai}/openai_embeddings.py +0 -0
- /vanna/{opensearch → legacy/opensearch}/__init__.py +0 -0
- /vanna/{oracle → legacy/oracle}/__init__.py +0 -0
- /vanna/{pgvector → legacy/pgvector}/__init__.py +0 -0
- /vanna/{pinecone → legacy/pinecone}/__init__.py +0 -0
- /vanna/{pinecone → legacy/pinecone}/pinecone_vector.py +0 -0
- /vanna/{qdrant → legacy/qdrant}/__init__.py +0 -0
- /vanna/{qianfan → legacy/qianfan}/__init__.py +0 -0
- /vanna/{qianwen → legacy/qianwen}/QianwenAI_embeddings.py +0 -0
- /vanna/{qianwen → legacy/qianwen}/__init__.py +0 -0
- /vanna/{types → legacy/types}/__init__.py +0 -0
- /vanna/{vannadb → legacy/vannadb}/__init__.py +0 -0
- /vanna/{vllm → legacy/vllm}/__init__.py +0 -0
- /vanna/{weaviate → legacy/weaviate}/__init__.py +0 -0
- /vanna/{xinference → legacy/xinference}/__init__.py +0 -0
- {vanna-0.7.9.dist-info → vanna-2.0.0.dist-info}/WHEEL +0 -0
- {vanna-0.7.9.dist-info → vanna-2.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from opensearchpy import OpenSearch
|
|
7
|
+
|
|
8
|
+
from ..base import VannaBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OpenSearch_VectorStore(VannaBase):
|
|
12
|
+
def __init__(self, config=None):
|
|
13
|
+
VannaBase.__init__(self, config=config)
|
|
14
|
+
document_index = "vanna_document_index"
|
|
15
|
+
ddl_index = "vanna_ddl_index"
|
|
16
|
+
question_sql_index = "vanna_questions_sql_index"
|
|
17
|
+
if config is not None and "es_document_index" in config:
|
|
18
|
+
document_index = config["es_document_index"]
|
|
19
|
+
if config is not None and "es_ddl_index" in config:
|
|
20
|
+
ddl_index = config["es_ddl_index"]
|
|
21
|
+
if config is not None and "es_question_sql_index" in config:
|
|
22
|
+
question_sql_index = config["es_question_sql_index"]
|
|
23
|
+
|
|
24
|
+
self.document_index = document_index
|
|
25
|
+
self.ddl_index = ddl_index
|
|
26
|
+
self.question_sql_index = question_sql_index
|
|
27
|
+
print(
|
|
28
|
+
"OpenSearch_VectorStore initialized with document_index: ",
|
|
29
|
+
document_index,
|
|
30
|
+
" ddl_index: ",
|
|
31
|
+
ddl_index,
|
|
32
|
+
" question_sql_index: ",
|
|
33
|
+
question_sql_index,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
document_index_settings = {
|
|
37
|
+
"settings": {"index": {"number_of_shards": 6, "number_of_replicas": 2}},
|
|
38
|
+
"mappings": {
|
|
39
|
+
"properties": {
|
|
40
|
+
"question": {
|
|
41
|
+
"type": "text",
|
|
42
|
+
},
|
|
43
|
+
"doc": {
|
|
44
|
+
"type": "text",
|
|
45
|
+
},
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
ddl_index_settings = {
|
|
51
|
+
"settings": {"index": {"number_of_shards": 6, "number_of_replicas": 2}},
|
|
52
|
+
"mappings": {
|
|
53
|
+
"properties": {
|
|
54
|
+
"ddl": {
|
|
55
|
+
"type": "text",
|
|
56
|
+
},
|
|
57
|
+
"doc": {
|
|
58
|
+
"type": "text",
|
|
59
|
+
},
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
question_sql_index_settings = {
|
|
65
|
+
"settings": {"index": {"number_of_shards": 6, "number_of_replicas": 2}},
|
|
66
|
+
"mappings": {
|
|
67
|
+
"properties": {
|
|
68
|
+
"question": {
|
|
69
|
+
"type": "text",
|
|
70
|
+
},
|
|
71
|
+
"sql": {
|
|
72
|
+
"type": "text",
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if config is not None and "es_document_index_settings" in config:
|
|
79
|
+
document_index_settings = config["es_document_index_settings"]
|
|
80
|
+
if config is not None and "es_ddl_index_settings" in config:
|
|
81
|
+
ddl_index_settings = config["es_ddl_index_settings"]
|
|
82
|
+
if config is not None and "es_question_sql_index_settings" in config:
|
|
83
|
+
question_sql_index_settings = config["es_question_sql_index_settings"]
|
|
84
|
+
|
|
85
|
+
self.document_index_settings = document_index_settings
|
|
86
|
+
self.ddl_index_settings = ddl_index_settings
|
|
87
|
+
self.question_sql_index_settings = question_sql_index_settings
|
|
88
|
+
|
|
89
|
+
es_urls = None
|
|
90
|
+
if config is not None and "es_urls" in config:
|
|
91
|
+
es_urls = config["es_urls"]
|
|
92
|
+
|
|
93
|
+
# Host and port
|
|
94
|
+
if config is not None and "es_host" in config:
|
|
95
|
+
host = config["es_host"]
|
|
96
|
+
else:
|
|
97
|
+
host = "localhost"
|
|
98
|
+
|
|
99
|
+
if config is not None and "es_port" in config:
|
|
100
|
+
port = config["es_port"]
|
|
101
|
+
else:
|
|
102
|
+
port = 9200
|
|
103
|
+
|
|
104
|
+
if config is not None and "es_ssl" in config:
|
|
105
|
+
ssl = config["es_ssl"]
|
|
106
|
+
else:
|
|
107
|
+
ssl = False
|
|
108
|
+
|
|
109
|
+
if config is not None and "es_verify_certs" in config:
|
|
110
|
+
verify_certs = config["es_verify_certs"]
|
|
111
|
+
else:
|
|
112
|
+
verify_certs = False
|
|
113
|
+
|
|
114
|
+
# Authentication
|
|
115
|
+
if config is not None and "es_user" in config:
|
|
116
|
+
auth = (config["es_user"], config["es_password"])
|
|
117
|
+
else:
|
|
118
|
+
# Default to admin:admin
|
|
119
|
+
auth = None
|
|
120
|
+
|
|
121
|
+
headers = None
|
|
122
|
+
# base64 authentication
|
|
123
|
+
if (
|
|
124
|
+
config is not None
|
|
125
|
+
and "es_encoded_base64" in config
|
|
126
|
+
and "es_user" in config
|
|
127
|
+
and "es_password" in config
|
|
128
|
+
):
|
|
129
|
+
if config["es_encoded_base64"]:
|
|
130
|
+
encoded_credentials = base64.b64encode(
|
|
131
|
+
(config["es_user"] + ":" + config["es_password"]).encode("utf-8")
|
|
132
|
+
).decode("utf-8")
|
|
133
|
+
headers = {"Authorization": "Basic " + encoded_credentials}
|
|
134
|
+
# remove auth from config
|
|
135
|
+
auth = None
|
|
136
|
+
|
|
137
|
+
# custom headers
|
|
138
|
+
if config is not None and "es_headers" in config:
|
|
139
|
+
headers = config["es_headers"]
|
|
140
|
+
|
|
141
|
+
if config is not None and "es_timeout" in config:
|
|
142
|
+
timeout = config["es_timeout"]
|
|
143
|
+
else:
|
|
144
|
+
timeout = 60
|
|
145
|
+
|
|
146
|
+
if config is not None and "es_max_retries" in config:
|
|
147
|
+
max_retries = config["es_max_retries"]
|
|
148
|
+
else:
|
|
149
|
+
max_retries = 10
|
|
150
|
+
|
|
151
|
+
if config is not None and "es_http_compress" in config:
|
|
152
|
+
es_http_compress = config["es_http_compress"]
|
|
153
|
+
else:
|
|
154
|
+
es_http_compress = False
|
|
155
|
+
|
|
156
|
+
print(
|
|
157
|
+
"OpenSearch_VectorStore initialized with es_urls: ",
|
|
158
|
+
es_urls,
|
|
159
|
+
" host: ",
|
|
160
|
+
host,
|
|
161
|
+
" port: ",
|
|
162
|
+
port,
|
|
163
|
+
" ssl: ",
|
|
164
|
+
ssl,
|
|
165
|
+
" verify_certs: ",
|
|
166
|
+
verify_certs,
|
|
167
|
+
" timeout: ",
|
|
168
|
+
timeout,
|
|
169
|
+
" max_retries: ",
|
|
170
|
+
max_retries,
|
|
171
|
+
)
|
|
172
|
+
if es_urls is not None:
|
|
173
|
+
# Initialize the OpenSearch client by passing a list of URLs
|
|
174
|
+
self.client = OpenSearch(
|
|
175
|
+
hosts=[es_urls],
|
|
176
|
+
http_compress=es_http_compress,
|
|
177
|
+
use_ssl=ssl,
|
|
178
|
+
verify_certs=verify_certs,
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
max_retries=max_retries,
|
|
181
|
+
retry_on_timeout=True,
|
|
182
|
+
http_auth=auth,
|
|
183
|
+
headers=headers,
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
# Initialize the OpenSearch client by passing a host and port
|
|
187
|
+
self.client = OpenSearch(
|
|
188
|
+
hosts=[{"host": host, "port": port}],
|
|
189
|
+
http_compress=es_http_compress,
|
|
190
|
+
use_ssl=ssl,
|
|
191
|
+
verify_certs=verify_certs,
|
|
192
|
+
timeout=timeout,
|
|
193
|
+
max_retries=max_retries,
|
|
194
|
+
retry_on_timeout=True,
|
|
195
|
+
http_auth=auth,
|
|
196
|
+
headers=headers,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
print("OpenSearch_VectorStore initialized with client over ")
|
|
200
|
+
|
|
201
|
+
# 执行一个简单的查询来检查连接
|
|
202
|
+
try:
|
|
203
|
+
print("Connected to OpenSearch cluster:")
|
|
204
|
+
info = self.client.info()
|
|
205
|
+
print("OpenSearch cluster info:", info)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
print("Error connecting to OpenSearch cluster:", e)
|
|
208
|
+
|
|
209
|
+
# Create the indices if they don't exist
|
|
210
|
+
self.create_index_if_not_exists(
|
|
211
|
+
self.document_index, self.document_index_settings
|
|
212
|
+
)
|
|
213
|
+
self.create_index_if_not_exists(self.ddl_index, self.ddl_index_settings)
|
|
214
|
+
self.create_index_if_not_exists(
|
|
215
|
+
self.question_sql_index, self.question_sql_index_settings
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def create_index(self):
|
|
219
|
+
for index in [self.document_index, self.ddl_index, self.question_sql_index]:
|
|
220
|
+
try:
|
|
221
|
+
self.client.indices.create(index)
|
|
222
|
+
except Exception as e:
|
|
223
|
+
print("Error creating index: ", e)
|
|
224
|
+
print(f"opensearch index {index} already exists")
|
|
225
|
+
pass
|
|
226
|
+
|
|
227
|
+
def create_index_if_not_exists(self, index_name: str, index_settings: dict) -> bool:
|
|
228
|
+
try:
|
|
229
|
+
if not self.client.indices.exists(index_name):
|
|
230
|
+
print(f"Index {index_name} does not exist. Creating...")
|
|
231
|
+
self.client.indices.create(index=index_name, body=index_settings)
|
|
232
|
+
return True
|
|
233
|
+
else:
|
|
234
|
+
print(f"Index {index_name} already exists.")
|
|
235
|
+
return False
|
|
236
|
+
except Exception as e:
|
|
237
|
+
print(f"Error creating index: {index_name} ", e)
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
def add_ddl(self, ddl: str, **kwargs) -> str:
|
|
241
|
+
# Assuming that you have a DDL index in your OpenSearch
|
|
242
|
+
id = str(uuid.uuid4()) + "-ddl"
|
|
243
|
+
ddl_dict = {"ddl": ddl}
|
|
244
|
+
response = self.client.index(
|
|
245
|
+
index=self.ddl_index, body=ddl_dict, id=id, **kwargs
|
|
246
|
+
)
|
|
247
|
+
return response["_id"]
|
|
248
|
+
|
|
249
|
+
def add_documentation(self, doc: str, **kwargs) -> str:
|
|
250
|
+
# Assuming you have a documentation index in your OpenSearch
|
|
251
|
+
id = str(uuid.uuid4()) + "-doc"
|
|
252
|
+
doc_dict = {"doc": doc}
|
|
253
|
+
response = self.client.index(
|
|
254
|
+
index=self.document_index, id=id, body=doc_dict, **kwargs
|
|
255
|
+
)
|
|
256
|
+
return response["_id"]
|
|
257
|
+
|
|
258
|
+
def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
|
|
259
|
+
# Assuming you have a Questions and SQL index in your OpenSearch
|
|
260
|
+
id = str(uuid.uuid4()) + "-sql"
|
|
261
|
+
question_sql_dict = {"question": question, "sql": sql}
|
|
262
|
+
response = self.client.index(
|
|
263
|
+
index=self.question_sql_index, body=question_sql_dict, id=id, **kwargs
|
|
264
|
+
)
|
|
265
|
+
return response["_id"]
|
|
266
|
+
|
|
267
|
+
def get_related_ddl(self, question: str, **kwargs) -> List[str]:
|
|
268
|
+
# Assume you have some vector search mechanism associated with your data
|
|
269
|
+
query = {"query": {"match": {"ddl": question}}}
|
|
270
|
+
print(query)
|
|
271
|
+
response = self.client.search(index=self.ddl_index, body=query, **kwargs)
|
|
272
|
+
return [hit["_source"]["ddl"] for hit in response["hits"]["hits"]]
|
|
273
|
+
|
|
274
|
+
def get_related_documentation(self, question: str, **kwargs) -> List[str]:
|
|
275
|
+
query = {"query": {"match": {"doc": question}}}
|
|
276
|
+
print(query)
|
|
277
|
+
response = self.client.search(index=self.document_index, body=query, **kwargs)
|
|
278
|
+
return [hit["_source"]["doc"] for hit in response["hits"]["hits"]]
|
|
279
|
+
|
|
280
|
+
def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
|
|
281
|
+
query = {"query": {"match": {"question": question}}}
|
|
282
|
+
print(query)
|
|
283
|
+
response = self.client.search(
|
|
284
|
+
index=self.question_sql_index, body=query, **kwargs
|
|
285
|
+
)
|
|
286
|
+
return [
|
|
287
|
+
(hit["_source"]["question"], hit["_source"]["sql"])
|
|
288
|
+
for hit in response["hits"]["hits"]
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
def get_training_data(self, **kwargs) -> pd.DataFrame:
|
|
292
|
+
# This will be a simple example pulling all data from an index
|
|
293
|
+
# WARNING: Do not use this approach in production for large indices!
|
|
294
|
+
data = []
|
|
295
|
+
response = self.client.search(
|
|
296
|
+
index=self.document_index, body={"query": {"match_all": {}}}, size=1000
|
|
297
|
+
)
|
|
298
|
+
print(response)
|
|
299
|
+
# records = [hit['_source'] for hit in response['hits']['hits']]
|
|
300
|
+
for hit in response["hits"]["hits"]:
|
|
301
|
+
data.append(
|
|
302
|
+
{
|
|
303
|
+
"id": hit["_id"],
|
|
304
|
+
"training_data_type": "documentation",
|
|
305
|
+
"question": "",
|
|
306
|
+
"content": hit["_source"]["doc"],
|
|
307
|
+
}
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
response = self.client.search(
|
|
311
|
+
index=self.question_sql_index, body={"query": {"match_all": {}}}, size=1000
|
|
312
|
+
)
|
|
313
|
+
# records = [hit['_source'] for hit in response['hits']['hits']]
|
|
314
|
+
for hit in response["hits"]["hits"]:
|
|
315
|
+
data.append(
|
|
316
|
+
{
|
|
317
|
+
"id": hit["_id"],
|
|
318
|
+
"training_data_type": "sql",
|
|
319
|
+
"question": hit.get("_source", {}).get("question", ""),
|
|
320
|
+
"content": hit.get("_source", {}).get("sql", ""),
|
|
321
|
+
}
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
response = self.client.search(
|
|
325
|
+
index=self.ddl_index, body={"query": {"match_all": {}}}, size=1000
|
|
326
|
+
)
|
|
327
|
+
# records = [hit['_source'] for hit in response['hits']['hits']]
|
|
328
|
+
for hit in response["hits"]["hits"]:
|
|
329
|
+
data.append(
|
|
330
|
+
{
|
|
331
|
+
"id": hit["_id"],
|
|
332
|
+
"training_data_type": "ddl",
|
|
333
|
+
"question": "",
|
|
334
|
+
"content": hit["_source"]["ddl"],
|
|
335
|
+
}
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
return pd.DataFrame(data)
|
|
339
|
+
|
|
340
|
+
def remove_training_data(self, id: str, **kwargs) -> bool:
|
|
341
|
+
try:
|
|
342
|
+
if id.endswith("-sql"):
|
|
343
|
+
self.client.delete(index=self.question_sql_index, id=id)
|
|
344
|
+
return True
|
|
345
|
+
elif id.endswith("-ddl"):
|
|
346
|
+
self.client.delete(index=self.ddl_index, id=id, **kwargs)
|
|
347
|
+
return True
|
|
348
|
+
elif id.endswith("-doc"):
|
|
349
|
+
self.client.delete(index=self.document_index, id=id, **kwargs)
|
|
350
|
+
return True
|
|
351
|
+
else:
|
|
352
|
+
return False
|
|
353
|
+
except Exception as e:
|
|
354
|
+
print("Error deleting training dataError deleting training data: ", e)
|
|
355
|
+
return False
|
|
356
|
+
|
|
357
|
+
def generate_embedding(self, data: str, **kwargs) -> list[float]:
|
|
358
|
+
# opensearch doesn't need to generate embeddings
|
|
359
|
+
pass
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
# OpenSearch_VectorStore.__init__(self, config={'es_urls':
|
|
363
|
+
# "https://opensearch-node.test.com:9200", 'es_encoded_base64': True, 'es_user':
|
|
364
|
+
# "admin", 'es_password': "admin", 'es_verify_certs': True})
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
# OpenSearch_VectorStore.__init__(self, config={'es_host':
|
|
368
|
+
# "https://opensearch-node.test.com", 'es_port': 9200, 'es_user': "admin",
|
|
369
|
+
# 'es_password': "admin", 'es_verify_certs': True})
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from langchain_community.vectorstores import OpenSearchVectorSearch
|
|
5
|
+
|
|
6
|
+
from ..base import VannaBase
|
|
7
|
+
from ..utils import deterministic_uuid
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OpenSearch_Semantic_VectorStore(VannaBase):
|
|
11
|
+
def __init__(self, config=None):
|
|
12
|
+
VannaBase.__init__(self, config=config)
|
|
13
|
+
if config is None:
|
|
14
|
+
config = {}
|
|
15
|
+
|
|
16
|
+
if "embedding_function" in config:
|
|
17
|
+
self.embedding_function = config.get("embedding_function")
|
|
18
|
+
else:
|
|
19
|
+
from langchain_huggingface import HuggingFaceEmbeddings
|
|
20
|
+
|
|
21
|
+
self.embedding_function = HuggingFaceEmbeddings(
|
|
22
|
+
model_name="all-MiniLM-L6-v2"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
self.n_results_sql = config.get("n_results_sql", config.get("n_results", 10))
|
|
26
|
+
self.n_results_documentation = config.get(
|
|
27
|
+
"n_results_documentation", config.get("n_results", 10)
|
|
28
|
+
)
|
|
29
|
+
self.n_results_ddl = config.get("n_results_ddl", config.get("n_results", 10))
|
|
30
|
+
|
|
31
|
+
self.document_index = config.get("es_document_index", "vanna_document_index")
|
|
32
|
+
self.ddl_index = config.get("es_ddl_index", "vanna_ddl_index")
|
|
33
|
+
self.question_sql_index = config.get(
|
|
34
|
+
"es_question_sql_index", "vanna_questions_sql_index"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
self.log(
|
|
38
|
+
f"OpenSearch_Semantic_VectorStore initialized with document_index: {self.document_index}, ddl_index: {self.ddl_index}, question_sql_index: {self.question_sql_index}"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
es_urls = config.get("es_urls", "https://localhost:9200")
|
|
42
|
+
ssl = config.get("es_ssl", True)
|
|
43
|
+
verify_certs = config.get("es_verify_certs", True)
|
|
44
|
+
|
|
45
|
+
if "es_user" in config:
|
|
46
|
+
auth = (config["es_user"], config["es_password"])
|
|
47
|
+
else:
|
|
48
|
+
auth = None
|
|
49
|
+
|
|
50
|
+
headers = config.get("es_headers", None)
|
|
51
|
+
timeout = config.get("es_timeout", 60)
|
|
52
|
+
max_retries = config.get("es_max_retries", 10)
|
|
53
|
+
|
|
54
|
+
common_args = {
|
|
55
|
+
"opensearch_url": es_urls,
|
|
56
|
+
"embedding_function": self.embedding_function,
|
|
57
|
+
"engine": "faiss",
|
|
58
|
+
"http_auth": auth,
|
|
59
|
+
"use_ssl": ssl,
|
|
60
|
+
"verify_certs": verify_certs,
|
|
61
|
+
"timeout": timeout,
|
|
62
|
+
"max_retries": max_retries,
|
|
63
|
+
"retry_on_timeout": True,
|
|
64
|
+
"headers": headers,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
self.documentation_store = OpenSearchVectorSearch(
|
|
68
|
+
index_name=self.document_index, **common_args
|
|
69
|
+
)
|
|
70
|
+
self.ddl_store = OpenSearchVectorSearch(
|
|
71
|
+
index_name=self.ddl_index, **common_args
|
|
72
|
+
)
|
|
73
|
+
self.sql_store = OpenSearchVectorSearch(
|
|
74
|
+
index_name=self.question_sql_index, **common_args
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def add_ddl(self, ddl: str, **kwargs) -> str:
|
|
78
|
+
_id = deterministic_uuid(ddl) + "-ddl"
|
|
79
|
+
self.ddl_store.add_texts(texts=[ddl], ids=[_id], **kwargs)
|
|
80
|
+
return _id
|
|
81
|
+
|
|
82
|
+
def add_documentation(self, documentation: str, **kwargs) -> str:
|
|
83
|
+
_id = deterministic_uuid(documentation) + "-doc"
|
|
84
|
+
self.documentation_store.add_texts(texts=[documentation], ids=[_id], **kwargs)
|
|
85
|
+
return _id
|
|
86
|
+
|
|
87
|
+
def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
|
|
88
|
+
question_sql_json = json.dumps(
|
|
89
|
+
{
|
|
90
|
+
"question": question,
|
|
91
|
+
"sql": sql,
|
|
92
|
+
},
|
|
93
|
+
ensure_ascii=False,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
_id = deterministic_uuid(question_sql_json) + "-sql"
|
|
97
|
+
self.sql_store.add_texts(texts=[question_sql_json], ids=[_id], **kwargs)
|
|
98
|
+
return _id
|
|
99
|
+
|
|
100
|
+
def get_related_ddl(self, question: str, **kwargs) -> list:
|
|
101
|
+
documents = self.ddl_store.similarity_search(
|
|
102
|
+
query=question, k=self.n_results_ddl
|
|
103
|
+
)
|
|
104
|
+
return [document.page_content for document in documents]
|
|
105
|
+
|
|
106
|
+
def get_related_documentation(self, question: str, **kwargs) -> list:
|
|
107
|
+
documents = self.documentation_store.similarity_search(
|
|
108
|
+
query=question, k=self.n_results_documentation
|
|
109
|
+
)
|
|
110
|
+
return [document.page_content for document in documents]
|
|
111
|
+
|
|
112
|
+
def get_similar_question_sql(self, question: str, **kwargs) -> list:
|
|
113
|
+
documents = self.sql_store.similarity_search(
|
|
114
|
+
query=question, k=self.n_results_sql
|
|
115
|
+
)
|
|
116
|
+
return [json.loads(document.page_content) for document in documents]
|
|
117
|
+
|
|
118
|
+
def get_training_data(self, **kwargs) -> pd.DataFrame:
|
|
119
|
+
data = []
|
|
120
|
+
query = {"query": {"match_all": {}}}
|
|
121
|
+
|
|
122
|
+
indices = [
|
|
123
|
+
{"index": self.document_index, "type": "documentation"},
|
|
124
|
+
{"index": self.question_sql_index, "type": "sql"},
|
|
125
|
+
{"index": self.ddl_index, "type": "ddl"},
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
# Use documentation_store.client consistently for search on all indices
|
|
129
|
+
opensearch_client = self.documentation_store.client
|
|
130
|
+
|
|
131
|
+
for index_info in indices:
|
|
132
|
+
index_name = index_info["index"]
|
|
133
|
+
training_data_type = index_info["type"]
|
|
134
|
+
scroll = "1m" # keep scroll context for 1 minute
|
|
135
|
+
response = opensearch_client.search(
|
|
136
|
+
index=index_name,
|
|
137
|
+
ignore_unavailable=True,
|
|
138
|
+
body=query,
|
|
139
|
+
scroll=scroll,
|
|
140
|
+
size=1000,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
scroll_id = response.get("_scroll_id")
|
|
144
|
+
|
|
145
|
+
while scroll_id:
|
|
146
|
+
hits = response["hits"]["hits"]
|
|
147
|
+
if not hits:
|
|
148
|
+
break # No more hits, exit loop
|
|
149
|
+
|
|
150
|
+
for hit in hits:
|
|
151
|
+
source = hit["_source"]
|
|
152
|
+
if training_data_type == "sql":
|
|
153
|
+
try:
|
|
154
|
+
doc_dict = json.loads(source["text"])
|
|
155
|
+
content = doc_dict.get("sql")
|
|
156
|
+
question = doc_dict.get("question")
|
|
157
|
+
except json.JSONDecodeError as e:
|
|
158
|
+
self.log(
|
|
159
|
+
f"Skipping row with custom_id {hit['_id']} due to JSON parsing error: {e}",
|
|
160
|
+
"Error",
|
|
161
|
+
)
|
|
162
|
+
continue
|
|
163
|
+
else: # documentation or ddl
|
|
164
|
+
content = source["text"]
|
|
165
|
+
question = None
|
|
166
|
+
|
|
167
|
+
data.append(
|
|
168
|
+
{
|
|
169
|
+
"id": hit["_id"],
|
|
170
|
+
"training_data_type": training_data_type,
|
|
171
|
+
"question": question,
|
|
172
|
+
"content": content,
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Get next batch of results, using documentation_store.client.scroll
|
|
177
|
+
response = opensearch_client.scroll(scroll_id=scroll_id, scroll=scroll)
|
|
178
|
+
scroll_id = response.get("_scroll_id")
|
|
179
|
+
|
|
180
|
+
return pd.DataFrame(data)
|
|
181
|
+
|
|
182
|
+
def remove_training_data(self, id: str, **kwargs) -> bool:
|
|
183
|
+
try:
|
|
184
|
+
if id.endswith("-sql"):
|
|
185
|
+
return self.sql_store.delete(ids=[id], **kwargs)
|
|
186
|
+
elif id.endswith("-ddl"):
|
|
187
|
+
return self.ddl_store.delete(ids=[id], **kwargs)
|
|
188
|
+
elif id.endswith("-doc"):
|
|
189
|
+
return self.documentation_store.delete(ids=[id], **kwargs)
|
|
190
|
+
else:
|
|
191
|
+
return False
|
|
192
|
+
except Exception as e:
|
|
193
|
+
self.log(
|
|
194
|
+
f"Error deleting training dataError deleting training data: {e}",
|
|
195
|
+
"Error",
|
|
196
|
+
)
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
def generate_embedding(self, data: str, **kwargs) -> list[float]:
|
|
200
|
+
pass
|