vanna 0.7.5__tar.gz → 0.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {vanna-0.7.5 → vanna-0.7.6}/PKG-INFO +52 -5
  2. {vanna-0.7.5 → vanna-0.7.6}/README.md +43 -3
  3. {vanna-0.7.5 → vanna-0.7.6}/pyproject.toml +4 -3
  4. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/base/base.py +5 -2
  5. vanna-0.7.6/src/vanna/deepseek/__init__.py +1 -0
  6. vanna-0.7.6/src/vanna/deepseek/deepseek_chat.py +60 -0
  7. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/google/gemini_chat.py +23 -1
  8. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/ollama/ollama.py +1 -1
  9. vanna-0.7.6/src/vanna/opensearch/__init__.py +2 -0
  10. vanna-0.7.6/src/vanna/opensearch/opensearch_vector_semantic.py +175 -0
  11. vanna-0.7.6/src/vanna/oracle/__init__.py +1 -0
  12. vanna-0.7.6/src/vanna/oracle/oracle_vector.py +585 -0
  13. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/remote.py +1 -1
  14. vanna-0.7.5/src/vanna/opensearch/__init__.py +0 -1
  15. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/ZhipuAI/ZhipuAI_Chat.py +0 -0
  16. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/ZhipuAI/ZhipuAI_embeddings.py +0 -0
  17. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/ZhipuAI/__init__.py +0 -0
  18. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/__init__.py +0 -0
  19. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/advanced/__init__.py +0 -0
  20. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/anthropic/__init__.py +0 -0
  21. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/anthropic/anthropic_chat.py +0 -0
  22. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/azuresearch/__init__.py +0 -0
  23. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/azuresearch/azuresearch_vector.py +0 -0
  24. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/base/__init__.py +0 -0
  25. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/bedrock/__init__.py +0 -0
  26. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/bedrock/bedrock_converse.py +0 -0
  27. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/chromadb/__init__.py +0 -0
  28. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/chromadb/chromadb_vector.py +0 -0
  29. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/exceptions/__init__.py +0 -0
  30. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/faiss/__init__.py +0 -0
  31. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/faiss/faiss.py +0 -0
  32. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/flask/__init__.py +0 -0
  33. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/flask/assets.py +0 -0
  34. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/flask/auth.py +0 -0
  35. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/google/__init__.py +0 -0
  36. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/google/bigquery_vector.py +0 -0
  37. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/hf/__init__.py +0 -0
  38. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/hf/hf.py +0 -0
  39. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/local.py +0 -0
  40. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/marqo/__init__.py +0 -0
  41. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/marqo/marqo.py +0 -0
  42. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/milvus/__init__.py +0 -0
  43. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/milvus/milvus_vector.py +0 -0
  44. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/mistral/__init__.py +0 -0
  45. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/mistral/mistral.py +0 -0
  46. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/mock/__init__.py +0 -0
  47. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/mock/embedding.py +0 -0
  48. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/mock/llm.py +0 -0
  49. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/mock/vectordb.py +0 -0
  50. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/ollama/__init__.py +0 -0
  51. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/openai/__init__.py +0 -0
  52. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/openai/openai_chat.py +0 -0
  53. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/openai/openai_embeddings.py +0 -0
  54. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/opensearch/opensearch_vector.py +0 -0
  55. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/pgvector/__init__.py +0 -0
  56. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/pgvector/pgvector.py +0 -0
  57. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/pinecone/__init__.py +0 -0
  58. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/pinecone/pinecone_vector.py +0 -0
  59. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qdrant/__init__.py +0 -0
  60. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qdrant/qdrant.py +0 -0
  61. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qianfan/Qianfan_Chat.py +0 -0
  62. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qianfan/Qianfan_embeddings.py +0 -0
  63. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qianfan/__init__.py +0 -0
  64. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qianwen/QianwenAI_chat.py +0 -0
  65. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qianwen/QianwenAI_embeddings.py +0 -0
  66. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/qianwen/__init__.py +0 -0
  67. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/types/__init__.py +0 -0
  68. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/utils.py +0 -0
  69. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/vannadb/__init__.py +0 -0
  70. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/vannadb/vannadb_vector.py +0 -0
  71. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/vllm/__init__.py +0 -0
  72. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/vllm/vllm.py +0 -0
  73. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/weaviate/__init__.py +0 -0
  74. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/weaviate/weaviate_vector.py +0 -0
  75. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/xinference/__init__.py +0 -0
  76. {vanna-0.7.5 → vanna-0.7.6}/src/vanna/xinference/xinference.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: vanna
3
- Version: 0.7.5
3
+ Version: 0.7.6
4
4
  Summary: Generate SQL queries from natural language
5
5
  Author-email: Zain Hoda <zain@vanna.ai>
6
6
  Requires-Python: >=3.9
@@ -52,6 +52,8 @@ Requires-Dist: boto3 ; extra == "all"
52
52
  Requires-Dist: botocore ; extra == "all"
53
53
  Requires-Dist: langchain_core ; extra == "all"
54
54
  Requires-Dist: langchain_postgres ; extra == "all"
55
+ Requires-Dist: langchain-community ; extra == "all"
56
+ Requires-Dist: langchain-huggingface ; extra == "all"
55
57
  Requires-Dist: xinference-client ; extra == "all"
56
58
  Requires-Dist: anthropic ; extra == "anthropic"
57
59
  Requires-Dist: azure-search-documents ; extra == "azuresearch"
@@ -79,6 +81,10 @@ Requires-Dist: httpx ; extra == "ollama"
79
81
  Requires-Dist: openai ; extra == "openai"
80
82
  Requires-Dist: opensearch-py ; extra == "opensearch"
81
83
  Requires-Dist: opensearch-dsl ; extra == "opensearch"
84
+ Requires-Dist: langchain-community ; extra == "opensearch"
85
+ Requires-Dist: langchain-huggingface ; extra == "opensearch"
86
+ Requires-Dist: oracledb ; extra == "oracle"
87
+ Requires-Dist: chromadb ; extra == "oracle"
82
88
  Requires-Dist: langchain-postgres>=0.0.12 ; extra == "pgvector"
83
89
  Requires-Dist: pinecone-client ; extra == "pinecone"
84
90
  Requires-Dist: fastembed ; extra == "pinecone"
@@ -115,6 +121,7 @@ Provides-Extra: mysql
115
121
  Provides-Extra: ollama
116
122
  Provides-Extra: openai
117
123
  Provides-Extra: opensearch
124
+ Provides-Extra: oracle
118
125
  Provides-Extra: pgvector
119
126
  Provides-Extra: pinecone
120
127
  Provides-Extra: postgres
@@ -129,9 +136,9 @@ Provides-Extra: zhipuai
129
136
 
130
137
 
131
138
 
132
- | GitHub | PyPI | Documentation |
133
- | ------ | ---- | ------------- |
134
- | [![GitHub](https://img.shields.io/badge/GitHub-vanna-blue?logo=github)](https://github.com/vanna-ai/vanna) | [![PyPI](https://img.shields.io/pypi/v/vanna?logo=pypi)](https://pypi.org/project/vanna/) | [![Documentation](https://img.shields.io/badge/Documentation-vanna-blue?logo=read-the-docs)](https://vanna.ai/docs/) |
139
+ | GitHub | PyPI | Documentation | Gurubase |
140
+ | ------ | ---- | ------------- | -------- |
141
+ | [![GitHub](https://img.shields.io/badge/GitHub-vanna-blue?logo=github)](https://github.com/vanna-ai/vanna) | [![PyPI](https://img.shields.io/pypi/v/vanna?logo=pypi)](https://pypi.org/project/vanna/) | [![Documentation](https://img.shields.io/badge/Documentation-vanna-blue?logo=read-the-docs)](https://vanna.ai/docs/) | [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20Vanna%20Guru-006BFF)](https://gurubase.io/g/vanna) |
135
142
 
136
143
  # Vanna
137
144
  Vanna is an MIT-licensed open-source Python RAG (Retrieval-Augmented Generation) framework for SQL generation and related functionality.
@@ -164,6 +171,46 @@ These are some of the user interfaces that we've built using Vanna. You can use
164
171
  - [vanna-ai/vanna-flask](https://github.com/vanna-ai/vanna-flask)
165
172
  - [vanna-ai/vanna-slack](https://github.com/vanna-ai/vanna-slack)
166
173
 
174
+ ## Supported LLMs
175
+
176
+ - [OpenAI](https://github.com/vanna-ai/vanna/tree/main/src/vanna/openai)
177
+ - [Anthropic](https://github.com/vanna-ai/vanna/tree/main/src/vanna/anthropic)
178
+ - [Gemini](https://github.com/vanna-ai/vanna/blob/main/src/vanna/google/gemini_chat.py)
179
+ - [HuggingFace](https://github.com/vanna-ai/vanna/blob/main/src/vanna/hf/hf.py)
180
+ - [AWS Bedrock](https://github.com/vanna-ai/vanna/tree/main/src/vanna/bedrock)
181
+ - [Ollama](https://github.com/vanna-ai/vanna/tree/main/src/vanna/ollama)
182
+ - [Qianwen](https://github.com/vanna-ai/vanna/tree/main/src/vanna/qianwen)
183
+ - [Qianfan](https://github.com/vanna-ai/vanna/tree/main/src/vanna/qianfan)
184
+ - [Zhipu](https://github.com/vanna-ai/vanna/tree/main/src/vanna/ZhipuAI)
185
+
186
+ ## Supported VectorStores
187
+
188
+ - [AzureSearch](https://github.com/vanna-ai/vanna/tree/main/src/vanna/azuresearch)
189
+ - [Opensearch](https://github.com/vanna-ai/vanna/tree/main/src/vanna/opensearch)
190
+ - [PgVector](https://github.com/vanna-ai/vanna/tree/main/src/vanna/pgvector)
191
+ - [PineCone](https://github.com/vanna-ai/vanna/tree/main/src/vanna/pinecone)
192
+ - [ChromaDB](https://github.com/vanna-ai/vanna/tree/main/src/vanna/chromadb)
193
+ - [FAISS](https://github.com/vanna-ai/vanna/tree/main/src/vanna/faiss)
194
+ - [Marqo](https://github.com/vanna-ai/vanna/tree/main/src/vanna/marqo)
195
+ - [Milvus](https://github.com/vanna-ai/vanna/tree/main/src/vanna/milvus)
196
+ - [Qdrant](https://github.com/vanna-ai/vanna/tree/main/src/vanna/qdrant)
197
+ - [Weaviate](https://github.com/vanna-ai/vanna/tree/main/src/vanna/weaviate)
198
+ - [Oracle](https://github.com/vanna-ai/vanna/tree/main/src/vanna/oracle)
199
+
200
+ ## Supported Databases
201
+
202
+ - [PostgreSQL](https://www.postgresql.org/)
203
+ - [MySQL](https://www.mysql.com/)
204
+ - [PrestoDB](https://prestodb.io/)
205
+ - [Apache Hive](https://hive.apache.org/)
206
+ - [ClickHouse](https://clickhouse.com/)
207
+ - [Snowflake](https://www.snowflake.com/en/)
208
+ - [Oracle](https://www.oracle.com/)
209
+ - [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
210
+ - [BigQuery](https://cloud.google.com/bigquery)
211
+ - [SQLite](https://www.sqlite.org/)
212
+ - [DuckDB](https://duckdb.org/)
213
+
167
214
 
168
215
  ## Getting started
169
216
  See the [documentation](https://vanna.ai/docs/) for specifics on your desired database, LLM, etc.
@@ -1,8 +1,8 @@
1
1
 
2
2
 
3
- | GitHub | PyPI | Documentation |
4
- | ------ | ---- | ------------- |
5
- | [![GitHub](https://img.shields.io/badge/GitHub-vanna-blue?logo=github)](https://github.com/vanna-ai/vanna) | [![PyPI](https://img.shields.io/pypi/v/vanna?logo=pypi)](https://pypi.org/project/vanna/) | [![Documentation](https://img.shields.io/badge/Documentation-vanna-blue?logo=read-the-docs)](https://vanna.ai/docs/) |
3
+ | GitHub | PyPI | Documentation | Gurubase |
4
+ | ------ | ---- | ------------- | -------- |
5
+ | [![GitHub](https://img.shields.io/badge/GitHub-vanna-blue?logo=github)](https://github.com/vanna-ai/vanna) | [![PyPI](https://img.shields.io/pypi/v/vanna?logo=pypi)](https://pypi.org/project/vanna/) | [![Documentation](https://img.shields.io/badge/Documentation-vanna-blue?logo=read-the-docs)](https://vanna.ai/docs/) | [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20Vanna%20Guru-006BFF)](https://gurubase.io/g/vanna) |
6
6
 
7
7
  # Vanna
8
8
  Vanna is an MIT-licensed open-source Python RAG (Retrieval-Augmented Generation) framework for SQL generation and related functionality.
@@ -35,6 +35,46 @@ These are some of the user interfaces that we've built using Vanna. You can use
35
35
  - [vanna-ai/vanna-flask](https://github.com/vanna-ai/vanna-flask)
36
36
  - [vanna-ai/vanna-slack](https://github.com/vanna-ai/vanna-slack)
37
37
 
38
+ ## Supported LLMs
39
+
40
+ - [OpenAI](https://github.com/vanna-ai/vanna/tree/main/src/vanna/openai)
41
+ - [Anthropic](https://github.com/vanna-ai/vanna/tree/main/src/vanna/anthropic)
42
+ - [Gemini](https://github.com/vanna-ai/vanna/blob/main/src/vanna/google/gemini_chat.py)
43
+ - [HuggingFace](https://github.com/vanna-ai/vanna/blob/main/src/vanna/hf/hf.py)
44
+ - [AWS Bedrock](https://github.com/vanna-ai/vanna/tree/main/src/vanna/bedrock)
45
+ - [Ollama](https://github.com/vanna-ai/vanna/tree/main/src/vanna/ollama)
46
+ - [Qianwen](https://github.com/vanna-ai/vanna/tree/main/src/vanna/qianwen)
47
+ - [Qianfan](https://github.com/vanna-ai/vanna/tree/main/src/vanna/qianfan)
48
+ - [Zhipu](https://github.com/vanna-ai/vanna/tree/main/src/vanna/ZhipuAI)
49
+
50
+ ## Supported VectorStores
51
+
52
+ - [AzureSearch](https://github.com/vanna-ai/vanna/tree/main/src/vanna/azuresearch)
53
+ - [Opensearch](https://github.com/vanna-ai/vanna/tree/main/src/vanna/opensearch)
54
+ - [PgVector](https://github.com/vanna-ai/vanna/tree/main/src/vanna/pgvector)
55
+ - [PineCone](https://github.com/vanna-ai/vanna/tree/main/src/vanna/pinecone)
56
+ - [ChromaDB](https://github.com/vanna-ai/vanna/tree/main/src/vanna/chromadb)
57
+ - [FAISS](https://github.com/vanna-ai/vanna/tree/main/src/vanna/faiss)
58
+ - [Marqo](https://github.com/vanna-ai/vanna/tree/main/src/vanna/marqo)
59
+ - [Milvus](https://github.com/vanna-ai/vanna/tree/main/src/vanna/milvus)
60
+ - [Qdrant](https://github.com/vanna-ai/vanna/tree/main/src/vanna/qdrant)
61
+ - [Weaviate](https://github.com/vanna-ai/vanna/tree/main/src/vanna/weaviate)
62
+ - [Oracle](https://github.com/vanna-ai/vanna/tree/main/src/vanna/oracle)
63
+
64
+ ## Supported Databases
65
+
66
+ - [PostgreSQL](https://www.postgresql.org/)
67
+ - [MySQL](https://www.mysql.com/)
68
+ - [PrestoDB](https://prestodb.io/)
69
+ - [Apache Hive](https://hive.apache.org/)
70
+ - [ClickHouse](https://clickhouse.com/)
71
+ - [Snowflake](https://www.snowflake.com/en/)
72
+ - [Oracle](https://www.oracle.com/)
73
+ - [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
74
+ - [BigQuery](https://cloud.google.com/bigquery)
75
+ - [SQLite](https://www.sqlite.org/)
76
+ - [DuckDB](https://duckdb.org/)
77
+
38
78
 
39
79
  ## Getting started
40
80
  See the [documentation](https://vanna.ai/docs/) for specifics on your desired database, LLM, etc.
@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
4
4
 
5
5
  [project]
6
6
  name = "vanna"
7
- version = "0.7.5"
7
+ version = "0.7.6"
8
8
  authors = [
9
9
  { name="Zain Hoda", email="zain@vanna.ai" },
10
10
  ]
@@ -33,7 +33,7 @@ bigquery = ["google-cloud-bigquery"]
33
33
  snowflake = ["snowflake-connector-python"]
34
34
  duckdb = ["duckdb"]
35
35
  google = ["google-generativeai", "google-cloud-aiplatform"]
36
- all = ["psycopg2-binary", "db-dtypes", "PyMySQL", "google-cloud-bigquery", "snowflake-connector-python", "duckdb", "openai", "qianfan", "mistralai>=1.0.0", "chromadb", "anthropic", "zhipuai", "marqo", "google-generativeai", "google-cloud-aiplatform", "qdrant-client", "fastembed", "ollama", "httpx", "opensearch-py", "opensearch-dsl", "transformers", "pinecone-client", "pymilvus[model]","weaviate-client", "azure-search-documents", "azure-identity", "azure-common", "faiss-cpu", "boto", "boto3", "botocore", "langchain_core", "langchain_postgres", "xinference-client"]
36
+ all = ["psycopg2-binary", "db-dtypes", "PyMySQL", "google-cloud-bigquery", "snowflake-connector-python", "duckdb", "openai", "qianfan", "mistralai>=1.0.0", "chromadb", "anthropic", "zhipuai", "marqo", "google-generativeai", "google-cloud-aiplatform", "qdrant-client", "fastembed", "ollama", "httpx", "opensearch-py", "opensearch-dsl", "transformers", "pinecone-client", "pymilvus[model]","weaviate-client", "azure-search-documents", "azure-identity", "azure-common", "faiss-cpu", "boto", "boto3", "botocore", "langchain_core", "langchain_postgres", "langchain-community", "langchain-huggingface", "xinference-client"]
37
37
  test = ["tox"]
38
38
  chromadb = ["chromadb"]
39
39
  openai = ["openai"]
@@ -47,7 +47,7 @@ ollama = ["ollama", "httpx"]
47
47
  qdrant = ["qdrant-client", "fastembed"]
48
48
  vllm = ["vllm"]
49
49
  pinecone = ["pinecone-client", "fastembed"]
50
- opensearch = ["opensearch-py", "opensearch-dsl"]
50
+ opensearch = ["opensearch-py", "opensearch-dsl", "langchain-community", "langchain-huggingface"]
51
51
  hf = ["transformers"]
52
52
  milvus = ["pymilvus[model]"]
53
53
  bedrock = ["boto3", "botocore"]
@@ -57,3 +57,4 @@ pgvector = ["langchain-postgres>=0.0.12"]
57
57
  faiss-cpu = ["faiss-cpu"]
58
58
  faiss-gpu = ["faiss-gpu"]
59
59
  xinference-client = ["xinference-client"]
60
+ oracle = ["oracledb", "chromadb"]
@@ -306,7 +306,7 @@ class VannaBase(ABC):
306
306
 
307
307
  message_log = [
308
308
  self.system_message(
309
- f"You are a helpful data assistant. The user asked the question: '{question}'\n\nThe SQL query for this question was: {sql}\n\nThe following is a pandas DataFrame with the results of the query: \n{df.to_markdown()}\n\n"
309
+ f"You are a helpful data assistant. The user asked the question: '{question}'\n\nThe SQL query for this question was: {sql}\n\nThe following is a pandas DataFrame with the results of the query: \n{df.head(25).to_markdown()}\n\n"
310
310
  ),
311
311
  self.user_message(
312
312
  f"Generate a list of {n_questions} followup questions that the user might ask about this data. Respond with a list of questions, one per line. Do not answer with any explanations -- just the questions. Remember that there should be an unambiguous SQL query that can be generated from the question. Prefer questions that are answerable outside of the context of this conversation. Prefer questions that are slight modifications of the SQL query that was generated that allow digging deeper into the data. Each question will be turned into a button that the user can click to generate a new SQL query so don't use 'example' type questions. Each question must have a one-to-one correspondence with an instantiated SQL query." +
@@ -689,6 +689,9 @@ class VannaBase(ABC):
689
689
  return response
690
690
 
691
691
  def _extract_python_code(self, markdown_string: str) -> str:
692
+ # Strip whitespace to avoid indentation errors in LLM-generated code
693
+ markdown_string = markdown_string.strip()
694
+
692
695
  # Regex pattern to match Python code blocks
693
696
  pattern = r"```[\w\s]*python\n([\s\S]*?)```|```([\s\S]*?)```"
694
697
 
@@ -1167,7 +1170,7 @@ class VannaBase(ABC):
1167
1170
  vn.connect_to_oracle(
1168
1171
  user="username",
1169
1172
  password="password",
1170
- dns="host:port/sid",
1173
+ dsn="host:port/sid",
1171
1174
  )
1172
1175
  ```
1173
1176
  Args:
@@ -0,0 +1 @@
1
+ from .deepseek_chat import DeepSeekChat
@@ -0,0 +1,60 @@
1
+ import os
2
+
3
+ from openai import OpenAI
4
+
5
+ from ..base import VannaBase
6
+
7
+
8
+
9
+ # from vanna.chromadb import ChromaDB_VectorStore
10
+
11
+ # class DeepSeekVanna(ChromaDB_VectorStore, DeepSeekChat):
12
+ # def __init__(self, config=None):
13
+ # ChromaDB_VectorStore.__init__(self, config=config)
14
+ # DeepSeekChat.__init__(self, config=config)
15
+
16
+ # vn = DeepSeekVanna(config={"api_key": "sk-************", "model": "deepseek-chat"})
17
+
18
+
19
+ class DeepSeekChat(VannaBase):
20
+ def __init__(self, config=None):
21
+ if config is None:
22
+ raise ValueError(
23
+ "For DeepSeek, config must be provided with an api_key and model"
24
+ )
25
+ if "api_key" not in config:
26
+ raise ValueError("config must contain a DeepSeek api_key")
27
+
28
+ if "model" not in config:
29
+ raise ValueError("config must contain a DeepSeek model")
30
+
31
+ api_key = config["api_key"]
32
+ model = config["model"]
33
+ self.model = model
34
+ self.client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com/v1")
35
+
36
+ def system_message(self, message: str) -> any:
37
+ return {"role": "system", "content": message}
38
+
39
+ def user_message(self, message: str) -> any:
40
+ return {"role": "user", "content": message}
41
+
42
+ def assistant_message(self, message: str) -> any:
43
+ return {"role": "assistant", "content": message}
44
+
45
+ def generate_sql(self, question: str, **kwargs) -> str:
46
+ # 使用父类的 generate_sql
47
+ sql = super().generate_sql(question, **kwargs)
48
+
49
+ # 替换 "\_" 为 "_"
50
+ sql = sql.replace("\\_", "_")
51
+
52
+ return sql
53
+
54
+ def submit_prompt(self, prompt, **kwargs) -> str:
55
+ chat_response = self.client.chat.completions.create(
56
+ model=self.model,
57
+ messages=prompt,
58
+ )
59
+
60
+ return chat_response.choices[0].message.content
@@ -1,4 +1,5 @@
1
1
  import os
2
+
2
3
  from ..base import VannaBase
3
4
 
4
5
 
@@ -30,8 +31,29 @@ class GoogleGeminiChat(VannaBase):
30
31
  self.chat_model = genai.GenerativeModel(model_name)
31
32
  else:
32
33
  # Authenticate using VertexAI
34
+ import google.auth
35
+ import vertexai
33
36
  from vertexai.generative_models import GenerativeModel
34
- self.chat_model = GenerativeModel(model_name)
37
+
38
+ json_file_path = config.get("google_credentials") # Assuming the JSON file path is provided in the config
39
+
40
+ if not json_file_path or not os.path.exists(json_file_path):
41
+ raise FileNotFoundError(f"JSON credentials file not found at: {json_file_path}")
42
+
43
+ try:
44
+ # Validate and set the JSON file path for GOOGLE_APPLICATION_CREDENTIALS
45
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = json_file_path
46
+
47
+ # Initialize VertexAI with the credentials
48
+ credentials, _ = google.auth.default()
49
+ vertexai.init(credentials=credentials)
50
+ self.chat_model = GenerativeModel(model_name)
51
+ except google.auth.exceptions.DefaultCredentialsError as e:
52
+ raise RuntimeError(f"Default credentials error: {e}")
53
+ except google.auth.exceptions.TransportError as e:
54
+ raise RuntimeError(f"Transport error during authentication: {e}")
55
+ except Exception as e:
56
+ raise RuntimeError(f"Failed to authenticate using JSON file: {e}")
35
57
 
36
58
  def system_message(self, message: str) -> any:
37
59
  return message
@@ -91,7 +91,7 @@ class Ollama(VannaBase):
91
91
  f"model={self.model},\n"
92
92
  f"options={self.ollama_options},\n"
93
93
  f"keep_alive={self.keep_alive}")
94
- self.log(f"Prompt Content:\n{json.dumps(prompt)}")
94
+ self.log(f"Prompt Content:\n{json.dumps(prompt, ensure_ascii=False)}")
95
95
  response_dict = self.ollama_client.chat(model=self.model,
96
96
  messages=prompt,
97
97
  stream=False,
@@ -0,0 +1,2 @@
1
+ from .opensearch_vector import OpenSearch_VectorStore
2
+ from .opensearch_vector_semantic import OpenSearch_Semantic_VectorStore
@@ -0,0 +1,175 @@
1
+ import json
2
+
3
+ import pandas as pd
4
+ from langchain_community.vectorstores import OpenSearchVectorSearch
5
+
6
+ from ..base import VannaBase
7
+ from ..utils import deterministic_uuid
8
+
9
+
10
+ class OpenSearch_Semantic_VectorStore(VannaBase):
11
+ def __init__(self, config=None):
12
+ VannaBase.__init__(self, config=config)
13
+ if config is None:
14
+ config = {}
15
+
16
+ if "embedding_function" in config:
17
+ self.embedding_function = config.get("embedding_function")
18
+ else:
19
+ from langchain_huggingface import HuggingFaceEmbeddings
20
+ self.embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
21
+
22
+ self.n_results_sql = config.get("n_results_sql", config.get("n_results", 10))
23
+ self.n_results_documentation = config.get("n_results_documentation", config.get("n_results", 10))
24
+ self.n_results_ddl = config.get("n_results_ddl", config.get("n_results", 10))
25
+
26
+ self.document_index = config.get("es_document_index", "vanna_document_index")
27
+ self.ddl_index = config.get("es_ddl_index", "vanna_ddl_index")
28
+ self.question_sql_index = config.get("es_question_sql_index", "vanna_questions_sql_index")
29
+
30
+ self.log(f"OpenSearch_Semantic_VectorStore initialized with document_index: {self.document_index}, ddl_index: {self.ddl_index}, question_sql_index: {self.question_sql_index}")
31
+
32
+ es_urls = config.get("es_urls", "https://localhost:9200")
33
+ ssl = config.get("es_ssl", True)
34
+ verify_certs = config.get("es_verify_certs", True)
35
+
36
+ if "es_user" in config:
37
+ auth = (config["es_user"], config["es_password"])
38
+ else:
39
+ auth = None
40
+
41
+ headers = config.get("es_headers", None)
42
+ timeout = config.get("es_timeout", 60)
43
+ max_retries = config.get("es_max_retries", 10)
44
+
45
+ common_args = {
46
+ "opensearch_url": es_urls,
47
+ "embedding_function": self.embedding_function,
48
+ "engine": "faiss",
49
+ "http_auth": auth,
50
+ "use_ssl": ssl,
51
+ "verify_certs": verify_certs,
52
+ "timeout": timeout,
53
+ "max_retries": max_retries,
54
+ "retry_on_timeout": True,
55
+ "headers": headers,
56
+ }
57
+
58
+ self.documentation_store = OpenSearchVectorSearch(index_name=self.document_index, **common_args)
59
+ self.ddl_store = OpenSearchVectorSearch(index_name=self.ddl_index, **common_args)
60
+ self.sql_store = OpenSearchVectorSearch(index_name=self.question_sql_index, **common_args)
61
+
62
+ def add_ddl(self, ddl: str, **kwargs) -> str:
63
+ _id = deterministic_uuid(ddl) + "-ddl"
64
+ self.ddl_store.add_texts(texts=[ddl], ids=[_id], **kwargs)
65
+ return _id
66
+
67
+ def add_documentation(self, documentation: str, **kwargs) -> str:
68
+ _id = deterministic_uuid(documentation) + "-doc"
69
+ self.documentation_store.add_texts(texts=[documentation], ids=[_id], **kwargs)
70
+ return _id
71
+
72
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
73
+ question_sql_json = json.dumps(
74
+ {
75
+ "question": question,
76
+ "sql": sql,
77
+ },
78
+ ensure_ascii=False,
79
+ )
80
+
81
+ _id = deterministic_uuid(question_sql_json) + "-sql"
82
+ self.sql_store.add_texts(texts=[question_sql_json], ids=[_id], **kwargs)
83
+ return _id
84
+
85
+ def get_related_ddl(self, question: str, **kwargs) -> list:
86
+ documents = self.ddl_store.similarity_search(query=question, k=self.n_results_ddl)
87
+ return [document.page_content for document in documents]
88
+
89
+ def get_related_documentation(self, question: str, **kwargs) -> list:
90
+ documents = self.documentation_store.similarity_search(query=question, k=self.n_results_documentation)
91
+ return [document.page_content for document in documents]
92
+
93
+ def get_similar_question_sql(self, question: str, **kwargs) -> list:
94
+ documents = self.sql_store.similarity_search(query=question, k=self.n_results_sql)
95
+ return [json.loads(document.page_content) for document in documents]
96
+
97
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
98
+ data = []
99
+ query = {
100
+ "query": {
101
+ "match_all": {}
102
+ }
103
+ }
104
+
105
+ indices = [
106
+ {"index": self.document_index, "type": "documentation"},
107
+ {"index": self.question_sql_index, "type": "sql"},
108
+ {"index": self.ddl_index, "type": "ddl"},
109
+ ]
110
+
111
+ # Use documentation_store.client consistently for search on all indices
112
+ opensearch_client = self.documentation_store.client
113
+
114
+ for index_info in indices:
115
+ index_name = index_info["index"]
116
+ training_data_type = index_info["type"]
117
+ scroll = '1m' # keep scroll context for 1 minute
118
+ response = opensearch_client.search(
119
+ index=index_name,
120
+ ignore_unavailable=True,
121
+ body=query,
122
+ scroll=scroll,
123
+ size=1000
124
+ )
125
+
126
+ scroll_id = response.get('_scroll_id')
127
+
128
+ while scroll_id:
129
+ hits = response['hits']['hits']
130
+ if not hits:
131
+ break # No more hits, exit loop
132
+
133
+ for hit in hits:
134
+ source = hit['_source']
135
+ if training_data_type == "sql":
136
+ try:
137
+ doc_dict = json.loads(source['text'])
138
+ content = doc_dict.get("sql")
139
+ question = doc_dict.get("question")
140
+ except json.JSONDecodeError as e:
141
+ self.log(f"Skipping row with custom_id {hit['_id']} due to JSON parsing error: {e}","Error")
142
+ continue
143
+ else: # documentation or ddl
144
+ content = source['text']
145
+ question = None
146
+
147
+ data.append({
148
+ "id": hit["_id"],
149
+ "training_data_type": training_data_type,
150
+ "question": question,
151
+ "content": content,
152
+ })
153
+
154
+ # Get next batch of results, using documentation_store.client.scroll
155
+ response = opensearch_client.scroll(scroll_id=scroll_id, scroll=scroll)
156
+ scroll_id = response.get('_scroll_id')
157
+
158
+ return pd.DataFrame(data)
159
+
160
+ def remove_training_data(self, id: str, **kwargs) -> bool:
161
+ try:
162
+ if id.endswith("-sql"):
163
+ return self.sql_store.delete(ids=[id], **kwargs)
164
+ elif id.endswith("-ddl"):
165
+ return self.ddl_store.delete(ids=[id], **kwargs)
166
+ elif id.endswith("-doc"):
167
+ return self.documentation_store.delete(ids=[id], **kwargs)
168
+ else:
169
+ return False
170
+ except Exception as e:
171
+ self.log(f"Error deleting training dataError deleting training data: {e}", "Error")
172
+ return False
173
+
174
+ def generate_embedding(self, data: str, **kwargs) -> list[float]:
175
+ pass
@@ -0,0 +1 @@
1
+ from .oracle_vector import Oracle_VectorStore