vanna 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vanna/base/base.py CHANGED
@@ -437,7 +437,7 @@ class VannaBase(ABC):
437
437
  pass
438
438
 
439
439
  @abstractmethod
440
- def remove_training_data(id: str, **kwargs) -> bool:
440
+ def remove_training_data(self, id: str, **kwargs) -> bool:
441
441
  """
442
442
  Example:
443
443
  ```python
@@ -1276,15 +1276,10 @@ class VannaBase(ABC):
1276
1276
 
1277
1277
  def run_sql_bigquery(sql: str) -> Union[pd.DataFrame, None]:
1278
1278
  if conn:
1279
- try:
1280
- job = conn.query(sql)
1281
- df = job.result().to_dataframe()
1282
- return df
1283
- except GoogleAPIError as error:
1284
- errors = []
1285
- for error in error.errors:
1286
- errors.append(error["message"])
1287
- raise errors
1279
+ job = conn.query(sql)
1280
+ df = job.result().to_dataframe()
1281
+ return df
1282
+
1288
1283
  return None
1289
1284
 
1290
1285
  self.dialect = "BigQuery SQL"
vanna/flask/__init__.py CHANGED
@@ -12,9 +12,9 @@ from flasgger import Swagger
12
12
  from flask import Flask, Response, jsonify, request, send_from_directory
13
13
  from flask_sock import Sock
14
14
 
15
+ from ..base import VannaBase
15
16
  from .assets import css_content, html_content, js_content
16
17
  from .auth import AuthInterface, NoAuth
17
- from ..base import VannaBase
18
18
 
19
19
 
20
20
  class Cache(ABC):
@@ -1211,7 +1211,7 @@ class VannaFlaskApp(VannaFlaskAPI):
1211
1211
  self.config["ask_results_correct"] = ask_results_correct
1212
1212
  self.config["followup_questions"] = followup_questions
1213
1213
  self.config["summarization"] = summarization
1214
- self.config["function_generation"] = function_generation
1214
+ self.config["function_generation"] = function_generation and hasattr(vn, "get_function")
1215
1215
 
1216
1216
  self.index_html_path = index_html_path
1217
1217
  self.assets_folder = assets_folder
vanna/google/__init__.py CHANGED
@@ -1 +1,2 @@
1
- from .gemini_chat import GoogleGeminiChat
1
+ from .bigquery_vector import BigQuery_VectorStore
2
+ from .gemini_chat import GoogleGeminiChat
@@ -0,0 +1,230 @@
1
+ import datetime
2
+ import os
3
+ import uuid
4
+ from typing import List, Optional
5
+
6
+ import pandas as pd
7
+ from google.cloud import bigquery
8
+
9
+ from ..base import VannaBase
10
+
11
+
12
+ class BigQuery_VectorStore(VannaBase):
13
+ def __init__(self, config: dict, **kwargs):
14
+ self.config = config
15
+
16
+ self.n_results_sql = config.get("n_results_sql", config.get("n_results", 10))
17
+ self.n_results_documentation = config.get("n_results_documentation", config.get("n_results", 10))
18
+ self.n_results_ddl = config.get("n_results_ddl", config.get("n_results", 10))
19
+
20
+ if "api_key" in config or os.getenv("GOOGLE_API_KEY"):
21
+ """
22
+ If Google api_key is provided through config
23
+ or set as an environment variable, assign it.
24
+ """
25
+ print("Configuring genai")
26
+ import google.generativeai as genai
27
+
28
+ genai.configure(api_key=config["api_key"])
29
+
30
+ self.genai = genai
31
+ else:
32
+ # Authenticate using VertexAI
33
+ from vertexai.language_models import (
34
+ TextEmbeddingInput,
35
+ TextEmbeddingModel,
36
+ )
37
+
38
+ if self.config.get("project_id"):
39
+ self.project_id = self.config.get("project_id")
40
+ else:
41
+ self.project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
42
+
43
+ if self.project_id is None:
44
+ raise ValueError("Project ID is not set")
45
+
46
+ self.conn = bigquery.Client(project=self.project_id)
47
+
48
+ dataset_name = self.config.get('bigquery_dataset_name', 'vanna_managed')
49
+ self.dataset_id = f"{self.project_id}.{dataset_name}"
50
+ dataset = bigquery.Dataset(self.dataset_id)
51
+
52
+ try:
53
+ self.conn.get_dataset(self.dataset_id) # Make an API request.
54
+ print(f"Dataset {self.dataset_id} already exists")
55
+ except Exception:
56
+ # Dataset does not exist, create it
57
+ dataset.location = "US"
58
+ self.conn.create_dataset(dataset, timeout=30) # Make an API request.
59
+ print(f"Created dataset {self.dataset_id}")
60
+
61
+ # Create a table called training_data in the dataset that contains the columns:
62
+ # id, training_data_type, question, content, embedding, created_at
63
+
64
+ self.table_id = f"{self.dataset_id}.training_data"
65
+ schema = [
66
+ bigquery.SchemaField("id", "STRING", mode="REQUIRED"),
67
+ bigquery.SchemaField("training_data_type", "STRING", mode="REQUIRED"),
68
+ bigquery.SchemaField("question", "STRING", mode="REQUIRED"),
69
+ bigquery.SchemaField("content", "STRING", mode="REQUIRED"),
70
+ bigquery.SchemaField("embedding", "FLOAT64", mode="REPEATED"),
71
+ bigquery.SchemaField("created_at", "TIMESTAMP", mode="REQUIRED"),
72
+ ]
73
+
74
+ table = bigquery.Table(self.table_id, schema=schema)
75
+
76
+ try:
77
+ self.conn.get_table(self.table_id) # Make an API request.
78
+ print(f"Table {self.table_id} already exists")
79
+ except Exception:
80
+ # Table does not exist, create it
81
+ self.conn.create_table(table, timeout=30) # Make an API request.
82
+ print(f"Created table {self.table_id}")
83
+
84
+ # Create VECTOR INDEX IF NOT EXISTS
85
+ # TODO: This requires 5000 rows before it can be created
86
+ # vector_index_query = f"""
87
+ # CREATE VECTOR INDEX IF NOT EXISTS my_index
88
+ # ON `{self.table_id}`(embedding)
89
+ # OPTIONS(
90
+ # distance_type='COSINE',
91
+ # index_type='IVF',
92
+ # ivf_options='{{"num_lists": 1000}}'
93
+ # )
94
+ # """
95
+
96
+ # try:
97
+ # self.conn.query(vector_index_query).result() # Make an API request.
98
+ # print(f"Vector index on {self.table_id} created or already exists")
99
+ # except Exception as e:
100
+ # print(f"Failed to create vector index: {e}")
101
+
102
+ def store_training_data(self, training_data_type: str, question: str, content: str, embedding: List[float], **kwargs) -> str:
103
+ id = str(uuid.uuid4())
104
+ created_at = datetime.datetime.now()
105
+ self.conn.insert_rows_json(self.table_id, [{
106
+ "id": id,
107
+ "training_data_type": training_data_type,
108
+ "question": question,
109
+ "content": content,
110
+ "embedding": embedding,
111
+ "created_at": created_at.isoformat()
112
+ }])
113
+
114
+ return id
115
+
116
+ def fetch_similar_training_data(self, training_data_type: str, question: str, n_results, **kwargs) -> pd.DataFrame:
117
+ question_embedding = self.generate_question_embedding(question)
118
+
119
+ query = f"""
120
+ SELECT
121
+ base.id as id,
122
+ base.question as question,
123
+ base.training_data_type as training_data_type,
124
+ base.content as content,
125
+ distance
126
+ FROM
127
+ VECTOR_SEARCH(
128
+ TABLE `{self.table_id}`,
129
+ 'embedding',
130
+ (SELECT * FROM UNNEST([STRUCT({question_embedding})])),
131
+ top_k => 5,
132
+ distance_type => 'COSINE',
133
+ options => '{{"use_brute_force":true}}'
134
+ )
135
+ WHERE
136
+ base.training_data_type = '{training_data_type}'
137
+ """
138
+
139
+ results = self.conn.query(query).result().to_dataframe()
140
+ return results
141
+
142
+ def generate_question_embedding(self, data: str, **kwargs) -> List[float]:
143
+ result = self.genai.embed_content(
144
+ model="models/text-embedding-004",
145
+ content=data,
146
+ task_type="retrieval_query")
147
+
148
+ if 'embedding' in result:
149
+ return result['embedding']
150
+ else:
151
+ raise ValueError("No embeddings returned")
152
+
153
+ def generate_storage_embedding(self, data: str, **kwargs) -> List[float]:
154
+ result = self.genai.embed_content(
155
+ model="models/text-embedding-004",
156
+ content=data,
157
+ task_type="retrieval_document")
158
+
159
+ if 'embedding' in result:
160
+ return result['embedding']
161
+ else:
162
+ raise ValueError("No embeddings returned")
163
+
164
+ # task = "RETRIEVAL_DOCUMENT"
165
+ # inputs = [TextEmbeddingInput(data, task)]
166
+ # embeddings = self.vertex_embedding_model.get_embeddings(inputs)
167
+
168
+ # if len(embeddings) == 0:
169
+ # raise ValueError("No embeddings returned")
170
+
171
+ # return embeddings[0].values
172
+
173
+ return result
174
+
175
+ def generate_embedding(self, data: str, **kwargs) -> List[float]:
176
+ return self.generate_storage_embedding(data, **kwargs)
177
+
178
+ def get_similar_question_sql(self, question: str, **kwargs) -> list:
179
+ df = self.fetch_similar_training_data(training_data_type="sql", question=question, n_results=self.n_results_sql)
180
+
181
+ # Return a list of dictionaries with only question, sql fields. The content field needs to be renamed to sql
182
+ return df.rename(columns={"content": "sql"})[["question", "sql"]].to_dict(orient="records")
183
+
184
+ def get_related_ddl(self, question: str, **kwargs) -> list:
185
+ df = self.fetch_similar_training_data(training_data_type="ddl", question=question, n_results=self.n_results_ddl)
186
+
187
+ # Return a list of strings of the content
188
+ return df["content"].tolist()
189
+
190
+ def get_related_documentation(self, question: str, **kwargs) -> list:
191
+ df = self.fetch_similar_training_data(training_data_type="documentation", question=question, n_results=self.n_results_documentation)
192
+
193
+ # Return a list of strings of the content
194
+ return df["content"].tolist()
195
+
196
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
197
+ doc = {
198
+ "question": question,
199
+ "sql": sql
200
+ }
201
+
202
+ embedding = self.generate_embedding(str(doc))
203
+
204
+ return self.store_training_data(training_data_type="sql", question=question, content=sql, embedding=embedding)
205
+
206
+ def add_ddl(self, ddl: str, **kwargs) -> str:
207
+ embedding = self.generate_embedding(ddl)
208
+
209
+ return self.store_training_data(training_data_type="ddl", question="", content=ddl, embedding=embedding)
210
+
211
+ def add_documentation(self, documentation: str, **kwargs) -> str:
212
+ embedding = self.generate_embedding(documentation)
213
+
214
+ return self.store_training_data(training_data_type="documentation", question="", content=documentation, embedding=embedding)
215
+
216
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
217
+ query = f"SELECT id, training_data_type, question, content FROM `{self.table_id}`"
218
+
219
+ return self.conn.query(query).result().to_dataframe()
220
+
221
+ def remove_training_data(self, id: str, **kwargs) -> bool:
222
+ query = f"DELETE FROM `{self.table_id}` WHERE id = '{id}'"
223
+
224
+ try:
225
+ self.conn.query(query).result()
226
+ return True
227
+
228
+ except Exception as e:
229
+ print(f"Failed to remove training data: {e}")
230
+ return False
vanna/mistral/mistral.py CHANGED
@@ -1,5 +1,7 @@
1
- from mistralai.client import MistralClient
2
- from mistralai.models.chat_completion import ChatMessage
1
+ import os
2
+
3
+ from mistralai import Mistral as MistralClient
4
+ from mistralai import UserMessage
3
5
 
4
6
  from ..base import VannaBase
5
7
 
@@ -23,13 +25,13 @@ class Mistral(VannaBase):
23
25
  self.model = model
24
26
 
25
27
  def system_message(self, message: str) -> any:
26
- return ChatMessage(role="system", content=message)
28
+ return {"role": "system", "content": message}
27
29
 
28
30
  def user_message(self, message: str) -> any:
29
- return ChatMessage(role="user", content=message)
31
+ return {"role": "user", "content": message}
30
32
 
31
33
  def assistant_message(self, message: str) -> any:
32
- return ChatMessage(role="assistant", content=message)
34
+ return {"role": "assistant", "content": message}
33
35
 
34
36
  def generate_sql(self, question: str, **kwargs) -> str:
35
37
  # Use the super generate_sql
@@ -41,7 +43,7 @@ class Mistral(VannaBase):
41
43
  return sql
42
44
 
43
45
  def submit_prompt(self, prompt, **kwargs) -> str:
44
- chat_response = self.client.chat(
46
+ chat_response = self.client.chat.complete(
45
47
  model=self.model,
46
48
  messages=prompt,
47
49
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vanna
3
- Version: 0.6.5
3
+ Version: 0.6.6
4
4
  Summary: Generate SQL queries from natural language
5
5
  Author-email: Zain Hoda <zain@vanna.ai>
6
6
  Requires-Python: >=3.9
@@ -26,7 +26,7 @@ Requires-Dist: snowflake-connector-python ; extra == "all"
26
26
  Requires-Dist: duckdb ; extra == "all"
27
27
  Requires-Dist: openai ; extra == "all"
28
28
  Requires-Dist: qianfan ; extra == "all"
29
- Requires-Dist: mistralai ; extra == "all"
29
+ Requires-Dist: mistralai>=1.0.0 ; extra == "all"
30
30
  Requires-Dist: chromadb ; extra == "all"
31
31
  Requires-Dist: anthropic ; extra == "all"
32
32
  Requires-Dist: zhipuai ; extra == "all"
@@ -56,7 +56,7 @@ Requires-Dist: google-cloud-aiplatform ; extra == "google"
56
56
  Requires-Dist: transformers ; extra == "hf"
57
57
  Requires-Dist: marqo ; extra == "marqo"
58
58
  Requires-Dist: pymilvus[model] ; extra == "milvus"
59
- Requires-Dist: mistralai ; extra == "mistralai"
59
+ Requires-Dist: mistralai>=1.0.0 ; extra == "mistralai"
60
60
  Requires-Dist: PyMySQL ; extra == "mysql"
61
61
  Requires-Dist: ollama ; extra == "ollama"
62
62
  Requires-Dist: httpx ; extra == "ollama"
@@ -9,16 +9,17 @@ vanna/advanced/__init__.py,sha256=oDj9g1JbrbCfp4WWdlr_bhgdMqNleyHgr6VXX6DcEbo,65
9
9
  vanna/anthropic/__init__.py,sha256=85s_2mAyyPxc0T_0JEvYeAkEKWJwkwqoyUwSC5dw9Gk,43
10
10
  vanna/anthropic/anthropic_chat.py,sha256=7X3x8SYwDY28aGyBnt0YNRMG8YY1p_t-foMfKGj8_Oo,2627
11
11
  vanna/base/__init__.py,sha256=Sl-HM1RRYzAZoSqmL1CZQmF3ZF-byYTCFQP3JZ2A5MU,28
12
- vanna/base/base.py,sha256=3Du70NrXQMn_LOif2YFPRRVKo4wH5-f6eZcLlXEX0X8,71705
12
+ vanna/base/base.py,sha256=DrXaJcMhIjD6BEqLu4JNZaZZ8nTM4SppjdvueJjEcko,71463
13
13
  vanna/bedrock/__init__.py,sha256=hRT2bgJbHEqViLdL-t9hfjSfFdIOkPU2ADBt-B1En-8,46
14
14
  vanna/bedrock/bedrock_converse.py,sha256=Nx5kYm-diAfYmsWAnTP5xnv7V84Og69-AP9b3seIe0E,2869
15
15
  vanna/chromadb/__init__.py,sha256=-iL0nW_g4uM8nWKMuWnNePfN4nb9uk8P3WzGvezOqRg,50
16
16
  vanna/chromadb/chromadb_vector.py,sha256=eKyPck99Y6Jt-BNWojvxLG-zvAERzLSm-3zY-bKXvaA,8792
17
17
  vanna/exceptions/__init__.py,sha256=dJ65xxxZh1lqBeg6nz6Tq_r34jLVmjvBvPO9Q6hFaQ8,685
18
- vanna/flask/__init__.py,sha256=r1ucQupb6wuTcjVVKpkdrg6R38FZe6KQoKw9AtcghDQ,42889
18
+ vanna/flask/__init__.py,sha256=cllLWqJ2SYVdvF4CQ-8cipoOdjgkoe0rChWnawtzMyA,42921
19
19
  vanna/flask/assets.py,sha256=_UoUr57sS0QL2BuTxAOe9k4yy8T7-fp2NpbRSVtW3IM,451769
20
20
  vanna/flask/auth.py,sha256=UpKxh7W5cd43W0LGch0VqhncKwB78L6dtOQkl1JY5T0,1246
21
- vanna/google/__init__.py,sha256=M-dCxCZcKL4bTQyMLj6r6VRs65YNX9Tl2aoPCuqGm-8,41
21
+ vanna/google/__init__.py,sha256=6D8rDBjKJJm_jpVn9b4Vc2NR-R779ed_bnHhWmxCJXE,92
22
+ vanna/google/bigquery_vector.py,sha256=rkP94Xd1lNYjU1x3MDLvqmGSPUYtDfQwvlqVmX44jyM,8839
22
23
  vanna/google/gemini_chat.py,sha256=j1szC2PamMLFrs0Z4lYPS69i017FYICe-mNObNYFBPQ,1576
23
24
  vanna/hf/__init__.py,sha256=vD0bIhfLkA1UsvVSF4MAz3Da8aQunkQo3wlDztmMuj0,19
24
25
  vanna/hf/hf.py,sha256=N8N5g3xvKDBt3dez2r_U0qATxbl2pN8SVLTZK9CSRA0,3020
@@ -27,7 +28,7 @@ vanna/marqo/marqo.py,sha256=W7WTtzWp4RJjZVy6OaXHqncUBIPdI4Q7qH7BRCxZ1_A,5242
27
28
  vanna/milvus/__init__.py,sha256=VBasJG2eTKbJI6CEand7kPLNBrqYrn0QCAhSYVz814s,46
28
29
  vanna/milvus/milvus_vector.py,sha256=Mq0eaSh0UcTYhgh8mTm0fvS6rbfL6tQONVnDZGemWoM,11268
29
30
  vanna/mistral/__init__.py,sha256=70rTY-69Z2ehkkMj84dNMCukPo6AWdflBGvIB_pztS0,29
30
- vanna/mistral/mistral.py,sha256=DAEqAT9SzC91rfMM_S3SuzBZ34MrKHw9qAj6EP2MGVk,1508
31
+ vanna/mistral/mistral.py,sha256=rcdgmUSQniLkah2VL23VGYRa9WXpOy_dZN4S0kc__V8,1494
31
32
  vanna/mock/__init__.py,sha256=nYR2WfcV5NdwpK3V64QGOWHBGc3ESN9uV68JLS76aRw,97
32
33
  vanna/mock/embedding.py,sha256=ggnP7KuPh6dlqeUFtoN8t0J0P7_yRNtn9rIq6h8g8-w,250
33
34
  vanna/mock/llm.py,sha256=WpG9f1pKZftPBHqgIYdARKB2Z9DZhOALYOJWoOjjFEc,518
@@ -53,6 +54,6 @@ vanna/vllm/__init__.py,sha256=aNlUkF9tbURdeXAJ8ytuaaF1gYwcG3ny1MfNl_cwQYg,23
53
54
  vanna/vllm/vllm.py,sha256=oM_aA-1Chyl7T_Qc_yRKlL6oSX1etsijY9zQdjeMGMQ,2827
54
55
  vanna/weaviate/__init__.py,sha256=HL6PAl7ePBAkeG8uln-BmM7IUtWohyTPvDfcPzSGSCg,46
55
56
  vanna/weaviate/weaviate_vector.py,sha256=GEiu4Vd9w-7j10aB-zTxJ8gefqe_F-LUUGvttFs1vlg,7539
56
- vanna-0.6.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
57
- vanna-0.6.5.dist-info/METADATA,sha256=77ggtzQplTTDdsOI0_U4k-t5UwHVmI5O3TSkpm5OXzY,11997
58
- vanna-0.6.5.dist-info/RECORD,,
57
+ vanna-0.6.6.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
58
+ vanna-0.6.6.dist-info/METADATA,sha256=_qy1wVZqQOLplCxZ43KqCIEgL0Wq2X48ekJEACq-0Ng,12011
59
+ vanna-0.6.6.dist-info/RECORD,,
File without changes