vanna 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vanna/__init__.py CHANGED
@@ -1065,7 +1065,7 @@ def train(
1065
1065
  if sql:
1066
1066
  if question is None:
1067
1067
  question = generate_question(sql)
1068
- print("Question generated with sql:", Question, "\nAdding SQL...")
1068
+ print("Question generated with sql:", question, "\nAdding SQL...")
1069
1069
  return add_sql(question=question, sql=sql)
1070
1070
 
1071
1071
  if ddl:
vanna/base/base.py CHANGED
@@ -612,6 +612,65 @@ class VannaBase(ABC):
612
612
 
613
613
  return df_tables
614
614
 
615
+ def get_training_plan_generic(self, df) -> TrainingPlan:
616
+ # For each of the following, we look at the df columns to see if there's a match:
617
+ database_column = df.columns[
618
+ df.columns.str.lower().str.contains("database")
619
+ | df.columns.str.lower().str.contains("table_catalog")
620
+ ].to_list()[0]
621
+ schema_column = df.columns[
622
+ df.columns.str.lower().str.contains("table_schema")
623
+ ].to_list()[0]
624
+ table_column = df.columns[
625
+ df.columns.str.lower().str.contains("table_name")
626
+ ].to_list()[0]
627
+ column_column = df.columns[
628
+ df.columns.str.lower().str.contains("column_name")
629
+ ].to_list()[0]
630
+ data_type_column = df.columns[
631
+ df.columns.str.lower().str.contains("data_type")
632
+ ].to_list()[0]
633
+
634
+ plan = TrainingPlan([])
635
+
636
+ for database in df[database_column].unique().tolist():
637
+ for schema in (
638
+ df.query(f'{database_column} == "{database}"')[schema_column]
639
+ .unique()
640
+ .tolist()
641
+ ):
642
+ for table in (
643
+ df.query(
644
+ f'{database_column} == "{database}" and {schema_column} == "{schema}"'
645
+ )[table_column]
646
+ .unique()
647
+ .tolist()
648
+ ):
649
+ df_columns_filtered_to_table = df.query(
650
+ f'{database_column} == "{database}" and {schema_column} == "{schema}" and {table_column} == "{table}"'
651
+ )
652
+ doc = f"The following columns are in the {table} table in the {database} database:\n\n"
653
+ doc += df_columns_filtered_to_table[
654
+ [
655
+ database_column,
656
+ schema_column,
657
+ table_column,
658
+ column_column,
659
+ data_type_column,
660
+ ]
661
+ ].to_markdown()
662
+
663
+ plan._plan.append(
664
+ TrainingPlanItem(
665
+ item_type=TrainingPlanItem.ITEM_TYPE_IS,
666
+ item_group=f"{database}.{schema}",
667
+ item_name=table,
668
+ item_value=doc,
669
+ )
670
+ )
671
+
672
+ return plan
673
+
615
674
  def get_training_plan_snowflake(
616
675
  self,
617
676
  filter_databases: Union[List[str], None] = None,
@@ -204,7 +204,7 @@ class OpenAI_Chat(VannaBase):
204
204
  print(
205
205
  f"Using engine {self.config['engine']} for {num_tokens} tokens (approx)"
206
206
  )
207
- response = openai.ChatCompletion.create(
207
+ response = openai.chat.completions.create(
208
208
  engine=self.config["engine"],
209
209
  messages=prompt,
210
210
  max_tokens=500,
@@ -215,7 +215,7 @@ class OpenAI_Chat(VannaBase):
215
215
  print(
216
216
  f"Using model {self.config['model']} for {num_tokens} tokens (approx)"
217
217
  )
218
- response = openai.ChatCompletion.create(
218
+ response = openai.chat.completions.create(
219
219
  model=self.config["model"],
220
220
  messages=prompt,
221
221
  max_tokens=500,
@@ -229,7 +229,7 @@ class OpenAI_Chat(VannaBase):
229
229
  model = "gpt-3.5-turbo"
230
230
 
231
231
  print(f"Using model {model} for {num_tokens} tokens (approx)")
232
- response = openai.ChatCompletion.create(
232
+ response = openai.chat.completions.create(
233
233
  model=model, messages=prompt, max_tokens=500, stop=None, temperature=0.7
234
234
  )
235
235
 
@@ -1,6 +1,6 @@
1
1
  from abc import abstractmethod
2
2
 
3
- import openai
3
+ from openai import OpenAI
4
4
 
5
5
  from ..base import VannaBase
6
6
 
@@ -9,29 +9,31 @@ class OpenAI_Embeddings(VannaBase):
9
9
  def __init__(self, config=None):
10
10
  VannaBase.__init__(self, config=config)
11
11
 
12
+ self.client = OpenAI()
13
+
12
14
  if config is None:
13
15
  return
14
16
 
15
17
  if "api_type" in config:
16
- openai.api_type = config["api_type"]
18
+ self.client.api_type = config["api_type"]
17
19
 
18
20
  if "api_base" in config:
19
- openai.api_base = config["api_base"]
21
+ self.client.api_base = config["api_base"]
20
22
 
21
23
  if "api_version" in config:
22
- openai.api_version = config["api_version"]
24
+ self.client.api_version = config["api_version"]
23
25
 
24
26
  if "api_key" in config:
25
- openai.api_key = config["api_key"]
27
+ self.client.api_key = config["api_key"]
26
28
 
27
29
  def generate_embedding(self, data: str, **kwargs) -> list[float]:
28
30
  if self.config is not None and "engine" in self.config:
29
- embedding = openai.Embedding.create(
31
+ embedding = self.client.embeddings.create(
30
32
  engine=self.config["engine"],
31
33
  input=data,
32
34
  )
33
35
  else:
34
- embedding = openai.Embedding.create(
36
+ embedding = self.client.embeddings.create(
35
37
  model="text-embedding-ada-002",
36
38
  input=data,
37
39
  )
vanna/remote.py CHANGED
@@ -197,7 +197,7 @@ class VannaDefault(VannaBase):
197
197
 
198
198
  return status.id
199
199
 
200
- def add_documentation(self, documentation: str, **kwargs) -> str:
200
+ def add_documentation(self, doc: str, **kwargs) -> str:
201
201
  """
202
202
  Adds documentation to the model's training data
203
203
 
File without changes
@@ -0,0 +1,187 @@
1
+ from ..base import VannaBase
2
+ from ..types import (
3
+ QuestionSQLPair,
4
+ StatusWithId,
5
+ StringData,
6
+ DataFrameJSON,
7
+ Status,
8
+ TrainingData,
9
+ Question,
10
+ )
11
+ from io import StringIO
12
+ import pandas as pd
13
+ import requests
14
+ import json
15
+ import dataclasses
16
+
17
+ class VannaDB_VectorStore(VannaBase):
18
+ def __init__(self, vanna_model: str, vanna_api_key: str, config=None):
19
+ VannaBase.__init__(self, config=config)
20
+
21
+ self._model = vanna_model
22
+ self._api_key = vanna_api_key
23
+
24
+ self._endpoint = (
25
+ "https://ask.vanna.ai/rpc"
26
+ if config is None or "endpoint" not in config
27
+ else config["endpoint"]
28
+ )
29
+ self._unauthenticated_endpoint = (
30
+ "https://ask.vanna.ai/unauthenticated_rpc"
31
+ if config is None or "unauthenticated_endpoint" not in config
32
+ else config["unauthenticated_endpoint"]
33
+ )
34
+ self.related_training_data = {}
35
+
36
+ def _unauthenticated_rpc_call(self, method, params):
37
+ headers = {
38
+ "Content-Type": "application/json",
39
+ }
40
+ data = {
41
+ "method": method,
42
+ "params": [self._dataclass_to_dict(obj) for obj in params],
43
+ }
44
+
45
+ response = requests.post(
46
+ self._unauthenticated_endpoint, headers=headers, data=json.dumps(data)
47
+ )
48
+ return response.json()
49
+
50
+ def _rpc_call(self, method, params):
51
+ if method != "list_orgs":
52
+ headers = {
53
+ "Content-Type": "application/json",
54
+ "Vanna-Key": self._api_key,
55
+ "Vanna-Org": self._model,
56
+ }
57
+ else:
58
+ headers = {
59
+ "Content-Type": "application/json",
60
+ "Vanna-Key": self._api_key,
61
+ "Vanna-Org": "demo-tpc-h",
62
+ }
63
+
64
+ data = {
65
+ "method": method,
66
+ "params": [self._dataclass_to_dict(obj) for obj in params],
67
+ }
68
+
69
+ response = requests.post(self._endpoint, headers=headers, data=json.dumps(data))
70
+ return response.json()
71
+
72
+ def _dataclass_to_dict(self, obj):
73
+ return dataclasses.asdict(obj)
74
+
75
+ def generate_embedding(self, data: str, **kwargs) -> list[float]:
76
+ # This is done server-side
77
+ pass
78
+
79
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
80
+ if "tag" in kwargs:
81
+ tag = kwargs["tag"]
82
+ else:
83
+ tag = "Manually Trained"
84
+
85
+ params = [QuestionSQLPair(question=question, sql=sql, tag=tag)]
86
+
87
+ d = self._rpc_call(method="add_sql", params=params)
88
+
89
+ if "result" not in d:
90
+ raise Exception("Error adding question and SQL pair", d)
91
+
92
+ status = StatusWithId(**d["result"])
93
+
94
+ return status.id
95
+
96
+ def add_ddl(self, ddl: str, **kwargs) -> str:
97
+ params = [StringData(data=ddl)]
98
+
99
+ d = self._rpc_call(method="add_ddl", params=params)
100
+
101
+ if "result" not in d:
102
+ raise Exception("Error adding DDL", d)
103
+
104
+ status = StatusWithId(**d["result"])
105
+
106
+ return status.id
107
+
108
+ def add_documentation(self, doc: str, **kwargs) -> str:
109
+ params = [StringData(data=doc)]
110
+
111
+ d = self._rpc_call(method="add_documentation", params=params)
112
+
113
+ if "result" not in d:
114
+ raise Exception("Error adding documentation", d)
115
+
116
+ status = StatusWithId(**d["result"])
117
+
118
+ return status.id
119
+
120
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
121
+ params = []
122
+
123
+ d = self._rpc_call(method="get_training_data", params=params)
124
+
125
+ if "result" not in d:
126
+ return None
127
+
128
+ # Load the result into a dataclass
129
+ training_data = DataFrameJSON(**d["result"])
130
+
131
+ df = pd.read_json(StringIO(training_data.data))
132
+
133
+ return df
134
+
135
+ def remove_training_data(self, id: str, **kwargs) -> bool:
136
+ params = [StringData(data=id)]
137
+
138
+ d = self._rpc_call(method="remove_training_data", params=params)
139
+
140
+ if "result" not in d:
141
+ raise Exception(f"Error removing training data")
142
+
143
+ status = Status(**d["result"])
144
+
145
+ if not status.success:
146
+ raise Exception(f"Error removing training data: {status.message}")
147
+
148
+ return status.success
149
+
150
+ def get_related_training_data_cached(self, question: str) -> TrainingData:
151
+ params = [Question(question=question)]
152
+
153
+ d = self._rpc_call(method="get_related_training_data", params=params)
154
+
155
+ if "result" not in d:
156
+ return None
157
+
158
+ # Load the result into a dataclass
159
+ training_data = TrainingData(**d["result"])
160
+
161
+ self.related_training_data[question] = training_data
162
+
163
+ return training_data
164
+
165
+ def get_similar_question_sql(self, question: str, **kwargs) -> list:
166
+ if question in self.related_training_data:
167
+ training_data = self.related_training_data[question]
168
+ else:
169
+ training_data = self.get_related_training_data_cached(question)
170
+
171
+ return training_data.questions
172
+
173
+ def get_related_ddl(self, question: str, **kwargs) -> list:
174
+ if question in self.related_training_data:
175
+ training_data = self.related_training_data[question]
176
+ else:
177
+ training_data = self.get_related_training_data_cached(question)
178
+
179
+ return training_data.ddl
180
+
181
+ def get_related_documentation(self, question: str, **kwargs) -> list:
182
+ if question in self.related_training_data:
183
+ training_data = self.related_training_data[question]
184
+ else:
185
+ training_data = self.get_related_training_data_cached(question)
186
+
187
+ return training_data.documentation
@@ -0,0 +1,245 @@
1
+ Metadata-Version: 2.1
2
+ Name: vanna
3
+ Version: 0.0.29
4
+ Summary: Generate SQL queries from natural language
5
+ Author-email: Zain Hoda <zain@vanna.ai>
6
+ Requires-Python: >=3.7
7
+ Description-Content-Type: text/markdown
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Dist: requests
12
+ Requires-Dist: tabulate
13
+ Requires-Dist: plotly
14
+ Requires-Dist: pandas
15
+ Requires-Dist: sqlparse
16
+ Requires-Dist: kaleido
17
+ Requires-Dist: psycopg2 ; extra == "all"
18
+ Requires-Dist: db-dtypes ; extra == "all"
19
+ Requires-Dist: google-cloud-bigquery ; extra == "all"
20
+ Requires-Dist: snowflake-connector-python ; extra == "all"
21
+ Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
22
+ Requires-Dist: chromadb ; extra == "chromadb"
23
+ Requires-Dist: openai ; extra == "openai"
24
+ Requires-Dist: psycopg2 ; extra == "postgres"
25
+ Requires-Dist: db-dtypes ; extra == "postgres"
26
+ Requires-Dist: snowflake-connector-python ; extra == "snowflake"
27
+ Requires-Dist: tox ; extra == "test"
28
+ Project-URL: Bug Tracker, https://github.com/vanna-ai/vanna/issues
29
+ Project-URL: Homepage, https://github.com/vanna-ai/vanna
30
+ Provides-Extra: all
31
+ Provides-Extra: bigquery
32
+ Provides-Extra: chromadb
33
+ Provides-Extra: openai
34
+ Provides-Extra: postgres
35
+ Provides-Extra: snowflake
36
+ Provides-Extra: test
37
+
38
+ ![](https://img.vanna.ai/vanna-github.svg)
39
+
40
+ | GitHub | PyPI | Documentation |
41
+ | ------ | ---- | ------------- |
42
+ | [![GitHub](https://img.shields.io/badge/GitHub-vanna-blue?logo=github)](https://github.com/vanna-ai/vanna) | [![PyPI](https://img.shields.io/pypi/v/vanna?logo=pypi)](https://pypi.org/project/vanna/) | [![Documentation](https://img.shields.io/badge/Documentation-vanna-blue?logo=read-the-docs)](https://vanna.ai/docs/) |
43
+
44
+ # Vanna
45
+ Vanna is an MIT-licensed open-source Python RAG (Retrieval-Augmented Generation) framework for SQL generation and related functionality.
46
+
47
+ https://github.com/vanna-ai/vanna/assets/7146154/1901f47a-515d-4982-af50-f12761a3b2ce
48
+
49
+ ## How Vanna works
50
+ Vanna works in two easy steps - train a RAG "model" on your data, and then ask questions which will return SQL queries that can be set up to automatically run on your database.
51
+
52
+ 1. **Train a RAG "model" on your data**.
53
+ 2. **Ask questions**.
54
+
55
+ ![](img/vanna-readme-diagram.png)
56
+
57
+ If you don't know what RAG is, don't worry -- you don't need to know how this works under the hood to use it. You just need to know that you "train" a model, which stores some metadata and then use it to "ask" questions.
58
+
59
+ See the [base class](src/vanna/base/base.py) for more details on how this works under the hood.
60
+
61
+ ## User Interfaces
62
+ These are some of the user interfaces that we've built using Vanna. You can use these as-is or as a starting point for your own custom interface.
63
+
64
+ - [Jupyter Notebook](https://github.com/vanna-ai/vanna/blob/main/notebooks/getting-started.ipynb)
65
+ - [vanna-ai/vanna-streamlit](https://github.com/vanna-ai/vanna-streamlit)
66
+ - [vanna-ai/vanna-flask](https://github.com/vanna-ai/vanna-flask)
67
+ - [vanna-ai/vanna-slack](https://github.com/vanna-ai/vanna-slack)
68
+
69
+
70
+ ## Getting started
71
+ See the [documentation](https://vanna.ai/docs/) for specifics on your desired database, LLM, etc.
72
+
73
+ If you want to get a feel for how it works after training, you can try this [Colab notebook](https://colab.research.google.com/github/vanna-ai/vanna/blob/main/notebooks/getting-started.ipynb).
74
+
75
+
76
+ ### Install
77
+ ```bash
78
+ pip install vanna
79
+ ```
80
+
81
+ There are a number of optional packages that can be installed so see the [documentation](https://vanna.ai/docs/) for more details.
82
+
83
+ ### Import
84
+ See the [documentation](https://vanna.ai/docs/) if you're customizing the LLM or vector database.
85
+
86
+ ```python
87
+ import vanna as vn
88
+ ```
89
+
90
+
91
+ ## Training
92
+ You may or may not need to run these `vn.train` commands depending on your use case. See the [documentation](https://vanna.ai/docs/) for more details.
93
+
94
+ These statements are shown to give you a feel for how it works.
95
+
96
+ ### Train with DDL Statements
97
+ DDL statements contain information about the table names, columns, data types, and relationships in your database.
98
+
99
+ ```python
100
+ vn.train(ddl="""
101
+ CREATE TABLE IF NOT EXISTS my-table (
102
+ id INT PRIMARY KEY,
103
+ name VARCHAR(100),
104
+ age INT
105
+ )
106
+ """)
107
+ ```
108
+
109
+ ### Train with Documentation
110
+ Sometimes you may want to add documentation about your business terminology or definitions.
111
+
112
+ ```python
113
+ vn.train(documentation="Our business defines XYZ as ...")
114
+ ```
115
+
116
+ ### Train with SQL
117
+ You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.
118
+
119
+ ```python
120
+ vn.train(sql="SELECT name, age FROM my-table WHERE name = 'John Doe'")
121
+ ```
122
+
123
+
124
+ ## Asking questions
125
+ ```python
126
+ vn.ask("What are the top 10 customers by sales?")
127
+ ```
128
+
129
+ You'll get SQL
130
+ ```sql
131
+ SELECT c.c_name as customer_name,
132
+ sum(l.l_extendedprice * (1 - l.l_discount)) as total_sales
133
+ FROM snowflake_sample_data.tpch_sf1.lineitem l join snowflake_sample_data.tpch_sf1.orders o
134
+ ON l.l_orderkey = o.o_orderkey join snowflake_sample_data.tpch_sf1.customer c
135
+ ON o.o_custkey = c.c_custkey
136
+ GROUP BY customer_name
137
+ ORDER BY total_sales desc limit 10;
138
+ ```
139
+
140
+ If you've connected to a database, you'll get the table:
141
+ <div>
142
+ <table border="1" class="dataframe">
143
+ <thead>
144
+ <tr style="text-align: right;">
145
+ <th></th>
146
+ <th>CUSTOMER_NAME</th>
147
+ <th>TOTAL_SALES</th>
148
+ </tr>
149
+ </thead>
150
+ <tbody>
151
+ <tr>
152
+ <th>0</th>
153
+ <td>Customer#000143500</td>
154
+ <td>6757566.0218</td>
155
+ </tr>
156
+ <tr>
157
+ <th>1</th>
158
+ <td>Customer#000095257</td>
159
+ <td>6294115.3340</td>
160
+ </tr>
161
+ <tr>
162
+ <th>2</th>
163
+ <td>Customer#000087115</td>
164
+ <td>6184649.5176</td>
165
+ </tr>
166
+ <tr>
167
+ <th>3</th>
168
+ <td>Customer#000131113</td>
169
+ <td>6080943.8305</td>
170
+ </tr>
171
+ <tr>
172
+ <th>4</th>
173
+ <td>Customer#000134380</td>
174
+ <td>6075141.9635</td>
175
+ </tr>
176
+ <tr>
177
+ <th>5</th>
178
+ <td>Customer#000103834</td>
179
+ <td>6059770.3232</td>
180
+ </tr>
181
+ <tr>
182
+ <th>6</th>
183
+ <td>Customer#000069682</td>
184
+ <td>6057779.0348</td>
185
+ </tr>
186
+ <tr>
187
+ <th>7</th>
188
+ <td>Customer#000102022</td>
189
+ <td>6039653.6335</td>
190
+ </tr>
191
+ <tr>
192
+ <th>8</th>
193
+ <td>Customer#000098587</td>
194
+ <td>6027021.5855</td>
195
+ </tr>
196
+ <tr>
197
+ <th>9</th>
198
+ <td>Customer#000064660</td>
199
+ <td>5905659.6159</td>
200
+ </tr>
201
+ </tbody>
202
+ </table>
203
+ </div>
204
+
205
+ You'll also get an automated Plotly chart:
206
+ ![](img/top-10-customers.png)
207
+
208
+ ## RAG vs. Fine-Tuning
209
+ RAG
210
+ - Portable across LLMs
211
+ - Easy to remove training data if any of it becomes obsolete
212
+ - Much cheaper to run than fine-tuning
213
+ - More future-proof -- if a better LLM comes out, you can just swap it out
214
+
215
+ Fine-Tuning
216
+ - Good if you need to minimize tokens in the prompt
217
+ - Slow to get started
218
+ - Expensive to train and run (generally)
219
+
220
+ ## Why Vanna?
221
+
222
+ 1. **High accuracy on complex datasets.**
223
+ - Vanna’s capabilities are tied to the training data you give it
224
+ - More training data means better accuracy for large and complex datasets
225
+ 2. **Secure and private.**
226
+ - Your database contents are never sent to the LLM or the vector database
227
+ - SQL execution happens in your local environment
228
+ 3. **Self learning.**
229
+ - If using via Jupyter, you can choose to "auto-train" it on the queries that were successfully executed
230
+ - If using via other interfaces, you can have the interface prompt the user to provide feedback on the results
231
+ - Correct question to SQL pairs are stored for future reference and make the future results more accurate
232
+ 4. **Supports any SQL database.**
233
+ - The package allows you to connect to any SQL database that you can otherwise connect to with Python
234
+ 5. **Choose your front end.**
235
+ - Most people start in a Jupyter Notebook.
236
+ - Expose to your end users via Slackbot, web app, Streamlit app, or a custom front end.
237
+
238
+ ## Extending Vanna
239
+ Vanna is designed to connect to any database, LLM, and vector database. There's a [VannaBase](src/vanna/base/base.py) abstract base class that defines some basic functionality. The package provides implementations for use with OpenAI and ChromaDB. You can easily extend Vanna to use your own LLM or vector database. See the [documentation](https://vanna.ai/docs/) for more details.
240
+
241
+ ## More resources
242
+ - [Full Documentation](https://vanna.ai/docs/)
243
+ - [Website](https://vanna.ai)
244
+ - [Discord group for support](https://discord.gg/qUZYKHremx)
245
+
@@ -0,0 +1,18 @@
1
+ vanna/__init__.py,sha256=thjmOUgHCboSxIkzQRKw-JvZLLFbnuyM7G5YIzmmmPQ,61545
2
+ vanna/local.py,sha256=U5s8ybCRQhBUizi8I69o3jqOpTeu_6KGYY6DMwZxjG4,313
3
+ vanna/remote.py,sha256=xWlF48eQXuc03NZrDpMQgvrM6dbbfbEjX_FEmQf_b5c,13573
4
+ vanna/utils.py,sha256=Q0H4eugPYg9SVpEoTWgvmuoJZZxOVRhNzrP97E5lyak,1472
5
+ vanna/base/__init__.py,sha256=Sl-HM1RRYzAZoSqmL1CZQmF3ZF-byYTCFQP3JZ2A5MU,28
6
+ vanna/base/base.py,sha256=iMW-0TnqIJE0ojO1UrTdtGCTjTK7bcAlskkmKJp6zUw,30067
7
+ vanna/chromadb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ vanna/chromadb/chromadb_vector.py,sha256=af1n7htIkSnpd7h9906mkKSK9BpvNNQa48_z4FS-_nE,5716
9
+ vanna/exceptions/__init__.py,sha256=N76unE7sjbGGBz6LmCrPQAugFWr9cUFv8ErJxBrCTts,717
10
+ vanna/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ vanna/openai/openai_chat.py,sha256=U6wkXztJnQtABItUMDlBIDN6m3fqD6pMpa9gyQAQx8A,9753
12
+ vanna/openai/openai_embeddings.py,sha256=kPtOqrKQYJnXe6My3pO9BWg-L3KIR1sJVqE3YoW0roA,1139
13
+ vanna/types/__init__.py,sha256=Qhn_YscKtJh7mFPCyCDLa2K8a4ORLMGVnPpTbv9uB2U,4957
14
+ vanna/vannadb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ vanna/vannadb/vannadb_vector.py,sha256=zX_oT66LQSDeqO87I5xdKA87uQRQDl-ZrGOh8BYkUOU,5645
16
+ vanna-0.0.29.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
17
+ vanna-0.0.29.dist-info/METADATA,sha256=j27f5mkOT3N2PibqKp95Ts5wiA08gJk9hu_IDfMsRqI,8501
18
+ vanna-0.0.29.dist-info/RECORD,,
@@ -1,213 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: vanna
3
- Version: 0.0.27
4
- Summary: Generate SQL queries from natural language
5
- Author-email: Zain Hoda <zain@vanna.ai>
6
- Requires-Python: >=3.7
7
- Description-Content-Type: text/markdown
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Dist: requests
12
- Requires-Dist: tabulate
13
- Requires-Dist: plotly
14
- Requires-Dist: pandas
15
- Requires-Dist: sqlparse
16
- Requires-Dist: kaleido
17
- Requires-Dist: psycopg2 ; extra == "all"
18
- Requires-Dist: db-dtypes ; extra == "all"
19
- Requires-Dist: google-cloud-bigquery ; extra == "all"
20
- Requires-Dist: snowflake-connector-python ; extra == "all"
21
- Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
22
- Requires-Dist: chromadb ; extra == "chromadb"
23
- Requires-Dist: openai ; extra == "openai"
24
- Requires-Dist: psycopg2 ; extra == "postgres"
25
- Requires-Dist: db-dtypes ; extra == "postgres"
26
- Requires-Dist: snowflake-connector-python ; extra == "snowflake"
27
- Requires-Dist: tox ; extra == "test"
28
- Project-URL: Bug Tracker, https://github.com/vanna-ai/vanna/issues
29
- Project-URL: Homepage, https://github.com/vanna-ai/vanna
30
- Provides-Extra: all
31
- Provides-Extra: bigquery
32
- Provides-Extra: chromadb
33
- Provides-Extra: openai
34
- Provides-Extra: postgres
35
- Provides-Extra: snowflake
36
- Provides-Extra: test
37
-
38
- ![](https://img.vanna.ai/vanna-github.svg)
39
-
40
- | GitHub | PyPI | Documentation |
41
- | ------ | ---- | ------------- |
42
- | [![GitHub](https://img.shields.io/badge/GitHub-vanna-blue?logo=github)](https://github.com/vanna-ai/vanna) | [![PyPI](https://img.shields.io/pypi/v/vanna?logo=pypi)](https://pypi.org/project/vanna/) | [![Documentation](https://img.shields.io/badge/Documentation-vanna-blue?logo=read-the-docs)](https://vanna.ai/docs/) |
43
-
44
- # Vanna.AI - Personalized AI SQL Agent
45
-
46
- https://github.com/vanna-ai/vanna/assets/7146154/1901f47a-515d-4982-af50-f12761a3b2ce
47
-
48
- ## How Vanna works
49
- Vanna works in two easy steps - train a model on your data, and then ask questions.
50
-
51
- 1. **Train a model on your data**.
52
- 2. **Ask questions**.
53
-
54
- When you ask a question, we utilize a custom model for your dataset to generate SQL, as seen below. Your model performance and accuracy depends on the quality and quantity of training data you use to train your model.
55
- <img width="1725" alt="how-vanna-works" src="https://github.com/vanna-ai/vanna/assets/7146154/5e2e2179-ed7a-4df4-92a2-1c017923a675">
56
-
57
- ## Getting started
58
- You can start by [automatically training Vanna (currently works for Snowflake)](https://vanna.ai/docs/vn-train.html) or add manual training data.
59
-
60
- ### Install Vanna
61
- ```
62
- pip install vanna
63
- ```
64
-
65
- Depending on the database you're using, you can also install the associated database drivers
66
- ```
67
- pip install 'vanna[snowflake]'
68
- ```
69
-
70
- ### Import Vanna
71
- ```python
72
- import vanna as vn
73
- ```
74
-
75
- ### Train with DDL Statements
76
- If you prefer to manually train, you do not need to connect to a database. You can use the train function with other parmaeters like ddl
77
-
78
-
79
- ```python
80
- vn.train(ddl="""
81
- CREATE TABLE IF NOT EXISTS my-table (
82
- id INT PRIMARY KEY,
83
- name VARCHAR(100),
84
- age INT
85
- )
86
- """)
87
- ```
88
-
89
- ### Train with Documentation
90
- Sometimes you may want to add documentation about your business terminology or definitions.
91
-
92
- ```python
93
- vn.train(documentation="Our business defines OTIF score as the percentage of orders that are delivered on time and in full")
94
- ```
95
-
96
- ### Train with SQL
97
- You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.
98
-
99
- ```python
100
- vn.train(sql="SELECT * FROM my-table WHERE name = 'John Doe'")
101
- ```
102
-
103
-
104
- ## Asking questions
105
- ```python
106
- vn.ask("What are the top 10 customers by sales?")
107
- ```
108
-
109
- SELECT c.c_name as customer_name,
110
- sum(l.l_extendedprice * (1 - l.l_discount)) as total_sales
111
- FROM snowflake_sample_data.tpch_sf1.lineitem l join snowflake_sample_data.tpch_sf1.orders o
112
- ON l.l_orderkey = o.o_orderkey join snowflake_sample_data.tpch_sf1.customer c
113
- ON o.o_custkey = c.c_custkey
114
- GROUP BY customer_name
115
- ORDER BY total_sales desc limit 10;
116
-
117
-
118
-
119
- <div>
120
- <table border="1" class="dataframe">
121
- <thead>
122
- <tr style="text-align: right;">
123
- <th></th>
124
- <th>CUSTOMER_NAME</th>
125
- <th>TOTAL_SALES</th>
126
- </tr>
127
- </thead>
128
- <tbody>
129
- <tr>
130
- <th>0</th>
131
- <td>Customer#000143500</td>
132
- <td>6757566.0218</td>
133
- </tr>
134
- <tr>
135
- <th>1</th>
136
- <td>Customer#000095257</td>
137
- <td>6294115.3340</td>
138
- </tr>
139
- <tr>
140
- <th>2</th>
141
- <td>Customer#000087115</td>
142
- <td>6184649.5176</td>
143
- </tr>
144
- <tr>
145
- <th>3</th>
146
- <td>Customer#000131113</td>
147
- <td>6080943.8305</td>
148
- </tr>
149
- <tr>
150
- <th>4</th>
151
- <td>Customer#000134380</td>
152
- <td>6075141.9635</td>
153
- </tr>
154
- <tr>
155
- <th>5</th>
156
- <td>Customer#000103834</td>
157
- <td>6059770.3232</td>
158
- </tr>
159
- <tr>
160
- <th>6</th>
161
- <td>Customer#000069682</td>
162
- <td>6057779.0348</td>
163
- </tr>
164
- <tr>
165
- <th>7</th>
166
- <td>Customer#000102022</td>
167
- <td>6039653.6335</td>
168
- </tr>
169
- <tr>
170
- <th>8</th>
171
- <td>Customer#000098587</td>
172
- <td>6027021.5855</td>
173
- </tr>
174
- <tr>
175
- <th>9</th>
176
- <td>Customer#000064660</td>
177
- <td>5905659.6159</td>
178
- </tr>
179
- </tbody>
180
- </table>
181
- </div>
182
-
183
- ## Why Vanna?
184
-
185
- 1. **High accuracy on complex datasets.**
186
- - Vanna’s capabilities are tied to the training data you give it
187
- - More training data means better accuracy for large and complex datasets
188
- 2. **Secure and private.**
189
- - Your database contents are never sent to Vanna’s servers
190
- - We only see the bare minimum - schemas & queries.
191
- 3. **Isolated, custom model.**
192
- - You train a custom model specific to your database and your schema.
193
- - Nobody else can use your model or view your model’s training data unless you choose to add members to your model or make it public
194
- - We use a combination of third-party foundational models (OpenAI, Google) and our own LLM.
195
- 4. **Self learning.**
196
- - As you use Vanna more, your model continuously improves as we augment your training data
197
- 5. **Supports many databases.**
198
- - We have out-of-the-box support Snowflake, BigQuery, Postgres
199
- - You can easily make a connector for any [database](https://docs.vanna.ai/databases/)
200
- 6. **Pretrained models.**
201
- - If you’re a data provider you can publish your models for anyone to use
202
- - As part of our roadmap, we are in the process of pre-training models for common datasets (Google Ads, Facebook ads, etc)
203
- 7. **Choose your front end.**
204
- - Start in a Jupyter Notebook.
205
- - Expose to business users via Slackbot, web app, Streamlit app, or Excel plugin.
206
- - Even integrate in your web app for customers.
207
-
208
- ## More resources
209
- - [Full Documentation](https://vanna.ai/docs/)
210
- - [Website](https://vanna.ai)
211
- - [Slack channel for support](https://join.slack.com/t/vanna-ai/shared_invite/zt-1unu0ipog-iE33QCoimQiBDxf2o7h97w)
212
- - [LinkedIn](https://www.linkedin.com/company/vanna-ai/)
213
-
@@ -1,16 +0,0 @@
1
- vanna/__init__.py,sha256=7ynse9XnAHLmDkUXPAVCtmmyDOMBr4wsV3xZewxgadM,61545
2
- vanna/local.py,sha256=U5s8ybCRQhBUizi8I69o3jqOpTeu_6KGYY6DMwZxjG4,313
3
- vanna/remote.py,sha256=aFume63QJhHn_vKdta3cYPKTYp45hGsSSxb7TrsbFSA,13583
4
- vanna/utils.py,sha256=Q0H4eugPYg9SVpEoTWgvmuoJZZxOVRhNzrP97E5lyak,1472
5
- vanna/base/__init__.py,sha256=Sl-HM1RRYzAZoSqmL1CZQmF3ZF-byYTCFQP3JZ2A5MU,28
6
- vanna/base/base.py,sha256=sqMQdnD_S9SGVXrX65ghKSs8g0jZ90Tioaf0Hv26XoQ,27644
7
- vanna/chromadb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- vanna/chromadb/chromadb_vector.py,sha256=af1n7htIkSnpd7h9906mkKSK9BpvNNQa48_z4FS-_nE,5716
9
- vanna/exceptions/__init__.py,sha256=N76unE7sjbGGBz6LmCrPQAugFWr9cUFv8ErJxBrCTts,717
10
- vanna/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- vanna/openai/openai_chat.py,sha256=zdWjrImhc8TbkGwFstOOCWtuiUzw7SO69vVrbQXXk_4,9747
12
- vanna/openai/openai_embeddings.py,sha256=6LwuHh3DTrDNsRhZiDGZ00A900-0o-9a9L6sL3spM-Q,1063
13
- vanna/types/__init__.py,sha256=Qhn_YscKtJh7mFPCyCDLa2K8a4ORLMGVnPpTbv9uB2U,4957
14
- vanna-0.0.27.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
15
- vanna-0.0.27.dist-info/METADATA,sha256=Fo8lh14ISx6McemteZh1sx5ihz_CCMM0Z7u35XUc810,7073
16
- vanna-0.0.27.dist-info/RECORD,,
File without changes