vanna 0.0.6__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vanna-0.0.11/PKG-INFO +156 -0
- vanna-0.0.11/README.md +142 -0
- {vanna-0.0.6 → vanna-0.0.11}/pyproject.toml +2 -2
- {vanna-0.0.6 → vanna-0.0.11}/src/vanna/__init__.py +326 -73
- {vanna-0.0.6 → vanna-0.0.11}/src/vanna/types.py +26 -3
- vanna-0.0.11/src/vanna.egg-info/PKG-INFO +156 -0
- {vanna-0.0.6 → vanna-0.0.11}/src/vanna.egg-info/requires.txt +1 -0
- vanna-0.0.6/PKG-INFO +0 -20
- vanna-0.0.6/README.md +0 -6
- vanna-0.0.6/src/vanna.egg-info/PKG-INFO +0 -20
- {vanna-0.0.6 → vanna-0.0.11}/LICENSE +0 -0
- {vanna-0.0.6 → vanna-0.0.11}/setup.cfg +0 -0
- {vanna-0.0.6 → vanna-0.0.11}/src/vanna.egg-info/SOURCES.txt +0 -0
- {vanna-0.0.6 → vanna-0.0.11}/src/vanna.egg-info/dependency_links.txt +0 -0
- {vanna-0.0.6 → vanna-0.0.11}/src/vanna.egg-info/top_level.txt +0 -0
vanna-0.0.11/PKG-INFO
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: vanna
|
|
3
|
+
Version: 0.0.11
|
|
4
|
+
Summary: Generate SQL queries from natural language
|
|
5
|
+
Author-email: Zain Hoda <zain@vanna.ai>
|
|
6
|
+
Project-URL: Homepage, https://github.com/vanna-ai/vanna-py
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/vanna-ai/vanna-py/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
| GitHub | PyPI | Colab | Documentation |
|
|
16
|
+
| ------ | ---- | ----- | ------------- |
|
|
17
|
+
| [](https://github.com/vanna-ai/vanna-py) | [](https://pypi.org/project/vanna/) | [](https://colab.research.google.com/github/vanna-ai/vanna-py/blob/main/notebooks/vn-starter.ipynb) | [](https://docs.vanna.ai) |
|
|
18
|
+
|
|
19
|
+
# Vanna.AI
|
|
20
|
+
|
|
21
|
+
Vanna is a Python-based AI SQL co-pilot. Our initial users are data-savvy data analysts, data scientists, engineers, and similar people that use Vanna to automate writing complex SQL.
|
|
22
|
+
|
|
23
|
+
Vanna can:
|
|
24
|
+
- [Convert natural language to SQL](#natural-language-to-sql)
|
|
25
|
+
- [Run SQL](#run-sql)
|
|
26
|
+
- [Generate Plotly code](#generate-plotly-code)
|
|
27
|
+
- [Run Plotly code](#run-plotly-code)
|
|
28
|
+
- [Get better over time](#improve-your-training-data)
|
|
29
|
+
- Be used in a Jupyter Notebooks, Colab, or other Python environments
|
|
30
|
+
- Be used with Snowflake, BigQuery, and other databases
|
|
31
|
+
- Be used with Python UIs, such as [Streamlit](https://github.com/vanna-ai/vanna-streamlit), Dash, and others
|
|
32
|
+
- Be used to make Slack bots
|
|
33
|
+
|
|
34
|
+
## Natural Language to SQL
|
|
35
|
+
```python
|
|
36
|
+
sql = vn.generate_sql(question='Who are the top 10 customers?')
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Output:
|
|
40
|
+
```sql
|
|
41
|
+
SELECT customer_name,
|
|
42
|
+
total_sales
|
|
43
|
+
FROM (SELECT c.c_name as customer_name,
|
|
44
|
+
sum(l.l_extendedprice * (1 - l.l_discount)) as total_sales,
|
|
45
|
+
row_number() OVER (ORDER BY sum(l.l_extendedprice * (1 - l.l_discount)) desc) as rank
|
|
46
|
+
FROM snowflake_sample_data.tpch_sf1.lineitem l join snowflake_sample_data.tpch_sf1.orders o
|
|
47
|
+
ON l.l_orderkey = o.o_orderkey join snowflake_sample_data.tpch_sf1.customer c
|
|
48
|
+
ON o.o_custkey = c.c_custkey
|
|
49
|
+
GROUP BY customer_name)
|
|
50
|
+
WHERE rank <= 10;
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Run SQL
|
|
54
|
+
This function is provided as a convenience. You can choose to run your SQL however you normally do and use the rest of the downstream functions.
|
|
55
|
+
```python
|
|
56
|
+
df = vn.get_results(cs, database, sql)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Output:
|
|
60
|
+
| customer_name | total_sales |
|
|
61
|
+
| ------------- | ----------- |
|
|
62
|
+
| Customer#000000001 | 68127.72 |
|
|
63
|
+
| Customer#000000002 | 65898.69 |
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
## Generate Plotly Code
|
|
67
|
+
```python
|
|
68
|
+
plotly_code = vn.generate_plotly_code(question=my_question, sql=sql, df=df)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Output:
|
|
72
|
+
```python
|
|
73
|
+
fig = go.Figure(go.Bar(
|
|
74
|
+
x=df['CUSTOMER_NAME'],
|
|
75
|
+
y=df['TOTAL_SALES'],
|
|
76
|
+
marker={'color': df['TOTAL_SALES'], 'colorscale': 'Viridis'},
|
|
77
|
+
text=df['TOTAL_SALES'],
|
|
78
|
+
textposition='auto',
|
|
79
|
+
))
|
|
80
|
+
|
|
81
|
+
fig.update_layout(
|
|
82
|
+
title="Top 10 Customers by Sales",
|
|
83
|
+
xaxis_title="Customer",
|
|
84
|
+
yaxis_title="Total Sales",
|
|
85
|
+
xaxis_tickangle=-45,
|
|
86
|
+
yaxis_tickprefix="$",
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Run Plotly Code
|
|
91
|
+
```python
|
|
92
|
+
fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)
|
|
93
|
+
fig.show()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Output:
|
|
97
|
+

|
|
98
|
+
|
|
99
|
+
## Improve Your Training Data
|
|
100
|
+
```python
|
|
101
|
+
vn.store_sql(
|
|
102
|
+
question=my_question,
|
|
103
|
+
sql=sql,
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## How Vanna Works
|
|
108
|
+
```mermaid
|
|
109
|
+
flowchart LR
|
|
110
|
+
DB[(Known Correct Question-SQL)]
|
|
111
|
+
Try[Try to Use DDL/Documentation]
|
|
112
|
+
SQL(SQL)
|
|
113
|
+
Check{Is the SQL correct?}
|
|
114
|
+
Generate[fa:fa-circle-question Use Examples to Generate]
|
|
115
|
+
DB --> Find
|
|
116
|
+
Question[fa:fa-circle-question Question] --> Find{fa:fa-magnifying-glass Do we have similar questions?}
|
|
117
|
+
Find -- Yes --> Generate
|
|
118
|
+
Find -- No --> Try
|
|
119
|
+
Generate --> SQL
|
|
120
|
+
Try --> SQL
|
|
121
|
+
SQL --> Check
|
|
122
|
+
Check -- Yes --> DB
|
|
123
|
+
Check -- No --> Analyst[fa:fa-glasses Analyst Writes the SQL]
|
|
124
|
+
Analyst -- Adds --> DB
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
# Getting Started
|
|
128
|
+
|
|
129
|
+
## Install Vanna from PyPI and import it:
|
|
130
|
+
```python
|
|
131
|
+
%pip install vanna
|
|
132
|
+
import vanna as vn
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Enter your email to set an API Key
|
|
136
|
+
This will send a one-time code to your email address. Copy and paste the code into the prompt.
|
|
137
|
+
```python
|
|
138
|
+
my_email = '' # Enter your email here
|
|
139
|
+
vn.login(email=my_email)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Add Training Data
|
|
143
|
+
```python
|
|
144
|
+
vn.train(
|
|
145
|
+
question="Which products have the highest sales?",
|
|
146
|
+
sql="...",
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Generate SQL
|
|
151
|
+
```python
|
|
152
|
+
sql = vn.generate_sql(question="Who are the top 10 customers?")
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
# Documentation
|
|
156
|
+
[Full Documentation](https://docs.vanna.ai)
|
vanna-0.0.11/README.md
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
| GitHub | PyPI | Colab | Documentation |
|
|
2
|
+
| ------ | ---- | ----- | ------------- |
|
|
3
|
+
| [](https://github.com/vanna-ai/vanna-py) | [](https://pypi.org/project/vanna/) | [](https://colab.research.google.com/github/vanna-ai/vanna-py/blob/main/notebooks/vn-starter.ipynb) | [](https://docs.vanna.ai) |
|
|
4
|
+
|
|
5
|
+
# Vanna.AI
|
|
6
|
+
|
|
7
|
+
Vanna is a Python-based AI SQL co-pilot. Our initial users are data-savvy data analysts, data scientists, engineers, and similar people that use Vanna to automate writing complex SQL.
|
|
8
|
+
|
|
9
|
+
Vanna can:
|
|
10
|
+
- [Convert natural language to SQL](#natural-language-to-sql)
|
|
11
|
+
- [Run SQL](#run-sql)
|
|
12
|
+
- [Generate Plotly code](#generate-plotly-code)
|
|
13
|
+
- [Run Plotly code](#run-plotly-code)
|
|
14
|
+
- [Get better over time](#improve-your-training-data)
|
|
15
|
+
- Be used in a Jupyter Notebooks, Colab, or other Python environments
|
|
16
|
+
- Be used with Snowflake, BigQuery, and other databases
|
|
17
|
+
- Be used with Python UIs, such as [Streamlit](https://github.com/vanna-ai/vanna-streamlit), Dash, and others
|
|
18
|
+
- Be used to make Slack bots
|
|
19
|
+
|
|
20
|
+
## Natural Language to SQL
|
|
21
|
+
```python
|
|
22
|
+
sql = vn.generate_sql(question='Who are the top 10 customers?')
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Output:
|
|
26
|
+
```sql
|
|
27
|
+
SELECT customer_name,
|
|
28
|
+
total_sales
|
|
29
|
+
FROM (SELECT c.c_name as customer_name,
|
|
30
|
+
sum(l.l_extendedprice * (1 - l.l_discount)) as total_sales,
|
|
31
|
+
row_number() OVER (ORDER BY sum(l.l_extendedprice * (1 - l.l_discount)) desc) as rank
|
|
32
|
+
FROM snowflake_sample_data.tpch_sf1.lineitem l join snowflake_sample_data.tpch_sf1.orders o
|
|
33
|
+
ON l.l_orderkey = o.o_orderkey join snowflake_sample_data.tpch_sf1.customer c
|
|
34
|
+
ON o.o_custkey = c.c_custkey
|
|
35
|
+
GROUP BY customer_name)
|
|
36
|
+
WHERE rank <= 10;
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Run SQL
|
|
40
|
+
This function is provided as a convenience. You can choose to run your SQL however you normally do and use the rest of the downstream functions.
|
|
41
|
+
```python
|
|
42
|
+
df = vn.get_results(cs, database, sql)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Output:
|
|
46
|
+
| customer_name | total_sales |
|
|
47
|
+
| ------------- | ----------- |
|
|
48
|
+
| Customer#000000001 | 68127.72 |
|
|
49
|
+
| Customer#000000002 | 65898.69 |
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
## Generate Plotly Code
|
|
53
|
+
```python
|
|
54
|
+
plotly_code = vn.generate_plotly_code(question=my_question, sql=sql, df=df)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Output:
|
|
58
|
+
```python
|
|
59
|
+
fig = go.Figure(go.Bar(
|
|
60
|
+
x=df['CUSTOMER_NAME'],
|
|
61
|
+
y=df['TOTAL_SALES'],
|
|
62
|
+
marker={'color': df['TOTAL_SALES'], 'colorscale': 'Viridis'},
|
|
63
|
+
text=df['TOTAL_SALES'],
|
|
64
|
+
textposition='auto',
|
|
65
|
+
))
|
|
66
|
+
|
|
67
|
+
fig.update_layout(
|
|
68
|
+
title="Top 10 Customers by Sales",
|
|
69
|
+
xaxis_title="Customer",
|
|
70
|
+
yaxis_title="Total Sales",
|
|
71
|
+
xaxis_tickangle=-45,
|
|
72
|
+
yaxis_tickprefix="$",
|
|
73
|
+
)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Run Plotly Code
|
|
77
|
+
```python
|
|
78
|
+
fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)
|
|
79
|
+
fig.show()
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Output:
|
|
83
|
+

|
|
84
|
+
|
|
85
|
+
## Improve Your Training Data
|
|
86
|
+
```python
|
|
87
|
+
vn.store_sql(
|
|
88
|
+
question=my_question,
|
|
89
|
+
sql=sql,
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## How Vanna Works
|
|
94
|
+
```mermaid
|
|
95
|
+
flowchart LR
|
|
96
|
+
DB[(Known Correct Question-SQL)]
|
|
97
|
+
Try[Try to Use DDL/Documentation]
|
|
98
|
+
SQL(SQL)
|
|
99
|
+
Check{Is the SQL correct?}
|
|
100
|
+
Generate[fa:fa-circle-question Use Examples to Generate]
|
|
101
|
+
DB --> Find
|
|
102
|
+
Question[fa:fa-circle-question Question] --> Find{fa:fa-magnifying-glass Do we have similar questions?}
|
|
103
|
+
Find -- Yes --> Generate
|
|
104
|
+
Find -- No --> Try
|
|
105
|
+
Generate --> SQL
|
|
106
|
+
Try --> SQL
|
|
107
|
+
SQL --> Check
|
|
108
|
+
Check -- Yes --> DB
|
|
109
|
+
Check -- No --> Analyst[fa:fa-glasses Analyst Writes the SQL]
|
|
110
|
+
Analyst -- Adds --> DB
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
# Getting Started
|
|
114
|
+
|
|
115
|
+
## Install Vanna from PyPI and import it:
|
|
116
|
+
```python
|
|
117
|
+
%pip install vanna
|
|
118
|
+
import vanna as vn
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Enter your email to set an API Key
|
|
122
|
+
This will send a one-time code to your email address. Copy and paste the code into the prompt.
|
|
123
|
+
```python
|
|
124
|
+
my_email = '' # Enter your email here
|
|
125
|
+
vn.login(email=my_email)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Add Training Data
|
|
129
|
+
```python
|
|
130
|
+
vn.train(
|
|
131
|
+
question="Which products have the highest sales?",
|
|
132
|
+
sql="...",
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Generate SQL
|
|
137
|
+
```python
|
|
138
|
+
sql = vn.generate_sql(question="Who are the top 10 customers?")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
# Documentation
|
|
142
|
+
[Full Documentation](https://docs.vanna.ai)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "vanna"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.11"
|
|
4
4
|
authors = [
|
|
5
5
|
{ name="Zain Hoda", email="zain@vanna.ai" },
|
|
6
6
|
]
|
|
@@ -13,7 +13,7 @@ classifiers = [
|
|
|
13
13
|
"Operating System :: OS Independent",
|
|
14
14
|
]
|
|
15
15
|
dependencies = [
|
|
16
|
-
"requests", "tabulate", "plotly"
|
|
16
|
+
"requests", "tabulate", "plotly", "pandas"
|
|
17
17
|
]
|
|
18
18
|
|
|
19
19
|
[project.urls]
|
|
@@ -4,7 +4,6 @@ Vanna.AI is a platform that allows you to ask questions about your data in plain
|
|
|
4
4
|
|
|
5
5
|
# API Reference
|
|
6
6
|
'''
|
|
7
|
-
print("Vanna.AI Imported")
|
|
8
7
|
|
|
9
8
|
import requests
|
|
10
9
|
import pandas as pd
|
|
@@ -13,16 +12,41 @@ import dataclasses
|
|
|
13
12
|
import plotly
|
|
14
13
|
import plotly.express as px
|
|
15
14
|
import plotly.graph_objects as go
|
|
16
|
-
from .types import SQLAnswer, Explanation, QuestionSQLPair, Question, QuestionId, DataResult, PlotlyResult, Status, FullQuestionDocument, QuestionList, QuestionCategory, AccuracyStats, UserEmail, UserOTP, ApiKey, OrganizationList, Organization, NewOrganization
|
|
17
|
-
from typing import List, Dict, Any, Union, Optional
|
|
15
|
+
from .types import SQLAnswer, Explanation, QuestionSQLPair, Question, QuestionId, DataResult, PlotlyResult, Status, FullQuestionDocument, QuestionList, QuestionCategory, AccuracyStats, UserEmail, UserOTP, ApiKey, OrganizationList, Organization, NewOrganization, StringData, QuestionStringList, Visibility, NewOrganizationMember, DataFrameJSON
|
|
16
|
+
from typing import List, Dict, Any, Union, Optional, Callable, Tuple
|
|
17
|
+
import warnings
|
|
18
|
+
import traceback
|
|
18
19
|
|
|
19
|
-
"""Set the API key for Vanna.AI."""
|
|
20
20
|
api_key: Union[str, None] = None # API key for Vanna.AI
|
|
21
|
+
"""
|
|
22
|
+
## Example
|
|
23
|
+
```python
|
|
24
|
+
# Login to Vanna.AI
|
|
25
|
+
vn.login('user@example.com')
|
|
26
|
+
print(vn.api_key)
|
|
27
|
+
|
|
28
|
+
vn.api_key='my_api_key'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
This is the API key for Vanna.AI. You can set it manually if you have it or use [`vn.login(...)`][vanna.login] to login and set it automatically.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
sql_to_df: Union[Callable[[str], pd.DataFrame], None] = None # Function to convert SQL to a Pandas DataFrame
|
|
36
|
+
"""
|
|
37
|
+
## Example
|
|
38
|
+
```python
|
|
39
|
+
vn.sql_to_df = lambda sql: pd.read_sql(sql, engine)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Set the SQL to DataFrame function for Vanna.AI. This is used in the [`vn.ask(...)`][vanna.ask] function.
|
|
43
|
+
|
|
44
|
+
"""
|
|
21
45
|
|
|
22
46
|
__org: Union[str, None] = None # Organization name for Vanna.AI
|
|
23
47
|
|
|
24
|
-
_endpoint = "https://
|
|
25
|
-
_unauthenticated_endpoint = "https://
|
|
48
|
+
_endpoint = "https://vanna-rpc-test-x5y3argz6q-uc.a.run.app/rpc"
|
|
49
|
+
_unauthenticated_endpoint = "https://vanna-rpc-test-x5y3argz6q-uc.a.run.app/unauthenticated_rpc"
|
|
26
50
|
|
|
27
51
|
def __unauthenticated_rpc_call(method, params):
|
|
28
52
|
headers = {
|
|
@@ -41,10 +65,10 @@ def __rpc_call(method, params):
|
|
|
41
65
|
global __org
|
|
42
66
|
|
|
43
67
|
if api_key is None:
|
|
44
|
-
raise Exception("API key not set")
|
|
68
|
+
raise Exception("API key not set. Use vn.login(...) to login.")
|
|
45
69
|
|
|
46
70
|
if __org is None and method != "list_orgs":
|
|
47
|
-
raise Exception("
|
|
71
|
+
raise Exception("Datasets not set. Use vn.use_datasets([...]) to set the datasets to use.")
|
|
48
72
|
|
|
49
73
|
if method != "list_orgs":
|
|
50
74
|
headers = {
|
|
@@ -56,7 +80,7 @@ def __rpc_call(method, params):
|
|
|
56
80
|
headers = {
|
|
57
81
|
'Content-Type': 'application/json',
|
|
58
82
|
'Vanna-Key': api_key,
|
|
59
|
-
'Vanna-Org': 'demo-
|
|
83
|
+
'Vanna-Org': 'demo-tpc-h'
|
|
60
84
|
}
|
|
61
85
|
|
|
62
86
|
data = {
|
|
@@ -101,9 +125,9 @@ def login(email: str, otp_code: Union[str, None] = None) -> bool:
|
|
|
101
125
|
if not status.success:
|
|
102
126
|
return False
|
|
103
127
|
|
|
104
|
-
|
|
128
|
+
otp_code = input("Check your email for the code and enter it here: ")
|
|
105
129
|
|
|
106
|
-
params = [UserOTP(email=email, otp=
|
|
130
|
+
params = [UserOTP(email=email, otp=otp_code)]
|
|
107
131
|
|
|
108
132
|
d = __unauthenticated_rpc_call(method="verify_otp", params=params)
|
|
109
133
|
|
|
@@ -120,17 +144,17 @@ def login(email: str, otp_code: Union[str, None] = None) -> bool:
|
|
|
120
144
|
|
|
121
145
|
return True
|
|
122
146
|
|
|
123
|
-
def
|
|
147
|
+
def list_datasets() -> List[str]:
|
|
124
148
|
"""
|
|
125
149
|
## Example
|
|
126
150
|
```python
|
|
127
|
-
|
|
151
|
+
datasets = vn.list_datasets()
|
|
128
152
|
```
|
|
129
153
|
|
|
130
|
-
List the
|
|
154
|
+
List the datasets that the user is a member of.
|
|
131
155
|
|
|
132
156
|
Returns:
|
|
133
|
-
List[str]: A list of
|
|
157
|
+
List[str]: A list of dataset names.
|
|
134
158
|
"""
|
|
135
159
|
d = __rpc_call(method="list_orgs", params=[])
|
|
136
160
|
|
|
@@ -141,23 +165,23 @@ def list_orgs() -> List[str]:
|
|
|
141
165
|
|
|
142
166
|
return orgs.organizations
|
|
143
167
|
|
|
144
|
-
def
|
|
168
|
+
def create_dataset(dataset: str, db_type: str) -> bool:
|
|
145
169
|
"""
|
|
146
170
|
## Example
|
|
147
171
|
```python
|
|
148
|
-
vn.
|
|
172
|
+
vn.create_dataset(dataset="my-dataset", db_type="postgres")
|
|
149
173
|
```
|
|
150
174
|
|
|
151
|
-
Create a new
|
|
175
|
+
Create a new dataset.
|
|
152
176
|
|
|
153
177
|
Args:
|
|
154
|
-
|
|
155
|
-
db_type (str): The type of database to use for the
|
|
178
|
+
dataset (str): The name of the dataset to create.
|
|
179
|
+
db_type (str): The type of database to use for the dataset. This can be "Snowflake", "BigQuery", "Postgres", or anything else.
|
|
156
180
|
|
|
157
181
|
Returns:
|
|
158
|
-
bool: True if the
|
|
182
|
+
bool: True if the dataset was created successfully, False otherwise.
|
|
159
183
|
"""
|
|
160
|
-
params = [NewOrganization(org_name=
|
|
184
|
+
params = [NewOrganization(org_name=dataset, db_type=db_type)]
|
|
161
185
|
|
|
162
186
|
d = __rpc_call(method="create_org", params=params)
|
|
163
187
|
|
|
@@ -168,46 +192,115 @@ def create_org(org: str, db_type: str) -> bool:
|
|
|
168
192
|
|
|
169
193
|
return status.success
|
|
170
194
|
|
|
195
|
+
def add_user_to_dataset(dataset: str, email: str, is_admin: bool) -> bool:
|
|
196
|
+
"""
|
|
197
|
+
## Example
|
|
198
|
+
```python
|
|
199
|
+
vn.add_user_to_dataset(dataset="my-dataset", email="user@example.com")
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Add a user to an dataset.
|
|
171
203
|
|
|
172
|
-
|
|
204
|
+
Args:
|
|
205
|
+
dataset (str): The name of the dataset to add the user to.
|
|
206
|
+
email (str): The email address of the user to add.
|
|
207
|
+
is_admin (bool): Whether or not the user should be an admin.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
bool: True if the user was added successfully, False otherwise.
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
params = [NewOrganizationMember(org_name=dataset, email=email, is_admin=is_admin)]
|
|
214
|
+
|
|
215
|
+
d = __rpc_call(method="add_user_to_org", params=params)
|
|
216
|
+
|
|
217
|
+
if 'result' not in d:
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
status = Status(**d['result'])
|
|
221
|
+
|
|
222
|
+
if not status.success:
|
|
223
|
+
print(status.message)
|
|
224
|
+
|
|
225
|
+
return status.success
|
|
226
|
+
|
|
227
|
+
def set_dataset_visibility(visibility: bool) -> bool:
|
|
173
228
|
"""
|
|
174
229
|
## Example
|
|
175
230
|
```python
|
|
176
|
-
vn.
|
|
231
|
+
vn.set_dataset_visibility(visibility=True)
|
|
177
232
|
```
|
|
178
233
|
|
|
179
|
-
Set the
|
|
234
|
+
Set the visibility of the current dataset. If a dataset is visible, anyone can see it. If it is not visible, only members of the dataset can see it.
|
|
180
235
|
|
|
181
236
|
Args:
|
|
182
|
-
|
|
237
|
+
visibility (bool): Whether or not the dataset should be publicly visible.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
bool: True if the dataset visibility was set successfully, False otherwise.
|
|
183
241
|
"""
|
|
242
|
+
params = [Visibility(visibility=visibility)]
|
|
243
|
+
|
|
244
|
+
d = __rpc_call(method="set_org_visibility", params=params)
|
|
245
|
+
|
|
246
|
+
if 'result' not in d:
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
status = Status(**d['result'])
|
|
250
|
+
|
|
251
|
+
return status.success
|
|
252
|
+
|
|
253
|
+
def _set_org(org: str) -> None:
|
|
184
254
|
global __org
|
|
185
255
|
|
|
186
|
-
my_orgs =
|
|
256
|
+
my_orgs = list_datasets()
|
|
187
257
|
if org not in my_orgs:
|
|
188
258
|
# Check if org exists
|
|
189
259
|
d = __unauthenticated_rpc_call(method="check_org_exists", params=[Organization(name=org, user=None, connection=None)])
|
|
190
260
|
|
|
191
261
|
if 'result' not in d:
|
|
192
|
-
raise Exception("Failed to check if
|
|
262
|
+
raise Exception("Failed to check if dataset exists")
|
|
193
263
|
|
|
194
264
|
status = Status(**d['result'])
|
|
195
265
|
|
|
196
266
|
if status.success:
|
|
197
267
|
raise Exception(f"An organization with the name {org} already exists")
|
|
198
268
|
|
|
199
|
-
create = input(f"Would you like to create
|
|
269
|
+
create = input(f"Would you like to create dataset '{org}'? (y/n): ")
|
|
200
270
|
|
|
201
271
|
if create.lower() == 'y':
|
|
202
272
|
db_type = input("What type of database would you like to use? (Snowflake, BigQuery, Postgres, etc.): ")
|
|
203
|
-
|
|
273
|
+
__org = 'demo-tpc-h'
|
|
274
|
+
if create_dataset(dataset=org, db_type=db_type):
|
|
204
275
|
__org = org
|
|
205
276
|
else:
|
|
206
|
-
|
|
277
|
+
__org = None
|
|
278
|
+
raise Exception("Failed to create dataset")
|
|
207
279
|
else:
|
|
208
280
|
__org = org
|
|
209
281
|
|
|
210
|
-
|
|
282
|
+
|
|
283
|
+
def use_datasets(datasets: List[str]):
|
|
284
|
+
"""
|
|
285
|
+
## Example
|
|
286
|
+
```python
|
|
287
|
+
vn.use_datasets(datasets=["employees", "departments"])
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Set the datasets to use for the Vanna.AI API.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
datasets (List[str]): A list of dataset names.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
bool: True if the datasets were set successfully, False otherwise.
|
|
297
|
+
"""
|
|
298
|
+
if len(datasets) >= 1:
|
|
299
|
+
_set_org(org=datasets[0])
|
|
300
|
+
else:
|
|
301
|
+
raise Exception("No datasets provided")
|
|
302
|
+
|
|
303
|
+
def store_sql(question: str, sql: str, tag: Union[str, None] = "Manually Trained") -> bool:
|
|
211
304
|
"""
|
|
212
305
|
## Example
|
|
213
306
|
```python
|
|
@@ -222,10 +315,12 @@ def store_sql(question: str, sql: str) -> bool:
|
|
|
222
315
|
Args:
|
|
223
316
|
question (str): The question to store.
|
|
224
317
|
sql (str): The SQL query to store.
|
|
318
|
+
tag (Union[str, None]): A tag to associate with the question and SQL query.
|
|
225
319
|
"""
|
|
226
320
|
params = [QuestionSQLPair(
|
|
227
321
|
question=question,
|
|
228
322
|
sql=sql,
|
|
323
|
+
tag=tag
|
|
229
324
|
)]
|
|
230
325
|
|
|
231
326
|
d = __rpc_call(method="store_sql", params=params)
|
|
@@ -237,6 +332,56 @@ def store_sql(question: str, sql: str) -> bool:
|
|
|
237
332
|
|
|
238
333
|
return status.success
|
|
239
334
|
|
|
335
|
+
def store_ddl(ddl: str) -> bool:
|
|
336
|
+
"""
|
|
337
|
+
## Example
|
|
338
|
+
```python
|
|
339
|
+
vn.store_ddl(
|
|
340
|
+
ddl="CREATE TABLE employees (id INT, name VARCHAR(255), salary INT)"
|
|
341
|
+
)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Store a DDL statement in the Vanna.AI database.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
ddl (str): The DDL statement to store.
|
|
348
|
+
"""
|
|
349
|
+
params = [StringData(data=ddl)]
|
|
350
|
+
|
|
351
|
+
d = __rpc_call(method="store_ddl", params=params)
|
|
352
|
+
|
|
353
|
+
if 'result' not in d:
|
|
354
|
+
return False
|
|
355
|
+
|
|
356
|
+
status = Status(**d['result'])
|
|
357
|
+
|
|
358
|
+
return status.success
|
|
359
|
+
|
|
360
|
+
def store_documentation(documentation: str) -> bool:
|
|
361
|
+
"""
|
|
362
|
+
## Example
|
|
363
|
+
```python
|
|
364
|
+
vn.store_documentation(
|
|
365
|
+
documentation="Our organization's definition of sales is the discount price of an item multiplied by the quantity sold."
|
|
366
|
+
)
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
Store a documentation string in the Vanna.AI database.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
documentation (str): The documentation string to store.
|
|
373
|
+
"""
|
|
374
|
+
params = [StringData(data=documentation)]
|
|
375
|
+
|
|
376
|
+
d = __rpc_call(method="store_documentation", params=params)
|
|
377
|
+
|
|
378
|
+
if 'result' not in d:
|
|
379
|
+
return False
|
|
380
|
+
|
|
381
|
+
status = Status(**d['result'])
|
|
382
|
+
|
|
383
|
+
return status.success
|
|
384
|
+
|
|
240
385
|
def train(question: str, sql: str) -> bool:
|
|
241
386
|
"""
|
|
242
387
|
## Example
|
|
@@ -261,7 +406,7 @@ def flag_sql_for_review(question: str, sql: Union[str, None] = None, error_msg:
|
|
|
261
406
|
```python
|
|
262
407
|
vn.flag_sql_for_review(question="What is the average salary of employees?")
|
|
263
408
|
```
|
|
264
|
-
Flag a question and its corresponding SQL query for review. You can
|
|
409
|
+
Flag a question and its corresponding SQL query for review. You can see the tag show up in [`vn.get_all_questions()`][vanna.get_all_questions]
|
|
265
410
|
|
|
266
411
|
Args:
|
|
267
412
|
question (str): The question to flag.
|
|
@@ -287,6 +432,34 @@ def flag_sql_for_review(question: str, sql: Union[str, None] = None, error_msg:
|
|
|
287
432
|
|
|
288
433
|
return status.success
|
|
289
434
|
|
|
435
|
+
# def read_questions_from_github(url: str) -> List[QuestionSQLPair]:
|
|
436
|
+
# """
|
|
437
|
+
# ## Example
|
|
438
|
+
# ```python
|
|
439
|
+
# url = "https://raw.githubusercontent.com/vanna-ai/vanna-ai/main/data/questions.json"
|
|
440
|
+
# questions = vn.read_questions_from_github(url)
|
|
441
|
+
# ```
|
|
442
|
+
# Read questions and SQL queries from a GitHub URL.
|
|
443
|
+
|
|
444
|
+
# Args:
|
|
445
|
+
# url (str): The URL to read from.
|
|
446
|
+
|
|
447
|
+
# Returns:
|
|
448
|
+
# List[QuestionSQLPair]: A list of [`QuestionSQLPair`][vanna.QuestionSQLPair] objects.
|
|
449
|
+
# """
|
|
450
|
+
# response = requests.get(url)
|
|
451
|
+
# data = response.json()
|
|
452
|
+
|
|
453
|
+
# question_sql_pairs = []
|
|
454
|
+
# for item in data:
|
|
455
|
+
# question = item.get('question')
|
|
456
|
+
# sql = item.get('sql')
|
|
457
|
+
# if question and sql:
|
|
458
|
+
# question_sql_pair = QuestionSQLPair(question=question, sql=sql)
|
|
459
|
+
# question_sql_pairs.append(question_sql_pair)
|
|
460
|
+
|
|
461
|
+
# return question_sql_pairs
|
|
462
|
+
|
|
290
463
|
def remove_sql(question: str) -> bool:
|
|
291
464
|
"""
|
|
292
465
|
## Example
|
|
@@ -337,7 +510,65 @@ def generate_sql(question: str) -> str:
|
|
|
337
510
|
|
|
338
511
|
return sql_answer.sql
|
|
339
512
|
|
|
340
|
-
def
|
|
513
|
+
def generate_followup_questions(question: str, df: pd.DataFrame) -> List[str]:
|
|
514
|
+
"""
|
|
515
|
+
## Example
|
|
516
|
+
```python
|
|
517
|
+
vn.generate_followup_questions(question="What is the average salary of employees?", df=df)
|
|
518
|
+
# ['What is the average salary of employees in the Sales department?', 'What is the average salary of employees in the Engineering department?', ...]
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
Generate follow-up questions using the Vanna.AI API.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
question (str): The question to generate follow-up questions for.
|
|
525
|
+
df (pd.DataFrame): The DataFrame to generate follow-up questions for.
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
List[str] or None: The follow-up questions, or None if an error occurred.
|
|
529
|
+
"""
|
|
530
|
+
params = [DataResult(
|
|
531
|
+
question=question,
|
|
532
|
+
sql=None,
|
|
533
|
+
table_markdown=df.head().to_markdown(),
|
|
534
|
+
error=None,
|
|
535
|
+
correction_attempts=0,
|
|
536
|
+
)]
|
|
537
|
+
|
|
538
|
+
d = __rpc_call(method="generate_followup_questions", params=params)
|
|
539
|
+
|
|
540
|
+
if 'result' not in d:
|
|
541
|
+
return None
|
|
542
|
+
|
|
543
|
+
# Load the result into a dataclass
|
|
544
|
+
question_string_list = QuestionStringList(**d['result'])
|
|
545
|
+
|
|
546
|
+
return question_string_list.questions
|
|
547
|
+
|
|
548
|
+
def generate_questions() -> List[str]:
|
|
549
|
+
"""
|
|
550
|
+
## Example
|
|
551
|
+
```python
|
|
552
|
+
vn.generate_questions()
|
|
553
|
+
# ['What is the average salary of employees?', 'What is the total salary of employees?', ...]
|
|
554
|
+
```
|
|
555
|
+
|
|
556
|
+
Generate questions using the Vanna.AI API.
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
List[str] or None: The questions, or None if an error occurred.
|
|
560
|
+
"""
|
|
561
|
+
d = __rpc_call(method="generate_questions", params=[])
|
|
562
|
+
|
|
563
|
+
if 'result' not in d:
|
|
564
|
+
return None
|
|
565
|
+
|
|
566
|
+
# Load the result into a dataclass
|
|
567
|
+
question_string_list = QuestionStringList(**d['result'])
|
|
568
|
+
|
|
569
|
+
return question_string_list.questions
|
|
570
|
+
|
|
571
|
+
def ask(question: Union[str, None] = None, print_results: bool = True, auto_train: bool = True) -> Tuple[Union[str, None], Union[pd.DataFrame, None], Union[plotly.graph_objs.Figure, None]]:
|
|
341
572
|
"""
|
|
342
573
|
## Example
|
|
343
574
|
```python
|
|
@@ -345,15 +576,61 @@ def ask(question: str) -> str:
|
|
|
345
576
|
# SELECT AVG(salary) FROM employees
|
|
346
577
|
```
|
|
347
578
|
|
|
348
|
-
Ask a question using the Vanna.AI API. This
|
|
579
|
+
Ask a question using the Vanna.AI API. This generates an SQL query, runs it, and returns the results in a dataframe and a Plotly figure.
|
|
349
580
|
|
|
350
581
|
Args:
|
|
351
|
-
question (str): The question to ask.
|
|
582
|
+
question (str): The question to ask. If None, you will be prompted to enter a question.
|
|
352
583
|
|
|
353
584
|
Returns:
|
|
354
585
|
str or None: The SQL query, or None if an error occurred.
|
|
586
|
+
pd.DataFrame or None: The results of the SQL query, or None if an error occurred.
|
|
587
|
+
plotly.graph_objs.Figure or None: The Plotly figure, or None if an error occurred.
|
|
355
588
|
"""
|
|
356
|
-
|
|
589
|
+
|
|
590
|
+
if question is None:
|
|
591
|
+
question = input("Enter a question: ")
|
|
592
|
+
|
|
593
|
+
try:
|
|
594
|
+
sql = generate_sql(question=question)
|
|
595
|
+
except Exception as e:
|
|
596
|
+
print(e)
|
|
597
|
+
return None, None, None
|
|
598
|
+
|
|
599
|
+
if print_results:
|
|
600
|
+
print(sql)
|
|
601
|
+
|
|
602
|
+
if sql_to_df is None:
|
|
603
|
+
print("If you want to run the SQL query, provide a vn.sql_to_df function.")
|
|
604
|
+
return sql, None, None
|
|
605
|
+
|
|
606
|
+
try:
|
|
607
|
+
df = sql_to_df(sql=sql)
|
|
608
|
+
|
|
609
|
+
if print_results:
|
|
610
|
+
print(df.head().to_markdown())
|
|
611
|
+
|
|
612
|
+
if len(df) > 0 and auto_train:
|
|
613
|
+
store_sql(question=question, sql=sql, tag="SQL Ran")
|
|
614
|
+
|
|
615
|
+
try:
|
|
616
|
+
plotly_code = generate_plotly_code(question=question, sql=sql, df=df)
|
|
617
|
+
fig = get_plotly_figure(plotly_code=plotly_code, df=df)
|
|
618
|
+
if print_results:
|
|
619
|
+
fig.show()
|
|
620
|
+
|
|
621
|
+
return sql, df, fig
|
|
622
|
+
|
|
623
|
+
except Exception as e:
|
|
624
|
+
# Print stack trace
|
|
625
|
+
traceback.print_exc()
|
|
626
|
+
print("Couldn't run plotly code: ", e)
|
|
627
|
+
return sql, df, None
|
|
628
|
+
|
|
629
|
+
except Exception as e:
|
|
630
|
+
print("Couldn't run sql: ", e)
|
|
631
|
+
return sql, None, None
|
|
632
|
+
|
|
633
|
+
|
|
357
634
|
|
|
358
635
|
def generate_plotly_code(question: Union[str, None], sql: Union[str, None], df: pd.DataFrame) -> str:
|
|
359
636
|
"""
|
|
@@ -428,10 +705,7 @@ def get_plotly_figure(plotly_code: str, df: pd.DataFrame, dark_mode: bool = True
|
|
|
428
705
|
|
|
429
706
|
def get_results(cs, default_database: str, sql: str) -> pd.DataFrame:
|
|
430
707
|
"""
|
|
431
|
-
|
|
432
|
-
```python
|
|
433
|
-
df = vn.get_results(cs=cs, default_database="PUBLIC", sql="SELECT * FROM students")
|
|
434
|
-
```
|
|
708
|
+
DEPRECATED. Use `vn.sql_to_df` instead.
|
|
435
709
|
Run the SQL query and return the results as a pandas dataframe. This is just a helper function that does not use the Vanna.AI API.
|
|
436
710
|
|
|
437
711
|
Args:
|
|
@@ -442,6 +716,9 @@ def get_results(cs, default_database: str, sql: str) -> pd.DataFrame:
|
|
|
442
716
|
Returns:
|
|
443
717
|
pd.DataFrame: The results of the SQL query.
|
|
444
718
|
"""
|
|
719
|
+
print("`vn.get_results()` is deprecated. Use `vn.sql_to_df()` instead.")
|
|
720
|
+
warnings.warn("`vn.get_results()` is deprecated. Use `vn.sql_to_df()` instead.")
|
|
721
|
+
|
|
445
722
|
cs.execute(f"USE DATABASE {default_database}")
|
|
446
723
|
|
|
447
724
|
cur = cs.execute(sql)
|
|
@@ -524,55 +801,31 @@ def generate_question(sql: str) -> str:
|
|
|
524
801
|
|
|
525
802
|
return question.question
|
|
526
803
|
|
|
527
|
-
def
|
|
804
|
+
def get_all_questions() -> pd.DataFrame:
|
|
528
805
|
"""
|
|
529
806
|
|
|
530
807
|
## Example
|
|
531
808
|
```python
|
|
532
|
-
questions = vn.
|
|
809
|
+
questions = vn.get_all_questions()
|
|
533
810
|
```
|
|
534
811
|
|
|
535
|
-
Get a list of
|
|
812
|
+
Get a list of questions from the Vanna.AI API.
|
|
536
813
|
|
|
537
814
|
Returns:
|
|
538
|
-
|
|
815
|
+
pd.DataFrame or None: The list of questions, or None if an error occurred.
|
|
539
816
|
|
|
540
817
|
"""
|
|
541
818
|
# params = [Question(question="")]
|
|
542
819
|
params = []
|
|
543
820
|
|
|
544
|
-
d = __rpc_call(method="
|
|
821
|
+
d = __rpc_call(method="get_all_questions", params=params)
|
|
545
822
|
|
|
546
823
|
if 'result' not in d:
|
|
547
824
|
return None
|
|
548
825
|
|
|
549
826
|
# Load the result into a dataclass
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
return flagged_questions
|
|
553
|
-
|
|
554
|
-
def get_accuracy_stats() -> AccuracyStats:
|
|
555
|
-
"""
|
|
556
|
-
|
|
557
|
-
## Example
|
|
558
|
-
```python
|
|
559
|
-
vn.get_accuracy_stats()
|
|
560
|
-
```
|
|
561
|
-
|
|
562
|
-
Get the accuracy statistics from the Vanna.AI API.
|
|
563
|
-
|
|
564
|
-
Returns:
|
|
565
|
-
dict or None: The accuracy statistics, or None if an error occurred.
|
|
566
|
-
|
|
567
|
-
"""
|
|
568
|
-
params = []
|
|
569
|
-
|
|
570
|
-
d = __rpc_call(method="get_accuracy_stats", params=params)
|
|
827
|
+
all_questions = DataFrameJSON(**d['result'])
|
|
571
828
|
|
|
572
|
-
|
|
573
|
-
return None
|
|
829
|
+
df = pd.read_json(all_questions.data)
|
|
574
830
|
|
|
575
|
-
|
|
576
|
-
accuracy_stats = AccuracyStats(**d['result'])
|
|
577
|
-
|
|
578
|
-
return accuracy_stats
|
|
831
|
+
return df
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
-
from typing import List, Dict
|
|
2
|
+
from typing import List, Dict, Union
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
|
|
5
5
|
@dataclass
|
|
@@ -23,6 +23,7 @@ class FullQuestionDocument:
|
|
|
23
23
|
class QuestionSQLPair:
|
|
24
24
|
question: str
|
|
25
25
|
sql: str
|
|
26
|
+
tag: Union[str, None]
|
|
26
27
|
|
|
27
28
|
@dataclass
|
|
28
29
|
class Organization:
|
|
@@ -34,6 +35,14 @@ class Organization:
|
|
|
34
35
|
class OrganizationList:
|
|
35
36
|
organizations: List[str]
|
|
36
37
|
|
|
38
|
+
@dataclass
|
|
39
|
+
class QuestionStringList:
|
|
40
|
+
questions: List[str]
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class Visibility:
|
|
44
|
+
visibility: bool
|
|
45
|
+
|
|
37
46
|
@dataclass
|
|
38
47
|
class UserEmail:
|
|
39
48
|
email: str
|
|
@@ -43,6 +52,12 @@ class NewOrganization:
|
|
|
43
52
|
org_name: str
|
|
44
53
|
db_type: str
|
|
45
54
|
|
|
55
|
+
@dataclass
|
|
56
|
+
class NewOrganizationMember:
|
|
57
|
+
org_name: str
|
|
58
|
+
email: str
|
|
59
|
+
is_admin: bool
|
|
60
|
+
|
|
46
61
|
@dataclass
|
|
47
62
|
class UserOTP:
|
|
48
63
|
email: str
|
|
@@ -68,7 +83,7 @@ class QuestionCategory:
|
|
|
68
83
|
NO_SQL_GENERATED = "No SQL Generated"
|
|
69
84
|
SQL_UNABLE_TO_RUN = "SQL Unable to Run"
|
|
70
85
|
BOOTSTRAP_TRAINING_QUERY = "Bootstrap Training Query"
|
|
71
|
-
|
|
86
|
+
SQL_RAN = "SQL Ran Successfully"
|
|
72
87
|
FLAGGED_FOR_REVIEW = "Flagged for Review"
|
|
73
88
|
REVIEWED_AND_APPROVED = "Reviewed and Approved"
|
|
74
89
|
REVIEWED_AND_REJECTED = "Reviewed and Rejected"
|
|
@@ -140,4 +155,12 @@ class ColumnDefinition:
|
|
|
140
155
|
@dataclass
|
|
141
156
|
class Diagram:
|
|
142
157
|
raw: str
|
|
143
|
-
mermaid_code: str
|
|
158
|
+
mermaid_code: str
|
|
159
|
+
|
|
160
|
+
@dataclass
|
|
161
|
+
class StringData:
|
|
162
|
+
data: str
|
|
163
|
+
|
|
164
|
+
@dataclass
|
|
165
|
+
class DataFrameJSON:
|
|
166
|
+
data: str
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: vanna
|
|
3
|
+
Version: 0.0.11
|
|
4
|
+
Summary: Generate SQL queries from natural language
|
|
5
|
+
Author-email: Zain Hoda <zain@vanna.ai>
|
|
6
|
+
Project-URL: Homepage, https://github.com/vanna-ai/vanna-py
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/vanna-ai/vanna-py/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.7
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
|
|
15
|
+
| GitHub | PyPI | Colab | Documentation |
|
|
16
|
+
| ------ | ---- | ----- | ------------- |
|
|
17
|
+
| [](https://github.com/vanna-ai/vanna-py) | [](https://pypi.org/project/vanna/) | [](https://colab.research.google.com/github/vanna-ai/vanna-py/blob/main/notebooks/vn-starter.ipynb) | [](https://docs.vanna.ai) |
|
|
18
|
+
|
|
19
|
+
# Vanna.AI
|
|
20
|
+
|
|
21
|
+
Vanna is a Python-based AI SQL co-pilot. Our initial users are data-savvy data analysts, data scientists, engineers, and similar people that use Vanna to automate writing complex SQL.
|
|
22
|
+
|
|
23
|
+
Vanna can:
|
|
24
|
+
- [Convert natural language to SQL](#natural-language-to-sql)
|
|
25
|
+
- [Run SQL](#run-sql)
|
|
26
|
+
- [Generate Plotly code](#generate-plotly-code)
|
|
27
|
+
- [Run Plotly code](#run-plotly-code)
|
|
28
|
+
- [Get better over time](#improve-your-training-data)
|
|
29
|
+
- Be used in a Jupyter Notebooks, Colab, or other Python environments
|
|
30
|
+
- Be used with Snowflake, BigQuery, and other databases
|
|
31
|
+
- Be used with Python UIs, such as [Streamlit](https://github.com/vanna-ai/vanna-streamlit), Dash, and others
|
|
32
|
+
- Be used to make Slack bots
|
|
33
|
+
|
|
34
|
+
## Natural Language to SQL
|
|
35
|
+
```python
|
|
36
|
+
sql = vn.generate_sql(question='Who are the top 10 customers?')
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Output:
|
|
40
|
+
```sql
|
|
41
|
+
SELECT customer_name,
|
|
42
|
+
total_sales
|
|
43
|
+
FROM (SELECT c.c_name as customer_name,
|
|
44
|
+
sum(l.l_extendedprice * (1 - l.l_discount)) as total_sales,
|
|
45
|
+
row_number() OVER (ORDER BY sum(l.l_extendedprice * (1 - l.l_discount)) desc) as rank
|
|
46
|
+
FROM snowflake_sample_data.tpch_sf1.lineitem l join snowflake_sample_data.tpch_sf1.orders o
|
|
47
|
+
ON l.l_orderkey = o.o_orderkey join snowflake_sample_data.tpch_sf1.customer c
|
|
48
|
+
ON o.o_custkey = c.c_custkey
|
|
49
|
+
GROUP BY customer_name)
|
|
50
|
+
WHERE rank <= 10;
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Run SQL
|
|
54
|
+
This function is provided as a convenience. You can choose to run your SQL however you normally do and use the rest of the downstream functions.
|
|
55
|
+
```python
|
|
56
|
+
df = vn.get_results(cs, database, sql)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Output:
|
|
60
|
+
| customer_name | total_sales |
|
|
61
|
+
| ------------- | ----------- |
|
|
62
|
+
| Customer#000000001 | 68127.72 |
|
|
63
|
+
| Customer#000000002 | 65898.69 |
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
## Generate Plotly Code
|
|
67
|
+
```python
|
|
68
|
+
plotly_code = vn.generate_plotly_code(question=my_question, sql=sql, df=df)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Output:
|
|
72
|
+
```python
|
|
73
|
+
fig = go.Figure(go.Bar(
|
|
74
|
+
x=df['CUSTOMER_NAME'],
|
|
75
|
+
y=df['TOTAL_SALES'],
|
|
76
|
+
marker={'color': df['TOTAL_SALES'], 'colorscale': 'Viridis'},
|
|
77
|
+
text=df['TOTAL_SALES'],
|
|
78
|
+
textposition='auto',
|
|
79
|
+
))
|
|
80
|
+
|
|
81
|
+
fig.update_layout(
|
|
82
|
+
title="Top 10 Customers by Sales",
|
|
83
|
+
xaxis_title="Customer",
|
|
84
|
+
yaxis_title="Total Sales",
|
|
85
|
+
xaxis_tickangle=-45,
|
|
86
|
+
yaxis_tickprefix="$",
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Run Plotly Code
|
|
91
|
+
```python
|
|
92
|
+
fig = vn.get_plotly_figure(plotly_code=plotly_code, df=df)
|
|
93
|
+
fig.show()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Output:
|
|
97
|
+

|
|
98
|
+
|
|
99
|
+
## Improve Your Training Data
|
|
100
|
+
```python
|
|
101
|
+
vn.store_sql(
|
|
102
|
+
question=my_question,
|
|
103
|
+
sql=sql,
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## How Vanna Works
|
|
108
|
+
```mermaid
|
|
109
|
+
flowchart LR
|
|
110
|
+
DB[(Known Correct Question-SQL)]
|
|
111
|
+
Try[Try to Use DDL/Documentation]
|
|
112
|
+
SQL(SQL)
|
|
113
|
+
Check{Is the SQL correct?}
|
|
114
|
+
Generate[fa:fa-circle-question Use Examples to Generate]
|
|
115
|
+
DB --> Find
|
|
116
|
+
Question[fa:fa-circle-question Question] --> Find{fa:fa-magnifying-glass Do we have similar questions?}
|
|
117
|
+
Find -- Yes --> Generate
|
|
118
|
+
Find -- No --> Try
|
|
119
|
+
Generate --> SQL
|
|
120
|
+
Try --> SQL
|
|
121
|
+
SQL --> Check
|
|
122
|
+
Check -- Yes --> DB
|
|
123
|
+
Check -- No --> Analyst[fa:fa-glasses Analyst Writes the SQL]
|
|
124
|
+
Analyst -- Adds --> DB
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
# Getting Started
|
|
128
|
+
|
|
129
|
+
## Install Vanna from PyPI and import it:
|
|
130
|
+
```python
|
|
131
|
+
%pip install vanna
|
|
132
|
+
import vanna as vn
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Enter your email to set an API Key
|
|
136
|
+
This will send a one-time code to your email address. Copy and paste the code into the prompt.
|
|
137
|
+
```python
|
|
138
|
+
my_email = '' # Enter your email here
|
|
139
|
+
vn.login(email=my_email)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Add Training Data
|
|
143
|
+
```python
|
|
144
|
+
vn.train(
|
|
145
|
+
question="Which products have the highest sales?",
|
|
146
|
+
sql="...",
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Generate SQL
|
|
151
|
+
```python
|
|
152
|
+
sql = vn.generate_sql(question="Who are the top 10 customers?")
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
# Documentation
|
|
156
|
+
[Full Documentation](https://docs.vanna.ai)
|
vanna-0.0.6/PKG-INFO
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: vanna
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: Generate SQL queries from natural language
|
|
5
|
-
Author-email: Zain Hoda <zain@vanna.ai>
|
|
6
|
-
Project-URL: Homepage, https://github.com/vanna-ai/vanna-py
|
|
7
|
-
Project-URL: Bug Tracker, https://github.com/vanna-ai/vanna-py/issues
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.7
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
|
|
15
|
-
# Vanna.AI
|
|
16
|
-
|
|
17
|
-
Vanna.AI is a tool to help you generate SQL from natural language.
|
|
18
|
-
|
|
19
|
-
# Documentation
|
|
20
|
-
[Full Documentation Reference](https://docs.vanna.ai)
|
vanna-0.0.6/README.md
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: vanna
|
|
3
|
-
Version: 0.0.6
|
|
4
|
-
Summary: Generate SQL queries from natural language
|
|
5
|
-
Author-email: Zain Hoda <zain@vanna.ai>
|
|
6
|
-
Project-URL: Homepage, https://github.com/vanna-ai/vanna-py
|
|
7
|
-
Project-URL: Bug Tracker, https://github.com/vanna-ai/vanna-py/issues
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.7
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
|
|
15
|
-
# Vanna.AI
|
|
16
|
-
|
|
17
|
-
Vanna.AI is a tool to help you generate SQL from natural language.
|
|
18
|
-
|
|
19
|
-
# Documentation
|
|
20
|
-
[Full Documentation Reference](https://docs.vanna.ai)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|