PyPI - vanna - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

vanna 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{vanna-0.1.0 → vanna-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vanna
-Version: 0.1.0
+Version: 0.2.0
 Summary: Generate SQL queries from natural language
 Author-email: Zain Hoda <zain@vanna.ai>
 Requires-Python: >=3.9
@@ -15,6 +15,7 @@ Requires-Dist: pandas
 Requires-Dist: sqlparse
 Requires-Dist: kaleido
 Requires-Dist: flask
+Requires-Dist: sqlalchemy
 Requires-Dist: psycopg2-binary ; extra == "all"
 Requires-Dist: db-dtypes ; extra == "all"
 Requires-Dist: google-cloud-bigquery ; extra == "all"
@@ -22,6 +23,7 @@ Requires-Dist: snowflake-connector-python ; extra == "all"
 Requires-Dist: duckdb ; extra == "all"
 Requires-Dist: openai ; extra == "all"
 Requires-Dist: mistralai ; extra == "all"
+Requires-Dist: chromadb ; extra == "all"
 Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
 Requires-Dist: chromadb ; extra == "chromadb"
 Requires-Dist: duckdb ; extra == "duckdb"

{vanna-0.1.0 → vanna-0.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
 [project]
 name = "vanna"
-version = "0.1.0"
+version = "0.2.0"
 authors = [
   { name="Zain Hoda", email="zain@vanna.ai" },
 ]
@@ -18,7 +18,7 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-    "requests", "tabulate", "plotly", "pandas", "sqlparse", "kaleido", "flask"
+    "requests", "tabulate", "plotly", "pandas", "sqlparse", "kaleido", "flask", "sqlalchemy"
 ]
 [project.urls]
@@ -30,7 +30,7 @@ postgres = ["psycopg2-binary", "db-dtypes"]
 bigquery = ["google-cloud-bigquery"]
 snowflake = ["snowflake-connector-python"]
 duckdb = ["duckdb"]
-all = ["psycopg2-binary", "db-dtypes", "google-cloud-bigquery", "snowflake-connector-python", "duckdb", "openai", "mistralai"]
+all = ["psycopg2-binary", "db-dtypes", "google-cloud-bigquery", "snowflake-connector-python", "duckdb", "openai", "mistralai", "chromadb"]
 test = ["tox"]
 chromadb = ["chromadb"]
 openai = ["openai"]

{vanna-0.1.0 → vanna-0.2.0}/src/vanna/base/base.py RENAMED Viewed

@@ -72,6 +72,7 @@ class VannaBase(ABC):
     def __init__(self, config=None):
         self.config = config
         self.run_sql_is_set = False
+        self.static_documentation = ""
     def log(self, message: str):
         print(message)
@@ -140,18 +141,35 @@ class VannaBase(ABC):
         else:
             return False
-    def generate_followup_questions(self, question: str, **kwargs) -> str:
-        question_sql_list = self.get_similar_question_sql(question, **kwargs)
-        ddl_list = self.get_related_ddl(question, **kwargs)
-        doc_list = self.get_related_documentation(question, **kwargs)
-        prompt = self.get_followup_questions_prompt(
-            question=question,
-            question_sql_list=question_sql_list,
-            ddl_list=ddl_list,
-            doc_list=doc_list,
-            **kwargs,
-        )
-        llm_response = self.submit_prompt(prompt, **kwargs)
+    def generate_followup_questions(
+        self, question: str, sql: str, df: pd.DataFrame, **kwargs
+    ) -> list:
+        """
+        **Example:**
+        ```python
+        vn.generate_followup_questions("What are the top 10 customers by sales?", df)
+        ```
+        Generate a list of followup questions that you can ask Vanna.AI.
+        Args:
+            question (str): The question that was asked.
+            df (pd.DataFrame): The results of the SQL query.
+        Returns:
+            list: A list of followup questions that you can ask Vanna.AI.
+        """
+        message_log = [
+            self.system_message(
+                f"You are a helpful data assistant. The user asked the question: '{question}'\n\nThe SQL query for this question was: {sql}\n\nThe following is a pandas DataFrame with the results of the query: \n{df.to_markdown()}\n\n"
+            ),
+            self.user_message(
+                "Generate a list of followup questions that the user might ask about this data. Respond with a list of questions, one per line. Do not answer with any explanations -- just the questions. Remember that there should be an unambiguous SQL query that can be generated from the question. Prefer questions that are answerable outside of the context of this conversation. Prefer questions that are slight modifications of the SQL query that was generated that allow digging deeper into the data. Each question will be turned into a button that the user can click to generate a new SQL query so don't use 'example' type questions. Each question must have a one-to-one correspondence with an instantiated SQL query."
+            ),
+        ]
+        llm_response = self.submit_prompt(message_log, **kwargs)
         numbers_removed = re.sub(r"^\d+\.\s*", "", llm_response, flags=re.MULTILINE)
         return numbers_removed.split("\n")
@@ -169,6 +187,36 @@ class VannaBase(ABC):
         return [q["question"] for q in question_sql]
+    def generate_summary(self, question: str, df: pd.DataFrame, **kwargs) -> str:
+        """
+        **Example:**
+        ```python
+        vn.generate_summary("What are the top 10 customers by sales?", df)
+        ```
+        Generate a summary of the results of a SQL query.
+        Args:
+            question (str): The question that was asked.
+            df (pd.DataFrame): The results of the SQL query.
+        Returns:
+            str: The summary of the results of the SQL query.
+        """
+        message_log = [
+            self.system_message(
+                f"You are a helpful data assistant. The user asked the question: '{question}'\n\nThe following is a pandas DataFrame with the results of the query: \n{df.to_markdown()}\n\n"
+            ),
+            self.user_message(
+                "Briefly summarize the data based on the question that was asked. Do not respond with any additional explanation beyond the summary."
+            ),
+        ]
+        summary = self.submit_prompt(message_log, **kwargs)
+        return summary
     # ----------------- Use Any Embeddings API ----------------- #
     @abstractmethod
     def generate_embedding(self, data: str, **kwargs) -> List[float]:
@@ -184,7 +232,7 @@ class VannaBase(ABC):
             question (str): The question to get similar questions and their corresponding SQL statements for.
         Returns:
-            list: A list of similar questions and their corresponding SQL statements.
+            list: A list of similar questions and their corresponding SQL statements.
         """
         pass
@@ -224,7 +272,7 @@ class VannaBase(ABC):
             sql (str): The SQL query to add.
         Returns:
-            str: The ID of the training data that was added.
+            str: The ID of the training data that was added.
         """
         pass
@@ -232,7 +280,7 @@ class VannaBase(ABC):
     def add_ddl(self, ddl: str, **kwargs) -> str:
         """
         This method is used to add a DDL statement to the training data.
         Args:
             ddl (str): The DDL statement to add.
@@ -265,7 +313,7 @@ class VannaBase(ABC):
         This method is used to get all the training data from the retrieval layer.
         Returns:
-            pd.DataFrame: The training data.
+            pd.DataFrame: The training data.
         """
         pass
@@ -321,7 +369,10 @@ class VannaBase(ABC):
         return initial_prompt
     def add_documentation_to_prompt(
-        self, initial_prompt: str, documentation_list: list[str], max_tokens: int = 14000
+        self,
+        initial_prompt: str,
+        documentation_list: list[str],
+        max_tokens: int = 14000,
     ) -> str:
         if len(documentation_list) > 0:
             initial_prompt += f"\nYou may use the following documentation as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
@@ -389,6 +440,9 @@ class VannaBase(ABC):
             initial_prompt, ddl_list, max_tokens=14000
         )
+        if self.static_documentation != "":
+            doc_list.append(self.static_documentation)
         initial_prompt = self.add_documentation_to_prompt(
             initial_prompt, doc_list, max_tokens=14000
         )
@@ -599,6 +653,7 @@ class VannaBase(ABC):
             return df
+        self.static_documentation = "This is a Snowflake database"
         self.run_sql = run_sql_snowflake
         self.run_sql_is_set = True
@@ -632,6 +687,7 @@ class VannaBase(ABC):
         def run_sql_sqlite(sql: str):
             return pd.read_sql_query(sql, conn)
+        self.static_documentation = "This is a SQLite database"
         self.run_sql = run_sql_sqlite
         self.run_sql_is_set = True
@@ -732,6 +788,11 @@ class VannaBase(ABC):
                     conn.rollback()
                     raise ValidationError(e)
+                except Exception as e:
+                    conn.rollback()
+                    raise e
+        self.static_documentation = "This is a Postgres database"
         self.run_sql_is_set = True
         self.run_sql = run_sql_postgres
@@ -821,6 +882,7 @@ class VannaBase(ABC):
                     raise errors
             return None
+        self.static_documentation = "This is a BigQuery database"
         self.run_sql_is_set = True
         self.run_sql = run_sql_bigquery
@@ -829,7 +891,7 @@ class VannaBase(ABC):
         Connect to a DuckDB database. This is just a helper function to set [`vn.run_sql`][vanna.base.base.VannaBase.run_sql]
         Args:
-            url (str): The URL of the database to connect to.
+            url (str): The URL of the database to connect to. Use :memory: to create an in-memory database. Use md: or motherduck: to use the MotherDuck database.
             init_sql (str, optional): SQL to run when connecting to the database. Defaults to None.
         Returns:
@@ -843,13 +905,15 @@ class VannaBase(ABC):
                 " run command: \npip install vanna[duckdb]"
             )
         # URL of the database to download
-        if url==":memory:" or url=="":
-            path=":memory:"
+        if url == ":memory:" or url == "":
+            path = ":memory:"
         else:
             # Path to save the downloaded database
             print(os.path.exists(url))
             if os.path.exists(url):
-                path=url
+                path = url
+            elif url.startswith("md") or url.startswith("motherduck"):
+                path = url
             else:
                 path = os.path.basename(urlparse(url).path)
                 # Download the database if it doesn't exist
@@ -867,9 +931,57 @@ class VannaBase(ABC):
         def run_sql_duckdb(sql: str):
             return conn.query(sql).to_df()
+        self.static_documentation = "This is a DuckDB database"
         self.run_sql = run_sql_duckdb
         self.run_sql_is_set = True
+    def connect_to_mssql(self, odbc_conn_str: str):
+        """
+        Connect to a Microsoft SQL Server database. This is just a helper function to set [`vn.run_sql`][vanna.base.base.VannaBase.run_sql]
+        Args:
+            odbc_conn_str (str): The ODBC connection string.
+        Returns:
+            None
+        """
+        try:
+            import pyodbc
+        except ImportError:
+            raise DependencyError(
+                "You need to install required dependencies to execute this method,"
+                " run command: pip install pyodbc"
+            )
+        try:
+            import sqlalchemy as sa
+            from sqlalchemy.engine import URL
+        except ImportError:
+            raise DependencyError(
+                "You need to install required dependencies to execute this method,"
+                " run command: pip install sqlalchemy"
+            )
+        connection_url = URL.create(
+            "mssql+pyodbc", query={"odbc_connect": odbc_conn_str}
+        )
+        from sqlalchemy import create_engine
+        engine = create_engine(connection_url)
+        def run_sql_mssql(sql: str):
+            # Execute the SQL statement and return the result as a pandas DataFrame
+            with engine.begin() as conn:
+                df = pd.read_sql_query(sa.text(sql), conn)
+                return df
+            raise Exception("Couldn't run sql")
+        self.static_documentation = "This is a Microsoft SQL Server database"
+        self.run_sql = run_sql_mssql
+        self.run_sql_is_set = True
     def run_sql(self, sql: str, **kwargs) -> pd.DataFrame:
         """
         Example:
@@ -894,7 +1006,7 @@ class VannaBase(ABC):
         question: Union[str, None] = None,
         print_results: bool = True,
         auto_train: bool = True,
-        visualize: bool = True, # if False, will not generate plotly code
+        visualize: bool = True,  # if False, will not generate plotly code
     ) -> Union[
         Tuple[
             Union[str, None],
@@ -975,7 +1087,9 @@ class VannaBase(ABC):
                             display = __import__(
                                 "IPython.display", fromlist=["display"]
                             ).display
-                            Image = __import__("IPython.display", fromlist=["Image"]).Image
+                            Image = __import__(
+                                "IPython.display", fromlist=["Image"]
+                            ).Image
                             img_bytes = fig.to_image(format="png", scale=2)
                             display(Image(img_bytes))
                         except Exception as e:
@@ -1328,4 +1442,3 @@ class VannaBase(ABC):
             fig.update_layout(template="plotly_dark")
         return fig

{vanna-0.1.0 → vanna-0.2.0}/src/vanna/chromadb/chromadb_vector.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import json
-from typing import List
 import uuid
-from abc import abstractmethod
+from typing import List
 import chromadb
 import pandas as pd
@@ -20,13 +19,28 @@ class ChromaDB_VectorStore(VannaBase):
         if config is not None:
             path = config.get("path", ".")
             self.embedding_function = config.get("embedding_function", default_ef)
+            curr_client = config.get("client", "persistent")
+            self.n_results = config.get("n_results", 10)
         else:
             path = "."
             self.embedding_function = default_ef
+            curr_client = "persistent"  # defaults to persistent storage
+            self.n_results = 10  # defaults to 10 documents
+        if curr_client == "persistent":
+            self.chroma_client = chromadb.PersistentClient(
+                path=path, settings=Settings(anonymized_telemetry=False)
+            )
+        elif curr_client == "in-memory":
+            self.chroma_client = chromadb.EphemeralClient(
+                settings=Settings(anonymized_telemetry=False)
+            )
+        elif isinstance(curr_client, chromadb.api.client.Client):
+            # allow providing client directly
+            self.chroma_client = curr_client
+        else:
+            raise ValueError(f"Unsupported client was set in config: {curr_client}")
-        self.chroma_client = chromadb.PersistentClient(
-            path=path, settings=Settings(anonymized_telemetry=False)
-        )
         self.documentation_collection = self.chroma_client.get_or_create_collection(
             name="documentation", embedding_function=self.embedding_function
         )
@@ -196,7 +210,8 @@ class ChromaDB_VectorStore(VannaBase):
             query_results (pd.DataFrame): The dataframe to use.
         Returns:
-            List[str] or None: The extracted documents, or an empty list or single document if an error occurred.
+            List[str] or None: The extracted documents, or an empty list or
+            single document if an error occurred.
         """
         if query_results is None:
             return []
@@ -216,6 +231,7 @@ class ChromaDB_VectorStore(VannaBase):
         return ChromaDB_VectorStore._extract_documents(
             self.sql_collection.query(
                 query_texts=[question],
+                n_results=self.n_results,
             )
         )

vanna-0.1.0/src/vanna/flask.py → vanna-0.2.0/src/vanna/flask/__init__.py RENAMED Viewed

@@ -7,6 +7,8 @@ import flask
 import requests
 from flask import Flask, Response, jsonify, request
+from .assets import css_content, html_content, js_content
 class Cache(ABC):
     @abstractmethod
@@ -92,10 +94,11 @@ class VannaFlaskApp:
         return decorator
-    def __init__(self, vn, cache: Cache = MemoryCache()):
+    def __init__(self, vn, cache: Cache = MemoryCache(), allow_llm_to_see_data=False):
         self.flask_app = Flask(__name__)
         self.vn = vn
         self.cache = cache
+        self.allow_llm_to_see_data = allow_llm_to_see_data
         log = logging.getLogger("werkzeug")
         log.setLevel(logging.ERROR)
@@ -296,23 +299,55 @@ class VannaFlaskApp:
                 return jsonify({"type": "error", "error": str(e)})
         @self.flask_app.route("/api/v0/generate_followup_questions", methods=["GET"])
-        @self.requires_cache(["df", "question"])
-        def generate_followup_questions(id: str, df, question):
-            followup_questions = []
-            # followup_questions = vn.generate_followup_questions(question=question, df=df)
-            # if followup_questions is not None and len(followup_questions) > 5:
-            #     followup_questions = followup_questions[:5]
+        @self.requires_cache(["df", "question", "sql"])
+        def generate_followup_questions(id: str, df, question, sql):
+            if self.allow_llm_to_see_data:
+                followup_questions = vn.generate_followup_questions(
+                    question=question, sql=sql, df=df
+                )
+                if followup_questions is not None and len(followup_questions) > 5:
+                    followup_questions = followup_questions[:5]
-            cache.set(id=id, field="followup_questions", value=followup_questions)
+                cache.set(id=id, field="followup_questions", value=followup_questions)
-            return jsonify(
-                {
-                    "type": "question_list",
-                    "id": id,
-                    "questions": followup_questions,
-                    "header": "Followup Questions can be enabled in a future version if you allow the LLM to 'see' your query results.",
-                }
-            )
+                return jsonify(
+                    {
+                        "type": "question_list",
+                        "id": id,
+                        "questions": followup_questions,
+                        "header": "Here are some potential followup questions:",
+                    }
+                )
+            else:
+                return jsonify(
+                    {
+                        "type": "question_list",
+                        "id": id,
+                        "questions": [],
+                        "header": "Followup Questions can be enabled if you set allow_llm_to_see_data=True",
+                    }
+                )
+        @self.flask_app.route("/api/v0/generate_summary", methods=["GET"])
+        @self.requires_cache(["df", "question"])
+        def generate_summary(id: str, df, question):
+            if self.allow_llm_to_see_data:
+                summary = vn.generate_summary(question=question, df=df)
+                return jsonify(
+                    {
+                        "type": "text",
+                        "id": id,
+                        "text": summary,
+                    }
+                )
+            else:
+                return jsonify(
+                    {
+                        "type": "text",
+                        "id": id,
+                        "text": "Summarization can be enabled if you set allow_llm_to_see_data=True",
+                    }
+                )
         @self.flask_app.route("/api/v0/load_question", methods=["GET"])
         @self.requires_cache(
@@ -352,25 +387,14 @@ class VannaFlaskApp:
         @self.flask_app.route("/assets/<path:filename>")
         def proxy_assets(filename):
-            remote_url = f"https://vanna.ai/assets/{filename}"
-            response = requests.get(remote_url, stream=True)
+            if ".css" in filename:
+                return Response(css_content, mimetype="text/css")
-            # Check if the request to the remote URL was successful
-            if response.status_code == 200:
-                excluded_headers = [
-                    "content-encoding",
-                    "content-length",
-                    "transfer-encoding",
-                    "connection",
-                ]
-                headers = [
-                    (name, value)
-                    for (name, value) in response.raw.headers.items()
-                    if name.lower() not in excluded_headers
-                ]
-                return Response(response.content, response.status_code, headers)
-            else:
-                return "Error fetching file from remote server", response.status_code
+            if ".js" in filename:
+                return Response(js_content, mimetype="text/javascript")
+            # Return 404
+            return "File not found", 404
         # Proxy the /vanna.svg file to the remote server
         @self.flask_app.route("/vanna.svg")
@@ -398,24 +422,7 @@ class VannaFlaskApp:
         @self.flask_app.route("/", defaults={"path": ""})
         @self.flask_app.route("/<path:path>")
         def hello(path: str):
-            return """
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <link rel="icon" type="image/svg+xml" href="/vanna.svg" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <link href="https://fonts.googleapis.com/css2?family=Roboto+Slab:wght@350&display=swap" rel="stylesheet">
-    <script src="https://cdn.plot.ly/plotly-latest.min.js" type="text/javascript"></script>
-    <title>Vanna.AI</title>
-    <script type="module" crossorigin src="/assets/index-d29524f4.js"></script>
-    <link rel="stylesheet" href="/assets/index-b1a5a2f1.css">
-  </head>
-  <body class="bg-white dark:bg-slate-900">
-    <div id="app"></div>
-  </body>
-</html>
-"""
+            return html_content
     def run(self):
         try:

vanna 0.1.0__tar.gz → 0.2.0__tar.gz

vanna 0.1.0tar.gz → 0.2.0tar.gz