PyPI - vanna - Versions diffs - 0.3.3__tar.gz → 0.4.0__tar.gz - Mend

vanna 0.3.3tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{vanna-0.3.3 → vanna-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vanna
-Version: 0.3.3
+Version: 0.4.0
 Summary: Generate SQL queries from natural language
 Author-email: Zain Hoda <zain@vanna.ai>
 Requires-Python: >=3.9
@@ -28,11 +28,15 @@ Requires-Dist: chromadb ; extra == "all"
 Requires-Dist: anthropic ; extra == "all"
 Requires-Dist: zhipuai ; extra == "all"
 Requires-Dist: marqo ; extra == "all"
+Requires-Dist: google-generativeai ; extra == "all"
+Requires-Dist: google-cloud-aiplatform ; extra == "all"
 Requires-Dist: anthropic ; extra == "anthropic"
 Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
 Requires-Dist: chromadb ; extra == "chromadb"
 Requires-Dist: duckdb ; extra == "duckdb"
 Requires-Dist: google-generativeai ; extra == "gemini"
+Requires-Dist: google-generativeai ; extra == "google"
+Requires-Dist: google-cloud-aiplatform ; extra == "google"
 Requires-Dist: marqo ; extra == "marqo"
 Requires-Dist: mistralai ; extra == "mistralai"
 Requires-Dist: PyMySQL ; extra == "mysql"
@@ -50,6 +54,7 @@ Provides-Extra: bigquery
 Provides-Extra: chromadb
 Provides-Extra: duckdb
 Provides-Extra: gemini
+Provides-Extra: google
 Provides-Extra: marqo
 Provides-Extra: mistralai
 Provides-Extra: mysql

{vanna-0.3.3 → vanna-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"
 [project]
 name = "vanna"
-version = "0.3.3"
+version = "0.4.0"
 authors = [
   { name="Zain Hoda", email="zain@vanna.ai" },
 ]
@@ -31,7 +31,8 @@ mysql = ["PyMySQL"]
 bigquery = ["google-cloud-bigquery"]
 snowflake = ["snowflake-connector-python"]
 duckdb = ["duckdb"]
-all = ["psycopg2-binary", "db-dtypes", "PyMySQL", "google-cloud-bigquery", "snowflake-connector-python", "duckdb", "openai", "mistralai", "chromadb", "anthropic", "zhipuai", "marqo"]
+google = ["google-generativeai", "google-cloud-aiplatform"]
+all = ["psycopg2-binary", "db-dtypes", "PyMySQL", "google-cloud-bigquery", "snowflake-connector-python", "duckdb", "openai", "mistralai", "chromadb", "anthropic", "zhipuai", "marqo", "google-generativeai", "google-cloud-aiplatform"]
 test = ["tox"]
 chromadb = ["chromadb"]
 openai = ["openai"]

{vanna-0.3.3 → vanna-0.4.0}/src/vanna/ZhipuAI/ZhipuAI_Chat.py RENAMED Viewed

@@ -40,7 +40,7 @@ class ZhipuAI_Chat(VannaBase):
         initial_prompt: str, ddl_list: List[str], max_tokens: int = 14000
     ) -> str:
         if len(ddl_list) > 0:
-            initial_prompt += f"\nYou may use the following DDL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+            initial_prompt += "\nYou may use the following DDL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
             for ddl in ddl_list:
                 if (
@@ -57,7 +57,7 @@ class ZhipuAI_Chat(VannaBase):
         initial_prompt: str, documentation_List: List[str], max_tokens: int = 14000
     ) -> str:
         if len(documentation_List) > 0:
-            initial_prompt += f"\nYou may use the following documentation as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+            initial_prompt += "\nYou may use the following documentation as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
             for documentation in documentation_List:
                 if (
@@ -74,7 +74,7 @@ class ZhipuAI_Chat(VannaBase):
         initial_prompt: str, sql_List: List[str], max_tokens: int = 14000
     ) -> str:
         if len(sql_List) > 0:
-            initial_prompt += f"\nYou may use the following SQL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+            initial_prompt += "\nYou may use the following SQL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
             for question in sql_List:
                 if (

{vanna-0.3.3 → vanna-0.4.0}/src/vanna/base/base.py RENAMED Viewed

@@ -124,6 +124,18 @@ class VannaBase(ABC):
         return self.extract_sql(llm_response)
     def extract_sql(self, llm_response: str) -> str:
+        # If the llm_response is not markdown formatted, extract sql by finding select and ; in the response
+        sql = re.search(r"SELECT.*?;", llm_response, re.DOTALL)
+        if sql:
+            self.log(f"Output from LLM: {llm_response} \nExtracted SQL: {sql.group(0)}"
+            )
+            return sql.group(0)
+        # If the llm_response contains a CTE (with clause), extract the sql bewteen WITH and ;
+        sql = re.search(r"WITH.*?;", llm_response, re.DOTALL)
+        if sql:
+            self.log(f"Output from LLM: {llm_response} \nExtracted SQL: {sql.group(0)}")
+            return sql.group(0)
         # If the llm_response contains a markdown code block, with or without the sql tag, extract the sql from it
         sql = re.search(r"```sql\n(.*)```", llm_response, re.DOTALL)
         if sql:
@@ -363,7 +375,7 @@ class VannaBase(ABC):
         self, initial_prompt: str, ddl_list: list[str], max_tokens: int = 14000
     ) -> str:
         if len(ddl_list) > 0:
-            initial_prompt += f"\nYou may use the following DDL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+            initial_prompt += "\nYou may use the following DDL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
             for ddl in ddl_list:
                 if (
@@ -382,7 +394,7 @@ class VannaBase(ABC):
         max_tokens: int = 14000,
     ) -> str:
         if len(documentation_list) > 0:
-            initial_prompt += f"\nYou may use the following documentation as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+            initial_prompt += "\nYou may use the following documentation as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
             for documentation in documentation_list:
                 if (
@@ -398,7 +410,7 @@ class VannaBase(ABC):
         self, initial_prompt: str, sql_list: list[str], max_tokens: int = 14000
     ) -> str:
         if len(sql_list) > 0:
-            initial_prompt += f"\nYou may use the following SQL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
+            initial_prompt += "\nYou may use the following SQL statements as a reference for what tables might be available. Use responses to past questions also to guide you:\n\n"
             for question in sql_list:
                 if (
@@ -642,6 +654,7 @@ class VannaBase(ABC):
             password=password,
             account=account,
             database=database,
+            client_session_keep_alive=True
         )
         def run_sql_snowflake(sql: str) -> pd.DataFrame:
@@ -890,6 +903,94 @@ class VannaBase(ABC):
         self.run_sql_is_set = True
         self.run_sql = run_sql_mysql
+    def connect_to_oracle(
+    self,
+    user: str = None,
+    password: str = None,
+    dsn: str = None,
+    ):
+        """
+        Connect to an Oracle db using oracledb package. This is just a helper function to set [`vn.run_sql`][vanna.base.base.VannaBase.run_sql]
+        **Example:**
+        ```python
+        vn.connect_to_oracle(
+        user="username",
+        password="password",
+        dns="host:port/sid",
+        )
+        ```
+        Args:
+            USER (str): Oracle db user name.
+            PASSWORD (str): Oracle db user password.
+            DSN (str): Oracle db host ip - host:port/sid.
+        """
+        try:
+            import oracledb
+        except ImportError:
+            raise DependencyError(
+                "You need to install required dependencies to execute this method,"
+                " run command: \npip install oracledb"
+            )
+        if not dsn:
+            dsn = os.getenv("DSN")
+        if not dsn:
+            raise ImproperlyConfigured("Please set your Oracle dsn which should include host:port/sid")
+        if not user:
+            user = os.getenv("USER")
+        if not user:
+            raise ImproperlyConfigured("Please set your Oracle db user")
+        if not password:
+            password = os.getenv("PASSWORD")
+        if not password:
+            raise ImproperlyConfigured("Please set your Oracle db password")
+        conn = None
+        try:
+            conn = oracledb.connect(
+                user=user,
+                password=password,
+                dsn=dsn,
+                )
+        except oracledb.Error as e:
+            raise ValidationError(e)
+        def run_sql_oracle(sql: str) -> Union[pd.DataFrame, None]:
+            if conn:
+                try:
+                    sql = sql.rstrip()
+                    if sql.endswith(';'): #fix for a known problem with Oracle db where an extra ; will cause an error.
+                        sql = sql[:-1]
+                    cs = conn.cursor()
+                    cs.execute(sql)
+                    results = cs.fetchall()
+                    # Create a pandas dataframe from the results
+                    df = pd.DataFrame(
+                        results, columns=[desc[0] for desc in cs.description]
+                    )
+                    return df
+                except oracledb.Error as e:
+                    conn.rollback()
+                    raise ValidationError(e)
+                except Exception as e:
+                    conn.rollback()
+                    raise e
+        self.run_sql_is_set = True
+        self.run_sql = run_sql_oracle
     def connect_to_bigquery(self, cred_file_path: str = None, project_id: str = None):
         """
@@ -1238,7 +1339,7 @@ class VannaBase(ABC):
         """
         if question and not sql:
-            raise ValidationError(f"Please also provide a SQL query")
+            raise ValidationError("Please also provide a SQL query")
         if documentation:
             print("Adding documentation....")

{vanna-0.3.3 → vanna-0.4.0}/src/vanna/chromadb/chromadb_vector.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import json
-import uuid
 from typing import List
 import chromadb
@@ -16,17 +15,16 @@ default_ef = embedding_functions.DefaultEmbeddingFunction()
 class ChromaDB_VectorStore(VannaBase):
     def __init__(self, config=None):
         VannaBase.__init__(self, config=config)
+        if config is None:
+            config = {}
-        if config is not None:
-            path = config.get("path", ".")
-            self.embedding_function = config.get("embedding_function", default_ef)
-            curr_client = config.get("client", "persistent")
-            self.n_results = config.get("n_results", 10)
-        else:
-            path = "."
-            self.embedding_function = default_ef
-            curr_client = "persistent"  # defaults to persistent storage
-            self.n_results = 10  # defaults to 10 documents
+        path = config.get("path", ".")
+        self.embedding_function = config.get("embedding_function", default_ef)
+        curr_client = config.get("client", "persistent")
+        collection_metadata = config.get("collection_metadata", None)
+        self.n_results_sql = config.get("n_results_sql", config.get("n_results", 10))
+        self.n_results_documentation = config.get("n_results_documentation", config.get("n_results", 10))
+        self.n_results_ddl = config.get("n_results_ddl", config.get("n_results", 10))
         if curr_client == "persistent":
             self.chroma_client = chromadb.PersistentClient(
@@ -43,13 +41,19 @@ class ChromaDB_VectorStore(VannaBase):
             raise ValueError(f"Unsupported client was set in config: {curr_client}")
         self.documentation_collection = self.chroma_client.get_or_create_collection(
-            name="documentation", embedding_function=self.embedding_function
+            name="documentation",
+            embedding_function=self.embedding_function,
+            metadata=collection_metadata,
         )
         self.ddl_collection = self.chroma_client.get_or_create_collection(
-            name="ddl", embedding_function=self.embedding_function
+            name="ddl",
+            embedding_function=self.embedding_function,
+            metadata=collection_metadata,
         )
         self.sql_collection = self.chroma_client.get_or_create_collection(
-            name="sql", embedding_function=self.embedding_function
+            name="sql",
+            embedding_function=self.embedding_function,
+            metadata=collection_metadata,
         )
     def generate_embedding(self, data: str, **kwargs) -> List[float]:
@@ -232,7 +236,7 @@ class ChromaDB_VectorStore(VannaBase):
         return ChromaDB_VectorStore._extract_documents(
             self.sql_collection.query(
                 query_texts=[question],
-                n_results=self.n_results,
+                n_results=self.n_results_sql,
             )
         )
@@ -240,6 +244,7 @@ class ChromaDB_VectorStore(VannaBase):
         return ChromaDB_VectorStore._extract_documents(
             self.ddl_collection.query(
                 query_texts=[question],
+                n_results=self.n_results_ddl,
             )
         )
@@ -247,5 +252,6 @@ class ChromaDB_VectorStore(VannaBase):
         return ChromaDB_VectorStore._extract_documents(
             self.documentation_collection.query(
                 query_texts=[question],
+                n_results=self.n_results_documentation,
             )
         )

vanna 0.3.3__tar.gz → 0.4.0__tar.gz

vanna 0.3.3tar.gz → 0.4.0tar.gz