PyPI - vanna - Versions diffs - 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

vanna 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

vanna/advanced/__init__.py +26 -0
vanna/base/base.py +25 -24
vanna/flask/__init__.py +124 -10
vanna/flask/assets.py +36 -16
vanna/milvus/__init__.py +1 -0
vanna/milvus/milvus_vector.py +305 -0
vanna/qdrant/qdrant.py +12 -14
vanna/vannadb/vannadb_vector.py +179 -1
vanna/vllm/vllm.py +16 -1
{vanna-0.5.5.dist-info → vanna-0.6.1.dist-info}/METADATA +5 -2
{vanna-0.5.5.dist-info → vanna-0.6.1.dist-info}/RECORD +12 -9
{vanna-0.5.5.dist-info → vanna-0.6.1.dist-info}/WHEEL +0 -0

vanna/milvus/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .milvus_vector import Milvus_VectorStore

vanna/milvus/milvus_vector.py ADDED Viewed

@@ -0,0 +1,305 @@
+import uuid
+from typing import List
+import pandas as pd
+from pymilvus import DataType, MilvusClient, model
+from ..base import VannaBase
+# Setting the URI as a local file, e.g.`./milvus.db`,
+# is the most convenient method, as it automatically utilizes Milvus Lite
+# to store all data in this file.
+#
+# If you have large scale of data such as more than a million docs, we
+# recommend setting up a more performant Milvus server on docker or kubernetes.
+# When using this setup, please use the server URI,
+# e.g.`http://localhost:19530`, as your URI.
+DEFAULT_MILVUS_URI = "./milvus.db"
+# DEFAULT_MILVUS_URI = "http://localhost:19530"
+MAX_LIMIT_SIZE = 10_000
+class Milvus_VectorStore(VannaBase):
+    """
+    Vectorstore implementation using Milvus - https://milvus.io/docs/quickstart.md
+    Args:
+        - config (dict, optional): Dictionary of `Milvus_VectorStore config` options. Defaults to `None`.
+            - milvus_client: A `pymilvus.MilvusClient` instance.
+            - embedding_function:
+                A `milvus_model.base.BaseEmbeddingFunction` instance. Defaults to `DefaultEmbeddingFunction()`.
+                For more models, please refer to:
+                https://milvus.io/docs/embeddings.md
+    """
+    def __init__(self, config=None):
+        VannaBase.__init__(self, config=config)
+        if "milvus_client" in config:
+            self.milvus_client = config["milvus_client"]
+        else:
+            self.milvus_client = MilvusClient(uri=DEFAULT_MILVUS_URI)
+        if "embedding_function" in config:
+            self.embedding_function = config.get("embedding_function")
+        else:
+            self.embedding_function = model.DefaultEmbeddingFunction()
+        self._embedding_dim = self.embedding_function.encode_documents(["foo"])[0].shape[0]
+        self._create_collections()
+        self.n_results = config.get("n_results", 10)
+    def _create_collections(self):
+        self._create_sql_collection("vannasql")
+        self._create_ddl_collection("vannaddl")
+        self._create_doc_collection("vannadoc")
+    def generate_embedding(self, data: str, **kwargs) -> List[float]:
+        return self.embedding_function.encode_documents(data).tolist()
+    def _create_sql_collection(self, name: str):
+        if not self.milvus_client.has_collection(collection_name=name):
+            vannasql_schema = MilvusClient.create_schema(
+                auto_id=False,
+                enable_dynamic_field=False,
+            )
+            vannasql_schema.add_field(field_name="id", datatype=DataType.VARCHAR, max_length=65535, is_primary=True)
+            vannasql_schema.add_field(field_name="text", datatype=DataType.VARCHAR, max_length=65535)
+            vannasql_schema.add_field(field_name="sql", datatype=DataType.VARCHAR, max_length=65535)
+            vannasql_schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=self._embedding_dim)
+            vannasql_index_params = self.milvus_client.prepare_index_params()
+            vannasql_index_params.add_index(
+                field_name="vector",
+                index_name="vector",
+                index_type="AUTOINDEX",
+                metric_type="L2",
+            )
+            self.milvus_client.create_collection(
+                collection_name=name,
+                schema=vannasql_schema,
+                index_params=vannasql_index_params,
+                consistency_level="Strong"
+            )
+    def _create_ddl_collection(self, name: str):
+        if not self.milvus_client.has_collection(collection_name=name):
+            vannaddl_schema = MilvusClient.create_schema(
+                auto_id=False,
+                enable_dynamic_field=False,
+            )
+            vannaddl_schema.add_field(field_name="id", datatype=DataType.VARCHAR, max_length=65535, is_primary=True)
+            vannaddl_schema.add_field(field_name="ddl", datatype=DataType.VARCHAR, max_length=65535)
+            vannaddl_schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=self._embedding_dim)
+            vannaddl_index_params = self.milvus_client.prepare_index_params()
+            vannaddl_index_params.add_index(
+                field_name="vector",
+                index_name="vector",
+                index_type="AUTOINDEX",
+                metric_type="L2",
+            )
+            self.milvus_client.create_collection(
+                collection_name=name,
+                schema=vannaddl_schema,
+                index_params=vannaddl_index_params,
+                consistency_level="Strong"
+            )
+    def _create_doc_collection(self, name: str):
+        if not self.milvus_client.has_collection(collection_name=name):
+            vannadoc_schema = MilvusClient.create_schema(
+                auto_id=False,
+                enable_dynamic_field=False,
+            )
+            vannadoc_schema.add_field(field_name="id", datatype=DataType.VARCHAR, max_length=65535, is_primary=True)
+            vannadoc_schema.add_field(field_name="doc", datatype=DataType.VARCHAR, max_length=65535)
+            vannadoc_schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=self._embedding_dim)
+            vannadoc_index_params = self.milvus_client.prepare_index_params()
+            vannadoc_index_params.add_index(
+                field_name="vector",
+                index_name="vector",
+                index_type="AUTOINDEX",
+                metric_type="L2",
+            )
+            self.milvus_client.create_collection(
+                collection_name=name,
+                schema=vannadoc_schema,
+                index_params=vannadoc_index_params,
+                consistency_level="Strong"
+            )
+    def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
+        if len(question) == 0 or len(sql) == 0:
+            raise Exception("pair of question and sql can not be null")
+        _id = str(uuid.uuid4()) + "-sql"
+        embedding = self.embedding_function.encode_documents([question])[0]
+        self.milvus_client.insert(
+            collection_name="vannasql",
+            data={
+                "id": _id,
+                "text": question,
+                "sql": sql,
+                "vector": embedding
+            }
+        )
+        return _id
+    def add_ddl(self, ddl: str, **kwargs) -> str:
+        if len(ddl) == 0:
+            raise Exception("ddl can not be null")
+        _id = str(uuid.uuid4()) + "-ddl"
+        embedding = self.embedding_function.encode_documents([ddl])[0]
+        self.milvus_client.insert(
+            collection_name="vannaddl",
+            data={
+                "id": _id,
+                "ddl": ddl,
+                "vector": embedding
+            }
+        )
+        return _id
+    def add_documentation(self, documentation: str, **kwargs) -> str:
+        if len(documentation) == 0:
+            raise Exception("documentation can not be null")
+        _id = str(uuid.uuid4()) + "-doc"
+        embedding = self.embedding_function.encode_documents([documentation])[0]
+        self.milvus_client.insert(
+            collection_name="vannadoc",
+            data={
+                "id": _id,
+                "doc": documentation,
+                "vector": embedding
+            }
+        )
+        return _id
+    def get_training_data(self, **kwargs) -> pd.DataFrame:
+        sql_data = self.milvus_client.query(
+            collection_name="vannasql",
+            output_fields=["*"],
+            limit=MAX_LIMIT_SIZE,
+        )
+        df = pd.DataFrame()
+        df_sql = pd.DataFrame(
+            {
+                "id": [doc["id"] for doc in sql_data],
+                "question": [doc["text"] for doc in sql_data],
+                "content": [doc["sql"] for doc in sql_data],
+            }
+        )
+        df = pd.concat([df, df_sql])
+        ddl_data = self.milvus_client.query(
+            collection_name="vannaddl",
+            output_fields=["*"],
+            limit=MAX_LIMIT_SIZE,
+        )
+        df_ddl = pd.DataFrame(
+            {
+                "id": [doc["id"] for doc in ddl_data],
+                "question": [None for doc in ddl_data],
+                "content": [doc["ddl"] for doc in ddl_data],
+            }
+        )
+        df = pd.concat([df, df_ddl])
+        doc_data = self.milvus_client.query(
+            collection_name="vannadoc",
+            output_fields=["*"],
+            limit=MAX_LIMIT_SIZE,
+        )
+        df_doc = pd.DataFrame(
+            {
+                "id": [doc["id"] for doc in doc_data],
+                "question": [None for doc in doc_data],
+                "content": [doc["doc"] for doc in doc_data],
+            }
+        )
+        df = pd.concat([df, df_doc])
+        return df
+    def get_similar_question_sql(self, question: str, **kwargs) -> list:
+        search_params = {
+            "metric_type": "L2",
+            "params": {"nprobe": 128},
+        }
+        embeddings = self.embedding_function.encode_queries([question])
+        res = self.milvus_client.search(
+            collection_name="vannasql",
+            anns_field="vector",
+            data=embeddings,
+            limit=self.n_results,
+            output_fields=["text", "sql"],
+            search_params=search_params
+        )
+        res = res[0]
+        list_sql = []
+        for doc in res:
+            dict = {}
+            dict["question"] = doc["entity"]["text"]
+            dict["sql"] = doc["entity"]["sql"]
+            list_sql.append(dict)
+        return list_sql
+    def get_related_ddl(self, question: str, **kwargs) -> list:
+        search_params = {
+            "metric_type": "L2",
+            "params": {"nprobe": 128},
+        }
+        embeddings = self.embedding_function.encode_queries([question])
+        res = self.milvus_client.search(
+            collection_name="vannaddl",
+            anns_field="vector",
+            data=embeddings,
+            limit=self.n_results,
+            output_fields=["ddl"],
+            search_params=search_params
+        )
+        res = res[0]
+        list_ddl = []
+        for doc in res:
+            list_ddl.append(doc["entity"]["ddl"])
+        return list_ddl
+    def get_related_documentation(self, question: str, **kwargs) -> list:
+        search_params = {
+            "metric_type": "L2",
+            "params": {"nprobe": 128},
+        }
+        embeddings = self.embedding_function.encode_queries([question])
+        res = self.milvus_client.search(
+            collection_name="vannadoc",
+            anns_field="vector",
+            data=embeddings,
+            limit=self.n_results,
+            output_fields=["doc"],
+            search_params=search_params
+        )
+        res = res[0]
+        list_doc = []
+        for doc in res:
+            list_doc.append(doc["entity"]["doc"])
+        return list_doc
+    def remove_training_data(self, id: str, **kwargs) -> bool:
+        if id.endswith("-sql"):
+            self.milvus_client.delete(collection_name="vannasql", ids=[id])
+            return True
+        elif id.endswith("-ddl"):
+            self.milvus_client.delete(collection_name="vannaddl", ids=[id])
+            return True
+        elif id.endswith("-doc"):
+            self.milvus_client.delete(collection_name="vannadoc", ids=[id])
+            return True
+        else:
+            return False

vanna/qdrant/qdrant.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import List, Tuple
 import pandas as pd
 from qdrant_client import QdrantClient, grpc, models
+from qdrant_client.http.models.models import UpdateStatus
 from ..base import VannaBase
 from ..utils import deterministic_uuid
@@ -38,16 +39,6 @@ class Qdrant_VectorStore(VannaBase):
         TypeError: If config["client"] is not a `qdrant_client.QdrantClient` instance
     """
-    documentation_collection_name = "documentation"
-    ddl_collection_name = "ddl"
-    sql_collection_name = "sql"
-    id_suffixes = {
-        ddl_collection_name: "ddl",
-        documentation_collection_name: "doc",
-        sql_collection_name: "sql",
-    }
     def __init__(
         self,
         config={},
@@ -79,15 +70,21 @@ class Qdrant_VectorStore(VannaBase):
         self.collection_params = config.get("collection_params", {})
         self.distance_metric = config.get("distance_metric", models.Distance.COSINE)
         self.documentation_collection_name = config.get(
-            "documentation_collection_name", self.documentation_collection_name
+            "documentation_collection_name", "documentation"
         )
         self.ddl_collection_name = config.get(
-            "ddl_collection_name", self.ddl_collection_name
+            "ddl_collection_name", "ddl"
         )
         self.sql_collection_name = config.get(
-            "sql_collection_name", self.sql_collection_name
+            "sql_collection_name", "sql"
         )
+        self.id_suffixes = {
+            self.ddl_collection_name: "ddl",
+            self.documentation_collection_name: "doc",
+            self.sql_collection_name: "sql",
+        }
         self._setup_collections()
     def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
@@ -210,7 +207,8 @@ class Qdrant_VectorStore(VannaBase):
     def remove_training_data(self, id: str, **kwargs) -> bool:
         try:
             id, collection_name = self._parse_point_id(id)
-            self._client.delete(collection_name, points_selector=[id])
+            res = self._client.delete(collection_name, points_selector=[id])
+            return True
         except ValueError:
             return False

vanna/vannadb/vannadb_vector.py CHANGED Viewed

@@ -5,6 +5,7 @@ from io import StringIO
 import pandas as pd
 import requests
+from ..advanced import VannaAdvanced
 from ..base import VannaBase
 from ..types import (
   DataFrameJSON,
@@ -20,7 +21,7 @@ from ..types import (
 from ..utils import sanitize_model_name
-class VannaDB_VectorStore(VannaBase):
+class VannaDB_VectorStore(VannaBase, VannaAdvanced):
     def __init__(self, vanna_model: str, vanna_api_key: str, config=None):
         VannaBase.__init__(self, config=config)
@@ -33,6 +34,12 @@ class VannaDB_VectorStore(VannaBase):
             else config["endpoint"]
         )
         self.related_training_data = {}
+        self._graphql_endpoint = "https://functionrag.com/query"
+        self._graphql_headers = {
+            "Content-Type": "application/json",
+            "API-KEY": self._api_key,
+            "NAMESPACE": self._model,
+        }
     def _rpc_call(self, method, params):
         if method != "list_orgs":
@@ -59,6 +66,177 @@ class VannaDB_VectorStore(VannaBase):
     def _dataclass_to_dict(self, obj):
         return dataclasses.asdict(obj)
+    def get_all_functions(self) -> list:
+        query = """
+            {
+                get_all_sql_functions {
+                    function_name
+                    description
+                    post_processing_code_template
+                    arguments {
+                        name
+                        description
+                        general_type
+                        is_user_editable
+                        available_values
+                    }
+                    sql_template
+                }
+            }
+        """
+        response = requests.post(self._graphql_endpoint, headers=self._graphql_headers, json={'query': query})
+        response_json = response.json()
+        if response.status_code == 200 and 'data' in response_json and 'get_all_sql_functions' in response_json['data']:
+            self.log(response_json['data']['get_all_sql_functions'])
+            resp = response_json['data']['get_all_sql_functions']
+            print(resp)
+            return resp
+        else:
+            raise Exception(f"Query failed to run by returning code of {response.status_code}. {response.text}")
+    def get_function(self, question: str, additional_data: dict = {}) -> dict:
+        query = """
+        query GetFunction($question: String!, $staticFunctionArguments: [StaticFunctionArgument]) {
+            get_and_instantiate_function(question: $question, static_function_arguments: $staticFunctionArguments) {
+                ... on SQLFunction {
+                function_name
+                description
+                post_processing_code_template
+                instantiated_post_processing_code
+                arguments {
+                    name
+                    description
+                    general_type
+                    is_user_editable
+                    instantiated_value
+                    available_values
+                }
+                sql_template
+                instantiated_sql
+            }
+            }
+        }
+        """
+        static_function_arguments = [{"name": key, "value": str(value)} for key, value in additional_data.items()]
+        variables = {"question": question, "staticFunctionArguments": static_function_arguments}
+        response = requests.post(self._graphql_endpoint, headers=self._graphql_headers, json={'query': query, 'variables': variables})
+        response_json = response.json()
+        if response.status_code == 200 and 'data' in response_json and 'get_and_instantiate_function' in response_json['data']:
+            self.log(response_json['data']['get_and_instantiate_function'])
+            resp = response_json['data']['get_and_instantiate_function']
+            print(resp)
+            return resp
+        else:
+            raise Exception(f"Query failed to run by returning code of {response.status_code}. {response.text}")
+    def create_function(self, question: str, sql: str, plotly_code: str, **kwargs) -> dict:
+        query = """
+        mutation CreateFunction($question: String!, $sql: String!, $plotly_code: String!) {
+            generate_and_create_sql_function(question: $question, sql: $sql, post_processing_code: $plotly_code) {
+                function_name
+                description
+                arguments {
+                    name
+                    description
+                    general_type
+                    is_user_editable
+                }
+                sql_template
+                post_processing_code_template
+            }
+        }
+        """
+        variables = {"question": question, "sql": sql, "plotly_code": plotly_code}
+        response = requests.post(self._graphql_endpoint, headers=self._graphql_headers, json={'query': query, 'variables': variables})
+        response_json = response.json()
+        if response.status_code == 200 and 'data' in response_json and response_json['data'] is not None and 'generate_and_create_sql_function' in response_json['data']:
+            resp = response_json['data']['generate_and_create_sql_function']
+            print(resp)
+            return resp
+        else:
+            raise Exception(f"Query failed to run by returning code of {response.status_code}. {response.text}")
+    def update_function(self, old_function_name: str, updated_function: dict) -> bool:
+        """
+        Update an existing SQL function based on the provided parameters.
+        Args:
+            old_function_name (str): The current name of the function to be updated.
+            updated_function (dict): A dictionary containing the updated function details. Expected keys:
+                - 'function_name': The new name of the function.
+                - 'description': The new description of the function.
+                - 'arguments': A list of dictionaries describing the function arguments.
+                - 'sql_template': The new SQL template for the function.
+                - 'post_processing_code_template': The new post-processing code template.
+        Returns:
+            bool: True if the function was successfully updated, False otherwise.
+        """
+        mutation = """
+        mutation UpdateSQLFunction($input: SQLFunctionUpdate!) {
+            update_sql_function(input: $input)
+        }
+        """
+        SQLFunctionUpdate = {
+            'function_name', 'description', 'arguments', 'sql_template', 'post_processing_code_template'
+        }
+        # Define the expected keys for each argument in the arguments list
+        ArgumentKeys = {'name', 'general_type', 'description', 'is_user_editable', 'available_values'}
+        # Function to validate and transform arguments
+        def validate_arguments(args):
+            return [
+                {key: arg[key] for key in arg if key in ArgumentKeys}
+                for arg in args
+            ]
+        # Keep only the keys that conform to the SQLFunctionUpdate GraphQL input type
+        updated_function = {key: value for key, value in updated_function.items() if key in SQLFunctionUpdate}
+        # Special handling for 'arguments' to ensure they conform to the spec
+        if 'arguments' in updated_function:
+            updated_function['arguments'] = validate_arguments(updated_function['arguments'])
+        variables = {
+            "input": {
+                "old_function_name": old_function_name,
+                **updated_function
+            }
+        }
+        print("variables", variables)
+        response = requests.post(self._graphql_endpoint, headers=self._graphql_headers, json={'query': mutation, 'variables': variables})
+        response_json = response.json()
+        if response.status_code == 200 and 'data' in response_json and response_json['data'] is not None and 'update_sql_function' in response_json['data']:
+            return response_json['data']['update_sql_function']
+        else:
+            raise Exception(f"Mutation failed to run by returning code of {response.status_code}. {response.text}")
+    def delete_function(self, function_name: str) -> bool:
+        mutation = """
+        mutation DeleteSQLFunction($function_name: String!) {
+            delete_sql_function(function_name: $function_name)
+        }
+        """
+        variables = {"function_name": function_name}
+        response = requests.post(self._graphql_endpoint, headers=self._graphql_headers, json={'query': mutation, 'variables': variables})
+        response_json = response.json()
+        if response.status_code == 200 and 'data' in response_json and response_json['data'] is not None and 'delete_sql_function' in response_json['data']:
+            return response_json['data']['delete_sql_function']
+        else:
+            raise Exception(f"Mutation failed to run by returning code of {response.status_code}. {response.text}")
     def create_model(self, model: str, **kwargs) -> bool:
         """
         **Example:**

vanna/vllm/vllm.py CHANGED Viewed

@@ -17,6 +17,11 @@ class Vllm(VannaBase):
         else:
             self.model = config["model"]
+        if "auth-key" in config:
+            self.auth_key = config["auth-key"]
+        else:
+            self.auth_key = None
     def system_message(self, message: str) -> any:
         return {"role": "system", "content": message}
@@ -67,7 +72,17 @@ class Vllm(VannaBase):
             "messages": prompt,
         }
-        response = requests.post(url, json=data)
+        if self.auth_key is not None:
+            headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {self.auth_key}'
+            }
+            response = requests.post(url, headers=headers,json=data)
+        else:
+            response = requests.post(url, json=data)
         response_dict = response.json()

{vanna-0.5.5.dist-info → vanna-0.6.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vanna
-Version: 0.5.5
+Version: 0.6.1
 Summary: Generate SQL queries from natural language
 Author-email: Zain Hoda <zain@vanna.ai>
 Requires-Python: >=3.9
@@ -39,16 +39,18 @@ Requires-Dist: opensearch-py ; extra == "all"
 Requires-Dist: opensearch-dsl ; extra == "all"
 Requires-Dist: transformers ; extra == "all"
 Requires-Dist: pinecone-client ; extra == "all"
+Requires-Dist: pymilvus[model] ; extra == "all"
 Requires-Dist: anthropic ; extra == "anthropic"
 Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
 Requires-Dist: chromadb ; extra == "chromadb"
-Requires-Dist: clickhouse_driver ; extra == "clickhouse"
+Requires-Dist: clickhouse_connect ; extra == "clickhouse"
 Requires-Dist: duckdb ; extra == "duckdb"
 Requires-Dist: google-generativeai ; extra == "gemini"
 Requires-Dist: google-generativeai ; extra == "google"
 Requires-Dist: google-cloud-aiplatform ; extra == "google"
 Requires-Dist: transformers ; extra == "hf"
 Requires-Dist: marqo ; extra == "marqo"
+Requires-Dist: pymilvus[model] ; extra == "milvus"
 Requires-Dist: mistralai ; extra == "mistralai"
 Requires-Dist: PyMySQL ; extra == "mysql"
 Requires-Dist: ollama ; extra == "ollama"
@@ -78,6 +80,7 @@ Provides-Extra: gemini
 Provides-Extra: google
 Provides-Extra: hf
 Provides-Extra: marqo
+Provides-Extra: milvus
 Provides-Extra: mistralai
 Provides-Extra: mysql
 Provides-Extra: ollama

vanna 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

vanna 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl