PyPI - swarmauri_vectorstore_qdrant - Versions diffs - 0.6.0.dev154__py3-none-any.whl - Mend

swarmauri_vectorstore_qdrant 0.6.0.dev154__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

swarmauri_vectorstore_qdrant/CloudQdrantVectorStore.py ADDED Viewed

@@ -0,0 +1,262 @@
+from typing import List, Union, Literal
+from pydantic import PrivateAttr, Field, ConfigDict
+from qdrant_client import QdrantClient
+from qdrant_client.models import (
+    PointStruct,
+    VectorParams,
+    Distance,
+)
+from swarmauri_standard.documents.Document import Document
+from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
+from swarmauri_standard.distances.CosineDistance import CosineDistance
+from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
+from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
+    VectorStoreRetrieveMixin,
+)
+from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
+    VectorStoreSaveLoadMixin,
+)
+from swarmauri_base.vector_stores.VectorStoreCloudMixin import (
+    VectorStoreCloudMixin,
+)
+from swarmauri_core.ComponentBase import ComponentBase
+@ComponentBase.register_type(VectorStoreBase, "CloudQdrantVectorStore")
+class CloudQdrantVectorStore(
+    VectorStoreSaveLoadMixin,
+    VectorStoreRetrieveMixin,
+    VectorStoreCloudMixin,
+    VectorStoreBase,
+):
+    """
+    CloudQdrantVectorStore is a concrete implementation that integrates functionality
+    for saving, loading, storing, and retrieving vector documents, leveraging Qdrant as the backend.
+    """
+    type: Literal["CloudQdrantVectorStore"] = "CloudQdrantVectorStore"
+    # allow arbitary types in the model config
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    # Use PrivateAttr to make _embedder and _distance private
+    _embedder: Doc2VecEmbedding = PrivateAttr()
+    _distance: CosineDistance = PrivateAttr()
+    client: Union[QdrantClient, None] = Field(default=None, init=False)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
+        self._distance = CosineDistance()
+    def connect(self) -> None:
+        """
+        Connects to the Qdrant cloud vector store using the provided credentials.
+        """
+        if self.client is None:
+            self.client = QdrantClient(
+                api_key=self.api_key,
+                url=self.url,
+            )
+        # TODO  may need optimization two loops may not be necessary
+        # Check if the collection exists
+        existing_collections = self.client.get_collections().collections
+        collection_names = [collection.name for collection in existing_collections]
+        if self.collection_name not in collection_names:
+            # Ensure the collection exists with the desired configuration
+            self.client.recreate_collection(
+                collection_name=self.collection_name,
+                vectors_config=VectorParams(
+                    size=self.vector_size, distance=Distance.COSINE
+                ),
+            )
+    def disconnect(self) -> None:
+        """
+        Disconnects from the Qdrant cloud vector store.
+        """
+        if self.client is not None:
+            self.client = None
+    def add_document(self, document: Document) -> None:
+        """
+        Add a single document to the document store.
+        Parameters:
+            document (Document): The document to be added to the store.
+        """
+        embedding = None
+        if not document.embedding:
+            self._embedder.fit([document.content])  # Fit only once
+            embedding = (
+                self._embedder.transform([document.content])[0].to_numpy().tolist()
+            )
+        else:
+            embedding = document.embedding
+        payload = {
+            "content": document.content,
+            "metadata": document.metadata,
+        }
+        doc = PointStruct(id=document.id, vector=embedding, payload=payload)
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=[doc],
+        )
+    def add_documents(self, documents: List[Document]) -> None:
+        """
+        Add multiple documents to the document store in a batch operation.
+        Parameters:
+            documents (List[Document]): A list of documents to be added to the store.
+        """
+        points = [
+            PointStruct(
+                id=doc.id,
+                vector=doc.embedding
+                or self._embedder.fit_transform([doc.content])[0].to_numpy().tolist(),
+                payload={"content": doc.content, "metadata": doc.metadata},
+            )
+            for doc in documents
+        ]
+        self.client.upsert(self.collection_name, points=points)
+    def get_document(self, id: str) -> Union[Document, None]:
+        """
+        Retrieve a single document by its identifier.
+        Parameters:
+            id (str): The unique identifier of the document to retrieve.
+        Returns:
+            Union[Document, None]: The requested document if found; otherwise, None.
+        """
+        response = self.client.retrieve(
+            collection_name=self.collection_name,
+            ids=[id],
+        )
+        if response:
+            payload = response[0].payload
+            return Document(
+                id=id, content=payload["content"], metadata=payload["metadata"]
+            )
+        return None
+    def get_all_documents(self) -> List[Document]:
+        """
+        Retrieve all documents stored in the document store.
+        Returns:
+            List[Document]: A list of all documents in the store.
+        """
+        response = self.client.scroll(
+            collection_name=self.collection_name,
+        )
+        return [
+            Document(
+                id=doc.id,
+                content=doc.payload["content"],
+                metadata=doc.payload["metadata"],
+            )
+            for doc in response[0]
+        ]
+    def delete_document(self, id: str) -> None:
+        """
+        Delete a document from the document store by its identifier.
+        Parameters:
+            id (str): The unique identifier of the document to delete.
+        """
+        self.client.delete(self.collection_name, points_selector=[id])
+    def update_document(self, id: str, updated_document: Document) -> None:
+        """
+        Update a document in the document store.
+        Parameters:
+            id (str): The unique identifier of the document to update.
+            updated_document (Document): The updated document instance.
+        """
+        # Precompute the embedding outside the update process
+        if not updated_document.embedding:
+            # Transform without refitting to avoid vocabulary issues
+            document_vector = self._embedder.transform([updated_document.content])[0]
+        else:
+            document_vector = updated_document.embedding
+        document_vector = document_vector.to_numpy().tolist()
+        self.client.upsert(
+            self.collection_name,
+            points=[
+                PointStruct(
+                    id=id,
+                    vector=document_vector,
+                    payload={
+                        "content": updated_document.content,
+                        "metadata": updated_document.metadata,
+                    },
+                )
+            ],
+        )
+    def clear_documents(self) -> None:
+        """
+        Deletes all documents from the vector store
+        """
+        self.client.delete_collection(self.collection_name)
+    def document_count(self) -> int:
+        """
+        Returns the number of documents in the store.
+        """
+        response = self.client.scroll(
+            collection_name=self.collection_name,
+        )
+        return len(response)
+    def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
+        """
+        Retrieve the top_k most relevant documents based on the given query.
+        For the purpose of this example, this method performs a basic search.
+        Args:
+            query (str): The query string used for document retrieval.
+            top_k (int): The number of top relevant documents to retrieve.
+        Returns:
+            List[Document]: A list of the top_k most relevant documents.
+        """
+        query_vector = self._embedder.infer_vector(query).value
+        results = self.client.search(
+            collection_name=self.collection_name, query_vector=query_vector, limit=top_k
+        )
+        return [
+            Document(
+                id=res.id,
+                content=res.payload["content"],
+                metadata=res.payload["metadata"],
+            )
+            for res in results
+        ]
+    # Override the model_dump_json method
+    def model_dump_json(self, *args, **kwargs) -> str:
+        # Call the disconnect method before serialization
+        self.disconnect()
+        # Now proceed with the usual JSON serialization
+        return super().model_dump_json(*args, **kwargs)

swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py ADDED Viewed

@@ -0,0 +1,258 @@
+from typing import List, Union, Literal
+from pydantic import Field, PrivateAttr, ConfigDict
+from qdrant_client import QdrantClient
+from qdrant_client.models import (
+    PointStruct,
+    VectorParams,
+    Distance,
+)
+from swarmauri_standard.documents.Document import Document
+from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
+from swarmauri_standard.distances.CosineDistance import CosineDistance
+from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
+from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
+    VectorStoreRetrieveMixin,
+)
+from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
+    VectorStoreSaveLoadMixin,
+)
+from swarmauri_base.vector_stores.VectorStorePersistentMixin import (
+    VectorStorePersistentMixin,
+)
+from swarmauri_core.ComponentBase import ComponentBase
+@ComponentBase.register_type(VectorStoreBase, "PersistentQdrantVectorStore")
+class PersistentQdrantVectorStore(
+    VectorStoreSaveLoadMixin,
+    VectorStoreRetrieveMixin,
+    VectorStorePersistentMixin,
+    VectorStoreBase,
+):
+    """
+    PersistentQdrantVectorStore is a concrete implementation that integrates functionality
+    for saving, loading, storing, and retrieving vector documents, leveraging a locally
+    hosted Qdrant instance as the backend.
+    """
+    type: Literal["PersistentQdrantVectorStore"] = "PersistentQdrantVectorStore"
+    # allow arbitary types in the model config
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    # Use PrivateAttr to make _embedder and _distance private
+    _embedder: Doc2VecEmbedding = PrivateAttr()
+    _distance: CosineDistance = PrivateAttr()
+    client: Union[QdrantClient, None] = Field(default=None, init=False)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
+        self._distance = CosineDistance()
+    def connect(self) -> None:
+        """
+        Connects to the Qdrant vector store using the provided URL.
+        """
+        if self.client is None:
+            self.client = QdrantClient(path=self.path)
+        # Check if the collection exists
+        existing_collections = self.client.get_collections().collections
+        collection_names = [collection.name for collection in existing_collections]
+        if self.collection_name not in collection_names:
+            # Ensure the collection exists with the desired configuration
+            self.client.recreate_collection(
+                collection_name=self.collection_name,
+                vectors_config=VectorParams(
+                    size=self.vector_size, distance=Distance.COSINE
+                ),
+            )
+    def disconnect(self) -> None:
+        """
+        Disconnects from the Qdrant vector store.
+        """
+        if self.client is not None:
+            self.client = None
+    def add_document(self, document: Document) -> None:
+        """
+        Add a single document to the document store.
+        Parameters:
+            document (Document): The document to be added to the store.
+        """
+        embedding = None
+        if not document.embedding:
+            self._embedder.fit([document.content])  # Fit only once
+            embedding = (
+                self._embedder.transform([document.content])[0].to_numpy().tolist()
+            )
+        else:
+            embedding = document.embedding
+        payload = {
+            "content": document.content,
+            "metadata": document.metadata,
+        }
+        doc = PointStruct(id=document.id, vector=embedding, payload=payload)
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=[doc],
+        )
+    def add_documents(self, documents: List[Document]) -> None:
+        """
+        Add multiple documents to the document store in a batch operation.
+        Parameters:
+            documents (List[Document]): A list of documents to be added to the store.
+        """
+        points = [
+            PointStruct(
+                id=doc.id,
+                vector=doc.embedding
+                or self._embedder.fit_transform([doc.content])[0].to_numpy().tolist(),
+                payload={"content": doc.content, "metadata": doc.metadata},
+            )
+            for doc in documents
+        ]
+        self.client.upsert(self.collection_name, points=points)
+    def get_document(self, id: str) -> Union[Document, None]:
+        """
+        Retrieve a single document by its identifier.
+        Parameters:
+            id (str): The unique identifier of the document to retrieve.
+        Returns:
+            Union[Document, None]: The requested document if found; otherwise, None.
+        """
+        response = self.client.retrieve(
+            collection_name=self.collection_name,
+            ids=[id],
+        )
+        if response:
+            payload = response[0].payload
+            return Document(
+                id=id, content=payload["content"], metadata=payload["metadata"]
+            )
+        return None
+    def get_all_documents(self) -> List[Document]:
+        """
+        Retrieve all documents stored in the document store.
+        Returns:
+            List[Document]: A list of all documents in the store.
+        """
+        response = self.client.scroll(
+            collection_name=self.collection_name,
+        )
+        return [
+            Document(
+                id=doc.id,
+                content=doc.payload["content"],
+                metadata=doc.payload["metadata"],
+            )
+            for doc in response[0]
+        ]
+    def delete_document(self, id: str) -> None:
+        """
+        Delete a document from the document store by its identifier.
+        Parameters:
+            id (str): The unique identifier of the document to delete.
+        """
+        self.client.delete(self.collection_name, points_selector=[id])
+    def update_document(self, id: str, updated_document: Document) -> None:
+        """
+        Update a document in the document store.
+        Parameters:
+            id (str): The unique identifier of the document to update.
+            updated_document (Document): The updated document instance.
+        """
+        # Precompute the embedding outside the update process
+        if not updated_document.embedding:
+            # Transform without refitting to avoid vocabulary issues
+            document_vector = self._embedder.transform([updated_document.content])[0]
+        else:
+            document_vector = updated_document.embedding
+        document_vector = document_vector.to_numpy().tolist()
+        self.client.upsert(
+            self.collection_name,
+            points=[
+                PointStruct(
+                    id=id,
+                    vector=document_vector,
+                    payload={
+                        "content": updated_document.content,
+                        "metadata": updated_document.metadata,
+                    },
+                )
+            ],
+        )
+    def clear_documents(self) -> None:
+        """
+        Deletes all documents from the vector store.
+        """
+        self.client.delete_collection(self.collection_name)
+    def document_count(self) -> int:
+        """
+        Returns the number of documents in the store.
+        """
+        response = self.client.scroll(
+            collection_name=self.collection_name,
+        )
+        return len(response)
+    def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
+        """
+        Retrieve the top_k most relevant documents based on the given query.
+        For the purpose of this example, this method performs a basic search.
+        Args:
+            query (str): The query string used for document retrieval.
+            top_k (int): The number of top relevant documents to retrieve.
+        Returns:
+            List[Document]: A list of the top_k most relevant documents.
+        """
+        query_vector = self._embedder.infer_vector(query).value
+        results = self.client.search(
+            collection_name=self.collection_name, query_vector=query_vector, limit=top_k
+        )
+        return [
+            Document(
+                id=res.id,
+                content=res.payload["content"],
+                metadata=res.payload["metadata"],
+            )
+            for res in results
+        ]
+    # Override the model_dump_json method
+    def model_dump_json(self, *args, **kwargs) -> str:
+        # Call the disconnect method before serialization
+        self.disconnect()
+        # Now proceed with the usual JSON serialization
+        return super().model_dump_json(*args, **kwargs)

swarmauri_vectorstore_qdrant/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+from .PersistentQdrantVectorStore import PersistentQdrantVectorStore
+from .CloudQdrantVectorStore import CloudQdrantVectorStore
+__version__ = "0.6.0.dev26"
+__long_desc__ = """
+# Swarmauri Qdrant Based Components
+Components Included:
+- PersistentQdrantVectorStore
+- CloudQdrantVectorStore
+Visit us at: https://swarmauri.com
+Follow us at: https://github.com/swarmauri
+Star us at: https://github.com/swarmauri/swarmauri-sdk
+"""

swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,21 @@
+Metadata-Version: 2.3
+Name: swarmauri_vectorstore_qdrant
+Version: 0.6.0.dev154
+Summary: Swarmauri Persistent Qdrant Vector Store
+License: Apache-2.0
+Author: Jacob Stewart
+Author-email: jacob@swarmauri.com
+Requires-Python: >=3.10,<3.13
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: qdrant-client (>=1.12.0,<2.0.0)
+Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
+Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
+Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
+Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
+Description-Content-Type: text/markdown
+# Swarmauri Example Community Package

swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+swarmauri_vectorstore_qdrant/__init__.py,sha256=-odIzXQs7OSpOPeaP2m0F0XD8xCwBSpPcAvs60pUf2s,449
+swarmauri_vectorstore_qdrant/CloudQdrantVectorStore.py,sha256=eV7Sv2VrPaHJo7qB2Fown8FIE9tQy6muxt41ocqXtCM,9013
+swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py,sha256=Et3yjg68mrwwhQy6fuRhb7BFfFYjrZsTkmZpNtZfLMg,8923
+swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/entry_points.txt,sha256=-XY2dvS5pIlDrAmYOjBLzDVerkBgXijW7ftHRIc9pDY,238
+swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/METADATA,sha256=r3MpbFLFbLvsdsHcPyOiJ14KseB3juPpZvRLbu5bNfM,867
+swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
+swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/RECORD,,

swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 2.0.1
+Root-Is-Purelib: true
+Tag: py3-none-any

swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,4 @@
+[swarmauri.vector_stores]
+CloudQdrantVectorStore=swarmauri_vectorstore_qdrant.CloudQdrantVectorStore:CloudQdrantVectorStore
+PersistentQdrantVectorStore=swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore:PersistentQdrantVectorStore