swarmauri_vectorstore_qdrant 0.6.0.dev154__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,262 @@
1
+ from typing import List, Union, Literal
2
+
3
+ from pydantic import PrivateAttr, Field, ConfigDict
4
+
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.models import (
7
+ PointStruct,
8
+ VectorParams,
9
+ Distance,
10
+ )
11
+
12
+ from swarmauri_standard.documents.Document import Document
13
+ from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
14
+ from swarmauri_standard.distances.CosineDistance import CosineDistance
15
+
16
+ from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
17
+ from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
18
+ VectorStoreRetrieveMixin,
19
+ )
20
+ from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
21
+ VectorStoreSaveLoadMixin,
22
+ )
23
+ from swarmauri_base.vector_stores.VectorStoreCloudMixin import (
24
+ VectorStoreCloudMixin,
25
+ )
26
+ from swarmauri_core.ComponentBase import ComponentBase
27
+
28
+
29
+ @ComponentBase.register_type(VectorStoreBase, "CloudQdrantVectorStore")
30
+ class CloudQdrantVectorStore(
31
+ VectorStoreSaveLoadMixin,
32
+ VectorStoreRetrieveMixin,
33
+ VectorStoreCloudMixin,
34
+ VectorStoreBase,
35
+ ):
36
+ """
37
+ CloudQdrantVectorStore is a concrete implementation that integrates functionality
38
+ for saving, loading, storing, and retrieving vector documents, leveraging Qdrant as the backend.
39
+ """
40
+
41
+ type: Literal["CloudQdrantVectorStore"] = "CloudQdrantVectorStore"
42
+
43
+ # allow arbitary types in the model config
44
+ model_config = ConfigDict(arbitrary_types_allowed=True)
45
+
46
+ # Use PrivateAttr to make _embedder and _distance private
47
+ _embedder: Doc2VecEmbedding = PrivateAttr()
48
+ _distance: CosineDistance = PrivateAttr()
49
+ client: Union[QdrantClient, None] = Field(default=None, init=False)
50
+
51
+ def __init__(self, **kwargs):
52
+ super().__init__(**kwargs)
53
+
54
+ self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
55
+ self._distance = CosineDistance()
56
+
57
+ def connect(self) -> None:
58
+ """
59
+ Connects to the Qdrant cloud vector store using the provided credentials.
60
+ """
61
+ if self.client is None:
62
+ self.client = QdrantClient(
63
+ api_key=self.api_key,
64
+ url=self.url,
65
+ )
66
+
67
+ # TODO may need optimization two loops may not be necessary
68
+ # Check if the collection exists
69
+ existing_collections = self.client.get_collections().collections
70
+ collection_names = [collection.name for collection in existing_collections]
71
+
72
+ if self.collection_name not in collection_names:
73
+ # Ensure the collection exists with the desired configuration
74
+ self.client.recreate_collection(
75
+ collection_name=self.collection_name,
76
+ vectors_config=VectorParams(
77
+ size=self.vector_size, distance=Distance.COSINE
78
+ ),
79
+ )
80
+
81
+ def disconnect(self) -> None:
82
+ """
83
+ Disconnects from the Qdrant cloud vector store.
84
+ """
85
+ if self.client is not None:
86
+ self.client = None
87
+
88
+ def add_document(self, document: Document) -> None:
89
+ """
90
+ Add a single document to the document store.
91
+
92
+ Parameters:
93
+ document (Document): The document to be added to the store.
94
+ """
95
+ embedding = None
96
+ if not document.embedding:
97
+ self._embedder.fit([document.content]) # Fit only once
98
+ embedding = (
99
+ self._embedder.transform([document.content])[0].to_numpy().tolist()
100
+ )
101
+ else:
102
+ embedding = document.embedding
103
+
104
+ payload = {
105
+ "content": document.content,
106
+ "metadata": document.metadata,
107
+ }
108
+
109
+ doc = PointStruct(id=document.id, vector=embedding, payload=payload)
110
+
111
+ self.client.upsert(
112
+ collection_name=self.collection_name,
113
+ points=[doc],
114
+ )
115
+
116
+ def add_documents(self, documents: List[Document]) -> None:
117
+ """
118
+ Add multiple documents to the document store in a batch operation.
119
+
120
+ Parameters:
121
+ documents (List[Document]): A list of documents to be added to the store.
122
+ """
123
+ points = [
124
+ PointStruct(
125
+ id=doc.id,
126
+ vector=doc.embedding
127
+ or self._embedder.fit_transform([doc.content])[0].to_numpy().tolist(),
128
+ payload={"content": doc.content, "metadata": doc.metadata},
129
+ )
130
+ for doc in documents
131
+ ]
132
+ self.client.upsert(self.collection_name, points=points)
133
+
134
+ def get_document(self, id: str) -> Union[Document, None]:
135
+ """
136
+ Retrieve a single document by its identifier.
137
+
138
+ Parameters:
139
+ id (str): The unique identifier of the document to retrieve.
140
+
141
+ Returns:
142
+ Union[Document, None]: The requested document if found; otherwise, None.
143
+ """
144
+ response = self.client.retrieve(
145
+ collection_name=self.collection_name,
146
+ ids=[id],
147
+ )
148
+ if response:
149
+ payload = response[0].payload
150
+ return Document(
151
+ id=id, content=payload["content"], metadata=payload["metadata"]
152
+ )
153
+ return None
154
+
155
+ def get_all_documents(self) -> List[Document]:
156
+ """
157
+ Retrieve all documents stored in the document store.
158
+
159
+ Returns:
160
+ List[Document]: A list of all documents in the store.
161
+ """
162
+ response = self.client.scroll(
163
+ collection_name=self.collection_name,
164
+ )
165
+
166
+ return [
167
+ Document(
168
+ id=doc.id,
169
+ content=doc.payload["content"],
170
+ metadata=doc.payload["metadata"],
171
+ )
172
+ for doc in response[0]
173
+ ]
174
+
175
+ def delete_document(self, id: str) -> None:
176
+ """
177
+ Delete a document from the document store by its identifier.
178
+
179
+ Parameters:
180
+ id (str): The unique identifier of the document to delete.
181
+ """
182
+ self.client.delete(self.collection_name, points_selector=[id])
183
+
184
+ def update_document(self, id: str, updated_document: Document) -> None:
185
+ """
186
+ Update a document in the document store.
187
+
188
+ Parameters:
189
+ id (str): The unique identifier of the document to update.
190
+ updated_document (Document): The updated document instance.
191
+ """
192
+ # Precompute the embedding outside the update process
193
+ if not updated_document.embedding:
194
+ # Transform without refitting to avoid vocabulary issues
195
+ document_vector = self._embedder.transform([updated_document.content])[0]
196
+ else:
197
+ document_vector = updated_document.embedding
198
+
199
+ document_vector = document_vector.to_numpy().tolist()
200
+
201
+ self.client.upsert(
202
+ self.collection_name,
203
+ points=[
204
+ PointStruct(
205
+ id=id,
206
+ vector=document_vector,
207
+ payload={
208
+ "content": updated_document.content,
209
+ "metadata": updated_document.metadata,
210
+ },
211
+ )
212
+ ],
213
+ )
214
+
215
+ def clear_documents(self) -> None:
216
+ """
217
+ Deletes all documents from the vector store
218
+ """
219
+ self.client.delete_collection(self.collection_name)
220
+
221
+ def document_count(self) -> int:
222
+ """
223
+ Returns the number of documents in the store.
224
+ """
225
+ response = self.client.scroll(
226
+ collection_name=self.collection_name,
227
+ )
228
+ return len(response)
229
+
230
+ def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
231
+ """
232
+ Retrieve the top_k most relevant documents based on the given query.
233
+ For the purpose of this example, this method performs a basic search.
234
+
235
+ Args:
236
+ query (str): The query string used for document retrieval.
237
+ top_k (int): The number of top relevant documents to retrieve.
238
+
239
+ Returns:
240
+ List[Document]: A list of the top_k most relevant documents.
241
+ """
242
+ query_vector = self._embedder.infer_vector(query).value
243
+ results = self.client.search(
244
+ collection_name=self.collection_name, query_vector=query_vector, limit=top_k
245
+ )
246
+
247
+ return [
248
+ Document(
249
+ id=res.id,
250
+ content=res.payload["content"],
251
+ metadata=res.payload["metadata"],
252
+ )
253
+ for res in results
254
+ ]
255
+
256
+ # Override the model_dump_json method
257
+ def model_dump_json(self, *args, **kwargs) -> str:
258
+ # Call the disconnect method before serialization
259
+ self.disconnect()
260
+
261
+ # Now proceed with the usual JSON serialization
262
+ return super().model_dump_json(*args, **kwargs)
@@ -0,0 +1,258 @@
1
+ from typing import List, Union, Literal
2
+ from pydantic import Field, PrivateAttr, ConfigDict
3
+
4
+ from qdrant_client import QdrantClient
5
+ from qdrant_client.models import (
6
+ PointStruct,
7
+ VectorParams,
8
+ Distance,
9
+ )
10
+
11
+ from swarmauri_standard.documents.Document import Document
12
+ from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
13
+ from swarmauri_standard.distances.CosineDistance import CosineDistance
14
+
15
+ from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
16
+ from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
17
+ VectorStoreRetrieveMixin,
18
+ )
19
+ from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
20
+ VectorStoreSaveLoadMixin,
21
+ )
22
+ from swarmauri_base.vector_stores.VectorStorePersistentMixin import (
23
+ VectorStorePersistentMixin,
24
+ )
25
+ from swarmauri_core.ComponentBase import ComponentBase
26
+
27
+
28
+ @ComponentBase.register_type(VectorStoreBase, "PersistentQdrantVectorStore")
29
+ class PersistentQdrantVectorStore(
30
+ VectorStoreSaveLoadMixin,
31
+ VectorStoreRetrieveMixin,
32
+ VectorStorePersistentMixin,
33
+ VectorStoreBase,
34
+ ):
35
+ """
36
+ PersistentQdrantVectorStore is a concrete implementation that integrates functionality
37
+ for saving, loading, storing, and retrieving vector documents, leveraging a locally
38
+ hosted Qdrant instance as the backend.
39
+ """
40
+
41
+ type: Literal["PersistentQdrantVectorStore"] = "PersistentQdrantVectorStore"
42
+
43
+ # allow arbitary types in the model config
44
+ model_config = ConfigDict(arbitrary_types_allowed=True)
45
+
46
+ # Use PrivateAttr to make _embedder and _distance private
47
+ _embedder: Doc2VecEmbedding = PrivateAttr()
48
+ _distance: CosineDistance = PrivateAttr()
49
+ client: Union[QdrantClient, None] = Field(default=None, init=False)
50
+
51
+ def __init__(self, **kwargs):
52
+ super().__init__(**kwargs)
53
+
54
+ self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
55
+ self._distance = CosineDistance()
56
+
57
+ def connect(self) -> None:
58
+ """
59
+ Connects to the Qdrant vector store using the provided URL.
60
+ """
61
+ if self.client is None:
62
+ self.client = QdrantClient(path=self.path)
63
+
64
+ # Check if the collection exists
65
+ existing_collections = self.client.get_collections().collections
66
+ collection_names = [collection.name for collection in existing_collections]
67
+
68
+ if self.collection_name not in collection_names:
69
+ # Ensure the collection exists with the desired configuration
70
+ self.client.recreate_collection(
71
+ collection_name=self.collection_name,
72
+ vectors_config=VectorParams(
73
+ size=self.vector_size, distance=Distance.COSINE
74
+ ),
75
+ )
76
+
77
+ def disconnect(self) -> None:
78
+ """
79
+ Disconnects from the Qdrant vector store.
80
+ """
81
+ if self.client is not None:
82
+ self.client = None
83
+
84
+ def add_document(self, document: Document) -> None:
85
+ """
86
+ Add a single document to the document store.
87
+
88
+ Parameters:
89
+ document (Document): The document to be added to the store.
90
+ """
91
+ embedding = None
92
+ if not document.embedding:
93
+ self._embedder.fit([document.content]) # Fit only once
94
+ embedding = (
95
+ self._embedder.transform([document.content])[0].to_numpy().tolist()
96
+ )
97
+ else:
98
+ embedding = document.embedding
99
+
100
+ payload = {
101
+ "content": document.content,
102
+ "metadata": document.metadata,
103
+ }
104
+
105
+ doc = PointStruct(id=document.id, vector=embedding, payload=payload)
106
+
107
+ self.client.upsert(
108
+ collection_name=self.collection_name,
109
+ points=[doc],
110
+ )
111
+
112
+ def add_documents(self, documents: List[Document]) -> None:
113
+ """
114
+ Add multiple documents to the document store in a batch operation.
115
+
116
+ Parameters:
117
+ documents (List[Document]): A list of documents to be added to the store.
118
+ """
119
+ points = [
120
+ PointStruct(
121
+ id=doc.id,
122
+ vector=doc.embedding
123
+ or self._embedder.fit_transform([doc.content])[0].to_numpy().tolist(),
124
+ payload={"content": doc.content, "metadata": doc.metadata},
125
+ )
126
+ for doc in documents
127
+ ]
128
+ self.client.upsert(self.collection_name, points=points)
129
+
130
+ def get_document(self, id: str) -> Union[Document, None]:
131
+ """
132
+ Retrieve a single document by its identifier.
133
+
134
+ Parameters:
135
+ id (str): The unique identifier of the document to retrieve.
136
+
137
+ Returns:
138
+ Union[Document, None]: The requested document if found; otherwise, None.
139
+ """
140
+ response = self.client.retrieve(
141
+ collection_name=self.collection_name,
142
+ ids=[id],
143
+ )
144
+ if response:
145
+ payload = response[0].payload
146
+ return Document(
147
+ id=id, content=payload["content"], metadata=payload["metadata"]
148
+ )
149
+ return None
150
+
151
+ def get_all_documents(self) -> List[Document]:
152
+ """
153
+ Retrieve all documents stored in the document store.
154
+
155
+ Returns:
156
+ List[Document]: A list of all documents in the store.
157
+ """
158
+ response = self.client.scroll(
159
+ collection_name=self.collection_name,
160
+ )
161
+
162
+ return [
163
+ Document(
164
+ id=doc.id,
165
+ content=doc.payload["content"],
166
+ metadata=doc.payload["metadata"],
167
+ )
168
+ for doc in response[0]
169
+ ]
170
+
171
+ def delete_document(self, id: str) -> None:
172
+ """
173
+ Delete a document from the document store by its identifier.
174
+
175
+ Parameters:
176
+ id (str): The unique identifier of the document to delete.
177
+ """
178
+ self.client.delete(self.collection_name, points_selector=[id])
179
+
180
+ def update_document(self, id: str, updated_document: Document) -> None:
181
+ """
182
+ Update a document in the document store.
183
+
184
+ Parameters:
185
+ id (str): The unique identifier of the document to update.
186
+ updated_document (Document): The updated document instance.
187
+ """
188
+ # Precompute the embedding outside the update process
189
+ if not updated_document.embedding:
190
+ # Transform without refitting to avoid vocabulary issues
191
+ document_vector = self._embedder.transform([updated_document.content])[0]
192
+ else:
193
+ document_vector = updated_document.embedding
194
+
195
+ document_vector = document_vector.to_numpy().tolist()
196
+
197
+ self.client.upsert(
198
+ self.collection_name,
199
+ points=[
200
+ PointStruct(
201
+ id=id,
202
+ vector=document_vector,
203
+ payload={
204
+ "content": updated_document.content,
205
+ "metadata": updated_document.metadata,
206
+ },
207
+ )
208
+ ],
209
+ )
210
+
211
+ def clear_documents(self) -> None:
212
+ """
213
+ Deletes all documents from the vector store.
214
+ """
215
+ self.client.delete_collection(self.collection_name)
216
+
217
+ def document_count(self) -> int:
218
+ """
219
+ Returns the number of documents in the store.
220
+ """
221
+ response = self.client.scroll(
222
+ collection_name=self.collection_name,
223
+ )
224
+ return len(response)
225
+
226
+ def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
227
+ """
228
+ Retrieve the top_k most relevant documents based on the given query.
229
+ For the purpose of this example, this method performs a basic search.
230
+
231
+ Args:
232
+ query (str): The query string used for document retrieval.
233
+ top_k (int): The number of top relevant documents to retrieve.
234
+
235
+ Returns:
236
+ List[Document]: A list of the top_k most relevant documents.
237
+ """
238
+ query_vector = self._embedder.infer_vector(query).value
239
+ results = self.client.search(
240
+ collection_name=self.collection_name, query_vector=query_vector, limit=top_k
241
+ )
242
+
243
+ return [
244
+ Document(
245
+ id=res.id,
246
+ content=res.payload["content"],
247
+ metadata=res.payload["metadata"],
248
+ )
249
+ for res in results
250
+ ]
251
+
252
+ # Override the model_dump_json method
253
+ def model_dump_json(self, *args, **kwargs) -> str:
254
+ # Call the disconnect method before serialization
255
+ self.disconnect()
256
+
257
+ # Now proceed with the usual JSON serialization
258
+ return super().model_dump_json(*args, **kwargs)
@@ -0,0 +1,18 @@
1
+ from .PersistentQdrantVectorStore import PersistentQdrantVectorStore
2
+ from .CloudQdrantVectorStore import CloudQdrantVectorStore
3
+
4
+ __version__ = "0.6.0.dev26"
5
+ __long_desc__ = """
6
+
7
+ # Swarmauri Qdrant Based Components
8
+
9
+ Components Included:
10
+ - PersistentQdrantVectorStore
11
+ - CloudQdrantVectorStore
12
+
13
+
14
+ Visit us at: https://swarmauri.com
15
+ Follow us at: https://github.com/swarmauri
16
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
17
+
18
+ """
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_vectorstore_qdrant
3
+ Version: 0.6.0.dev154
4
+ Summary: Swarmauri Persistent Qdrant Vector Store
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: qdrant-client (>=1.12.0,<2.0.0)
15
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
17
+ Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
18
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Swarmauri Example Community Package
@@ -0,0 +1,7 @@
1
+ swarmauri_vectorstore_qdrant/__init__.py,sha256=-odIzXQs7OSpOPeaP2m0F0XD8xCwBSpPcAvs60pUf2s,449
2
+ swarmauri_vectorstore_qdrant/CloudQdrantVectorStore.py,sha256=eV7Sv2VrPaHJo7qB2Fown8FIE9tQy6muxt41ocqXtCM,9013
3
+ swarmauri_vectorstore_qdrant/PersistentQdrantVectorStore.py,sha256=Et3yjg68mrwwhQy6fuRhb7BFfFYjrZsTkmZpNtZfLMg,8923
4
+ swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/entry_points.txt,sha256=-XY2dvS5pIlDrAmYOjBLzDVerkBgXijW7ftHRIc9pDY,238
5
+ swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/METADATA,sha256=r3MpbFLFbLvsdsHcPyOiJ14KseB3juPpZvRLbu5bNfM,867
6
+ swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
7
+ swarmauri_vectorstore_qdrant-0.6.0.dev154.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.0.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,4 @@
1
+ [swarmauri.vector_stores]
2
+ CloudQdrantVectorStore=swarmauri_vectorstore_qdrant.CloudQdrantVectorStore:CloudQdrantVectorStore
3
+ PersistentQdrantVectorStore=swarmauri_vectorstore_qdrant.PersistentQdrantVectorStore:PersistentQdrantVectorStore
4
+