swarmauri_vectorstore_cloudweaviate 0.6.0.dev154__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarmauri_vectorstore_cloudweaviate-0.6.0.dev154/PKG-INFO +21 -0
- swarmauri_vectorstore_cloudweaviate-0.6.0.dev154/README.md +1 -0
- swarmauri_vectorstore_cloudweaviate-0.6.0.dev154/pyproject.toml +58 -0
- swarmauri_vectorstore_cloudweaviate-0.6.0.dev154/swarmauri_vectorstore_cloudweaviate/CloudWeaviateVectorStore.py +232 -0
- swarmauri_vectorstore_cloudweaviate-0.6.0.dev154/swarmauri_vectorstore_cloudweaviate/__init__.py +12 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: swarmauri_vectorstore_cloudweaviate
|
|
3
|
+
Version: 0.6.0.dev154
|
|
4
|
+
Summary: Swarmauri Weaviate Vector Store
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Jacob Stewart
|
|
7
|
+
Author-email: jacob@swarmauri.com
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
|
|
15
|
+
Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
|
|
16
|
+
Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
|
|
17
|
+
Requires-Dist: weaviate-client (>=4.9.2,<5.0.0)
|
|
18
|
+
Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# Swarmauri Example Community Package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Swarmauri Example Community Package
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "swarmauri_vectorstore_cloudweaviate"
|
|
3
|
+
version = "0.6.0.dev154"
|
|
4
|
+
description = "Swarmauri Weaviate Vector Store"
|
|
5
|
+
authors = ["Jacob Stewart <jacob@swarmauri.com>"]
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
repository = "http://github.com/swarmauri/swarmauri-sdk"
|
|
9
|
+
classifiers = [
|
|
10
|
+
"License :: OSI Approved :: Apache Software License",
|
|
11
|
+
"Programming Language :: Python :: 3.10",
|
|
12
|
+
"Programming Language :: Python :: 3.11",
|
|
13
|
+
"Programming Language :: Python :: 3.12"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.poetry.dependencies]
|
|
17
|
+
python = ">=3.10,<3.13"
|
|
18
|
+
|
|
19
|
+
# Swarmauri
|
|
20
|
+
swarmauri_core = {version = "^0.6.0.dev154"}
|
|
21
|
+
swarmauri_base = {version = "^0.6.0.dev154"}
|
|
22
|
+
swarmauri_embedding_doc2vec = {version = "^0.6.0.dev154"}
|
|
23
|
+
|
|
24
|
+
# Dependencies
|
|
25
|
+
weaviate-client = "^4.9.2"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
[tool.poetry.group.dev.dependencies]
|
|
29
|
+
flake8 = "^7.0"
|
|
30
|
+
pytest = "^8.0"
|
|
31
|
+
pytest-asyncio = ">=0.24.0"
|
|
32
|
+
pytest-xdist = "^3.6.1"
|
|
33
|
+
pytest-json-report = "^1.5.0"
|
|
34
|
+
python-dotenv = "*"
|
|
35
|
+
requests = "^2.32.3"
|
|
36
|
+
|
|
37
|
+
[build-system]
|
|
38
|
+
requires = ["poetry-core>=1.0.0"]
|
|
39
|
+
build-backend = "poetry.core.masonry.api"
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
norecursedirs = ["combined", "scripts"]
|
|
43
|
+
|
|
44
|
+
markers = [
|
|
45
|
+
"test: standard test",
|
|
46
|
+
"unit: Unit tests",
|
|
47
|
+
"integration: Integration tests",
|
|
48
|
+
"acceptance: Acceptance tests",
|
|
49
|
+
"experimental: Experimental tests"
|
|
50
|
+
]
|
|
51
|
+
log_cli = true
|
|
52
|
+
log_cli_level = "INFO"
|
|
53
|
+
log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
|
|
54
|
+
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
|
55
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
56
|
+
|
|
57
|
+
[tool.poetry.plugins."swarmauri.vector_stores"]
|
|
58
|
+
CloudWeaviateVectorStore = "swarmauri_vectorstore_cloudweaviate.CloudWeaviateVectorStore:CloudWeaviateVectorStore"
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
from typing import List, Union, Literal, Optional
|
|
2
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
|
3
|
+
from pydantic import PrivateAttr
|
|
4
|
+
import uuid as ud
|
|
5
|
+
import weaviate
|
|
6
|
+
from weaviate.classes.init import Auth
|
|
7
|
+
from weaviate.util import generate_uuid5
|
|
8
|
+
from weaviate.classes.query import MetadataQuery
|
|
9
|
+
|
|
10
|
+
from swarmauri_standard.documents.Document import Document
|
|
11
|
+
from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
|
|
12
|
+
from swarmauri_standard.vectors.Vector import Vector
|
|
13
|
+
|
|
14
|
+
from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
|
|
15
|
+
from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
|
|
16
|
+
VectorStoreRetrieveMixin,
|
|
17
|
+
)
|
|
18
|
+
from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
|
|
19
|
+
VectorStoreSaveLoadMixin,
|
|
20
|
+
)
|
|
21
|
+
from swarmauri_base.vector_stores.VectorStoreCloudMixin import VectorStoreCloudMixin
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@ComponentBase.register_type(VectorStoreBase, "CloudWeaviateVectorStore")
|
|
25
|
+
class CloudWeaviateVectorStore(
|
|
26
|
+
VectorStoreSaveLoadMixin,
|
|
27
|
+
VectorStoreRetrieveMixin,
|
|
28
|
+
VectorStoreBase,
|
|
29
|
+
VectorStoreCloudMixin,
|
|
30
|
+
):
|
|
31
|
+
type: Literal["CloudWeaviateVectorStore"] = "CloudWeaviateVectorStore"
|
|
32
|
+
|
|
33
|
+
# Private attributes
|
|
34
|
+
_client: Optional[weaviate.Client] = PrivateAttr(default=None)
|
|
35
|
+
_embedder: Doc2VecEmbedding = PrivateAttr(default=None)
|
|
36
|
+
_namespace_uuid: ud.UUID = PrivateAttr(default_factory=ud.uuid4)
|
|
37
|
+
|
|
38
|
+
def __init__(self, **data):
|
|
39
|
+
super().__init__(**data)
|
|
40
|
+
|
|
41
|
+
# Initialize the vectorizer and Weaviate client
|
|
42
|
+
self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
|
|
43
|
+
# self._initialize_client()
|
|
44
|
+
|
|
45
|
+
def connect(self, **kwargs):
|
|
46
|
+
"""
|
|
47
|
+
Initialize the Weaviate client.
|
|
48
|
+
"""
|
|
49
|
+
if self._client is None:
|
|
50
|
+
self._client = weaviate.connect_to_weaviate_cloud(
|
|
51
|
+
cluster_url=self.url,
|
|
52
|
+
auth_credentials=Auth.api_key(self.api_key),
|
|
53
|
+
headers=kwargs.get("headers", {}),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def disconnect(self) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Disconnects from the Qdrant cloud vector store.
|
|
59
|
+
"""
|
|
60
|
+
if self.client is not None:
|
|
61
|
+
self.client = None
|
|
62
|
+
|
|
63
|
+
def add_document(self, document: Document) -> None:
|
|
64
|
+
"""
|
|
65
|
+
Add a single document to the vector store.
|
|
66
|
+
|
|
67
|
+
:param document: Document to add
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
collection = self._client.collections.get(self.collection_name)
|
|
71
|
+
|
|
72
|
+
# Generate or use existing embedding
|
|
73
|
+
embedding = (
|
|
74
|
+
document.embedding
|
|
75
|
+
or self._embedder.fit_transform([document.content])[0]
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
data_object = {
|
|
79
|
+
"content": document.content,
|
|
80
|
+
"metadata": document.metadata,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
# Generate UUID for document
|
|
84
|
+
uuid = (
|
|
85
|
+
str(ud.uuid5(self._namespace_uuid, document.id))
|
|
86
|
+
if document.id
|
|
87
|
+
else generate_uuid5(data_object)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
collection.data.insert(
|
|
91
|
+
properties=data_object,
|
|
92
|
+
vector=embedding.value,
|
|
93
|
+
uuid=uuid,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
print(f"Document '{document.id}' added to Weaviate.")
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print(f"Error adding document '{document.id}': {e}")
|
|
99
|
+
raise
|
|
100
|
+
|
|
101
|
+
def add_documents(self, documents: List[Document]) -> None:
|
|
102
|
+
"""
|
|
103
|
+
Add multiple documents to the vector store.
|
|
104
|
+
|
|
105
|
+
:param documents: List of documents to add
|
|
106
|
+
"""
|
|
107
|
+
try:
|
|
108
|
+
for document in documents:
|
|
109
|
+
self.add_document(document)
|
|
110
|
+
|
|
111
|
+
print(f"{len(documents)} documents added to Weaviate.")
|
|
112
|
+
except Exception as e:
|
|
113
|
+
print(f"Error adding documents: {e}")
|
|
114
|
+
raise
|
|
115
|
+
|
|
116
|
+
def get_document(self, id: str) -> Union[Document, None]:
|
|
117
|
+
"""
|
|
118
|
+
Retrieve a document by its ID.
|
|
119
|
+
|
|
120
|
+
:param id: Document ID
|
|
121
|
+
:return: Document object or None if not found
|
|
122
|
+
"""
|
|
123
|
+
try:
|
|
124
|
+
collection = self._client.collections.get(self.collection_name)
|
|
125
|
+
|
|
126
|
+
result = collection.query.fetch_object_by_id(
|
|
127
|
+
ud.uuid5(self._namespace_uuid, id)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if result:
|
|
131
|
+
return Document(
|
|
132
|
+
id=id,
|
|
133
|
+
content=result.properties["content"],
|
|
134
|
+
metadata=result.properties["metadata"],
|
|
135
|
+
)
|
|
136
|
+
return None
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f"Error retrieving document '{id}': {e}")
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def get_all_documents(self) -> List[Document]:
|
|
142
|
+
"""
|
|
143
|
+
Retrieve all documents from the vector store.
|
|
144
|
+
|
|
145
|
+
:return: List of Document objects
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
collection = self._client.collections.get(self.collection_name)
|
|
149
|
+
# return collection
|
|
150
|
+
documents = [
|
|
151
|
+
Document(
|
|
152
|
+
content=item.properties["content"],
|
|
153
|
+
metadata=item.properties["metadata"],
|
|
154
|
+
embedding=Vector(value=list(item.vector.values())[0]),
|
|
155
|
+
)
|
|
156
|
+
for item in collection.iterator(include_vector=True)
|
|
157
|
+
]
|
|
158
|
+
return documents
|
|
159
|
+
except Exception as e:
|
|
160
|
+
print(f"Error retrieving all documents: {e}")
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
def delete_document(self, id: str) -> None:
|
|
164
|
+
"""
|
|
165
|
+
Delete a document by its ID.
|
|
166
|
+
|
|
167
|
+
:param id: Document ID
|
|
168
|
+
"""
|
|
169
|
+
try:
|
|
170
|
+
collection = self._client.collections.get(self.collection_name)
|
|
171
|
+
collection.data.delete_by_id(ud.uuid5(self._namespace_uuid, id))
|
|
172
|
+
print(f"Document '{id}' has been deleted from Weaviate.")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
print(f"Error deleting document '{id}': {e}")
|
|
175
|
+
raise
|
|
176
|
+
|
|
177
|
+
def update_document(self, id: str, document: Document) -> None:
|
|
178
|
+
"""
|
|
179
|
+
Update an existing document.
|
|
180
|
+
|
|
181
|
+
:param id: Document ID
|
|
182
|
+
:param updated_document: Document object with updated data
|
|
183
|
+
"""
|
|
184
|
+
self.delete_document(id)
|
|
185
|
+
self.add_document(document)
|
|
186
|
+
|
|
187
|
+
def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
|
|
188
|
+
"""
|
|
189
|
+
Retrieve the top_k most relevant documents based on the given query.
|
|
190
|
+
|
|
191
|
+
:param query: Query string
|
|
192
|
+
:param top_k: Number of top similar documents to retrieve
|
|
193
|
+
:return: List of Document objects
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
collection = self._client.collections.get(self.collection_name)
|
|
197
|
+
query_vector = self._embedder.infer_vector(query)
|
|
198
|
+
response = collection.query.near_vector(
|
|
199
|
+
near_vector=query_vector.value,
|
|
200
|
+
limit=top_k,
|
|
201
|
+
return_metadata=MetadataQuery(distance=True),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
documents = [
|
|
205
|
+
Document(
|
|
206
|
+
# id=res.id,
|
|
207
|
+
content=res.properties["content"],
|
|
208
|
+
metadata=res.properties["metadata"],
|
|
209
|
+
)
|
|
210
|
+
for res in response.objects
|
|
211
|
+
]
|
|
212
|
+
return documents
|
|
213
|
+
except Exception as e:
|
|
214
|
+
print(f"Error retrieving documents for query '{query}': {e}")
|
|
215
|
+
return []
|
|
216
|
+
|
|
217
|
+
def close(self):
|
|
218
|
+
"""
|
|
219
|
+
Close the connection to the Weaviate server.
|
|
220
|
+
"""
|
|
221
|
+
if self._client:
|
|
222
|
+
self._client.close()
|
|
223
|
+
|
|
224
|
+
def model_dump_json(self, *args, **kwargs) -> str:
|
|
225
|
+
# Call the disconnect method before serialization
|
|
226
|
+
self.disconnect()
|
|
227
|
+
|
|
228
|
+
# Now proceed with the usual JSON serialization
|
|
229
|
+
return super().model_dump_json(*args, **kwargs)
|
|
230
|
+
|
|
231
|
+
def __del__(self):
|
|
232
|
+
self.close()
|
swarmauri_vectorstore_cloudweaviate-0.6.0.dev154/swarmauri_vectorstore_cloudweaviate/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .CloudWeaviateVectorStore import CloudWeaviateVectorStore
|
|
2
|
+
|
|
3
|
+
__version__ = "0.6.0.dev26"
|
|
4
|
+
__long_desc__ = """
|
|
5
|
+
|
|
6
|
+
# Swarmauri CloudWeaviate Vector Store Plugin
|
|
7
|
+
|
|
8
|
+
Visit us at: https://swarmauri.com
|
|
9
|
+
Follow us at: https://github.com/swarmauri
|
|
10
|
+
Star us at: https://github.com/swarmauri/swarmauri-sdk
|
|
11
|
+
|
|
12
|
+
"""
|