swarmauri_vectorstore_cloudweaviate 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_vectorstore_cloudweaviate
3
+ Version: 0.6.0.dev154
4
+ Summary: Swarmauri Weaviate Vector Store
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
15
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
17
+ Requires-Dist: weaviate-client (>=4.9.2,<5.0.0)
18
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Swarmauri Example Community Package
@@ -0,0 +1 @@
1
+ # Swarmauri Example Community Package
@@ -0,0 +1,58 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_vectorstore_cloudweaviate"
3
+ version = "0.6.0.dev154"
4
+ description = "Swarmauri Weaviate Vector Store"
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+ swarmauri_core = {version = "^0.6.0.dev154"}
21
+ swarmauri_base = {version = "^0.6.0.dev154"}
22
+ swarmauri_embedding_doc2vec = {version = "^0.6.0.dev154"}
23
+
24
+ # Dependencies
25
+ weaviate-client = "^4.9.2"
26
+
27
+
28
+ [tool.poetry.group.dev.dependencies]
29
+ flake8 = "^7.0"
30
+ pytest = "^8.0"
31
+ pytest-asyncio = ">=0.24.0"
32
+ pytest-xdist = "^3.6.1"
33
+ pytest-json-report = "^1.5.0"
34
+ python-dotenv = "*"
35
+ requests = "^2.32.3"
36
+
37
+ [build-system]
38
+ requires = ["poetry-core>=1.0.0"]
39
+ build-backend = "poetry.core.masonry.api"
40
+
41
+ [tool.pytest.ini_options]
42
+ norecursedirs = ["combined", "scripts"]
43
+
44
+ markers = [
45
+ "test: standard test",
46
+ "unit: Unit tests",
47
+ "integration: Integration tests",
48
+ "acceptance: Acceptance tests",
49
+ "experimental: Experimental tests"
50
+ ]
51
+ log_cli = true
52
+ log_cli_level = "INFO"
53
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
54
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
55
+ asyncio_default_fixture_loop_scope = "function"
56
+
57
+ [tool.poetry.plugins."swarmauri.vector_stores"]
58
+ CloudWeaviateVectorStore = "swarmauri_vectorstore_cloudweaviate.CloudWeaviateVectorStore:CloudWeaviateVectorStore"
@@ -0,0 +1,232 @@
1
+ from typing import List, Union, Literal, Optional
2
+ from swarmauri_core.ComponentBase import ComponentBase
3
+ from pydantic import PrivateAttr
4
+ import uuid as ud
5
+ import weaviate
6
+ from weaviate.classes.init import Auth
7
+ from weaviate.util import generate_uuid5
8
+ from weaviate.classes.query import MetadataQuery
9
+
10
+ from swarmauri_standard.documents.Document import Document
11
+ from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
12
+ from swarmauri_standard.vectors.Vector import Vector
13
+
14
+ from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
15
+ from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
16
+ VectorStoreRetrieveMixin,
17
+ )
18
+ from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
19
+ VectorStoreSaveLoadMixin,
20
+ )
21
+ from swarmauri_base.vector_stores.VectorStoreCloudMixin import VectorStoreCloudMixin
22
+
23
+
24
+ @ComponentBase.register_type(VectorStoreBase, "CloudWeaviateVectorStore")
25
+ class CloudWeaviateVectorStore(
26
+ VectorStoreSaveLoadMixin,
27
+ VectorStoreRetrieveMixin,
28
+ VectorStoreBase,
29
+ VectorStoreCloudMixin,
30
+ ):
31
+ type: Literal["CloudWeaviateVectorStore"] = "CloudWeaviateVectorStore"
32
+
33
+ # Private attributes
34
+ _client: Optional[weaviate.Client] = PrivateAttr(default=None)
35
+ _embedder: Doc2VecEmbedding = PrivateAttr(default=None)
36
+ _namespace_uuid: ud.UUID = PrivateAttr(default_factory=ud.uuid4)
37
+
38
+ def __init__(self, **data):
39
+ super().__init__(**data)
40
+
41
+ # Initialize the vectorizer and Weaviate client
42
+ self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
43
+ # self._initialize_client()
44
+
45
+ def connect(self, **kwargs):
46
+ """
47
+ Initialize the Weaviate client.
48
+ """
49
+ if self._client is None:
50
+ self._client = weaviate.connect_to_weaviate_cloud(
51
+ cluster_url=self.url,
52
+ auth_credentials=Auth.api_key(self.api_key),
53
+ headers=kwargs.get("headers", {}),
54
+ )
55
+
56
+ def disconnect(self) -> None:
57
+ """
58
+ Disconnects from the Qdrant cloud vector store.
59
+ """
60
+ if self.client is not None:
61
+ self.client = None
62
+
63
+ def add_document(self, document: Document) -> None:
64
+ """
65
+ Add a single document to the vector store.
66
+
67
+ :param document: Document to add
68
+ """
69
+ try:
70
+ collection = self._client.collections.get(self.collection_name)
71
+
72
+ # Generate or use existing embedding
73
+ embedding = (
74
+ document.embedding
75
+ or self._embedder.fit_transform([document.content])[0]
76
+ )
77
+
78
+ data_object = {
79
+ "content": document.content,
80
+ "metadata": document.metadata,
81
+ }
82
+
83
+ # Generate UUID for document
84
+ uuid = (
85
+ str(ud.uuid5(self._namespace_uuid, document.id))
86
+ if document.id
87
+ else generate_uuid5(data_object)
88
+ )
89
+
90
+ collection.data.insert(
91
+ properties=data_object,
92
+ vector=embedding.value,
93
+ uuid=uuid,
94
+ )
95
+
96
+ print(f"Document '{document.id}' added to Weaviate.")
97
+ except Exception as e:
98
+ print(f"Error adding document '{document.id}': {e}")
99
+ raise
100
+
101
+ def add_documents(self, documents: List[Document]) -> None:
102
+ """
103
+ Add multiple documents to the vector store.
104
+
105
+ :param documents: List of documents to add
106
+ """
107
+ try:
108
+ for document in documents:
109
+ self.add_document(document)
110
+
111
+ print(f"{len(documents)} documents added to Weaviate.")
112
+ except Exception as e:
113
+ print(f"Error adding documents: {e}")
114
+ raise
115
+
116
+ def get_document(self, id: str) -> Union[Document, None]:
117
+ """
118
+ Retrieve a document by its ID.
119
+
120
+ :param id: Document ID
121
+ :return: Document object or None if not found
122
+ """
123
+ try:
124
+ collection = self._client.collections.get(self.collection_name)
125
+
126
+ result = collection.query.fetch_object_by_id(
127
+ ud.uuid5(self._namespace_uuid, id)
128
+ )
129
+
130
+ if result:
131
+ return Document(
132
+ id=id,
133
+ content=result.properties["content"],
134
+ metadata=result.properties["metadata"],
135
+ )
136
+ return None
137
+ except Exception as e:
138
+ print(f"Error retrieving document '{id}': {e}")
139
+ return None
140
+
141
+ def get_all_documents(self) -> List[Document]:
142
+ """
143
+ Retrieve all documents from the vector store.
144
+
145
+ :return: List of Document objects
146
+ """
147
+ try:
148
+ collection = self._client.collections.get(self.collection_name)
149
+ # return collection
150
+ documents = [
151
+ Document(
152
+ content=item.properties["content"],
153
+ metadata=item.properties["metadata"],
154
+ embedding=Vector(value=list(item.vector.values())[0]),
155
+ )
156
+ for item in collection.iterator(include_vector=True)
157
+ ]
158
+ return documents
159
+ except Exception as e:
160
+ print(f"Error retrieving all documents: {e}")
161
+ return []
162
+
163
+ def delete_document(self, id: str) -> None:
164
+ """
165
+ Delete a document by its ID.
166
+
167
+ :param id: Document ID
168
+ """
169
+ try:
170
+ collection = self._client.collections.get(self.collection_name)
171
+ collection.data.delete_by_id(ud.uuid5(self._namespace_uuid, id))
172
+ print(f"Document '{id}' has been deleted from Weaviate.")
173
+ except Exception as e:
174
+ print(f"Error deleting document '{id}': {e}")
175
+ raise
176
+
177
+ def update_document(self, id: str, document: Document) -> None:
178
+ """
179
+ Update an existing document.
180
+
181
+ :param id: Document ID
182
+ :param updated_document: Document object with updated data
183
+ """
184
+ self.delete_document(id)
185
+ self.add_document(document)
186
+
187
+ def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
188
+ """
189
+ Retrieve the top_k most relevant documents based on the given query.
190
+
191
+ :param query: Query string
192
+ :param top_k: Number of top similar documents to retrieve
193
+ :return: List of Document objects
194
+ """
195
+ try:
196
+ collection = self._client.collections.get(self.collection_name)
197
+ query_vector = self._embedder.infer_vector(query)
198
+ response = collection.query.near_vector(
199
+ near_vector=query_vector.value,
200
+ limit=top_k,
201
+ return_metadata=MetadataQuery(distance=True),
202
+ )
203
+
204
+ documents = [
205
+ Document(
206
+ # id=res.id,
207
+ content=res.properties["content"],
208
+ metadata=res.properties["metadata"],
209
+ )
210
+ for res in response.objects
211
+ ]
212
+ return documents
213
+ except Exception as e:
214
+ print(f"Error retrieving documents for query '{query}': {e}")
215
+ return []
216
+
217
+ def close(self):
218
+ """
219
+ Close the connection to the Weaviate server.
220
+ """
221
+ if self._client:
222
+ self._client.close()
223
+
224
+ def model_dump_json(self, *args, **kwargs) -> str:
225
+ # Call the disconnect method before serialization
226
+ self.disconnect()
227
+
228
+ # Now proceed with the usual JSON serialization
229
+ return super().model_dump_json(*args, **kwargs)
230
+
231
+ def __del__(self):
232
+ self.close()
@@ -0,0 +1,12 @@
1
+ from .CloudWeaviateVectorStore import CloudWeaviateVectorStore
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri CloudWeaviate Vector Store Plugin
7
+
8
+ Visit us at: https://swarmauri.com
9
+ Follow us at: https://github.com/swarmauri
10
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
11
+
12
+ """