swarmauri_vectorstore_persistentchromadb 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_vectorstore_persistentchromadb
3
+ Version: 0.6.0.dev154
4
+ Summary: A Persistent ChromaDB based Vector Store
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: chromadb (>=0.5.17,<0.6.0)
15
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
17
+ Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
18
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Swarmauri Example Community Package
@@ -0,0 +1 @@
1
+ # Swarmauri Example Community Package
@@ -0,0 +1,58 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_vectorstore_persistentchromadb"
3
+ version = "0.6.0.dev154"
4
+ description = "A Persistent ChromaDB based Vector Store"
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+ swarmauri_core = {version = "^0.6.0.dev154"}
21
+ swarmauri_base = {version = "^0.6.0.dev154"}
22
+ swarmauri_embedding_doc2vec = {version = "^0.6.0.dev154"}
23
+
24
+
25
+ # Dependencies
26
+ chromadb = "^0.5.17"
27
+
28
+ [tool.poetry.group.dev.dependencies]
29
+ flake8 = "^7.0"
30
+ pytest = "^8.0"
31
+ pytest-asyncio = ">=0.24.0"
32
+ pytest-xdist = "^3.6.1"
33
+ pytest-json-report = "^1.5.0"
34
+ python-dotenv = "*"
35
+ requests = "^2.32.3"
36
+
37
+ [build-system]
38
+ requires = ["poetry-core>=1.0.0"]
39
+ build-backend = "poetry.core.masonry.api"
40
+
41
+ [tool.pytest.ini_options]
42
+ norecursedirs = ["combined", "scripts"]
43
+
44
+ markers = [
45
+ "test: standard test",
46
+ "unit: Unit tests",
47
+ "integration: Integration tests",
48
+ "acceptance: Acceptance tests",
49
+ "experimental: Experimental tests"
50
+ ]
51
+ log_cli = true
52
+ log_cli_level = "INFO"
53
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
54
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
55
+ asyncio_default_fixture_loop_scope = "function"
56
+
57
+ [tool.poetry.plugins."swarmauri.vector_stores"]
58
+ PersistentChromaDBVectorStore = "swarmauri_vectorstore_persistentchromadb.PersistentChromaDBVectorStore:PersistentChromaDBVectorStore"
@@ -0,0 +1,188 @@
1
+ import logging
2
+ import chromadb
3
+
4
+ from typing import List, Union, Literal
5
+
6
+ from swarmauri_standard.documents.concrete.Document import Document
7
+ from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
8
+ from swarmauri.distances.concrete.CosineDistance import CosineDistance
9
+
10
+ from swarmauri.vector_stores.base.VectorStoreBase import VectorStoreBase
11
+ from swarmauri.vector_stores.base.VectorStoreRetrieveMixin import (
12
+ VectorStoreRetrieveMixin,
13
+ )
14
+ from swarmauri.vector_stores.base.VectorStoreSaveLoadMixin import (
15
+ VectorStoreSaveLoadMixin,
16
+ )
17
+ from swarmauri.vector_stores.base.VectorStorePersistentMixin import (
18
+ VectorStorePersistentMixin,
19
+ )
20
+ from swarmauri_core.ComponentBase import ComponentBase
21
+
22
+
23
+ @ComponentBase.register_type(VectorStoreBase, "PersistentChromaDBVectorStore")
24
+ class PersistentChromaDBVectorStore(
25
+ VectorStoreSaveLoadMixin,
26
+ VectorStoreRetrieveMixin,
27
+ VectorStorePersistentMixin,
28
+ VectorStoreBase,
29
+ ):
30
+ type: Literal["PersistentChromaDBVectorStore"] = "PersistentChromaDBVectorStore"
31
+
32
+ def __init__(self, **kwargs):
33
+ """
34
+ Initialize the PersistentChromaDBVectorStore.
35
+
36
+ Args:
37
+ Args:
38
+ **kwargs: keyword arguments.
39
+ """
40
+ super().__init__(**kwargs)
41
+
42
+ self._embedder = Doc2VecEmbedding(vector_size=self.vector_size)
43
+ self._distance = CosineDistance()
44
+
45
+ def connect(self) -> None:
46
+ """
47
+ Establish a connection to ChromaDB and get or create the collection.
48
+ """
49
+ # settings = Settings(
50
+ # chroma_api_impl="chromadb.api.fastapi.FastAPI", # Use FastAPI if LocalAPI is not supported
51
+ # chroma_server_host="localhost", # Server host
52
+ # chroma_server_http_port=8000, # Server port
53
+ # )
54
+ #
55
+ # self.client = chromadb.Client(
56
+ # settings=settings,
57
+ # )
58
+ self.client = chromadb.Client()
59
+
60
+ self.collection = self.client.get_or_create_collection(
61
+ name=self.collection_name
62
+ )
63
+ logging.info(
64
+ f"Connected to ChromaDB at {self.path}, collection: {self.collection_name}"
65
+ )
66
+
67
+ def disconnect(self) -> None:
68
+ """
69
+ Close the connection to ChromaDB.
70
+ """
71
+ if self.client:
72
+ # Perform any necessary cleanup here
73
+ self.client = None
74
+ self.collection = None
75
+
76
+ def add_document(self, document: Document) -> None:
77
+ embedding = None
78
+ if not document.embedding:
79
+ self._embedder.fit([document.content]) # Fit only once
80
+ embedding = (
81
+ self._embedder.transform([document.content])[0].to_numpy().tolist()
82
+ )
83
+ else:
84
+ embedding = document.embedding
85
+
86
+ self.collection.add(
87
+ ids=[document.id],
88
+ documents=[document.content],
89
+ embeddings=[embedding],
90
+ metadatas=[document.metadata],
91
+ )
92
+
93
+ def add_documents(self, documents: List[Document]) -> None:
94
+ ids = [doc.id for doc in documents]
95
+ texts = [doc.content for doc in documents]
96
+
97
+ for doc in documents:
98
+ self._embedder.fit([doc.content])
99
+
100
+ embeddings = [
101
+ self._embedder.infer_vector(doc.content).value for doc in documents
102
+ ]
103
+ metadatas = [doc.metadata for doc in documents]
104
+ if metadatas[0]:
105
+ self.collection.add(
106
+ ids=ids, documents=texts, embeddings=embeddings, metadatas=metadatas
107
+ )
108
+ else:
109
+ self.collection.add(ids=ids, documents=texts, embeddings=embeddings)
110
+
111
+ def get_document(self, doc_id: str) -> Union[Document, None]:
112
+ results = self.collection.get(ids=[doc_id])
113
+ if not results["metadatas"][0]:
114
+ results["metadatas"][0] = {}
115
+ if results["ids"]:
116
+ document = Document(
117
+ id=results["ids"][0],
118
+ content=results["documents"][0],
119
+ metadata=results["metadatas"][0],
120
+ )
121
+ return document
122
+ return None
123
+
124
+ def get_all_documents(self) -> List[Document]:
125
+ results = self.collection.get()
126
+ documents = [
127
+ Document(
128
+ id=results["ids"][idx],
129
+ content=results["documents"][idx],
130
+ metadata=results["metadatas"][idx],
131
+ )
132
+ for idx in range(len(results["ids"]))
133
+ ]
134
+ return documents
135
+
136
+ def delete_document(self, doc_id: str) -> None:
137
+ self.collection.delete(ids=[doc_id])
138
+
139
+ def update_document(self, doc_id: str, updated_document: Document) -> None:
140
+ document_vector = None
141
+ # Precompute the embedding outside the update process
142
+ if not updated_document.embedding:
143
+ # Transform without refitting to avoid vocabulary issues
144
+ document_vector = self._embedder.transform([updated_document.content])[0]
145
+ else:
146
+ document_vector = updated_document.embedding
147
+
148
+ document_vector = document_vector.to_numpy().tolist()
149
+
150
+ updated_document.embedding = document_vector
151
+
152
+ self.delete_document(doc_id)
153
+ self.add_document(updated_document)
154
+
155
+ def clear_documents(self) -> None:
156
+ documents = self.get_all_documents()
157
+ doc_ids = [doc.id for doc in documents]
158
+ self.collection.delete(ids=doc_ids)
159
+
160
+ def document_count(self) -> int:
161
+ return len(self.get_all_documents())
162
+
163
+ def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
164
+ query_embedding = self._embedder.infer_vector(query).value
165
+ # print(query_embedding)
166
+
167
+ results = self.collection.query(
168
+ query_embeddings=query_embedding, n_results=top_k
169
+ )
170
+
171
+ return [
172
+ Document(
173
+ id=results["ids"][0][idx],
174
+ content=results["documents"][0][idx],
175
+ metadata=(
176
+ results["metadatas"][0][idx] if results["metadatas"][0][idx] else {}
177
+ ),
178
+ )
179
+ for idx in range(len(results["ids"][0]))
180
+ ]
181
+
182
+ # Override the model_dump_json method
183
+ def model_dump_json(self, *args, **kwargs) -> str:
184
+ # Call the disconnect method before serialization
185
+ self.disconnect()
186
+
187
+ # Now proceed with the usual JSON serialization
188
+ return super().model_dump_json(*args, **kwargs)
@@ -0,0 +1,14 @@
1
+ from .PersistentChromaDBVectorStore import PersistentChromaDBVectorStore
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri PersistentChromaDB Plugin
7
+
8
+ This repository includes an PersistentChromaDB of a Swarmauri Plugin.
9
+
10
+ Visit us at: https://swarmauri.com
11
+ Follow us at: https://github.com/swarmauri
12
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
13
+
14
+ """