swarmauri_vectorstore_doc2vec 0.6.0.dev154__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarmauri_vectorstore_doc2vec/Doc2VecVectorStore.py +78 -0
- swarmauri_vectorstore_doc2vec/__init__.py +12 -0
- swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/METADATA +20 -0
- swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/RECORD +6 -0
- swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/WHEEL +4 -0
- swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from typing import List, Union, Literal
|
|
2
|
+
|
|
3
|
+
from swarmauri_standard.documents.Document import Document
|
|
4
|
+
from swarmauri_standard.distances.CosineDistance import CosineDistance
|
|
5
|
+
from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
|
|
6
|
+
from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
|
|
7
|
+
VectorStoreRetrieveMixin,
|
|
8
|
+
)
|
|
9
|
+
from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
|
|
10
|
+
VectorStoreSaveLoadMixin,
|
|
11
|
+
)
|
|
12
|
+
from swarmauri_embedding_doc2vec.Doc2VecEmbedding import (
|
|
13
|
+
Doc2VecEmbedding,
|
|
14
|
+
)
|
|
15
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@ComponentBase.register_type(VectorStoreBase, "Doc2VecVectorStore")
|
|
19
|
+
class Doc2VecVectorStore(
|
|
20
|
+
VectorStoreSaveLoadMixin, VectorStoreRetrieveMixin, VectorStoreBase
|
|
21
|
+
):
|
|
22
|
+
type: Literal["Doc2VecVectorStore"] = "Doc2VecVectorStore"
|
|
23
|
+
|
|
24
|
+
def __init__(self, **kwargs):
|
|
25
|
+
super().__init__(**kwargs)
|
|
26
|
+
self._embedder = Doc2VecEmbedding()
|
|
27
|
+
self._distance = CosineDistance()
|
|
28
|
+
|
|
29
|
+
def add_document(self, document: Document) -> None:
|
|
30
|
+
self._embedder.fit([document.content])
|
|
31
|
+
self.documents.append(document)
|
|
32
|
+
|
|
33
|
+
def add_documents(self, documents: List[Document]) -> None:
|
|
34
|
+
self.documents.extend(documents)
|
|
35
|
+
self._embedder.fit([doc.content for doc in documents])
|
|
36
|
+
|
|
37
|
+
def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
|
|
38
|
+
query_vector = self._embedder.infer_vector(query)
|
|
39
|
+
|
|
40
|
+
# If the query vector is all-zero, return an empty list
|
|
41
|
+
if all(v == 0.0 for v in query_vector.value):
|
|
42
|
+
print("Query contains only OOV words.")
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
# Transform the stored documents into vectors
|
|
46
|
+
document_vectors = self._embedder.transform(
|
|
47
|
+
[doc.content for doc in self.documents]
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Calculate cosine distances between the query vector and document vectors
|
|
51
|
+
distances = self._distance.distances(query_vector, document_vectors)
|
|
52
|
+
|
|
53
|
+
# Get the indices of the top_k closest documents
|
|
54
|
+
top_k_indices = sorted(range(len(distances)), key=lambda i: distances[i])[
|
|
55
|
+
:top_k
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
return [self.documents[i] for i in top_k_indices]
|
|
59
|
+
|
|
60
|
+
def get_document(self, id: str) -> Union[Document, None]:
|
|
61
|
+
for document in self.documents:
|
|
62
|
+
if document.id == id:
|
|
63
|
+
return document
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def get_all_documents(self) -> List[Document]:
|
|
67
|
+
return self.documents
|
|
68
|
+
|
|
69
|
+
def delete_document(self, id: str) -> None:
|
|
70
|
+
self.documents = [doc for doc in self.documents if doc.id != id]
|
|
71
|
+
self._embedder.fit([doc.content for doc in self.documents])
|
|
72
|
+
|
|
73
|
+
def update_document(self, id: str, updated_document: Document) -> None:
|
|
74
|
+
for i, document in enumerate(self.documents):
|
|
75
|
+
if document.id == id:
|
|
76
|
+
self.documents[i] = updated_document
|
|
77
|
+
break
|
|
78
|
+
self._embedder.fit([doc.content for doc in self.documents])
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .Doc2VecVectorStore import Doc2VecVectorStore
|
|
2
|
+
|
|
3
|
+
__version__ = "0.6.0.dev26"
|
|
4
|
+
__long_desc__ = """
|
|
5
|
+
|
|
6
|
+
# Swarmauri Doc2Vec VectorStore Plugin
|
|
7
|
+
|
|
8
|
+
Visit us at: https://swarmauri.com
|
|
9
|
+
Follow us at: https://github.com/swarmauri
|
|
10
|
+
Star us at: https://github.com/swarmauri/swarmauri-sdk
|
|
11
|
+
|
|
12
|
+
"""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: swarmauri_vectorstore_doc2vec
|
|
3
|
+
Version: 0.6.0.dev154
|
|
4
|
+
Summary: A Doc2Vec based Vector Store and Doc2Vec Based Embedding Model.
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Jacob Stewart
|
|
7
|
+
Author-email: jacob@swarmauri.com
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
|
|
15
|
+
Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
|
|
16
|
+
Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
|
|
17
|
+
Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Swarmauri Example Plugin
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
swarmauri_vectorstore_doc2vec/__init__.py,sha256=lMwpN8881a1Zumf8QCYZIR1CcCQKDECD_PwZqTDzFGk,291
|
|
2
|
+
swarmauri_vectorstore_doc2vec/Doc2VecVectorStore.py,sha256=gfGe5axKHOJOyk3rOaRl4Omvs5Cg-prqVs05bOipjLo,3014
|
|
3
|
+
swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/entry_points.txt,sha256=3e0jR1vegYY6F7fGR-wa12zbIZVOna1VbTqWbMFci-4,95
|
|
4
|
+
swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/METADATA,sha256=45EPB4jyrwinWK656olAc1Ds7Oh1RptFlvtqktDcA7Q,833
|
|
5
|
+
swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
|
6
|
+
swarmauri_vectorstore_doc2vec-0.6.0.dev154.dist-info/RECORD,,
|