swarmauri_vectorstore_doc2vec 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_vectorstore_doc2vec
3
+ Version: 0.6.0.dev154
4
+ Summary: A Doc2Vec based Vector Store and Doc2Vec Based Embedding Model.
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
15
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_embedding_doc2vec (>=0.6.0.dev154,<0.7.0)
17
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Swarmauri Example Plugin
@@ -0,0 +1 @@
1
+ # Swarmauri Example Plugin
@@ -0,0 +1,55 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_vectorstore_doc2vec"
3
+ version = "0.6.0.dev154"
4
+ description = "A Doc2Vec based Vector Store and Doc2Vec Based Embedding Model."
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+
21
+ swarmauri_core = {version = "^0.6.0.dev154"}
22
+ swarmauri_base = {version = "^0.6.0.dev154"}
23
+ swarmauri_embedding_doc2vec = {version = "^0.6.0.dev154"}
24
+
25
+ [tool.poetry.group.dev.dependencies]
26
+ flake8 = "^7.0"
27
+ pytest = "^8.0"
28
+ pytest-asyncio = ">=0.24.0"
29
+ pytest-xdist = "^3.6.1"
30
+ pytest-json-report = "^1.5.0"
31
+ python-dotenv = "*"
32
+ requests = "^2.32.3"
33
+
34
+ [build-system]
35
+ requires = ["poetry-core>=1.0.0"]
36
+ build-backend = "poetry.core.masonry.api"
37
+
38
+ [tool.pytest.ini_options]
39
+ norecursedirs = ["combined", "scripts"]
40
+
41
+ markers = [
42
+ "test: standard test",
43
+ "unit: Unit tests",
44
+ "integration: Integration tests",
45
+ "acceptance: Acceptance tests",
46
+ "experimental: Experimental tests"
47
+ ]
48
+ log_cli = true
49
+ log_cli_level = "INFO"
50
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
51
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
52
+ asyncio_default_fixture_loop_scope = "function"
53
+
54
+ [tool.poetry.plugins."swarmauri.vector_stores"]
55
+ Doc2VecVectorStore = "swarmauri_vectorstore_doc2vec:Doc2VecVectorStore"
@@ -0,0 +1,78 @@
1
+ from typing import List, Union, Literal
2
+
3
+ from swarmauri_standard.documents.Document import Document
4
+ from swarmauri_standard.distances.CosineDistance import CosineDistance
5
+ from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
6
+ from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import (
7
+ VectorStoreRetrieveMixin,
8
+ )
9
+ from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import (
10
+ VectorStoreSaveLoadMixin,
11
+ )
12
+ from swarmauri_embedding_doc2vec.Doc2VecEmbedding import (
13
+ Doc2VecEmbedding,
14
+ )
15
+ from swarmauri_core.ComponentBase import ComponentBase
16
+
17
+
18
+ @ComponentBase.register_type(VectorStoreBase, "Doc2VecVectorStore")
19
+ class Doc2VecVectorStore(
20
+ VectorStoreSaveLoadMixin, VectorStoreRetrieveMixin, VectorStoreBase
21
+ ):
22
+ type: Literal["Doc2VecVectorStore"] = "Doc2VecVectorStore"
23
+
24
+ def __init__(self, **kwargs):
25
+ super().__init__(**kwargs)
26
+ self._embedder = Doc2VecEmbedding()
27
+ self._distance = CosineDistance()
28
+
29
+ def add_document(self, document: Document) -> None:
30
+ self._embedder.fit([document.content])
31
+ self.documents.append(document)
32
+
33
+ def add_documents(self, documents: List[Document]) -> None:
34
+ self.documents.extend(documents)
35
+ self._embedder.fit([doc.content for doc in documents])
36
+
37
+ def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
38
+ query_vector = self._embedder.infer_vector(query)
39
+
40
+ # If the query vector is all-zero, return an empty list
41
+ if all(v == 0.0 for v in query_vector.value):
42
+ print("Query contains only OOV words.")
43
+ return []
44
+
45
+ # Transform the stored documents into vectors
46
+ document_vectors = self._embedder.transform(
47
+ [doc.content for doc in self.documents]
48
+ )
49
+
50
+ # Calculate cosine distances between the query vector and document vectors
51
+ distances = self._distance.distances(query_vector, document_vectors)
52
+
53
+ # Get the indices of the top_k closest documents
54
+ top_k_indices = sorted(range(len(distances)), key=lambda i: distances[i])[
55
+ :top_k
56
+ ]
57
+
58
+ return [self.documents[i] for i in top_k_indices]
59
+
60
+ def get_document(self, id: str) -> Union[Document, None]:
61
+ for document in self.documents:
62
+ if document.id == id:
63
+ return document
64
+ return None
65
+
66
+ def get_all_documents(self) -> List[Document]:
67
+ return self.documents
68
+
69
+ def delete_document(self, id: str) -> None:
70
+ self.documents = [doc for doc in self.documents if doc.id != id]
71
+ self._embedder.fit([doc.content for doc in self.documents])
72
+
73
+ def update_document(self, id: str, updated_document: Document) -> None:
74
+ for i, document in enumerate(self.documents):
75
+ if document.id == id:
76
+ self.documents[i] = updated_document
77
+ break
78
+ self._embedder.fit([doc.content for doc in self.documents])
@@ -0,0 +1,12 @@
1
+ from .Doc2VecVectorStore import Doc2VecVectorStore
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri Doc2Vec VectorStore Plugin
7
+
8
+ Visit us at: https://swarmauri.com
9
+ Follow us at: https://github.com/swarmauri
10
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
11
+
12
+ """