swarmauri_vectorstore_redis 0.6.0.dev154__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ from typing import List
2
+ from redisearch import Client, Query
3
+ from swarmauri_core.documents.IDocument import IDocument
4
+ from swarmauri_standard.document_stores.ConcreteDocument import (
5
+ ConcreteDocument,
6
+ )
7
+ from swarmauri_base.retrievers.DocumentRetrieverBase import DocumentRetrieverBase
8
+ from swarmauri_core.ComponentBase import ComponentBase
9
+
10
+ @ComponentBase.register_type(DocumentRetrieverBase, "RedisDocumentRetriever")
11
+ class RedisDocumentRetriever(DocumentRetrieverBase):
12
+ """
13
+ A document retriever that fetches documents from a Redis store.
14
+ """
15
+
16
+ def __init__(self, redis_idx_name, redis_host, redis_port):
17
+ """
18
+ Initializes a new instance of RedisDocumentRetriever.
19
+
20
+ Args:
21
+ redis_client (Redis): An instance of the Redis client.
22
+ """
23
+ self._redis_client = None
24
+ self._redis_idx_name = redis_idx_name
25
+ self._redis_host = redis_host
26
+ self._redis_port = redis_port
27
+
28
+ @property
29
+ def redis_client(self):
30
+ """Lazily initialize and return the Redis client using a factory method."""
31
+ if self._redis_client is None:
32
+ self._redis_client = Client(
33
+ self.redis_idx_name, host=self.redis_host, port=self.redis_port
34
+ )
35
+ return self._redis_client
36
+
37
+ def retrieve(self, query: str, top_k: int = 5) -> List[IDocument]:
38
+ """
39
+ Retrieve the most relevant documents based on the given query.
40
+
41
+ Args:
42
+ query (str): The query string used for document retrieval.
43
+ top_k (int, optional): The number of top relevant documents to retrieve. Defaults to 5.
44
+
45
+ Returns:
46
+ List[IDocument]: A list of the top_k most relevant documents.
47
+ """
48
+ query_result = self.redis_client.search(Query(query).paging(0, top_k))
49
+
50
+ documents = [
51
+ ConcreteDocument(
52
+ doc_id=doc.id,
53
+ content=doc.text, # Note: Adjust 'text' based on actual Redis document schema
54
+ metadata=doc.__dict__, # Including full document fields and values in metadata
55
+ )
56
+ for doc in query_result.docs
57
+ ]
58
+
59
+ return documents
@@ -0,0 +1,238 @@
1
+ import json
2
+ from typing import List, Union, Literal, Optional
3
+ from pydantic import PrivateAttr
4
+
5
+ import numpy as np
6
+ import redis
7
+ from redis.commands.search.field import VectorField, TextField
8
+ from redis.commands.search.indexDefinition import IndexDefinition, IndexType
9
+
10
+ from swarmauri_standard.vectors.Vector import Vector
11
+ from swarmauri_standard.documents.concrete.Document import Document
12
+ from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
13
+ from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
14
+ from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import VectorStoreRetrieveMixin
15
+ from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import VectorStoreSaveLoadMixin
16
+ from swarmauri_core.ComponentBase import ComponentBase
17
+
18
+ @ComponentBase.register_type(VectorStoreBase, "RedisVectorStore")
19
+ class RedisVectorStore(VectorStoreSaveLoadMixin, VectorStoreRetrieveMixin, VectorStoreBase):
20
+ type: Literal["RedisVectorStore"] = "RedisVectorStore"
21
+ index_name: str = "documents_index"
22
+ embedding_dimension: int = 8000 # Default embedding dimension
23
+
24
+ # Private attributes
25
+ _embedder: Doc2VecEmbedding = PrivateAttr()
26
+ _redis_client: Optional[redis.Redis] = PrivateAttr(default=None)
27
+
28
+ # Configuration attributes with default values
29
+ redis_host: str = "localhost"
30
+ redis_port: int = 6379
31
+ redis_password: Optional[str] = None
32
+
33
+ def __init__(self, **kwargs):
34
+ super().__init__(**kwargs)
35
+ self._embedder = Doc2VecEmbedding(vector_size=self.embedding_dimension)
36
+
37
+ # Initialize Redis client using class attributes
38
+ self.connect()
39
+
40
+ # Setup Redis Search index
41
+ vector_field = VectorField(
42
+ "embedding",
43
+ "FLAT",
44
+ {
45
+ "TYPE": "FLOAT32",
46
+ "DIM": self.embedding_dimension,
47
+ "DISTANCE_METRIC": "COSINE"
48
+ }
49
+ )
50
+ text_field = TextField("content")
51
+
52
+ try:
53
+ self._redis_client.ft(self.index_name).info()
54
+ print(f"Index '{self.index_name}' exists.")
55
+ except Exception:
56
+ print(f"Index '{self.index_name}' does not exist. Creating index...")
57
+ schema = (
58
+ text_field,
59
+ vector_field
60
+ )
61
+ definition = IndexDefinition(
62
+ prefix=["doc:"],
63
+ index_type=IndexType.HASH
64
+ )
65
+ self._redis_client.ft(self.index_name).create_index(
66
+ fields=schema,
67
+ definition=definition
68
+ )
69
+ print(f"Index '{self.index_name}' created successfully.")
70
+
71
+
72
+ def connect(self) -> None:
73
+ """
74
+ Establishes a connection to the Redis server using class attributes.
75
+ """
76
+ try:
77
+ self._redis_client = redis.Redis(
78
+ host=self.redis_host,
79
+ port=self.redis_port,
80
+ password=self.redis_password,
81
+ decode_responses=False, # For binary data
82
+ )
83
+ # Test the connection
84
+ self._redis_client.ping()
85
+ print("Connected to Redis successfully.")
86
+ except Exception as e:
87
+ print(f"Failed to connect to Redis: {e}")
88
+ raise
89
+
90
+ def disconnect(self) -> None:
91
+ """
92
+ Disconnects from the Redis server.
93
+ """
94
+ if self._redis_client:
95
+ self._redis_client.close()
96
+ self._redis_client = None
97
+ print("Disconnected from Redis.")
98
+
99
+
100
+ def _doc_key(self, document_id: str) -> str:
101
+ return f"doc:{document_id}"
102
+
103
+ def add_document(self, document: Document) -> None:
104
+ doc = document
105
+ pipeline = self._redis_client.pipeline()
106
+
107
+ # Embed the document content
108
+ embedding = self._embedder.fit_transform([doc.content])[0]
109
+
110
+ if isinstance(embedding, Vector):
111
+ embedding = embedding.value
112
+ metadata = doc.metadata
113
+
114
+ # print("METADATA ::::::::::::::::::::", metadata)
115
+ doc_key = self._doc_key(doc.id)
116
+ # print("DOC KEY ::::::::::::::::::::", doc_key)
117
+ pipeline.hset(doc_key, mapping={
118
+ "content": doc.content,
119
+ "metadata": json.dumps(metadata), # Store metadata as JSON
120
+ "embedding": np.array(embedding, dtype=np.float32).tobytes() # Convert embedding values to bytes
121
+ })
122
+ add = pipeline.execute()
123
+
124
+ def add_documents(self, documents: List[Document]) -> None:
125
+ pipeline = self._redis_client.pipeline()
126
+ for doc in documents:
127
+ if not doc.content:
128
+ continue
129
+ # Embed the document content
130
+ embedding = self._embedder.fit_transform([doc.content])[0]
131
+
132
+ if isinstance(embedding, Vector):
133
+ embedding = embedding.value
134
+ metadata={doc.metadata}
135
+
136
+ doc_key = self._doc_key(doc.id)
137
+ pipeline.hset(doc_key, mapping={
138
+ "content": doc.content,
139
+ "metadata": json.dumps(metadata),
140
+ "embedding": np.array(embedding, dtype=np.float32).tobytes()
141
+ })
142
+ pipeline.execute()
143
+
144
+ def get_document(self, id: str) -> Union[Document, None]:
145
+
146
+ doc_key = self._doc_key(id)
147
+ data = self._redis_client.hgetall(doc_key)
148
+ if not data:
149
+ return None
150
+
151
+ metadata_raw = data.get(b"metadata", b"{}").decode("utf-8")
152
+ metadata = json.loads(metadata_raw)
153
+
154
+ content = data.get(b"content", b"").decode("utf-8")
155
+ # print("METAAAAAAA ::::::::::::", metadata)
156
+
157
+ embedding_bytes = data.get(b"embedding")
158
+ if embedding_bytes:
159
+ embedding = Vector(value=np.frombuffer(embedding_bytes, dtype=np.float32).tolist())
160
+ else:
161
+ embedding = None
162
+ return Document(
163
+ id=id,
164
+ content=content,
165
+ metadata=metadata,
166
+ embedding=embedding
167
+ )
168
+
169
+ def get_all_documents(self) -> List[Document]:
170
+ cursor = '0'
171
+ documents = []
172
+ while cursor != 0:
173
+ cursor, keys = self._redis_client.scan(cursor=cursor, match="doc:*", count=1000)
174
+ for key in keys:
175
+ data = self._redis_client.hgetall(key)
176
+ if not data:
177
+ continue
178
+ doc_id = key.decode("utf-8").split("doc:")[1]
179
+ metadata_raw = data.get(b"metadata", b"{}").decode("utf-8")
180
+ metadata = json.loads(metadata_raw)
181
+ content = data.get(b"content", b"").decode("utf-8")
182
+ embedding_bytes = data.get(b"embedding")
183
+ if embedding_bytes:
184
+ embedding = Vector(value=np.frombuffer(embedding_bytes, dtype=np.float32).tolist())
185
+ else:
186
+ embedding = None
187
+ document = Document(
188
+ id=doc_id,
189
+ content=content,
190
+ metadata=metadata,
191
+ embedding=embedding
192
+ )
193
+ documents.append(document)
194
+ return documents
195
+
196
+ def delete_document(self, id: str) -> None:
197
+ doc_key = self._doc_key(id)
198
+ self._redis_client.delete(doc_key)
199
+
200
+ def update_document(self, document: Document) -> None:
201
+ doc_key = self._doc_key(document.id)
202
+ if not self._redis_client.exists(doc_key):
203
+ raise ValueError(f"Document with id {document.id} does not exist.")
204
+ # Update the document by re-adding it
205
+ self.add_documents([document])
206
+
207
+
208
+ def cosine_similarity(self, vec1, vec2):
209
+ dot_product = np.dot(vec1, vec2)
210
+ norm_vec1 = np.linalg.norm(vec1)
211
+ norm_vec2 = np.linalg.norm(vec2)
212
+ if norm_vec1 == 0 or norm_vec2 == 0:
213
+ return 0
214
+ return dot_product / (norm_vec1 * norm_vec2)
215
+
216
+
217
+ def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
218
+ query_vector = self._embedder.infer_vector(query)
219
+
220
+ all_documents = self.get_all_documents()
221
+ # print("ALL DOCUMENTS ::::::::::::::::::::", all_documents[:10])
222
+ similarities = []
223
+ for doc in all_documents:
224
+ if doc.embedding is not None:
225
+ doc_vector = doc.embedding
226
+ # print("DOC VECTOR ::::::::::::::::::::", doc_vector.value[:10])
227
+ similarity = self.cosine_similarity(query_vector.value, doc_vector.value)
228
+ similarities.append((doc, similarity))
229
+
230
+ similarities.sort(key=lambda x: x[1], reverse=True)
231
+ # print("SIMILARITIES ::::::::::::::::::::", similarities[:10])
232
+ top_documents = [doc for doc, _ in similarities[:top_k]]
233
+ # print(f"Found {len(top_documents)} similar documents.")
234
+ return top_documents
235
+
236
+
237
+ class Config:
238
+ extra = 'allow'
@@ -0,0 +1,12 @@
1
+ from .RedisVectorStore import RedisVectorStore
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri Redis VectorStore Plugin
7
+
8
+ Visit us at: https://swarmauri.com
9
+ Follow us at: https://github.com/swarmauri
10
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
11
+
12
+ """
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_vectorstore_redis
3
+ Version: 0.6.0.dev154
4
+ Summary: Swarmauri Redis Vector Store
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: redis (>=4.0,<5.0)
15
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
17
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Swarmauri Example Community Package
@@ -0,0 +1,7 @@
1
+ swarmauri_vectorstore_redis/__init__.py,sha256=kEer1rPuqj3_CAoRTjOr9-nBY7Dz6n__w_kXSddD6u4,285
2
+ swarmauri_vectorstore_redis/RedisDocumentRetriever.py,sha256=5x62kZ2a7H8qoHzOttNnpg3WlKoyL6FyCuR8PlEm_SU,2234
3
+ swarmauri_vectorstore_redis/RedisVectorStore.py,sha256=cmckLjTsg1qxvqojom3VZW41tIg8bOlkWPt4l1Ufsug,9121
4
+ swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/entry_points.txt,sha256=HSN4TqGUlAgs7ovkyK0DCa0k4MoQkF2irfPAPjIw9kU,227
5
+ swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/METADATA,sha256=djVzFMQXuV4whFaQwpCTeFSMKKeqrm9qmbXPgDl0hFM,774
6
+ swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
7
+ swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.0.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,6 @@
1
+ [swarmauri.retrievers]
2
+ RedisDocumentRetriever=swarmauri_vectorstore_redis.RedisDocumentRetriever:RedisDocumentRetriever
3
+
4
+ [swarmauri.vector_stores]
5
+ RedisVectorStore=swarmauri_vectorstore_redis.RedisVectorStore:RedisVectorStore
6
+