swarmauri_vectorstore_redis 0.6.0.dev154__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarmauri_vectorstore_redis/RedisDocumentRetriever.py +59 -0
- swarmauri_vectorstore_redis/RedisVectorStore.py +238 -0
- swarmauri_vectorstore_redis/__init__.py +12 -0
- swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/METADATA +20 -0
- swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/RECORD +7 -0
- swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/WHEEL +4 -0
- swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
from typing import List
|
2
|
+
from redisearch import Client, Query
|
3
|
+
from swarmauri_core.documents.IDocument import IDocument
|
4
|
+
from swarmauri_standard.document_stores.ConcreteDocument import (
|
5
|
+
ConcreteDocument,
|
6
|
+
)
|
7
|
+
from swarmauri_base.retrievers.DocumentRetrieverBase import DocumentRetrieverBase
|
8
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
9
|
+
|
10
|
+
@ComponentBase.register_type(DocumentRetrieverBase, "RedisDocumentRetriever")
|
11
|
+
class RedisDocumentRetriever(DocumentRetrieverBase):
|
12
|
+
"""
|
13
|
+
A document retriever that fetches documents from a Redis store.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, redis_idx_name, redis_host, redis_port):
|
17
|
+
"""
|
18
|
+
Initializes a new instance of RedisDocumentRetriever.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
redis_client (Redis): An instance of the Redis client.
|
22
|
+
"""
|
23
|
+
self._redis_client = None
|
24
|
+
self._redis_idx_name = redis_idx_name
|
25
|
+
self._redis_host = redis_host
|
26
|
+
self._redis_port = redis_port
|
27
|
+
|
28
|
+
@property
|
29
|
+
def redis_client(self):
|
30
|
+
"""Lazily initialize and return the Redis client using a factory method."""
|
31
|
+
if self._redis_client is None:
|
32
|
+
self._redis_client = Client(
|
33
|
+
self.redis_idx_name, host=self.redis_host, port=self.redis_port
|
34
|
+
)
|
35
|
+
return self._redis_client
|
36
|
+
|
37
|
+
def retrieve(self, query: str, top_k: int = 5) -> List[IDocument]:
|
38
|
+
"""
|
39
|
+
Retrieve the most relevant documents based on the given query.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
query (str): The query string used for document retrieval.
|
43
|
+
top_k (int, optional): The number of top relevant documents to retrieve. Defaults to 5.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
List[IDocument]: A list of the top_k most relevant documents.
|
47
|
+
"""
|
48
|
+
query_result = self.redis_client.search(Query(query).paging(0, top_k))
|
49
|
+
|
50
|
+
documents = [
|
51
|
+
ConcreteDocument(
|
52
|
+
doc_id=doc.id,
|
53
|
+
content=doc.text, # Note: Adjust 'text' based on actual Redis document schema
|
54
|
+
metadata=doc.__dict__, # Including full document fields and values in metadata
|
55
|
+
)
|
56
|
+
for doc in query_result.docs
|
57
|
+
]
|
58
|
+
|
59
|
+
return documents
|
@@ -0,0 +1,238 @@
|
|
1
|
+
import json
|
2
|
+
from typing import List, Union, Literal, Optional
|
3
|
+
from pydantic import PrivateAttr
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import redis
|
7
|
+
from redis.commands.search.field import VectorField, TextField
|
8
|
+
from redis.commands.search.indexDefinition import IndexDefinition, IndexType
|
9
|
+
|
10
|
+
from swarmauri_standard.vectors.Vector import Vector
|
11
|
+
from swarmauri_standard.documents.concrete.Document import Document
|
12
|
+
from swarmauri_embedding_doc2vec.Doc2VecEmbedding import Doc2VecEmbedding
|
13
|
+
from swarmauri_base.vector_stores.VectorStoreBase import VectorStoreBase
|
14
|
+
from swarmauri_base.vector_stores.VectorStoreRetrieveMixin import VectorStoreRetrieveMixin
|
15
|
+
from swarmauri_base.vector_stores.VectorStoreSaveLoadMixin import VectorStoreSaveLoadMixin
|
16
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
17
|
+
|
18
|
+
@ComponentBase.register_type(VectorStoreBase, "RedisVectorStore")
|
19
|
+
class RedisVectorStore(VectorStoreSaveLoadMixin, VectorStoreRetrieveMixin, VectorStoreBase):
|
20
|
+
type: Literal["RedisVectorStore"] = "RedisVectorStore"
|
21
|
+
index_name: str = "documents_index"
|
22
|
+
embedding_dimension: int = 8000 # Default embedding dimension
|
23
|
+
|
24
|
+
# Private attributes
|
25
|
+
_embedder: Doc2VecEmbedding = PrivateAttr()
|
26
|
+
_redis_client: Optional[redis.Redis] = PrivateAttr(default=None)
|
27
|
+
|
28
|
+
# Configuration attributes with default values
|
29
|
+
redis_host: str = "localhost"
|
30
|
+
redis_port: int = 6379
|
31
|
+
redis_password: Optional[str] = None
|
32
|
+
|
33
|
+
def __init__(self, **kwargs):
|
34
|
+
super().__init__(**kwargs)
|
35
|
+
self._embedder = Doc2VecEmbedding(vector_size=self.embedding_dimension)
|
36
|
+
|
37
|
+
# Initialize Redis client using class attributes
|
38
|
+
self.connect()
|
39
|
+
|
40
|
+
# Setup Redis Search index
|
41
|
+
vector_field = VectorField(
|
42
|
+
"embedding",
|
43
|
+
"FLAT",
|
44
|
+
{
|
45
|
+
"TYPE": "FLOAT32",
|
46
|
+
"DIM": self.embedding_dimension,
|
47
|
+
"DISTANCE_METRIC": "COSINE"
|
48
|
+
}
|
49
|
+
)
|
50
|
+
text_field = TextField("content")
|
51
|
+
|
52
|
+
try:
|
53
|
+
self._redis_client.ft(self.index_name).info()
|
54
|
+
print(f"Index '{self.index_name}' exists.")
|
55
|
+
except Exception:
|
56
|
+
print(f"Index '{self.index_name}' does not exist. Creating index...")
|
57
|
+
schema = (
|
58
|
+
text_field,
|
59
|
+
vector_field
|
60
|
+
)
|
61
|
+
definition = IndexDefinition(
|
62
|
+
prefix=["doc:"],
|
63
|
+
index_type=IndexType.HASH
|
64
|
+
)
|
65
|
+
self._redis_client.ft(self.index_name).create_index(
|
66
|
+
fields=schema,
|
67
|
+
definition=definition
|
68
|
+
)
|
69
|
+
print(f"Index '{self.index_name}' created successfully.")
|
70
|
+
|
71
|
+
|
72
|
+
def connect(self) -> None:
|
73
|
+
"""
|
74
|
+
Establishes a connection to the Redis server using class attributes.
|
75
|
+
"""
|
76
|
+
try:
|
77
|
+
self._redis_client = redis.Redis(
|
78
|
+
host=self.redis_host,
|
79
|
+
port=self.redis_port,
|
80
|
+
password=self.redis_password,
|
81
|
+
decode_responses=False, # For binary data
|
82
|
+
)
|
83
|
+
# Test the connection
|
84
|
+
self._redis_client.ping()
|
85
|
+
print("Connected to Redis successfully.")
|
86
|
+
except Exception as e:
|
87
|
+
print(f"Failed to connect to Redis: {e}")
|
88
|
+
raise
|
89
|
+
|
90
|
+
def disconnect(self) -> None:
|
91
|
+
"""
|
92
|
+
Disconnects from the Redis server.
|
93
|
+
"""
|
94
|
+
if self._redis_client:
|
95
|
+
self._redis_client.close()
|
96
|
+
self._redis_client = None
|
97
|
+
print("Disconnected from Redis.")
|
98
|
+
|
99
|
+
|
100
|
+
def _doc_key(self, document_id: str) -> str:
|
101
|
+
return f"doc:{document_id}"
|
102
|
+
|
103
|
+
def add_document(self, document: Document) -> None:
|
104
|
+
doc = document
|
105
|
+
pipeline = self._redis_client.pipeline()
|
106
|
+
|
107
|
+
# Embed the document content
|
108
|
+
embedding = self._embedder.fit_transform([doc.content])[0]
|
109
|
+
|
110
|
+
if isinstance(embedding, Vector):
|
111
|
+
embedding = embedding.value
|
112
|
+
metadata = doc.metadata
|
113
|
+
|
114
|
+
# print("METADATA ::::::::::::::::::::", metadata)
|
115
|
+
doc_key = self._doc_key(doc.id)
|
116
|
+
# print("DOC KEY ::::::::::::::::::::", doc_key)
|
117
|
+
pipeline.hset(doc_key, mapping={
|
118
|
+
"content": doc.content,
|
119
|
+
"metadata": json.dumps(metadata), # Store metadata as JSON
|
120
|
+
"embedding": np.array(embedding, dtype=np.float32).tobytes() # Convert embedding values to bytes
|
121
|
+
})
|
122
|
+
add = pipeline.execute()
|
123
|
+
|
124
|
+
def add_documents(self, documents: List[Document]) -> None:
|
125
|
+
pipeline = self._redis_client.pipeline()
|
126
|
+
for doc in documents:
|
127
|
+
if not doc.content:
|
128
|
+
continue
|
129
|
+
# Embed the document content
|
130
|
+
embedding = self._embedder.fit_transform([doc.content])[0]
|
131
|
+
|
132
|
+
if isinstance(embedding, Vector):
|
133
|
+
embedding = embedding.value
|
134
|
+
metadata={doc.metadata}
|
135
|
+
|
136
|
+
doc_key = self._doc_key(doc.id)
|
137
|
+
pipeline.hset(doc_key, mapping={
|
138
|
+
"content": doc.content,
|
139
|
+
"metadata": json.dumps(metadata),
|
140
|
+
"embedding": np.array(embedding, dtype=np.float32).tobytes()
|
141
|
+
})
|
142
|
+
pipeline.execute()
|
143
|
+
|
144
|
+
def get_document(self, id: str) -> Union[Document, None]:
|
145
|
+
|
146
|
+
doc_key = self._doc_key(id)
|
147
|
+
data = self._redis_client.hgetall(doc_key)
|
148
|
+
if not data:
|
149
|
+
return None
|
150
|
+
|
151
|
+
metadata_raw = data.get(b"metadata", b"{}").decode("utf-8")
|
152
|
+
metadata = json.loads(metadata_raw)
|
153
|
+
|
154
|
+
content = data.get(b"content", b"").decode("utf-8")
|
155
|
+
# print("METAAAAAAA ::::::::::::", metadata)
|
156
|
+
|
157
|
+
embedding_bytes = data.get(b"embedding")
|
158
|
+
if embedding_bytes:
|
159
|
+
embedding = Vector(value=np.frombuffer(embedding_bytes, dtype=np.float32).tolist())
|
160
|
+
else:
|
161
|
+
embedding = None
|
162
|
+
return Document(
|
163
|
+
id=id,
|
164
|
+
content=content,
|
165
|
+
metadata=metadata,
|
166
|
+
embedding=embedding
|
167
|
+
)
|
168
|
+
|
169
|
+
def get_all_documents(self) -> List[Document]:
|
170
|
+
cursor = '0'
|
171
|
+
documents = []
|
172
|
+
while cursor != 0:
|
173
|
+
cursor, keys = self._redis_client.scan(cursor=cursor, match="doc:*", count=1000)
|
174
|
+
for key in keys:
|
175
|
+
data = self._redis_client.hgetall(key)
|
176
|
+
if not data:
|
177
|
+
continue
|
178
|
+
doc_id = key.decode("utf-8").split("doc:")[1]
|
179
|
+
metadata_raw = data.get(b"metadata", b"{}").decode("utf-8")
|
180
|
+
metadata = json.loads(metadata_raw)
|
181
|
+
content = data.get(b"content", b"").decode("utf-8")
|
182
|
+
embedding_bytes = data.get(b"embedding")
|
183
|
+
if embedding_bytes:
|
184
|
+
embedding = Vector(value=np.frombuffer(embedding_bytes, dtype=np.float32).tolist())
|
185
|
+
else:
|
186
|
+
embedding = None
|
187
|
+
document = Document(
|
188
|
+
id=doc_id,
|
189
|
+
content=content,
|
190
|
+
metadata=metadata,
|
191
|
+
embedding=embedding
|
192
|
+
)
|
193
|
+
documents.append(document)
|
194
|
+
return documents
|
195
|
+
|
196
|
+
def delete_document(self, id: str) -> None:
|
197
|
+
doc_key = self._doc_key(id)
|
198
|
+
self._redis_client.delete(doc_key)
|
199
|
+
|
200
|
+
def update_document(self, document: Document) -> None:
|
201
|
+
doc_key = self._doc_key(document.id)
|
202
|
+
if not self._redis_client.exists(doc_key):
|
203
|
+
raise ValueError(f"Document with id {document.id} does not exist.")
|
204
|
+
# Update the document by re-adding it
|
205
|
+
self.add_documents([document])
|
206
|
+
|
207
|
+
|
208
|
+
def cosine_similarity(self, vec1, vec2):
|
209
|
+
dot_product = np.dot(vec1, vec2)
|
210
|
+
norm_vec1 = np.linalg.norm(vec1)
|
211
|
+
norm_vec2 = np.linalg.norm(vec2)
|
212
|
+
if norm_vec1 == 0 or norm_vec2 == 0:
|
213
|
+
return 0
|
214
|
+
return dot_product / (norm_vec1 * norm_vec2)
|
215
|
+
|
216
|
+
|
217
|
+
def retrieve(self, query: str, top_k: int = 5) -> List[Document]:
|
218
|
+
query_vector = self._embedder.infer_vector(query)
|
219
|
+
|
220
|
+
all_documents = self.get_all_documents()
|
221
|
+
# print("ALL DOCUMENTS ::::::::::::::::::::", all_documents[:10])
|
222
|
+
similarities = []
|
223
|
+
for doc in all_documents:
|
224
|
+
if doc.embedding is not None:
|
225
|
+
doc_vector = doc.embedding
|
226
|
+
# print("DOC VECTOR ::::::::::::::::::::", doc_vector.value[:10])
|
227
|
+
similarity = self.cosine_similarity(query_vector.value, doc_vector.value)
|
228
|
+
similarities.append((doc, similarity))
|
229
|
+
|
230
|
+
similarities.sort(key=lambda x: x[1], reverse=True)
|
231
|
+
# print("SIMILARITIES ::::::::::::::::::::", similarities[:10])
|
232
|
+
top_documents = [doc for doc, _ in similarities[:top_k]]
|
233
|
+
# print(f"Found {len(top_documents)} similar documents.")
|
234
|
+
return top_documents
|
235
|
+
|
236
|
+
|
237
|
+
class Config:
|
238
|
+
extra = 'allow'
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from .RedisVectorStore import RedisVectorStore
|
2
|
+
|
3
|
+
__version__ = "0.6.0.dev26"
|
4
|
+
__long_desc__ = """
|
5
|
+
|
6
|
+
# Swarmauri Redis VectorStore Plugin
|
7
|
+
|
8
|
+
Visit us at: https://swarmauri.com
|
9
|
+
Follow us at: https://github.com/swarmauri
|
10
|
+
Star us at: https://github.com/swarmauri/swarmauri-sdk
|
11
|
+
|
12
|
+
"""
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: swarmauri_vectorstore_redis
|
3
|
+
Version: 0.6.0.dev154
|
4
|
+
Summary: Swarmauri Redis Vector Store
|
5
|
+
License: Apache-2.0
|
6
|
+
Author: Jacob Stewart
|
7
|
+
Author-email: jacob@swarmauri.com
|
8
|
+
Requires-Python: >=3.10,<3.13
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
14
|
+
Requires-Dist: redis (>=4.0,<5.0)
|
15
|
+
Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
|
16
|
+
Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
|
17
|
+
Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
|
18
|
+
Description-Content-Type: text/markdown
|
19
|
+
|
20
|
+
# Swarmauri Example Community Package
|
@@ -0,0 +1,7 @@
|
|
1
|
+
swarmauri_vectorstore_redis/__init__.py,sha256=kEer1rPuqj3_CAoRTjOr9-nBY7Dz6n__w_kXSddD6u4,285
|
2
|
+
swarmauri_vectorstore_redis/RedisDocumentRetriever.py,sha256=5x62kZ2a7H8qoHzOttNnpg3WlKoyL6FyCuR8PlEm_SU,2234
|
3
|
+
swarmauri_vectorstore_redis/RedisVectorStore.py,sha256=cmckLjTsg1qxvqojom3VZW41tIg8bOlkWPt4l1Ufsug,9121
|
4
|
+
swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/entry_points.txt,sha256=HSN4TqGUlAgs7ovkyK0DCa0k4MoQkF2irfPAPjIw9kU,227
|
5
|
+
swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/METADATA,sha256=djVzFMQXuV4whFaQwpCTeFSMKKeqrm9qmbXPgDl0hFM,774
|
6
|
+
swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
7
|
+
swarmauri_vectorstore_redis-0.6.0.dev154.dist-info/RECORD,,
|