vector-inspector 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/core/connections/base_connection.py +86 -1
- vector_inspector/core/connections/chroma_connection.py +23 -3
- vector_inspector/core/connections/pgvector_connection.py +1100 -0
- vector_inspector/core/connections/pinecone_connection.py +24 -4
- vector_inspector/core/connections/qdrant_connection.py +224 -189
- vector_inspector/core/embedding_providers/provider_factory.py +33 -38
- vector_inspector/core/embedding_utils.py +2 -2
- vector_inspector/services/backup_restore_service.py +41 -33
- vector_inspector/ui/components/connection_manager_panel.py +96 -77
- vector_inspector/ui/components/profile_manager_panel.py +315 -121
- vector_inspector/ui/dialogs/embedding_config_dialog.py +79 -58
- vector_inspector/ui/main_window.py +22 -0
- vector_inspector/ui/views/connection_view.py +215 -116
- vector_inspector/ui/views/info_panel.py +6 -6
- vector_inspector/ui/views/metadata_view.py +466 -187
- {vector_inspector-0.3.4.dist-info → vector_inspector-0.3.6.dist-info}/METADATA +7 -6
- {vector_inspector-0.3.4.dist-info → vector_inspector-0.3.6.dist-info}/RECORD +19 -18
- {vector_inspector-0.3.4.dist-info → vector_inspector-0.3.6.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.4.dist-info → vector_inspector-0.3.6.dist-info}/entry_points.txt +0 -0
|
@@ -4,31 +4,41 @@ from typing import Optional, List, Dict, Any
|
|
|
4
4
|
import uuid
|
|
5
5
|
from qdrant_client import QdrantClient
|
|
6
6
|
from qdrant_client.models import (
|
|
7
|
-
Distance,
|
|
8
|
-
|
|
7
|
+
Distance,
|
|
8
|
+
VectorParams,
|
|
9
|
+
PointStruct,
|
|
10
|
+
Filter,
|
|
11
|
+
FieldCondition,
|
|
12
|
+
MatchValue,
|
|
13
|
+
MatchText,
|
|
14
|
+
MatchAny,
|
|
15
|
+
MatchExcept,
|
|
16
|
+
Range,
|
|
9
17
|
)
|
|
10
18
|
|
|
11
|
-
from .base_connection import VectorDBConnection
|
|
19
|
+
from vector_inspector.core.connections.base_connection import VectorDBConnection
|
|
12
20
|
from vector_inspector.core.logging import log_info, log_error, log_debug
|
|
13
21
|
from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
|
|
14
|
-
from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import
|
|
22
|
+
from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import (
|
|
23
|
+
resolve_embedding_model,
|
|
24
|
+
)
|
|
15
25
|
|
|
16
26
|
|
|
17
27
|
class QdrantConnection(VectorDBConnection):
|
|
18
28
|
"""Manages connection to Qdrant and provides query interface."""
|
|
19
|
-
|
|
29
|
+
|
|
20
30
|
def __init__(
|
|
21
|
-
self,
|
|
22
|
-
path: Optional[str] = None,
|
|
31
|
+
self,
|
|
32
|
+
path: Optional[str] = None,
|
|
23
33
|
url: Optional[str] = None,
|
|
24
|
-
host: Optional[str] = None,
|
|
34
|
+
host: Optional[str] = None,
|
|
25
35
|
port: Optional[int] = None,
|
|
26
36
|
api_key: Optional[str] = None,
|
|
27
|
-
prefer_grpc: bool = False
|
|
37
|
+
prefer_grpc: bool = False,
|
|
28
38
|
):
|
|
29
39
|
"""
|
|
30
40
|
Initialize Qdrant connection.
|
|
31
|
-
|
|
41
|
+
|
|
32
42
|
Args:
|
|
33
43
|
path: Path for local/embedded client
|
|
34
44
|
url: Full URL for remote client (e.g., "http://localhost:6333")
|
|
@@ -44,21 +54,21 @@ class QdrantConnection(VectorDBConnection):
|
|
|
44
54
|
self.api_key = api_key
|
|
45
55
|
self.prefer_grpc = prefer_grpc
|
|
46
56
|
self._client: Optional[QdrantClient] = None
|
|
47
|
-
|
|
57
|
+
|
|
48
58
|
def connect(self) -> bool:
|
|
49
59
|
"""
|
|
50
60
|
Establish connection to Qdrant.
|
|
51
|
-
|
|
61
|
+
|
|
52
62
|
Returns:
|
|
53
63
|
True if connection successful, False otherwise
|
|
54
64
|
"""
|
|
55
65
|
try:
|
|
56
66
|
# Common parameters for stability
|
|
57
67
|
common_params = {
|
|
58
|
-
|
|
59
|
-
|
|
68
|
+
"check_compatibility": False,
|
|
69
|
+
"timeout": 300, # 5 minutes timeout for long operations
|
|
60
70
|
}
|
|
61
|
-
|
|
71
|
+
|
|
62
72
|
if self.path:
|
|
63
73
|
# Local/embedded mode
|
|
64
74
|
self._client = QdrantClient(path=self.path, **common_params)
|
|
@@ -68,7 +78,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
68
78
|
url=self.url,
|
|
69
79
|
api_key=self.api_key,
|
|
70
80
|
prefer_grpc=self.prefer_grpc,
|
|
71
|
-
**common_params
|
|
81
|
+
**common_params,
|
|
72
82
|
)
|
|
73
83
|
elif self.host:
|
|
74
84
|
# Host and port provided
|
|
@@ -77,22 +87,22 @@ class QdrantConnection(VectorDBConnection):
|
|
|
77
87
|
port=self.port,
|
|
78
88
|
api_key=self.api_key,
|
|
79
89
|
prefer_grpc=self.prefer_grpc,
|
|
80
|
-
**common_params
|
|
90
|
+
**common_params,
|
|
81
91
|
)
|
|
82
92
|
else:
|
|
83
93
|
# Default to in-memory client
|
|
84
94
|
self._client = QdrantClient(":memory:", **common_params)
|
|
85
|
-
|
|
95
|
+
|
|
86
96
|
# Test connection
|
|
87
97
|
self._client.get_collections()
|
|
88
98
|
return True
|
|
89
99
|
except Exception as e:
|
|
90
100
|
log_error("Connection failed: %s", e)
|
|
91
101
|
return False
|
|
92
|
-
|
|
102
|
+
|
|
93
103
|
def _to_uuid(self, id_str: str) -> uuid.UUID:
|
|
94
104
|
"""Convert a string ID to a valid UUID.
|
|
95
|
-
|
|
105
|
+
|
|
96
106
|
If the string is already a valid UUID, return it.
|
|
97
107
|
Otherwise, generate a deterministic UUID from the string.
|
|
98
108
|
"""
|
|
@@ -101,18 +111,18 @@ class QdrantConnection(VectorDBConnection):
|
|
|
101
111
|
except (ValueError, AttributeError):
|
|
102
112
|
# Generate deterministic UUID from string
|
|
103
113
|
return uuid.uuid5(uuid.NAMESPACE_DNS, id_str)
|
|
104
|
-
|
|
114
|
+
|
|
105
115
|
def disconnect(self):
|
|
106
116
|
"""Close connection to Qdrant."""
|
|
107
117
|
if self._client:
|
|
108
118
|
self._client.close()
|
|
109
119
|
self._client = None
|
|
110
|
-
|
|
120
|
+
|
|
111
121
|
@property
|
|
112
122
|
def is_connected(self) -> bool:
|
|
113
123
|
"""Check if connected to Qdrant."""
|
|
114
124
|
return self._client is not None
|
|
115
|
-
|
|
125
|
+
|
|
116
126
|
def count_collection(self, name: str) -> int:
|
|
117
127
|
"""Count the number of items in a collection."""
|
|
118
128
|
if not self._client:
|
|
@@ -122,23 +132,23 @@ class QdrantConnection(VectorDBConnection):
|
|
|
122
132
|
return getattr(res, "count", 0) or 0
|
|
123
133
|
except Exception:
|
|
124
134
|
return 0
|
|
125
|
-
|
|
135
|
+
|
|
126
136
|
def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
|
|
127
137
|
"""
|
|
128
138
|
Get items by IDs (implementation for compatibility).
|
|
129
|
-
|
|
139
|
+
|
|
130
140
|
Note: This is a simplified implementation that retrieves items by scrolling
|
|
131
141
|
and filtering. For production use, consider using get_all_items with filters.
|
|
132
142
|
"""
|
|
133
143
|
if not self._client:
|
|
134
144
|
return {"documents": [], "metadatas": []}
|
|
135
|
-
|
|
145
|
+
|
|
136
146
|
try:
|
|
137
147
|
# Retrieve by scrolling and filtering
|
|
138
148
|
all_items = self.get_all_items(name, limit=1000)
|
|
139
149
|
if not all_items:
|
|
140
150
|
return {"documents": [], "metadatas": []}
|
|
141
|
-
|
|
151
|
+
|
|
142
152
|
# Filter by requested IDs
|
|
143
153
|
documents = []
|
|
144
154
|
metadatas = []
|
|
@@ -146,16 +156,16 @@ class QdrantConnection(VectorDBConnection):
|
|
|
146
156
|
if item_id in ids:
|
|
147
157
|
documents.append(all_items["documents"][i])
|
|
148
158
|
metadatas.append(all_items["metadatas"][i])
|
|
149
|
-
|
|
159
|
+
|
|
150
160
|
return {"documents": documents, "metadatas": metadatas}
|
|
151
161
|
except Exception as e:
|
|
152
162
|
log_error("Failed to get items: %s", e)
|
|
153
163
|
return {"documents": [], "metadatas": []}
|
|
154
|
-
|
|
164
|
+
|
|
155
165
|
def list_collections(self) -> List[str]:
|
|
156
166
|
"""
|
|
157
167
|
Get list of all collections.
|
|
158
|
-
|
|
168
|
+
|
|
159
169
|
Returns:
|
|
160
170
|
List of collection names
|
|
161
171
|
"""
|
|
@@ -167,91 +177,88 @@ class QdrantConnection(VectorDBConnection):
|
|
|
167
177
|
except Exception as e:
|
|
168
178
|
log_error("Failed to list collections: %s", e)
|
|
169
179
|
return []
|
|
170
|
-
|
|
180
|
+
|
|
171
181
|
def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
|
|
172
182
|
"""
|
|
173
183
|
Get collection metadata and statistics.
|
|
174
|
-
|
|
184
|
+
|
|
175
185
|
Args:
|
|
176
186
|
name: Collection name
|
|
177
|
-
|
|
187
|
+
|
|
178
188
|
Returns:
|
|
179
189
|
Dictionary with collection info
|
|
180
190
|
"""
|
|
181
191
|
if not self._client:
|
|
182
192
|
return None
|
|
183
|
-
|
|
193
|
+
|
|
184
194
|
try:
|
|
185
195
|
# Get collection info
|
|
186
196
|
collection_info = self._client.get_collection(name)
|
|
187
|
-
|
|
197
|
+
|
|
188
198
|
# Get a sample point to determine metadata fields
|
|
189
199
|
sample = self._client.scroll(
|
|
190
|
-
collection_name=name,
|
|
191
|
-
limit=1,
|
|
192
|
-
with_payload=True,
|
|
193
|
-
with_vectors=False
|
|
200
|
+
collection_name=name, limit=1, with_payload=True, with_vectors=False
|
|
194
201
|
)
|
|
195
|
-
|
|
202
|
+
|
|
196
203
|
metadata_fields = []
|
|
197
204
|
if sample[0] and len(sample[0]) > 0:
|
|
198
205
|
point = sample[0][0]
|
|
199
206
|
if point.payload:
|
|
200
207
|
# Extract metadata fields, excluding 'document' if present
|
|
201
|
-
metadata_fields = [k for k in point.payload.keys() if k !=
|
|
202
|
-
|
|
208
|
+
metadata_fields = [k for k in point.payload.keys() if k != "document"]
|
|
209
|
+
|
|
203
210
|
# Extract vector configuration
|
|
204
211
|
vector_dimension = "Unknown"
|
|
205
212
|
distance_metric = "Unknown"
|
|
206
213
|
config_details = {}
|
|
207
|
-
|
|
214
|
+
|
|
208
215
|
if collection_info.config:
|
|
209
216
|
# Get vector parameters
|
|
210
|
-
if hasattr(collection_info.config,
|
|
217
|
+
if hasattr(collection_info.config, "params"):
|
|
211
218
|
params = collection_info.config.params
|
|
212
|
-
if hasattr(params,
|
|
219
|
+
if hasattr(params, "vectors"):
|
|
213
220
|
vectors = params.vectors
|
|
214
221
|
# Handle both dict and object access
|
|
215
222
|
if isinstance(vectors, dict):
|
|
216
223
|
# Named vectors
|
|
217
224
|
first_vector = next(iter(vectors.values()), None)
|
|
218
225
|
if first_vector:
|
|
219
|
-
vector_dimension = getattr(first_vector,
|
|
220
|
-
distance = getattr(first_vector,
|
|
226
|
+
vector_dimension = getattr(first_vector, "size", "Unknown")
|
|
227
|
+
distance = getattr(first_vector, "distance", None)
|
|
221
228
|
else:
|
|
222
229
|
# Single vector config
|
|
223
|
-
vector_dimension = getattr(vectors,
|
|
224
|
-
distance = getattr(vectors,
|
|
225
|
-
|
|
230
|
+
vector_dimension = getattr(vectors, "size", "Unknown")
|
|
231
|
+
distance = getattr(vectors, "distance", None)
|
|
232
|
+
|
|
226
233
|
# Map distance enum to readable name
|
|
227
234
|
if distance:
|
|
228
235
|
distance_str = str(distance)
|
|
229
|
-
if
|
|
236
|
+
if "COSINE" in distance_str.upper():
|
|
230
237
|
distance_metric = "Cosine"
|
|
231
|
-
elif
|
|
238
|
+
elif "EUCLID" in distance_str.upper():
|
|
232
239
|
distance_metric = "Euclidean"
|
|
233
|
-
elif
|
|
240
|
+
elif "DOT" in distance_str.upper():
|
|
234
241
|
distance_metric = "Dot Product"
|
|
235
|
-
elif
|
|
242
|
+
elif "MANHATTAN" in distance_str.upper():
|
|
236
243
|
distance_metric = "Manhattan"
|
|
237
244
|
else:
|
|
238
245
|
distance_metric = distance_str
|
|
239
|
-
|
|
246
|
+
|
|
240
247
|
# Get HNSW config if available
|
|
241
|
-
if hasattr(collection_info.config,
|
|
248
|
+
if hasattr(collection_info.config, "hnsw_config"):
|
|
242
249
|
hnsw = collection_info.config.hnsw_config
|
|
243
|
-
config_details[
|
|
244
|
-
|
|
245
|
-
|
|
250
|
+
config_details["hnsw_config"] = {
|
|
251
|
+
"m": getattr(hnsw, "m", None),
|
|
252
|
+
"ef_construct": getattr(hnsw, "ef_construct", None),
|
|
246
253
|
}
|
|
247
|
-
|
|
254
|
+
|
|
248
255
|
# Get optimizer config if available
|
|
249
|
-
if hasattr(collection_info.config,
|
|
256
|
+
if hasattr(collection_info.config, "optimizer_config"):
|
|
250
257
|
opt = collection_info.config.optimizer_config
|
|
251
|
-
config_details[
|
|
252
|
-
|
|
258
|
+
config_details["optimizer_config"] = {
|
|
259
|
+
"indexing_threshold": getattr(opt, "indexing_threshold", None),
|
|
253
260
|
}
|
|
254
|
-
|
|
261
|
+
|
|
255
262
|
result = {
|
|
256
263
|
"name": name,
|
|
257
264
|
"count": collection_info.points_count,
|
|
@@ -259,35 +266,36 @@ class QdrantConnection(VectorDBConnection):
|
|
|
259
266
|
"vector_dimension": vector_dimension,
|
|
260
267
|
"distance_metric": distance_metric,
|
|
261
268
|
}
|
|
262
|
-
|
|
269
|
+
|
|
263
270
|
# Check for embedding model metadata (if collection creator stored it)
|
|
264
|
-
if hasattr(collection_info.config,
|
|
271
|
+
if hasattr(collection_info.config, "metadata") and collection_info.config.metadata:
|
|
265
272
|
metadata = collection_info.config.metadata
|
|
266
|
-
if
|
|
267
|
-
result[
|
|
268
|
-
if
|
|
269
|
-
result[
|
|
270
|
-
|
|
273
|
+
if "embedding_model" in metadata:
|
|
274
|
+
result["embedding_model"] = metadata["embedding_model"]
|
|
275
|
+
if "embedding_model_type" in metadata:
|
|
276
|
+
result["embedding_model_type"] = metadata["embedding_model_type"]
|
|
277
|
+
|
|
271
278
|
if config_details:
|
|
272
|
-
result[
|
|
273
|
-
|
|
279
|
+
result["config"] = config_details
|
|
280
|
+
|
|
274
281
|
return result
|
|
275
|
-
|
|
282
|
+
|
|
276
283
|
except Exception as e:
|
|
277
284
|
log_error("Failed to get collection info: %s", e)
|
|
278
285
|
return None
|
|
279
|
-
|
|
286
|
+
|
|
280
287
|
def _get_embedding_model_for_collection(self, collection_name: str):
|
|
281
288
|
"""Delegate embedding-model selection to helper resolver."""
|
|
282
289
|
try:
|
|
283
290
|
return resolve_embedding_model(self, collection_name)
|
|
284
291
|
except Exception as e:
|
|
285
292
|
log_error("Failed to resolve embedding model for %s: %s", collection_name, e)
|
|
286
|
-
from
|
|
293
|
+
from vector_inspector.core.embedding_utils import DEFAULT_MODEL, load_embedding_model
|
|
294
|
+
|
|
287
295
|
model_name, model_type = DEFAULT_MODEL
|
|
288
296
|
model = load_embedding_model(model_name, model_type)
|
|
289
297
|
return (model, model_name, model_type)
|
|
290
|
-
|
|
298
|
+
|
|
291
299
|
def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
|
|
292
300
|
"""Delegate filter construction to helper module."""
|
|
293
301
|
try:
|
|
@@ -295,7 +303,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
295
303
|
except Exception as e:
|
|
296
304
|
log_error("Failed to build filter: %s", e)
|
|
297
305
|
return None
|
|
298
|
-
|
|
306
|
+
|
|
299
307
|
def query_collection(
|
|
300
308
|
self,
|
|
301
309
|
collection_name: str,
|
|
@@ -307,7 +315,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
307
315
|
) -> Optional[Dict[str, Any]]:
|
|
308
316
|
"""
|
|
309
317
|
Query a collection for similar vectors.
|
|
310
|
-
|
|
318
|
+
|
|
311
319
|
Args:
|
|
312
320
|
collection_name: Name of collection to query
|
|
313
321
|
query_texts: Text queries (Qdrant will embed automatically)
|
|
@@ -315,44 +323,48 @@ class QdrantConnection(VectorDBConnection):
|
|
|
315
323
|
n_results: Number of results to return
|
|
316
324
|
where: Metadata filter
|
|
317
325
|
where_document: Document content filter (limited support)
|
|
318
|
-
|
|
326
|
+
|
|
319
327
|
Returns:
|
|
320
328
|
Query results or None if failed
|
|
321
329
|
"""
|
|
322
330
|
if not self._client:
|
|
323
331
|
return None
|
|
324
|
-
|
|
332
|
+
|
|
325
333
|
if not query_texts and not query_embeddings:
|
|
326
334
|
log_error("Either query_texts or query_embeddings required")
|
|
327
335
|
return None
|
|
328
|
-
|
|
336
|
+
|
|
329
337
|
try:
|
|
330
338
|
# Build filter
|
|
331
339
|
qdrant_filter = self._build_qdrant_filter(where)
|
|
332
|
-
|
|
340
|
+
|
|
333
341
|
# Perform search for each query
|
|
334
342
|
all_results = {
|
|
335
343
|
"ids": [],
|
|
336
344
|
"distances": [],
|
|
337
345
|
"documents": [],
|
|
338
346
|
"metadatas": [],
|
|
339
|
-
"embeddings": []
|
|
347
|
+
"embeddings": [],
|
|
340
348
|
}
|
|
341
|
-
|
|
349
|
+
|
|
342
350
|
# Use query_texts if provided (Qdrant handles embedding)
|
|
343
351
|
queries = query_texts if query_texts else []
|
|
344
|
-
|
|
352
|
+
|
|
345
353
|
# If embeddings provided instead, use them
|
|
346
354
|
if query_embeddings and not query_texts:
|
|
347
355
|
queries = query_embeddings
|
|
348
|
-
|
|
356
|
+
|
|
349
357
|
for query in queries:
|
|
350
358
|
# Embed text queries if needed
|
|
351
359
|
if isinstance(query, str):
|
|
352
360
|
# Generate embeddings for text query using appropriate model for this collection
|
|
353
361
|
try:
|
|
354
|
-
model, model_name, model_type = self._get_embedding_model_for_collection(
|
|
355
|
-
|
|
362
|
+
model, model_name, model_type = self._get_embedding_model_for_collection(
|
|
363
|
+
collection_name
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
from vector_inspector.core.embedding_utils import encode_text
|
|
367
|
+
|
|
356
368
|
query_vector = encode_text(query, model, model_type)
|
|
357
369
|
except Exception as e:
|
|
358
370
|
log_error("Failed to embed query text: %s", e)
|
|
@@ -374,40 +386,40 @@ class QdrantConnection(VectorDBConnection):
|
|
|
374
386
|
except Exception as e:
|
|
375
387
|
log_error("Query failed: %s", e)
|
|
376
388
|
continue
|
|
377
|
-
|
|
389
|
+
|
|
378
390
|
# Transform results to standard format
|
|
379
391
|
ids = []
|
|
380
392
|
distances = []
|
|
381
393
|
documents = []
|
|
382
394
|
metadatas = []
|
|
383
395
|
embeddings = []
|
|
384
|
-
|
|
396
|
+
|
|
385
397
|
for result in search_results:
|
|
386
398
|
ids.append(str(result.id))
|
|
387
399
|
distances.append(result.score)
|
|
388
|
-
|
|
400
|
+
|
|
389
401
|
# Extract document and metadata from payload
|
|
390
402
|
payload = result.payload or {}
|
|
391
|
-
documents.append(payload.get(
|
|
392
|
-
|
|
403
|
+
documents.append(payload.get("document", ""))
|
|
404
|
+
|
|
393
405
|
# Metadata is everything except 'document'
|
|
394
|
-
metadata = {k: v for k, v in payload.items() if k !=
|
|
406
|
+
metadata = {k: v for k, v in payload.items() if k != "document"}
|
|
395
407
|
metadatas.append(metadata)
|
|
396
|
-
|
|
408
|
+
|
|
397
409
|
# Extract embedding
|
|
398
410
|
embeddings.append(result.vector if result.vector else [])
|
|
399
|
-
|
|
411
|
+
|
|
400
412
|
all_results["ids"].append(ids)
|
|
401
413
|
all_results["distances"].append(distances)
|
|
402
414
|
all_results["documents"].append(documents)
|
|
403
415
|
all_results["metadatas"].append(metadatas)
|
|
404
416
|
all_results["embeddings"].append(embeddings)
|
|
405
|
-
|
|
417
|
+
|
|
406
418
|
return all_results
|
|
407
419
|
except Exception as e:
|
|
408
420
|
log_error("Query failed: %s", e)
|
|
409
421
|
return None
|
|
410
|
-
|
|
422
|
+
|
|
411
423
|
def get_all_items(
|
|
412
424
|
self,
|
|
413
425
|
collection_name: str,
|
|
@@ -417,23 +429,23 @@ class QdrantConnection(VectorDBConnection):
|
|
|
417
429
|
) -> Optional[Dict[str, Any]]:
|
|
418
430
|
"""
|
|
419
431
|
Get all items from a collection.
|
|
420
|
-
|
|
432
|
+
|
|
421
433
|
Args:
|
|
422
434
|
collection_name: Name of collection
|
|
423
435
|
limit: Maximum number of items to return
|
|
424
436
|
offset: Number of items to skip
|
|
425
437
|
where: Metadata filter
|
|
426
|
-
|
|
438
|
+
|
|
427
439
|
Returns:
|
|
428
440
|
Collection items or None if failed
|
|
429
441
|
"""
|
|
430
442
|
if not self._client:
|
|
431
443
|
return None
|
|
432
|
-
|
|
444
|
+
|
|
433
445
|
try:
|
|
434
446
|
# Build filter
|
|
435
447
|
qdrant_filter = self._build_qdrant_filter(where)
|
|
436
|
-
|
|
448
|
+
|
|
437
449
|
# Use scroll to retrieve items
|
|
438
450
|
points, next_offset = self._client.scroll(
|
|
439
451
|
collection_name=collection_name,
|
|
@@ -441,42 +453,42 @@ class QdrantConnection(VectorDBConnection):
|
|
|
441
453
|
limit=limit,
|
|
442
454
|
offset=offset,
|
|
443
455
|
with_payload=True,
|
|
444
|
-
with_vectors=True
|
|
456
|
+
with_vectors=True,
|
|
445
457
|
)
|
|
446
|
-
|
|
458
|
+
|
|
447
459
|
# Transform to standard format
|
|
448
460
|
ids = []
|
|
449
461
|
documents = []
|
|
450
462
|
metadatas = []
|
|
451
463
|
embeddings = []
|
|
452
|
-
|
|
464
|
+
|
|
453
465
|
for point in points:
|
|
454
466
|
ids.append(str(point.id))
|
|
455
|
-
|
|
467
|
+
|
|
456
468
|
payload = point.payload or {}
|
|
457
|
-
documents.append(payload.get(
|
|
458
|
-
|
|
469
|
+
documents.append(payload.get("document", ""))
|
|
470
|
+
|
|
459
471
|
# Metadata is everything except 'document'
|
|
460
|
-
metadata = {k: v for k, v in payload.items() if k !=
|
|
472
|
+
metadata = {k: v for k, v in payload.items() if k != "document"}
|
|
461
473
|
metadatas.append(metadata)
|
|
462
|
-
|
|
474
|
+
|
|
463
475
|
# Extract embedding
|
|
464
476
|
if isinstance(point.vector, dict):
|
|
465
477
|
# Named vectors - use the first one
|
|
466
478
|
embeddings.append(list(point.vector.values())[0] if point.vector else [])
|
|
467
479
|
else:
|
|
468
480
|
embeddings.append(point.vector if point.vector else [])
|
|
469
|
-
|
|
481
|
+
|
|
470
482
|
return {
|
|
471
483
|
"ids": ids,
|
|
472
484
|
"documents": documents,
|
|
473
485
|
"metadatas": metadatas,
|
|
474
|
-
"embeddings": embeddings
|
|
486
|
+
"embeddings": embeddings,
|
|
475
487
|
}
|
|
476
488
|
except Exception as e:
|
|
477
489
|
log_error("Failed to get items: %s", e)
|
|
478
490
|
return None
|
|
479
|
-
|
|
491
|
+
|
|
480
492
|
def add_items(
|
|
481
493
|
self,
|
|
482
494
|
collection_name: str,
|
|
@@ -487,29 +499,39 @@ class QdrantConnection(VectorDBConnection):
|
|
|
487
499
|
) -> bool:
|
|
488
500
|
"""
|
|
489
501
|
Add items to a collection.
|
|
490
|
-
|
|
502
|
+
|
|
491
503
|
Args:
|
|
492
504
|
collection_name: Name of collection
|
|
493
505
|
documents: Document texts
|
|
494
506
|
metadatas: Metadata for each document
|
|
495
507
|
ids: IDs for each document (will generate UUIDs if not provided)
|
|
496
508
|
embeddings: Pre-computed embeddings (required for Qdrant)
|
|
497
|
-
|
|
509
|
+
|
|
498
510
|
Returns:
|
|
499
511
|
True if successful, False otherwise
|
|
500
512
|
"""
|
|
501
513
|
if not self._client:
|
|
502
514
|
return False
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
515
|
+
|
|
516
|
+
# If embeddings not provided, compute using model resolution helper
|
|
517
|
+
if not embeddings and documents:
|
|
518
|
+
try:
|
|
519
|
+
embeddings = self.compute_embeddings_for_documents(
|
|
520
|
+
collection_name,
|
|
521
|
+
documents,
|
|
522
|
+
getattr(self, "path", None)
|
|
523
|
+
or getattr(self, "url", None)
|
|
524
|
+
or getattr(self, "host", None),
|
|
525
|
+
)
|
|
526
|
+
except Exception as e:
|
|
527
|
+
log_error("Embeddings are required for Qdrant and computing them failed: %s", e)
|
|
528
|
+
return False
|
|
529
|
+
|
|
508
530
|
try:
|
|
509
531
|
# Generate IDs if not provided
|
|
510
532
|
if not ids:
|
|
511
533
|
ids = [str(uuid.uuid4()) for _ in documents]
|
|
512
|
-
|
|
534
|
+
|
|
513
535
|
# Build points
|
|
514
536
|
points = []
|
|
515
537
|
for i, (doc_id, document, embedding) in enumerate(zip(ids, documents, embeddings)):
|
|
@@ -517,27 +539,20 @@ class QdrantConnection(VectorDBConnection):
|
|
|
517
539
|
payload = {"document": document}
|
|
518
540
|
if metadatas and i < len(metadatas):
|
|
519
541
|
payload.update(metadatas[i])
|
|
520
|
-
|
|
542
|
+
|
|
521
543
|
# Convert string ID to UUID
|
|
522
544
|
point_id = self._to_uuid(doc_id)
|
|
523
|
-
|
|
524
|
-
point = PointStruct(
|
|
525
|
-
id=point_id,
|
|
526
|
-
vector=embedding,
|
|
527
|
-
payload=payload
|
|
528
|
-
)
|
|
545
|
+
|
|
546
|
+
point = PointStruct(id=point_id, vector=embedding, payload=payload)
|
|
529
547
|
points.append(point)
|
|
530
|
-
|
|
548
|
+
|
|
531
549
|
# Upsert points
|
|
532
|
-
self._client.upsert(
|
|
533
|
-
collection_name=collection_name,
|
|
534
|
-
points=points
|
|
535
|
-
)
|
|
550
|
+
self._client.upsert(collection_name=collection_name, points=points)
|
|
536
551
|
return True
|
|
537
552
|
except Exception as e:
|
|
538
553
|
log_error("Failed to add items: %s", e)
|
|
539
554
|
return False
|
|
540
|
-
|
|
555
|
+
|
|
541
556
|
def update_items(
|
|
542
557
|
self,
|
|
543
558
|
collection_name: str,
|
|
@@ -548,20 +563,20 @@ class QdrantConnection(VectorDBConnection):
|
|
|
548
563
|
) -> bool:
|
|
549
564
|
"""
|
|
550
565
|
Update items in a collection.
|
|
551
|
-
|
|
566
|
+
|
|
552
567
|
Args:
|
|
553
568
|
collection_name: Name of collection
|
|
554
569
|
ids: IDs of items to update
|
|
555
570
|
documents: New document texts
|
|
556
571
|
metadatas: New metadata
|
|
557
572
|
embeddings: New embeddings
|
|
558
|
-
|
|
573
|
+
|
|
559
574
|
Returns:
|
|
560
575
|
True if successful, False otherwise
|
|
561
576
|
"""
|
|
562
577
|
if not self._client:
|
|
563
578
|
return False
|
|
564
|
-
|
|
579
|
+
|
|
565
580
|
try:
|
|
566
581
|
# For Qdrant, we need to retrieve existing points, update them, and upsert
|
|
567
582
|
for i, point_id in enumerate(ids):
|
|
@@ -570,40 +585,56 @@ class QdrantConnection(VectorDBConnection):
|
|
|
570
585
|
collection_name=collection_name,
|
|
571
586
|
ids=[point_id],
|
|
572
587
|
with_payload=True,
|
|
573
|
-
with_vectors=True
|
|
588
|
+
with_vectors=True,
|
|
574
589
|
)
|
|
575
|
-
|
|
590
|
+
|
|
576
591
|
if not existing:
|
|
577
592
|
continue
|
|
578
|
-
|
|
593
|
+
|
|
579
594
|
point = existing[0]
|
|
580
595
|
payload = point.payload or {}
|
|
581
596
|
vector = point.vector
|
|
582
|
-
|
|
597
|
+
|
|
583
598
|
# Update fields as provided
|
|
584
599
|
if documents and i < len(documents):
|
|
585
|
-
payload[
|
|
586
|
-
|
|
600
|
+
payload["document"] = documents[i]
|
|
601
|
+
|
|
587
602
|
if metadatas and i < len(metadatas):
|
|
588
603
|
# Update metadata, keeping 'document' field
|
|
589
|
-
doc = payload.get(
|
|
604
|
+
doc = payload.get("document", "")
|
|
590
605
|
payload = metadatas[i].copy()
|
|
591
|
-
payload[
|
|
592
|
-
|
|
606
|
+
payload["document"] = doc
|
|
607
|
+
|
|
608
|
+
# If embeddings provided use them; otherwise compute for updated documents
|
|
593
609
|
if embeddings and i < len(embeddings):
|
|
594
610
|
vector = embeddings[i]
|
|
595
|
-
|
|
611
|
+
elif documents and i < len(documents) and documents[i]:
|
|
612
|
+
try:
|
|
613
|
+
# Compute single embedding for this document
|
|
614
|
+
computed = self.compute_embeddings_for_documents(
|
|
615
|
+
collection_name,
|
|
616
|
+
[documents[i]],
|
|
617
|
+
getattr(self, "path", None)
|
|
618
|
+
or getattr(self, "url", None)
|
|
619
|
+
or getattr(self, "host", None),
|
|
620
|
+
)
|
|
621
|
+
vector = computed[0] if computed else vector
|
|
622
|
+
except Exception as e:
|
|
623
|
+
log_error("Failed to compute embedding for Qdrant update: %s", e)
|
|
624
|
+
# leave existing vector unchanged
|
|
625
|
+
pass
|
|
626
|
+
|
|
596
627
|
# Upsert updated point
|
|
597
628
|
self._client.upsert(
|
|
598
629
|
collection_name=collection_name,
|
|
599
|
-
points=[PointStruct(id=point_id, vector=vector, payload=payload)]
|
|
630
|
+
points=[PointStruct(id=point_id, vector=vector, payload=payload)],
|
|
600
631
|
)
|
|
601
|
-
|
|
632
|
+
|
|
602
633
|
return True
|
|
603
634
|
except Exception as e:
|
|
604
635
|
log_error("Failed to update items: %s", e)
|
|
605
636
|
return False
|
|
606
|
-
|
|
637
|
+
|
|
607
638
|
def delete_items(
|
|
608
639
|
self,
|
|
609
640
|
collection_name: str,
|
|
@@ -612,78 +643,69 @@ class QdrantConnection(VectorDBConnection):
|
|
|
612
643
|
) -> bool:
|
|
613
644
|
"""
|
|
614
645
|
Delete items from a collection.
|
|
615
|
-
|
|
646
|
+
|
|
616
647
|
Args:
|
|
617
648
|
collection_name: Name of collection
|
|
618
649
|
ids: IDs of items to delete
|
|
619
650
|
where: Metadata filter for items to delete
|
|
620
|
-
|
|
651
|
+
|
|
621
652
|
Returns:
|
|
622
653
|
True if successful, False otherwise
|
|
623
654
|
"""
|
|
624
655
|
if not self._client:
|
|
625
656
|
return False
|
|
626
|
-
|
|
657
|
+
|
|
627
658
|
try:
|
|
628
659
|
if ids:
|
|
629
660
|
# Delete by IDs
|
|
630
|
-
self._client.delete(
|
|
631
|
-
collection_name=collection_name,
|
|
632
|
-
points_selector=ids
|
|
633
|
-
)
|
|
661
|
+
self._client.delete(collection_name=collection_name, points_selector=ids)
|
|
634
662
|
elif where:
|
|
635
663
|
# Delete by filter
|
|
636
664
|
qdrant_filter = self._build_qdrant_filter(where)
|
|
637
665
|
if qdrant_filter:
|
|
638
666
|
self._client.delete(
|
|
639
|
-
collection_name=collection_name,
|
|
640
|
-
points_selector=qdrant_filter
|
|
667
|
+
collection_name=collection_name, points_selector=qdrant_filter
|
|
641
668
|
)
|
|
642
669
|
return True
|
|
643
670
|
except Exception as e:
|
|
644
671
|
log_error("Failed to delete items: %s", e)
|
|
645
672
|
return False
|
|
646
|
-
|
|
673
|
+
|
|
647
674
|
def delete_collection(self, name: str) -> bool:
|
|
648
675
|
"""
|
|
649
676
|
Delete an entire collection.
|
|
650
|
-
|
|
677
|
+
|
|
651
678
|
Args:
|
|
652
679
|
name: Collection name
|
|
653
|
-
|
|
680
|
+
|
|
654
681
|
Returns:
|
|
655
682
|
True if successful, False otherwise
|
|
656
683
|
"""
|
|
657
684
|
if not self._client:
|
|
658
685
|
return False
|
|
659
|
-
|
|
686
|
+
|
|
660
687
|
try:
|
|
661
688
|
self._client.delete_collection(collection_name=name)
|
|
662
689
|
return True
|
|
663
690
|
except Exception as e:
|
|
664
691
|
log_error("Failed to delete collection: %s", e)
|
|
665
692
|
return False
|
|
666
|
-
|
|
667
|
-
def create_collection(
|
|
668
|
-
self,
|
|
669
|
-
name: str,
|
|
670
|
-
vector_size: int,
|
|
671
|
-
distance: str = "Cosine"
|
|
672
|
-
) -> bool:
|
|
693
|
+
|
|
694
|
+
def create_collection(self, name: str, vector_size: int, distance: str = "Cosine") -> bool:
|
|
673
695
|
"""
|
|
674
696
|
Create a new collection.
|
|
675
|
-
|
|
697
|
+
|
|
676
698
|
Args:
|
|
677
699
|
name: Collection name
|
|
678
700
|
vector_size: Dimension of vectors
|
|
679
701
|
distance: Distance metric ("Cosine", "Euclid", "Dot")
|
|
680
|
-
|
|
702
|
+
|
|
681
703
|
Returns:
|
|
682
704
|
True if successful, False otherwise
|
|
683
705
|
"""
|
|
684
706
|
if not self._client:
|
|
685
707
|
return False
|
|
686
|
-
|
|
708
|
+
|
|
687
709
|
try:
|
|
688
710
|
# Map distance string to Qdrant Distance enum
|
|
689
711
|
distance_map = {
|
|
@@ -692,15 +714,12 @@ class QdrantConnection(VectorDBConnection):
|
|
|
692
714
|
"Euclidean": Distance.EUCLID,
|
|
693
715
|
"Dot": Distance.DOT,
|
|
694
716
|
}
|
|
695
|
-
|
|
717
|
+
|
|
696
718
|
qdrant_distance = distance_map.get(distance, Distance.COSINE)
|
|
697
|
-
|
|
719
|
+
|
|
698
720
|
self._client.create_collection(
|
|
699
721
|
collection_name=name,
|
|
700
|
-
vectors_config=VectorParams(
|
|
701
|
-
size=vector_size,
|
|
702
|
-
distance=qdrant_distance
|
|
703
|
-
)
|
|
722
|
+
vectors_config=VectorParams(size=vector_size, distance=qdrant_distance),
|
|
704
723
|
)
|
|
705
724
|
return True
|
|
706
725
|
except Exception as e:
|
|
@@ -739,10 +758,16 @@ class QdrantConnection(VectorDBConnection):
|
|
|
739
758
|
distance = coll_info.get("distance_metric") or coll_info.get("distance")
|
|
740
759
|
distance = distance or "Cosine"
|
|
741
760
|
|
|
742
|
-
log_info(
|
|
743
|
-
|
|
761
|
+
log_info(
|
|
762
|
+
"Preparing restore: collection=%s, vector_size=%s, distance=%s",
|
|
763
|
+
metadata.get("collection_name"),
|
|
764
|
+
vector_size,
|
|
765
|
+
distance,
|
|
766
|
+
)
|
|
744
767
|
|
|
745
|
-
if not self.create_collection(
|
|
768
|
+
if not self.create_collection(
|
|
769
|
+
metadata.get("collection_name"), int(vector_size), distance
|
|
770
|
+
):
|
|
746
771
|
log_error("Failed to create collection %s", metadata.get("collection_name"))
|
|
747
772
|
return False
|
|
748
773
|
|
|
@@ -757,19 +782,30 @@ class QdrantConnection(VectorDBConnection):
|
|
|
757
782
|
# leave conversion to normalize_embeddings later
|
|
758
783
|
continue
|
|
759
784
|
if len(emb) != int(vector_size):
|
|
760
|
-
log_error(
|
|
761
|
-
|
|
785
|
+
log_error(
|
|
786
|
+
"Embedding at index %d has length %d but expected %d",
|
|
787
|
+
i,
|
|
788
|
+
len(emb),
|
|
789
|
+
int(vector_size),
|
|
790
|
+
)
|
|
762
791
|
return False
|
|
763
792
|
|
|
764
793
|
# If embeddings missing or empty, try to generate using connection utilities
|
|
765
794
|
if not embeddings_present:
|
|
766
795
|
try:
|
|
767
|
-
model, model_name, model_type = self._get_embedding_model_for_collection(
|
|
768
|
-
|
|
796
|
+
model, model_name, model_type = self._get_embedding_model_for_collection(
|
|
797
|
+
metadata.get("collection_name")
|
|
798
|
+
)
|
|
799
|
+
from vector_inspector.core.embedding_utils import encode_documents
|
|
800
|
+
|
|
769
801
|
documents = data.get("documents", []) if data else []
|
|
770
802
|
if documents:
|
|
771
803
|
data["embeddings"] = encode_documents(documents, model, model_type)
|
|
772
|
-
log_info(
|
|
804
|
+
log_info(
|
|
805
|
+
"Generated %d embeddings using model %s",
|
|
806
|
+
len(data.get("embeddings")),
|
|
807
|
+
model_name,
|
|
808
|
+
)
|
|
773
809
|
except Exception as e:
|
|
774
810
|
log_error("Failed to generate embeddings during prepare_restore: %s", e)
|
|
775
811
|
return False
|
|
@@ -817,4 +853,3 @@ class QdrantConnection(VectorDBConnection):
|
|
|
817
853
|
{"name": "contains", "server_side": True},
|
|
818
854
|
{"name": "not contains", "server_side": True},
|
|
819
855
|
]
|
|
820
|
-
|