vector-inspector 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,31 +4,41 @@ from typing import Optional, List, Dict, Any
4
4
  import uuid
5
5
  from qdrant_client import QdrantClient
6
6
  from qdrant_client.models import (
7
- Distance, VectorParams, PointStruct,
8
- Filter, FieldCondition, MatchValue, MatchText, MatchAny, MatchExcept, Range
7
+ Distance,
8
+ VectorParams,
9
+ PointStruct,
10
+ Filter,
11
+ FieldCondition,
12
+ MatchValue,
13
+ MatchText,
14
+ MatchAny,
15
+ MatchExcept,
16
+ Range,
9
17
  )
10
18
 
11
- from .base_connection import VectorDBConnection
19
+ from vector_inspector.core.connections.base_connection import VectorDBConnection
12
20
  from vector_inspector.core.logging import log_info, log_error, log_debug
13
21
  from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
14
- from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import resolve_embedding_model
22
+ from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import (
23
+ resolve_embedding_model,
24
+ )
15
25
 
16
26
 
17
27
  class QdrantConnection(VectorDBConnection):
18
28
  """Manages connection to Qdrant and provides query interface."""
19
-
29
+
20
30
  def __init__(
21
- self,
22
- path: Optional[str] = None,
31
+ self,
32
+ path: Optional[str] = None,
23
33
  url: Optional[str] = None,
24
- host: Optional[str] = None,
34
+ host: Optional[str] = None,
25
35
  port: Optional[int] = None,
26
36
  api_key: Optional[str] = None,
27
- prefer_grpc: bool = False
37
+ prefer_grpc: bool = False,
28
38
  ):
29
39
  """
30
40
  Initialize Qdrant connection.
31
-
41
+
32
42
  Args:
33
43
  path: Path for local/embedded client
34
44
  url: Full URL for remote client (e.g., "http://localhost:6333")
@@ -44,21 +54,21 @@ class QdrantConnection(VectorDBConnection):
44
54
  self.api_key = api_key
45
55
  self.prefer_grpc = prefer_grpc
46
56
  self._client: Optional[QdrantClient] = None
47
-
57
+
48
58
  def connect(self) -> bool:
49
59
  """
50
60
  Establish connection to Qdrant.
51
-
61
+
52
62
  Returns:
53
63
  True if connection successful, False otherwise
54
64
  """
55
65
  try:
56
66
  # Common parameters for stability
57
67
  common_params = {
58
- 'check_compatibility': False,
59
- 'timeout': 300, # 5 minutes timeout for long operations
68
+ "check_compatibility": False,
69
+ "timeout": 300, # 5 minutes timeout for long operations
60
70
  }
61
-
71
+
62
72
  if self.path:
63
73
  # Local/embedded mode
64
74
  self._client = QdrantClient(path=self.path, **common_params)
@@ -68,7 +78,7 @@ class QdrantConnection(VectorDBConnection):
68
78
  url=self.url,
69
79
  api_key=self.api_key,
70
80
  prefer_grpc=self.prefer_grpc,
71
- **common_params
81
+ **common_params,
72
82
  )
73
83
  elif self.host:
74
84
  # Host and port provided
@@ -77,22 +87,22 @@ class QdrantConnection(VectorDBConnection):
77
87
  port=self.port,
78
88
  api_key=self.api_key,
79
89
  prefer_grpc=self.prefer_grpc,
80
- **common_params
90
+ **common_params,
81
91
  )
82
92
  else:
83
93
  # Default to in-memory client
84
94
  self._client = QdrantClient(":memory:", **common_params)
85
-
95
+
86
96
  # Test connection
87
97
  self._client.get_collections()
88
98
  return True
89
99
  except Exception as e:
90
100
  log_error("Connection failed: %s", e)
91
101
  return False
92
-
102
+
93
103
  def _to_uuid(self, id_str: str) -> uuid.UUID:
94
104
  """Convert a string ID to a valid UUID.
95
-
105
+
96
106
  If the string is already a valid UUID, return it.
97
107
  Otherwise, generate a deterministic UUID from the string.
98
108
  """
@@ -101,18 +111,18 @@ class QdrantConnection(VectorDBConnection):
101
111
  except (ValueError, AttributeError):
102
112
  # Generate deterministic UUID from string
103
113
  return uuid.uuid5(uuid.NAMESPACE_DNS, id_str)
104
-
114
+
105
115
  def disconnect(self):
106
116
  """Close connection to Qdrant."""
107
117
  if self._client:
108
118
  self._client.close()
109
119
  self._client = None
110
-
120
+
111
121
  @property
112
122
  def is_connected(self) -> bool:
113
123
  """Check if connected to Qdrant."""
114
124
  return self._client is not None
115
-
125
+
116
126
  def count_collection(self, name: str) -> int:
117
127
  """Count the number of items in a collection."""
118
128
  if not self._client:
@@ -122,23 +132,23 @@ class QdrantConnection(VectorDBConnection):
122
132
  return getattr(res, "count", 0) or 0
123
133
  except Exception:
124
134
  return 0
125
-
135
+
126
136
  def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
127
137
  """
128
138
  Get items by IDs (implementation for compatibility).
129
-
139
+
130
140
  Note: This is a simplified implementation that retrieves items by scrolling
131
141
  and filtering. For production use, consider using get_all_items with filters.
132
142
  """
133
143
  if not self._client:
134
144
  return {"documents": [], "metadatas": []}
135
-
145
+
136
146
  try:
137
147
  # Retrieve by scrolling and filtering
138
148
  all_items = self.get_all_items(name, limit=1000)
139
149
  if not all_items:
140
150
  return {"documents": [], "metadatas": []}
141
-
151
+
142
152
  # Filter by requested IDs
143
153
  documents = []
144
154
  metadatas = []
@@ -146,16 +156,16 @@ class QdrantConnection(VectorDBConnection):
146
156
  if item_id in ids:
147
157
  documents.append(all_items["documents"][i])
148
158
  metadatas.append(all_items["metadatas"][i])
149
-
159
+
150
160
  return {"documents": documents, "metadatas": metadatas}
151
161
  except Exception as e:
152
162
  log_error("Failed to get items: %s", e)
153
163
  return {"documents": [], "metadatas": []}
154
-
164
+
155
165
  def list_collections(self) -> List[str]:
156
166
  """
157
167
  Get list of all collections.
158
-
168
+
159
169
  Returns:
160
170
  List of collection names
161
171
  """
@@ -167,91 +177,88 @@ class QdrantConnection(VectorDBConnection):
167
177
  except Exception as e:
168
178
  log_error("Failed to list collections: %s", e)
169
179
  return []
170
-
180
+
171
181
  def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
172
182
  """
173
183
  Get collection metadata and statistics.
174
-
184
+
175
185
  Args:
176
186
  name: Collection name
177
-
187
+
178
188
  Returns:
179
189
  Dictionary with collection info
180
190
  """
181
191
  if not self._client:
182
192
  return None
183
-
193
+
184
194
  try:
185
195
  # Get collection info
186
196
  collection_info = self._client.get_collection(name)
187
-
197
+
188
198
  # Get a sample point to determine metadata fields
189
199
  sample = self._client.scroll(
190
- collection_name=name,
191
- limit=1,
192
- with_payload=True,
193
- with_vectors=False
200
+ collection_name=name, limit=1, with_payload=True, with_vectors=False
194
201
  )
195
-
202
+
196
203
  metadata_fields = []
197
204
  if sample[0] and len(sample[0]) > 0:
198
205
  point = sample[0][0]
199
206
  if point.payload:
200
207
  # Extract metadata fields, excluding 'document' if present
201
- metadata_fields = [k for k in point.payload.keys() if k != 'document']
202
-
208
+ metadata_fields = [k for k in point.payload.keys() if k != "document"]
209
+
203
210
  # Extract vector configuration
204
211
  vector_dimension = "Unknown"
205
212
  distance_metric = "Unknown"
206
213
  config_details = {}
207
-
214
+
208
215
  if collection_info.config:
209
216
  # Get vector parameters
210
- if hasattr(collection_info.config, 'params'):
217
+ if hasattr(collection_info.config, "params"):
211
218
  params = collection_info.config.params
212
- if hasattr(params, 'vectors'):
219
+ if hasattr(params, "vectors"):
213
220
  vectors = params.vectors
214
221
  # Handle both dict and object access
215
222
  if isinstance(vectors, dict):
216
223
  # Named vectors
217
224
  first_vector = next(iter(vectors.values()), None)
218
225
  if first_vector:
219
- vector_dimension = getattr(first_vector, 'size', 'Unknown')
220
- distance = getattr(first_vector, 'distance', None)
226
+ vector_dimension = getattr(first_vector, "size", "Unknown")
227
+ distance = getattr(first_vector, "distance", None)
221
228
  else:
222
229
  # Single vector config
223
- vector_dimension = getattr(vectors, 'size', 'Unknown')
224
- distance = getattr(vectors, 'distance', None)
225
-
230
+ vector_dimension = getattr(vectors, "size", "Unknown")
231
+ distance = getattr(vectors, "distance", None)
232
+
226
233
  # Map distance enum to readable name
227
234
  if distance:
228
235
  distance_str = str(distance)
229
- if 'COSINE' in distance_str.upper():
236
+ if "COSINE" in distance_str.upper():
230
237
  distance_metric = "Cosine"
231
- elif 'EUCLID' in distance_str.upper():
238
+ elif "EUCLID" in distance_str.upper():
232
239
  distance_metric = "Euclidean"
233
- elif 'DOT' in distance_str.upper():
240
+ elif "DOT" in distance_str.upper():
234
241
  distance_metric = "Dot Product"
235
- elif 'MANHATTAN' in distance_str.upper():
242
+ elif "MANHATTAN" in distance_str.upper():
236
243
  distance_metric = "Manhattan"
237
244
  else:
238
245
  distance_metric = distance_str
239
-
246
+
240
247
  # Get HNSW config if available
241
- if hasattr(collection_info.config, 'hnsw_config'):
248
+ if hasattr(collection_info.config, "hnsw_config"):
242
249
  hnsw = collection_info.config.hnsw_config
243
- config_details['hnsw_config'] = {
244
- 'm': getattr(hnsw, 'm', None),
245
- 'ef_construct': getattr(hnsw, 'ef_construct', None),
250
+ config_details["hnsw_config"] = {
251
+ "m": getattr(hnsw, "m", None),
252
+ "ef_construct": getattr(hnsw, "ef_construct", None),
246
253
  }
247
-
254
+
248
255
  # Get optimizer config if available
249
- if hasattr(collection_info.config, 'optimizer_config'):
256
+ if hasattr(collection_info.config, "optimizer_config"):
250
257
  opt = collection_info.config.optimizer_config
251
- config_details['optimizer_config'] = {
252
- 'indexing_threshold': getattr(opt, 'indexing_threshold', None),
258
+ config_details["optimizer_config"] = {
259
+ "indexing_threshold": getattr(opt, "indexing_threshold", None),
253
260
  }
254
-
261
+
255
262
  result = {
256
263
  "name": name,
257
264
  "count": collection_info.points_count,
@@ -259,35 +266,36 @@ class QdrantConnection(VectorDBConnection):
259
266
  "vector_dimension": vector_dimension,
260
267
  "distance_metric": distance_metric,
261
268
  }
262
-
269
+
263
270
  # Check for embedding model metadata (if collection creator stored it)
264
- if hasattr(collection_info.config, 'metadata') and collection_info.config.metadata:
271
+ if hasattr(collection_info.config, "metadata") and collection_info.config.metadata:
265
272
  metadata = collection_info.config.metadata
266
- if 'embedding_model' in metadata:
267
- result['embedding_model'] = metadata['embedding_model']
268
- if 'embedding_model_type' in metadata:
269
- result['embedding_model_type'] = metadata['embedding_model_type']
270
-
273
+ if "embedding_model" in metadata:
274
+ result["embedding_model"] = metadata["embedding_model"]
275
+ if "embedding_model_type" in metadata:
276
+ result["embedding_model_type"] = metadata["embedding_model_type"]
277
+
271
278
  if config_details:
272
- result['config'] = config_details
273
-
279
+ result["config"] = config_details
280
+
274
281
  return result
275
-
282
+
276
283
  except Exception as e:
277
284
  log_error("Failed to get collection info: %s", e)
278
285
  return None
279
-
286
+
280
287
  def _get_embedding_model_for_collection(self, collection_name: str):
281
288
  """Delegate embedding-model selection to helper resolver."""
282
289
  try:
283
290
  return resolve_embedding_model(self, collection_name)
284
291
  except Exception as e:
285
292
  log_error("Failed to resolve embedding model for %s: %s", collection_name, e)
286
- from ..embedding_utils import DEFAULT_MODEL, load_embedding_model
293
+ from vector_inspector.core.embedding_utils import DEFAULT_MODEL, load_embedding_model
294
+
287
295
  model_name, model_type = DEFAULT_MODEL
288
296
  model = load_embedding_model(model_name, model_type)
289
297
  return (model, model_name, model_type)
290
-
298
+
291
299
  def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
292
300
  """Delegate filter construction to helper module."""
293
301
  try:
@@ -295,7 +303,7 @@ class QdrantConnection(VectorDBConnection):
295
303
  except Exception as e:
296
304
  log_error("Failed to build filter: %s", e)
297
305
  return None
298
-
306
+
299
307
  def query_collection(
300
308
  self,
301
309
  collection_name: str,
@@ -307,7 +315,7 @@ class QdrantConnection(VectorDBConnection):
307
315
  ) -> Optional[Dict[str, Any]]:
308
316
  """
309
317
  Query a collection for similar vectors.
310
-
318
+
311
319
  Args:
312
320
  collection_name: Name of collection to query
313
321
  query_texts: Text queries (Qdrant will embed automatically)
@@ -315,44 +323,48 @@ class QdrantConnection(VectorDBConnection):
315
323
  n_results: Number of results to return
316
324
  where: Metadata filter
317
325
  where_document: Document content filter (limited support)
318
-
326
+
319
327
  Returns:
320
328
  Query results or None if failed
321
329
  """
322
330
  if not self._client:
323
331
  return None
324
-
332
+
325
333
  if not query_texts and not query_embeddings:
326
334
  log_error("Either query_texts or query_embeddings required")
327
335
  return None
328
-
336
+
329
337
  try:
330
338
  # Build filter
331
339
  qdrant_filter = self._build_qdrant_filter(where)
332
-
340
+
333
341
  # Perform search for each query
334
342
  all_results = {
335
343
  "ids": [],
336
344
  "distances": [],
337
345
  "documents": [],
338
346
  "metadatas": [],
339
- "embeddings": []
347
+ "embeddings": [],
340
348
  }
341
-
349
+
342
350
  # Use query_texts if provided (Qdrant handles embedding)
343
351
  queries = query_texts if query_texts else []
344
-
352
+
345
353
  # If embeddings provided instead, use them
346
354
  if query_embeddings and not query_texts:
347
355
  queries = query_embeddings
348
-
356
+
349
357
  for query in queries:
350
358
  # Embed text queries if needed
351
359
  if isinstance(query, str):
352
360
  # Generate embeddings for text query using appropriate model for this collection
353
361
  try:
354
- model, model_name, model_type = self._get_embedding_model_for_collection(collection_name)
355
- from ..embedding_utils import encode_text
362
+ model, model_name, model_type = self._get_embedding_model_for_collection(
363
+ collection_name
364
+ )
365
+
366
+ from vector_inspector.core.embedding_utils import encode_text
367
+
356
368
  query_vector = encode_text(query, model, model_type)
357
369
  except Exception as e:
358
370
  log_error("Failed to embed query text: %s", e)
@@ -374,40 +386,40 @@ class QdrantConnection(VectorDBConnection):
374
386
  except Exception as e:
375
387
  log_error("Query failed: %s", e)
376
388
  continue
377
-
389
+
378
390
  # Transform results to standard format
379
391
  ids = []
380
392
  distances = []
381
393
  documents = []
382
394
  metadatas = []
383
395
  embeddings = []
384
-
396
+
385
397
  for result in search_results:
386
398
  ids.append(str(result.id))
387
399
  distances.append(result.score)
388
-
400
+
389
401
  # Extract document and metadata from payload
390
402
  payload = result.payload or {}
391
- documents.append(payload.get('document', ''))
392
-
403
+ documents.append(payload.get("document", ""))
404
+
393
405
  # Metadata is everything except 'document'
394
- metadata = {k: v for k, v in payload.items() if k != 'document'}
406
+ metadata = {k: v for k, v in payload.items() if k != "document"}
395
407
  metadatas.append(metadata)
396
-
408
+
397
409
  # Extract embedding
398
410
  embeddings.append(result.vector if result.vector else [])
399
-
411
+
400
412
  all_results["ids"].append(ids)
401
413
  all_results["distances"].append(distances)
402
414
  all_results["documents"].append(documents)
403
415
  all_results["metadatas"].append(metadatas)
404
416
  all_results["embeddings"].append(embeddings)
405
-
417
+
406
418
  return all_results
407
419
  except Exception as e:
408
420
  log_error("Query failed: %s", e)
409
421
  return None
410
-
422
+
411
423
  def get_all_items(
412
424
  self,
413
425
  collection_name: str,
@@ -417,23 +429,23 @@ class QdrantConnection(VectorDBConnection):
417
429
  ) -> Optional[Dict[str, Any]]:
418
430
  """
419
431
  Get all items from a collection.
420
-
432
+
421
433
  Args:
422
434
  collection_name: Name of collection
423
435
  limit: Maximum number of items to return
424
436
  offset: Number of items to skip
425
437
  where: Metadata filter
426
-
438
+
427
439
  Returns:
428
440
  Collection items or None if failed
429
441
  """
430
442
  if not self._client:
431
443
  return None
432
-
444
+
433
445
  try:
434
446
  # Build filter
435
447
  qdrant_filter = self._build_qdrant_filter(where)
436
-
448
+
437
449
  # Use scroll to retrieve items
438
450
  points, next_offset = self._client.scroll(
439
451
  collection_name=collection_name,
@@ -441,42 +453,42 @@ class QdrantConnection(VectorDBConnection):
441
453
  limit=limit,
442
454
  offset=offset,
443
455
  with_payload=True,
444
- with_vectors=True
456
+ with_vectors=True,
445
457
  )
446
-
458
+
447
459
  # Transform to standard format
448
460
  ids = []
449
461
  documents = []
450
462
  metadatas = []
451
463
  embeddings = []
452
-
464
+
453
465
  for point in points:
454
466
  ids.append(str(point.id))
455
-
467
+
456
468
  payload = point.payload or {}
457
- documents.append(payload.get('document', ''))
458
-
469
+ documents.append(payload.get("document", ""))
470
+
459
471
  # Metadata is everything except 'document'
460
- metadata = {k: v for k, v in payload.items() if k != 'document'}
472
+ metadata = {k: v for k, v in payload.items() if k != "document"}
461
473
  metadatas.append(metadata)
462
-
474
+
463
475
  # Extract embedding
464
476
  if isinstance(point.vector, dict):
465
477
  # Named vectors - use the first one
466
478
  embeddings.append(list(point.vector.values())[0] if point.vector else [])
467
479
  else:
468
480
  embeddings.append(point.vector if point.vector else [])
469
-
481
+
470
482
  return {
471
483
  "ids": ids,
472
484
  "documents": documents,
473
485
  "metadatas": metadatas,
474
- "embeddings": embeddings
486
+ "embeddings": embeddings,
475
487
  }
476
488
  except Exception as e:
477
489
  log_error("Failed to get items: %s", e)
478
490
  return None
479
-
491
+
480
492
  def add_items(
481
493
  self,
482
494
  collection_name: str,
@@ -487,29 +499,39 @@ class QdrantConnection(VectorDBConnection):
487
499
  ) -> bool:
488
500
  """
489
501
  Add items to a collection.
490
-
502
+
491
503
  Args:
492
504
  collection_name: Name of collection
493
505
  documents: Document texts
494
506
  metadatas: Metadata for each document
495
507
  ids: IDs for each document (will generate UUIDs if not provided)
496
508
  embeddings: Pre-computed embeddings (required for Qdrant)
497
-
509
+
498
510
  Returns:
499
511
  True if successful, False otherwise
500
512
  """
501
513
  if not self._client:
502
514
  return False
503
-
504
- if not embeddings:
505
- log_error("Embeddings are required for Qdrant")
506
- return False
507
-
515
+
516
+ # If embeddings not provided, compute using model resolution helper
517
+ if not embeddings and documents:
518
+ try:
519
+ embeddings = self.compute_embeddings_for_documents(
520
+ collection_name,
521
+ documents,
522
+ getattr(self, "path", None)
523
+ or getattr(self, "url", None)
524
+ or getattr(self, "host", None),
525
+ )
526
+ except Exception as e:
527
+ log_error("Embeddings are required for Qdrant and computing them failed: %s", e)
528
+ return False
529
+
508
530
  try:
509
531
  # Generate IDs if not provided
510
532
  if not ids:
511
533
  ids = [str(uuid.uuid4()) for _ in documents]
512
-
534
+
513
535
  # Build points
514
536
  points = []
515
537
  for i, (doc_id, document, embedding) in enumerate(zip(ids, documents, embeddings)):
@@ -517,27 +539,20 @@ class QdrantConnection(VectorDBConnection):
517
539
  payload = {"document": document}
518
540
  if metadatas and i < len(metadatas):
519
541
  payload.update(metadatas[i])
520
-
542
+
521
543
  # Convert string ID to UUID
522
544
  point_id = self._to_uuid(doc_id)
523
-
524
- point = PointStruct(
525
- id=point_id,
526
- vector=embedding,
527
- payload=payload
528
- )
545
+
546
+ point = PointStruct(id=point_id, vector=embedding, payload=payload)
529
547
  points.append(point)
530
-
548
+
531
549
  # Upsert points
532
- self._client.upsert(
533
- collection_name=collection_name,
534
- points=points
535
- )
550
+ self._client.upsert(collection_name=collection_name, points=points)
536
551
  return True
537
552
  except Exception as e:
538
553
  log_error("Failed to add items: %s", e)
539
554
  return False
540
-
555
+
541
556
  def update_items(
542
557
  self,
543
558
  collection_name: str,
@@ -548,20 +563,20 @@ class QdrantConnection(VectorDBConnection):
548
563
  ) -> bool:
549
564
  """
550
565
  Update items in a collection.
551
-
566
+
552
567
  Args:
553
568
  collection_name: Name of collection
554
569
  ids: IDs of items to update
555
570
  documents: New document texts
556
571
  metadatas: New metadata
557
572
  embeddings: New embeddings
558
-
573
+
559
574
  Returns:
560
575
  True if successful, False otherwise
561
576
  """
562
577
  if not self._client:
563
578
  return False
564
-
579
+
565
580
  try:
566
581
  # For Qdrant, we need to retrieve existing points, update them, and upsert
567
582
  for i, point_id in enumerate(ids):
@@ -570,40 +585,56 @@ class QdrantConnection(VectorDBConnection):
570
585
  collection_name=collection_name,
571
586
  ids=[point_id],
572
587
  with_payload=True,
573
- with_vectors=True
588
+ with_vectors=True,
574
589
  )
575
-
590
+
576
591
  if not existing:
577
592
  continue
578
-
593
+
579
594
  point = existing[0]
580
595
  payload = point.payload or {}
581
596
  vector = point.vector
582
-
597
+
583
598
  # Update fields as provided
584
599
  if documents and i < len(documents):
585
- payload['document'] = documents[i]
586
-
600
+ payload["document"] = documents[i]
601
+
587
602
  if metadatas and i < len(metadatas):
588
603
  # Update metadata, keeping 'document' field
589
- doc = payload.get('document', '')
604
+ doc = payload.get("document", "")
590
605
  payload = metadatas[i].copy()
591
- payload['document'] = doc
592
-
606
+ payload["document"] = doc
607
+
608
+ # If embeddings provided use them; otherwise compute for updated documents
593
609
  if embeddings and i < len(embeddings):
594
610
  vector = embeddings[i]
595
-
611
+ elif documents and i < len(documents) and documents[i]:
612
+ try:
613
+ # Compute single embedding for this document
614
+ computed = self.compute_embeddings_for_documents(
615
+ collection_name,
616
+ [documents[i]],
617
+ getattr(self, "path", None)
618
+ or getattr(self, "url", None)
619
+ or getattr(self, "host", None),
620
+ )
621
+ vector = computed[0] if computed else vector
622
+ except Exception as e:
623
+ log_error("Failed to compute embedding for Qdrant update: %s", e)
624
+ # leave existing vector unchanged
625
+ pass
626
+
596
627
  # Upsert updated point
597
628
  self._client.upsert(
598
629
  collection_name=collection_name,
599
- points=[PointStruct(id=point_id, vector=vector, payload=payload)]
630
+ points=[PointStruct(id=point_id, vector=vector, payload=payload)],
600
631
  )
601
-
632
+
602
633
  return True
603
634
  except Exception as e:
604
635
  log_error("Failed to update items: %s", e)
605
636
  return False
606
-
637
+
607
638
  def delete_items(
608
639
  self,
609
640
  collection_name: str,
@@ -612,78 +643,69 @@ class QdrantConnection(VectorDBConnection):
612
643
  ) -> bool:
613
644
  """
614
645
  Delete items from a collection.
615
-
646
+
616
647
  Args:
617
648
  collection_name: Name of collection
618
649
  ids: IDs of items to delete
619
650
  where: Metadata filter for items to delete
620
-
651
+
621
652
  Returns:
622
653
  True if successful, False otherwise
623
654
  """
624
655
  if not self._client:
625
656
  return False
626
-
657
+
627
658
  try:
628
659
  if ids:
629
660
  # Delete by IDs
630
- self._client.delete(
631
- collection_name=collection_name,
632
- points_selector=ids
633
- )
661
+ self._client.delete(collection_name=collection_name, points_selector=ids)
634
662
  elif where:
635
663
  # Delete by filter
636
664
  qdrant_filter = self._build_qdrant_filter(where)
637
665
  if qdrant_filter:
638
666
  self._client.delete(
639
- collection_name=collection_name,
640
- points_selector=qdrant_filter
667
+ collection_name=collection_name, points_selector=qdrant_filter
641
668
  )
642
669
  return True
643
670
  except Exception as e:
644
671
  log_error("Failed to delete items: %s", e)
645
672
  return False
646
-
673
+
647
674
  def delete_collection(self, name: str) -> bool:
648
675
  """
649
676
  Delete an entire collection.
650
-
677
+
651
678
  Args:
652
679
  name: Collection name
653
-
680
+
654
681
  Returns:
655
682
  True if successful, False otherwise
656
683
  """
657
684
  if not self._client:
658
685
  return False
659
-
686
+
660
687
  try:
661
688
  self._client.delete_collection(collection_name=name)
662
689
  return True
663
690
  except Exception as e:
664
691
  log_error("Failed to delete collection: %s", e)
665
692
  return False
666
-
667
- def create_collection(
668
- self,
669
- name: str,
670
- vector_size: int,
671
- distance: str = "Cosine"
672
- ) -> bool:
693
+
694
+ def create_collection(self, name: str, vector_size: int, distance: str = "Cosine") -> bool:
673
695
  """
674
696
  Create a new collection.
675
-
697
+
676
698
  Args:
677
699
  name: Collection name
678
700
  vector_size: Dimension of vectors
679
701
  distance: Distance metric ("Cosine", "Euclid", "Dot")
680
-
702
+
681
703
  Returns:
682
704
  True if successful, False otherwise
683
705
  """
684
706
  if not self._client:
685
707
  return False
686
-
708
+
687
709
  try:
688
710
  # Map distance string to Qdrant Distance enum
689
711
  distance_map = {
@@ -692,15 +714,12 @@ class QdrantConnection(VectorDBConnection):
692
714
  "Euclidean": Distance.EUCLID,
693
715
  "Dot": Distance.DOT,
694
716
  }
695
-
717
+
696
718
  qdrant_distance = distance_map.get(distance, Distance.COSINE)
697
-
719
+
698
720
  self._client.create_collection(
699
721
  collection_name=name,
700
- vectors_config=VectorParams(
701
- size=vector_size,
702
- distance=qdrant_distance
703
- )
722
+ vectors_config=VectorParams(size=vector_size, distance=qdrant_distance),
704
723
  )
705
724
  return True
706
725
  except Exception as e:
@@ -739,10 +758,16 @@ class QdrantConnection(VectorDBConnection):
739
758
  distance = coll_info.get("distance_metric") or coll_info.get("distance")
740
759
  distance = distance or "Cosine"
741
760
 
742
- log_info("Preparing restore: collection=%s, vector_size=%s, distance=%s",
743
- metadata.get("collection_name"), vector_size, distance)
761
+ log_info(
762
+ "Preparing restore: collection=%s, vector_size=%s, distance=%s",
763
+ metadata.get("collection_name"),
764
+ vector_size,
765
+ distance,
766
+ )
744
767
 
745
- if not self.create_collection(metadata.get("collection_name"), int(vector_size), distance):
768
+ if not self.create_collection(
769
+ metadata.get("collection_name"), int(vector_size), distance
770
+ ):
746
771
  log_error("Failed to create collection %s", metadata.get("collection_name"))
747
772
  return False
748
773
 
@@ -757,19 +782,30 @@ class QdrantConnection(VectorDBConnection):
757
782
  # leave conversion to normalize_embeddings later
758
783
  continue
759
784
  if len(emb) != int(vector_size):
760
- log_error("Embedding at index %d has length %d but expected %d",
761
- i, len(emb), int(vector_size))
785
+ log_error(
786
+ "Embedding at index %d has length %d but expected %d",
787
+ i,
788
+ len(emb),
789
+ int(vector_size),
790
+ )
762
791
  return False
763
792
 
764
793
  # If embeddings missing or empty, try to generate using connection utilities
765
794
  if not embeddings_present:
766
795
  try:
767
- model, model_name, model_type = self._get_embedding_model_for_collection(metadata.get("collection_name"))
768
- from ..embedding_utils import encode_documents
796
+ model, model_name, model_type = self._get_embedding_model_for_collection(
797
+ metadata.get("collection_name")
798
+ )
799
+ from vector_inspector.core.embedding_utils import encode_documents
800
+
769
801
  documents = data.get("documents", []) if data else []
770
802
  if documents:
771
803
  data["embeddings"] = encode_documents(documents, model, model_type)
772
- log_info("Generated %d embeddings using model %s", len(data.get("embeddings")), model_name)
804
+ log_info(
805
+ "Generated %d embeddings using model %s",
806
+ len(data.get("embeddings")),
807
+ model_name,
808
+ )
773
809
  except Exception as e:
774
810
  log_error("Failed to generate embeddings during prepare_restore: %s", e)
775
811
  return False
@@ -817,4 +853,3 @@ class QdrantConnection(VectorDBConnection):
817
853
  {"name": "contains", "server_side": True},
818
854
  {"name": "not contains", "server_side": True},
819
855
  ]
820
-