vector-inspector 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/core/connection_manager.py +55 -49
  2. vector_inspector/core/connections/base_connection.py +41 -41
  3. vector_inspector/core/connections/chroma_connection.py +110 -86
  4. vector_inspector/core/connections/pinecone_connection.py +168 -182
  5. vector_inspector/core/connections/qdrant_connection.py +109 -126
  6. vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
  7. vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
  8. vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
  9. vector_inspector/core/connections/template_connection.py +55 -65
  10. vector_inspector/core/embedding_utils.py +32 -32
  11. vector_inspector/core/logging.py +27 -0
  12. vector_inspector/core/model_registry.py +4 -3
  13. vector_inspector/main.py +6 -2
  14. vector_inspector/services/backup_helpers.py +63 -0
  15. vector_inspector/services/backup_restore_service.py +73 -152
  16. vector_inspector/services/credential_service.py +33 -40
  17. vector_inspector/services/import_export_service.py +70 -67
  18. vector_inspector/services/profile_service.py +92 -94
  19. vector_inspector/services/settings_service.py +68 -48
  20. vector_inspector/services/visualization_service.py +40 -39
  21. vector_inspector/ui/components/splash_window.py +57 -0
  22. vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
  23. vector_inspector/ui/main_window.py +200 -146
  24. vector_inspector/ui/views/info_panel.py +208 -127
  25. vector_inspector/ui/views/metadata_view.py +8 -7
  26. vector_inspector/ui/views/search_view.py +97 -75
  27. vector_inspector/ui/views/visualization_view.py +140 -97
  28. vector_inspector/utils/version.py +5 -0
  29. {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/METADATA +9 -2
  30. {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/RECORD +32 -25
  31. {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/WHEEL +0 -0
  32. {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -9,6 +9,9 @@ from qdrant_client.models import (
9
9
  )
10
10
 
11
11
  from .base_connection import VectorDBConnection
12
+ from vector_inspector.core.logging import log_info, log_error, log_debug
13
+ from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
14
+ from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import resolve_embedding_model
12
15
 
13
16
 
14
17
  class QdrantConnection(VectorDBConnection):
@@ -84,7 +87,7 @@ class QdrantConnection(VectorDBConnection):
84
87
  self._client.get_collections()
85
88
  return True
86
89
  except Exception as e:
87
- print(f"Connection failed: {e}")
90
+ log_error("Connection failed: %s", e)
88
91
  return False
89
92
 
90
93
  def _to_uuid(self, id_str: str) -> uuid.UUID:
@@ -146,7 +149,7 @@ class QdrantConnection(VectorDBConnection):
146
149
 
147
150
  return {"documents": documents, "metadatas": metadatas}
148
151
  except Exception as e:
149
- print(f"Failed to get items: {e}")
152
+ log_error("Failed to get items: %s", e)
150
153
  return {"documents": [], "metadatas": []}
151
154
 
152
155
  def list_collections(self) -> List[str]:
@@ -162,7 +165,7 @@ class QdrantConnection(VectorDBConnection):
162
165
  collections = self._client.get_collections()
163
166
  return [col.name for col in collections.collections]
164
167
  except Exception as e:
165
- print(f"Failed to list collections: {e}")
168
+ log_error("Failed to list collections: %s", e)
166
169
  return []
167
170
 
168
171
  def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
@@ -271,108 +274,26 @@ class QdrantConnection(VectorDBConnection):
271
274
  return result
272
275
 
273
276
  except Exception as e:
274
- print(f"Failed to get collection info: {e}")
277
+ log_error("Failed to get collection info: %s", e)
275
278
  return None
276
279
 
277
280
  def _get_embedding_model_for_collection(self, collection_name: str):
278
- """Get the appropriate embedding model for a collection based on stored metadata, settings, or dimension."""
279
- from ..embedding_utils import get_model_for_dimension, load_embedding_model, DEFAULT_MODEL
280
-
281
- # Get collection info to determine vector dimension and check metadata
282
- collection_info = self.get_collection_info(collection_name)
283
- if not collection_info:
284
- # Default if we can't determine
285
- print(f"Warning: Could not determine collection info for {collection_name}, using default")
286
- model_name, model_type = DEFAULT_MODEL
287
- model = load_embedding_model(model_name, model_type)
288
- return (model, model_name, model_type)
289
-
290
- # Priority 1: Check if collection metadata has embedding model info (most reliable)
291
- if 'embedding_model' in collection_info:
292
- model_name = collection_info['embedding_model']
293
- model_type = collection_info.get('embedding_model_type', 'sentence-transformer')
294
- print(f"Using stored embedding model '{model_name}' ({model_type}) for collection '{collection_name}'")
295
- model = load_embedding_model(model_name, model_type)
296
- return (model, model_name, model_type)
297
-
298
- # Priority 2: Check user settings for manual override (skip in connection class)
299
- # Settings lookup is done in the UI layer where connection_id is available
300
-
301
- # Priority 3: Fall back to dimension-based guessing (least reliable)
302
- vector_dim = collection_info.get("vector_dimension")
303
- if not vector_dim or vector_dim == "Unknown":
304
- print(f"Warning: No vector dimension in collection info, using default")
281
+ """Delegate embedding-model selection to helper resolver."""
282
+ try:
283
+ return resolve_embedding_model(self, collection_name)
284
+ except Exception as e:
285
+ log_error("Failed to resolve embedding model for %s: %s", collection_name, e)
286
+ from ..embedding_utils import DEFAULT_MODEL, load_embedding_model
305
287
  model_name, model_type = DEFAULT_MODEL
306
288
  model = load_embedding_model(model_name, model_type)
307
289
  return (model, model_name, model_type)
308
-
309
- # Get the appropriate model for this dimension
310
- model_name, model_type = get_model_for_dimension(vector_dim)
311
- model = load_embedding_model(model_name, model_type)
312
-
313
- print(f"⚠️ Guessing {model_type} model '{model_name}' based on dimension {vector_dim} for '{collection_name}'")
314
- print(f" To specify the correct model, use Settings > Configure Collection Embedding Models")
315
- return (model, model_name, model_type)
316
290
 
317
291
  def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
318
- """
319
- Build Qdrant filter from ChromaDB-style where clause.
320
-
321
- Args:
322
- where: ChromaDB-style filter dictionary
323
-
324
- Returns:
325
- Qdrant Filter object or None
326
- """
327
- if not where:
328
- return None
329
-
292
+ """Delegate filter construction to helper module."""
330
293
  try:
331
- must_conditions = []
332
- must_not_conditions = []
333
-
334
- for key, value in where.items():
335
- if isinstance(value, dict):
336
- # Handle operators like $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin, $contains, $not_contains
337
- for op, val in value.items():
338
- if op == "$eq":
339
- must_conditions.append(FieldCondition(key=key, match=MatchValue(value=val)))
340
- elif op == "$ne":
341
- # Use must_not for not-equal
342
- must_not_conditions.append(FieldCondition(key=key, match=MatchValue(value=val)))
343
- elif op == "$in":
344
- # Use MatchAny for IN operator (available since v1.1.0)
345
- must_conditions.append(FieldCondition(key=key, match=MatchAny(any=val)))
346
- elif op == "$nin":
347
- # Use MatchExcept for NOT IN operator (available since v1.2.0)
348
- must_conditions.append(FieldCondition(key=key, match=MatchExcept(**{"except": val})))
349
- elif op == "$contains":
350
- # Text matching in Qdrant (uses full-text index if available)
351
- must_conditions.append(FieldCondition(key=key, match=MatchText(text=str(val))))
352
- elif op == "$not_contains":
353
- # Negative text matching using must_not
354
- must_not_conditions.append(FieldCondition(key=key, match=MatchText(text=str(val))))
355
- elif op in ["$gt", "$gte", "$lt", "$lte"]:
356
- range_args = {}
357
- if op == "$gt":
358
- range_args["gt"] = val
359
- elif op == "$gte":
360
- range_args["gte"] = val
361
- elif op == "$lt":
362
- range_args["lt"] = val
363
- elif op == "$lte":
364
- range_args["lte"] = val
365
- must_conditions.append(FieldCondition(key=key, range=Range(**range_args)))
366
- else:
367
- # Direct equality match
368
- must_conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
369
-
370
- if must_conditions or must_not_conditions:
371
- return Filter(must=must_conditions if must_conditions else None,
372
- must_not=must_not_conditions if must_not_conditions else None)
373
- return None
294
+ return build_filter(where)
374
295
  except Exception as e:
375
- print(f"Failed to build filter: {e}")
296
+ log_error("Failed to build filter: %s", e)
376
297
  return None
377
298
 
378
299
  def query_collection(
@@ -402,7 +323,7 @@ class QdrantConnection(VectorDBConnection):
402
323
  return None
403
324
 
404
325
  if not query_texts and not query_embeddings:
405
- print("Either query_texts or query_embeddings required")
326
+ log_error("Either query_texts or query_embeddings required")
406
327
  return None
407
328
 
408
329
  try:
@@ -434,7 +355,7 @@ class QdrantConnection(VectorDBConnection):
434
355
  from ..embedding_utils import encode_text
435
356
  query_vector = encode_text(query, model, model_type)
436
357
  except Exception as e:
437
- print(f"Failed to embed query text: {e}")
358
+ log_error("Failed to embed query text: %s", e)
438
359
  continue
439
360
  else:
440
361
  query_vector = query
@@ -451,7 +372,7 @@ class QdrantConnection(VectorDBConnection):
451
372
  )
452
373
  search_results = getattr(res, "points", res)
453
374
  except Exception as e:
454
- print(f"Query failed: {e}")
375
+ log_error("Query failed: %s", e)
455
376
  continue
456
377
 
457
378
  # Transform results to standard format
@@ -484,7 +405,7 @@ class QdrantConnection(VectorDBConnection):
484
405
 
485
406
  return all_results
486
407
  except Exception as e:
487
- print(f"Query failed: {e}")
408
+ log_error("Query failed: %s", e)
488
409
  return None
489
410
 
490
411
  def get_all_items(
@@ -553,7 +474,7 @@ class QdrantConnection(VectorDBConnection):
553
474
  "embeddings": embeddings
554
475
  }
555
476
  except Exception as e:
556
- print(f"Failed to get items: {e}")
477
+ log_error("Failed to get items: %s", e)
557
478
  return None
558
479
 
559
480
  def add_items(
@@ -581,7 +502,7 @@ class QdrantConnection(VectorDBConnection):
581
502
  return False
582
503
 
583
504
  if not embeddings:
584
- print("Embeddings are required for Qdrant")
505
+ log_error("Embeddings are required for Qdrant")
585
506
  return False
586
507
 
587
508
  try:
@@ -614,7 +535,7 @@ class QdrantConnection(VectorDBConnection):
614
535
  )
615
536
  return True
616
537
  except Exception as e:
617
- print(f"Failed to add items: {e}")
538
+ log_error("Failed to add items: %s", e)
618
539
  return False
619
540
 
620
541
  def update_items(
@@ -680,7 +601,7 @@ class QdrantConnection(VectorDBConnection):
680
601
 
681
602
  return True
682
603
  except Exception as e:
683
- print(f"Failed to update items: {e}")
604
+ log_error("Failed to update items: %s", e)
684
605
  return False
685
606
 
686
607
  def delete_items(
@@ -720,7 +641,7 @@ class QdrantConnection(VectorDBConnection):
720
641
  )
721
642
  return True
722
643
  except Exception as e:
723
- print(f"Failed to delete items: {e}")
644
+ log_error("Failed to delete items: %s", e)
724
645
  return False
725
646
 
726
647
  def delete_collection(self, name: str) -> bool:
@@ -740,7 +661,7 @@ class QdrantConnection(VectorDBConnection):
740
661
  self._client.delete_collection(collection_name=name)
741
662
  return True
742
663
  except Exception as e:
743
- print(f"Failed to delete collection: {e}")
664
+ log_error("Failed to delete collection: %s", e)
744
665
  return False
745
666
 
746
667
  def create_collection(
@@ -783,21 +704,91 @@ class QdrantConnection(VectorDBConnection):
783
704
  )
784
705
  return True
785
706
  except Exception as e:
786
- print(f"Failed to create collection: {e}")
707
+ log_error(f"Failed to create collection: {e}")
787
708
  return False
788
-
789
- def get_connection_info(self) -> Dict[str, Any]:
790
- """
791
- Get information about the current connection.
792
-
793
- Returns:
794
- Dictionary with connection details
709
+
710
+ def prepare_restore(self, metadata: Dict[str, Any], data: Dict[str, Any]) -> bool:
711
+ """Provider-specific hook invoked before restoring data.
712
+
713
+ The connection can use metadata and data to pre-create collections,
714
+ normalize ids, and generate embeddings if needed. Returns True on
715
+ success or False on fatal failure.
795
716
  """
796
- info = {
717
+ try:
718
+ coll_info = metadata.get("collection_info") if metadata else None
719
+
720
+ # Prefer `vector_dimension` but fall back to older `vector_size` for compatibility
721
+ vector_size = None
722
+ if coll_info:
723
+ vector_size = coll_info.get("vector_dimension") or coll_info.get("vector_size")
724
+
725
+ embeddings = data.get("embeddings") if data else None
726
+ embeddings_present = bool(embeddings) and len(embeddings) > 0
727
+ if not vector_size and embeddings_present:
728
+ first = embeddings[0]
729
+ if isinstance(first, (list, tuple)):
730
+ vector_size = len(first)
731
+
732
+ if not vector_size:
733
+ log_error("Cannot determine vector size for Qdrant collection during restore")
734
+ return False
735
+
736
+ # Determine distance metric (try several known keys)
737
+ distance = None
738
+ if coll_info:
739
+ distance = coll_info.get("distance_metric") or coll_info.get("distance")
740
+ distance = distance or "Cosine"
741
+
742
+ log_info("Preparing restore: collection=%s, vector_size=%s, distance=%s",
743
+ metadata.get("collection_name"), vector_size, distance)
744
+
745
+ if not self.create_collection(metadata.get("collection_name"), int(vector_size), distance):
746
+ log_error("Failed to create collection %s", metadata.get("collection_name"))
747
+ return False
748
+
749
+ # Ensure IDs are strings — actual insertion will convert to UUIDs
750
+ if data and data.get("ids"):
751
+ data["ids"] = [str(i) for i in data.get("ids")]
752
+
753
+ # If embeddings present, validate their dimensionality
754
+ if embeddings_present:
755
+ for i, emb in enumerate(embeddings):
756
+ if not isinstance(emb, (list, tuple)):
757
+ # leave conversion to normalize_embeddings later
758
+ continue
759
+ if len(emb) != int(vector_size):
760
+ log_error("Embedding at index %d has length %d but expected %d",
761
+ i, len(emb), int(vector_size))
762
+ return False
763
+
764
+ # If embeddings missing or empty, try to generate using connection utilities
765
+ if not embeddings_present:
766
+ try:
767
+ model, model_name, model_type = self._get_embedding_model_for_collection(metadata.get("collection_name"))
768
+ from ..embedding_utils import encode_documents
769
+ documents = data.get("documents", []) if data else []
770
+ if documents:
771
+ data["embeddings"] = encode_documents(documents, model, model_type)
772
+ log_info("Generated %d embeddings using model %s", len(data.get("embeddings")), model_name)
773
+ except Exception as e:
774
+ log_error("Failed to generate embeddings during prepare_restore: %s", e)
775
+ return False
776
+
777
+ # Normalize coll_info key for downstream code expectations
778
+ if coll_info and "vector_dimension" not in coll_info:
779
+ coll_info["vector_dimension"] = vector_size
780
+
781
+ return True
782
+ except Exception as e:
783
+ log_error("prepare_restore failed: %s", e)
784
+ return False
785
+
786
+ def get_connection_info(self) -> Dict[str, Any]:
787
+ """Get information about the current connection."""
788
+ info: Dict[str, Any] = {
797
789
  "provider": "Qdrant",
798
790
  "connected": self.is_connected,
799
791
  }
800
-
801
792
  if self.path:
802
793
  info["mode"] = "local"
803
794
  info["path"] = self.path
@@ -810,18 +801,10 @@ class QdrantConnection(VectorDBConnection):
810
801
  info["port"] = self.port
811
802
  else:
812
803
  info["mode"] = "memory"
813
-
814
804
  return info
815
-
805
+
816
806
  def get_supported_filter_operators(self) -> List[Dict[str, Any]]:
817
- """
818
- Get filter operators supported by Qdrant.
819
-
820
- Qdrant has richer filtering capabilities than ChromaDB.
821
-
822
- Returns:
823
- List of operator dictionaries
824
- """
807
+ """Get filter operators supported by Qdrant."""
825
808
  return [
826
809
  {"name": "=", "server_side": True},
827
810
  {"name": "!=", "server_side": True},
@@ -831,7 +814,7 @@ class QdrantConnection(VectorDBConnection):
831
814
  {"name": "<=", "server_side": True},
832
815
  {"name": "in", "server_side": True},
833
816
  {"name": "not in", "server_side": True},
834
- # Qdrant supports text matching server-side
835
817
  {"name": "contains", "server_side": True},
836
818
  {"name": "not contains", "server_side": True},
837
819
  ]
820
+
@@ -0,0 +1,4 @@
1
+ from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
2
+ from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import resolve_embedding_model
3
+
4
+ __all__ = ["build_filter", "resolve_embedding_model"]
@@ -0,0 +1,35 @@
1
+ from typing import Tuple
2
+
3
+ from vector_inspector.core.embedding_utils import get_model_for_dimension, load_embedding_model, DEFAULT_MODEL
4
+
5
+
6
+ def resolve_embedding_model(connection, collection_name: str) -> Tuple[object, str, str]:
7
+ """Resolve an embedding model for a collection.
8
+
9
+ Returns (model, model_name, model_type). This encapsulates the previous
10
+ `_get_embedding_model_for_collection` logic so the connection stays focused
11
+ on Qdrant operations.
12
+ """
13
+ collection_info = connection.get_collection_info(collection_name)
14
+ if not collection_info:
15
+ model_name, model_type = DEFAULT_MODEL
16
+ model = load_embedding_model(model_name, model_type)
17
+ return (model, model_name, model_type)
18
+
19
+ # Priority 1: explicit metadata on collection
20
+ if 'embedding_model' in collection_info:
21
+ model_name = collection_info['embedding_model']
22
+ model_type = collection_info.get('embedding_model_type', 'sentence-transformer')
23
+ model = load_embedding_model(model_name, model_type)
24
+ return (model, model_name, model_type)
25
+
26
+ # Priority 3: guess by vector dimension
27
+ vector_dim = collection_info.get('vector_dimension')
28
+ if not vector_dim or vector_dim == 'Unknown':
29
+ model_name, model_type = DEFAULT_MODEL
30
+ model = load_embedding_model(model_name, model_type)
31
+ return (model, model_name, model_type)
32
+
33
+ model_name, model_type = get_model_for_dimension(vector_dim)
34
+ model = load_embedding_model(model_name, model_type)
35
+ return (model, model_name, model_type)
@@ -0,0 +1,51 @@
1
+ from typing import Optional, Dict, Any, List
2
+ from qdrant_client.models import Filter, FieldCondition, MatchValue, MatchText, MatchAny, MatchExcept, Range
3
+
4
+
5
+ def build_filter(where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
6
+ """Build a Qdrant `Filter` from a Chroma-style `where` dict.
7
+
8
+ This mirrors the previous inline logic in `QdrantConnection._build_qdrant_filter`.
9
+ """
10
+ if not where:
11
+ return None
12
+
13
+ try:
14
+ must_conditions: List[FieldCondition] = []
15
+ must_not_conditions: List[FieldCondition] = []
16
+
17
+ for key, value in where.items():
18
+ if isinstance(value, dict):
19
+ for op, val in value.items():
20
+ if op == "$eq":
21
+ must_conditions.append(FieldCondition(key=key, match=MatchValue(value=val)))
22
+ elif op == "$ne":
23
+ must_not_conditions.append(FieldCondition(key=key, match=MatchValue(value=val)))
24
+ elif op == "$in":
25
+ must_conditions.append(FieldCondition(key=key, match=MatchAny(any=val)))
26
+ elif op == "$nin":
27
+ must_conditions.append(FieldCondition(key=key, match=MatchExcept(**{"except": val})))
28
+ elif op == "$contains":
29
+ must_conditions.append(FieldCondition(key=key, match=MatchText(text=str(val))))
30
+ elif op == "$not_contains":
31
+ must_not_conditions.append(FieldCondition(key=key, match=MatchText(text=str(val))))
32
+ elif op in ["$gt", "$gte", "$lt", "$lte"]:
33
+ range_args = {}
34
+ if op == "$gt":
35
+ range_args["gt"] = val
36
+ elif op == "$gte":
37
+ range_args["gte"] = val
38
+ elif op == "$lt":
39
+ range_args["lt"] = val
40
+ elif op == "$lte":
41
+ range_args["lte"] = val
42
+ must_conditions.append(FieldCondition(key=key, range=Range(**range_args)))
43
+ else:
44
+ must_conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
45
+
46
+ if must_conditions or must_not_conditions:
47
+ return Filter(must=must_conditions if must_conditions else None,
48
+ must_not=must_not_conditions if must_not_conditions else None)
49
+ return None
50
+ except Exception:
51
+ return None