vector-inspector 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. vector_inspector/core/cache_manager.py +159 -0
  2. vector_inspector/core/connection_manager.py +277 -0
  3. vector_inspector/core/connections/chroma_connection.py +90 -5
  4. vector_inspector/core/connections/qdrant_connection.py +62 -8
  5. vector_inspector/core/embedding_utils.py +140 -0
  6. vector_inspector/services/backup_restore_service.py +3 -29
  7. vector_inspector/services/credential_service.py +130 -0
  8. vector_inspector/services/filter_service.py +1 -1
  9. vector_inspector/services/profile_service.py +409 -0
  10. vector_inspector/services/settings_service.py +20 -1
  11. vector_inspector/services/visualization_service.py +11 -7
  12. vector_inspector/ui/components/connection_manager_panel.py +320 -0
  13. vector_inspector/ui/components/profile_manager_panel.py +518 -0
  14. vector_inspector/ui/dialogs/__init__.py +5 -0
  15. vector_inspector/ui/dialogs/cross_db_migration.py +364 -0
  16. vector_inspector/ui/dialogs/embedding_config_dialog.py +176 -0
  17. vector_inspector/ui/main_window.py +429 -181
  18. vector_inspector/ui/views/connection_view.py +43 -8
  19. vector_inspector/ui/views/info_panel.py +226 -80
  20. vector_inspector/ui/views/metadata_view.py +136 -28
  21. vector_inspector/ui/views/search_view.py +43 -3
  22. {vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/METADATA +5 -3
  23. vector_inspector-0.2.7.dist-info/RECORD +45 -0
  24. vector_inspector-0.2.5.dist-info/RECORD +0 -35
  25. {vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/WHEEL +0 -0
  26. {vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/entry_points.txt +0 -0
@@ -50,16 +50,22 @@ class QdrantConnection(VectorDBConnection):
50
50
  True if connection successful, False otherwise
51
51
  """
52
52
  try:
53
+ # Common parameters for stability
54
+ common_params = {
55
+ 'check_compatibility': False,
56
+ 'timeout': 300, # 5 minutes timeout for long operations
57
+ }
58
+
53
59
  if self.path:
54
60
  # Local/embedded mode
55
- self._client = QdrantClient(path=self.path, check_compatibility=False)
61
+ self._client = QdrantClient(path=self.path, **common_params)
56
62
  elif self.url:
57
63
  # Full URL provided
58
64
  self._client = QdrantClient(
59
65
  url=self.url,
60
66
  api_key=self.api_key,
61
67
  prefer_grpc=self.prefer_grpc,
62
- check_compatibility=False
68
+ **common_params
63
69
  )
64
70
  elif self.host:
65
71
  # Host and port provided
@@ -68,11 +74,11 @@ class QdrantConnection(VectorDBConnection):
68
74
  port=self.port,
69
75
  api_key=self.api_key,
70
76
  prefer_grpc=self.prefer_grpc,
71
- check_compatibility=False
77
+ **common_params
72
78
  )
73
79
  else:
74
80
  # Default to in-memory client
75
- self._client = QdrantClient(":memory:", check_compatibility=False)
81
+ self._client = QdrantClient(":memory:", **common_params)
76
82
 
77
83
  # Test connection
78
84
  self._client.get_collections()
@@ -251,6 +257,14 @@ class QdrantConnection(VectorDBConnection):
251
257
  "distance_metric": distance_metric,
252
258
  }
253
259
 
260
+ # Check for embedding model metadata (if collection creator stored it)
261
+ if hasattr(collection_info.config, 'metadata') and collection_info.config.metadata:
262
+ metadata = collection_info.config.metadata
263
+ if 'embedding_model' in metadata:
264
+ result['embedding_model'] = metadata['embedding_model']
265
+ if 'embedding_model_type' in metadata:
266
+ result['embedding_model_type'] = metadata['embedding_model_type']
267
+
254
268
  if config_details:
255
269
  result['config'] = config_details
256
270
 
@@ -260,6 +274,46 @@ class QdrantConnection(VectorDBConnection):
260
274
  print(f"Failed to get collection info: {e}")
261
275
  return None
262
276
 
277
+ def _get_embedding_model_for_collection(self, collection_name: str):
278
+ """Get the appropriate embedding model for a collection based on stored metadata, settings, or dimension."""
279
+ from ..embedding_utils import get_model_for_dimension, load_embedding_model, DEFAULT_MODEL
280
+
281
+ # Get collection info to determine vector dimension and check metadata
282
+ collection_info = self.get_collection_info(collection_name)
283
+ if not collection_info:
284
+ # Default if we can't determine
285
+ print(f"Warning: Could not determine collection info for {collection_name}, using default")
286
+ model_name, model_type = DEFAULT_MODEL
287
+ model = load_embedding_model(model_name, model_type)
288
+ return (model, model_name, model_type)
289
+
290
+ # Priority 1: Check if collection metadata has embedding model info (most reliable)
291
+ if 'embedding_model' in collection_info:
292
+ model_name = collection_info['embedding_model']
293
+ model_type = collection_info.get('embedding_model_type', 'sentence-transformer')
294
+ print(f"Using stored embedding model '{model_name}' ({model_type}) for collection '{collection_name}'")
295
+ model = load_embedding_model(model_name, model_type)
296
+ return (model, model_name, model_type)
297
+
298
+ # Priority 2: Check user settings for manual override (skip in connection class)
299
+ # Settings lookup is done in the UI layer where connection_id is available
300
+
301
+ # Priority 3: Fall back to dimension-based guessing (least reliable)
302
+ vector_dim = collection_info.get("vector_dimension")
303
+ if not vector_dim or vector_dim == "Unknown":
304
+ print(f"Warning: No vector dimension in collection info, using default")
305
+ model_name, model_type = DEFAULT_MODEL
306
+ model = load_embedding_model(model_name, model_type)
307
+ return (model, model_name, model_type)
308
+
309
+ # Get the appropriate model for this dimension
310
+ model_name, model_type = get_model_for_dimension(vector_dim)
311
+ model = load_embedding_model(model_name, model_type)
312
+
313
+ print(f"⚠️ Guessing {model_type} model '{model_name}' based on dimension {vector_dim} for '{collection_name}'")
314
+ print(f" To specify the correct model, use Settings > Configure Collection Embedding Models")
315
+ return (model, model_name, model_type)
316
+
263
317
  def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
264
318
  """
265
319
  Build Qdrant filter from ChromaDB-style where clause.
@@ -374,11 +428,11 @@ class QdrantConnection(VectorDBConnection):
374
428
  for query in queries:
375
429
  # Embed text queries if needed
376
430
  if isinstance(query, str):
377
- # Generate embeddings for text query
431
+ # Generate embeddings for text query using appropriate model for this collection
378
432
  try:
379
- from sentence_transformers import SentenceTransformer
380
- model = SentenceTransformer("all-MiniLM-L6-v2")
381
- query_vector = model.encode(query).tolist()
433
+ model, model_name, model_type = self._get_embedding_model_for_collection(collection_name)
434
+ from ..embedding_utils import encode_text
435
+ query_vector = encode_text(query, model, model_type)
382
436
  except Exception as e:
383
437
  print(f"Failed to embed query text: {e}")
384
438
  continue
@@ -0,0 +1,140 @@
1
+ """Utilities for managing embedding models and vector dimensions."""
2
+
3
+ from typing import Optional, Union, Tuple
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+
7
+ # Mapping of vector dimensions to appropriate models
8
+ # Format: dimension -> list of (model_name, model_type, description)
9
+ # Listed in order of preference for ambiguous cases
10
+ DIMENSION_TO_MODEL = {
11
+ 384: [
12
+ ("all-MiniLM-L6-v2", "sentence-transformer", "Fast text embeddings"),
13
+ ],
14
+ 512: [
15
+ ("openai/clip-vit-base-patch32", "clip", "Multi-modal (text + images)"),
16
+ ("paraphrase-albert-small-v2", "sentence-transformer", "Text-only paraphrase"),
17
+ ],
18
+ 768: [
19
+ ("all-mpnet-base-v2", "sentence-transformer", "High quality text embeddings"),
20
+ ],
21
+ 1024: [
22
+ ("all-roberta-large-v1", "sentence-transformer", "Large text embeddings"),
23
+ ],
24
+ 1536: [
25
+ ("gtr-t5-large", "sentence-transformer", "Very large text embeddings"),
26
+ ],
27
+ }
28
+
29
+ # Default model to use when dimension is unknown or not mapped
30
+ DEFAULT_MODEL = ("all-MiniLM-L6-v2", "sentence-transformer")
31
+
32
+
33
+ def get_model_for_dimension(dimension: int, prefer_multimodal: bool = True) -> Tuple[str, str]:
34
+ """
35
+ Get the appropriate embedding model name and type for a given vector dimension.
36
+
37
+ Args:
38
+ dimension: The vector dimension size
39
+ prefer_multimodal: If True and multiple models exist for this dimension,
40
+ prefer multi-modal (CLIP) over text-only models
41
+
42
+ Returns:
43
+ Tuple of (model_name, model_type) where model_type is "sentence-transformer" or "clip"
44
+ """
45
+ if dimension in DIMENSION_TO_MODEL:
46
+ models = DIMENSION_TO_MODEL[dimension]
47
+ if len(models) == 1:
48
+ return (models[0][0], models[0][1])
49
+
50
+ # Multiple models available - apply preference
51
+ if prefer_multimodal:
52
+ # Prefer CLIP/multimodal
53
+ for model_name, model_type, desc in models:
54
+ if model_type == "clip":
55
+ return (model_name, model_type)
56
+
57
+ # Default to first option
58
+ return (models[0][0], models[0][1])
59
+
60
+ # Find the closest dimension if exact match not found
61
+ closest_dim = min(DIMENSION_TO_MODEL.keys(), key=lambda x: abs(x - dimension))
62
+ models = DIMENSION_TO_MODEL[closest_dim]
63
+ return (models[0][0], models[0][1])
64
+
65
+
66
+ def get_available_models_for_dimension(dimension: int) -> list:
67
+ """
68
+ Get all available model options for a given dimension.
69
+
70
+ Args:
71
+ dimension: The vector dimension size
72
+
73
+ Returns:
74
+ List of tuples: [(model_name, model_type, description), ...]
75
+ """
76
+ if dimension in DIMENSION_TO_MODEL:
77
+ return DIMENSION_TO_MODEL[dimension]
78
+ return []
79
+
80
+
81
+ def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTransformer, any]:
82
+ """
83
+ Load an embedding model (sentence-transformer or CLIP).
84
+
85
+ Args:
86
+ model_name: Name of the model to load
87
+ model_type: Type of model ("sentence-transformer" or "clip")
88
+
89
+ Returns:
90
+ Loaded model (SentenceTransformer or CLIP model)
91
+ """
92
+ if model_type == "clip":
93
+ from transformers import CLIPModel, CLIPProcessor
94
+ model = CLIPModel.from_pretrained(model_name)
95
+ processor = CLIPProcessor.from_pretrained(model_name)
96
+ return (model, processor)
97
+ else:
98
+ return SentenceTransformer(model_name)
99
+
100
+
101
+ def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type: str) -> list:
102
+ """
103
+ Encode text using the appropriate model.
104
+
105
+ Args:
106
+ text: Text to encode
107
+ model: The loaded model (SentenceTransformer or (CLIPModel, CLIPProcessor) tuple)
108
+ model_type: Type of model ("sentence-transformer" or "clip")
109
+
110
+ Returns:
111
+ Embedding vector as a list
112
+ """
113
+ if model_type == "clip":
114
+ import torch
115
+ clip_model, processor = model
116
+ inputs = processor(text=[text], return_tensors="pt", padding=True)
117
+ with torch.no_grad():
118
+ text_features = clip_model.get_text_features(**inputs)
119
+ # Normalize the features (CLIP embeddings are typically normalized)
120
+ text_features = text_features / text_features.norm(dim=-1, keepdim=True)
121
+ return text_features[0].cpu().numpy().tolist()
122
+ else:
123
+ # sentence-transformer
124
+ embedding = model.encode(text)
125
+ return embedding.tolist()
126
+
127
+
128
+ def get_embedding_model_for_dimension(dimension: int) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
129
+ """
130
+ Get a loaded embedding model for a specific dimension.
131
+
132
+ Args:
133
+ dimension: The vector dimension size
134
+
135
+ Returns:
136
+ Tuple of (loaded_model, model_name, model_type)
137
+ """
138
+ model_name, model_type = get_model_for_dimension(dimension)
139
+ model = load_embedding_model(model_name, model_type)
140
+ return (model, model_name, model_type)
@@ -169,36 +169,10 @@ class BackupRestoreService:
169
169
  print(f"Failed to generate embeddings: {e}")
170
170
  return False
171
171
 
172
- # Convert IDs to Qdrant-compatible format (integers or UUIDs)
173
- # Store original IDs in metadata
172
+ # Keep IDs as strings - Qdrant's _to_uuid method handles conversion
173
+ # Just ensure all IDs are strings
174
174
  original_ids = data.get("ids", [])
175
- qdrant_ids = []
176
- metadatas = data.get("metadatas", [])
177
-
178
- for i, orig_id in enumerate(original_ids):
179
- # Try to convert to integer, otherwise use index
180
- try:
181
- # If it's like "doc_123", extract the number
182
- if isinstance(orig_id, str) and "_" in orig_id:
183
- qdrant_id = int(orig_id.split("_")[-1])
184
- else:
185
- qdrant_id = int(orig_id)
186
- except (ValueError, AttributeError):
187
- # Use index as ID if can't convert
188
- qdrant_id = i
189
-
190
- qdrant_ids.append(qdrant_id)
191
-
192
- # Store original ID in metadata
193
- if i < len(metadatas):
194
- if metadatas[i] is None:
195
- metadatas[i] = {}
196
- metadatas[i]["original_id"] = orig_id
197
- else:
198
- metadatas.append({"original_id": orig_id})
199
-
200
- data["ids"] = qdrant_ids
201
- data["metadatas"] = metadatas
175
+ data["ids"] = [str(id_val) for id_val in original_ids]
202
176
 
203
177
  # Add items to collection
204
178
  success = connection.add_items(
@@ -0,0 +1,130 @@
1
+ """Service for secure credential storage using system keychains."""
2
+
3
+ from typing import Optional
4
+ import json
5
+
6
+
7
+ class CredentialService:
8
+ """Handles secure storage and retrieval of credentials using system keychains.
9
+
10
+ Falls back to in-memory storage if keyring is not available (not recommended for production).
11
+ """
12
+
13
+ SERVICE_NAME = "vector-inspector"
14
+
15
+ def __init__(self):
16
+ """Initialize credential service with keyring if available."""
17
+ self._use_keyring = False
18
+ self._memory_store = {} # Fallback in-memory storage
19
+
20
+ try:
21
+ import keyring
22
+ self._keyring = keyring
23
+ self._use_keyring = True
24
+ except ImportError:
25
+ print("Warning: keyring module not available. Credentials will not be persisted securely.")
26
+ self._keyring = None
27
+
28
+ def store_credentials(self, profile_id: str, credentials: dict) -> bool:
29
+ """
30
+ Store credentials for a profile.
31
+
32
+ Args:
33
+ profile_id: Unique profile identifier
34
+ credentials: Dictionary of credential data (api_key, password, etc.)
35
+
36
+ Returns:
37
+ True if successful, False otherwise
38
+ """
39
+ try:
40
+ credential_key = f"profile:{profile_id}"
41
+ credential_json = json.dumps(credentials)
42
+
43
+ if self._use_keyring:
44
+ self._keyring.set_password(
45
+ self.SERVICE_NAME,
46
+ credential_key,
47
+ credential_json
48
+ )
49
+ else:
50
+ # Fallback to in-memory (not persistent)
51
+ self._memory_store[credential_key] = credential_json
52
+
53
+ return True
54
+ except Exception as e:
55
+ print(f"Failed to store credentials: {e}")
56
+ return False
57
+
58
+ def get_credentials(self, profile_id: str) -> Optional[dict]:
59
+ """
60
+ Retrieve credentials for a profile.
61
+
62
+ Args:
63
+ profile_id: Unique profile identifier
64
+
65
+ Returns:
66
+ Dictionary of credential data, or None if not found
67
+ """
68
+ try:
69
+ credential_key = f"profile:{profile_id}"
70
+
71
+ if self._use_keyring:
72
+ credential_json = self._keyring.get_password(
73
+ self.SERVICE_NAME,
74
+ credential_key
75
+ )
76
+ else:
77
+ # Fallback to in-memory
78
+ credential_json = self._memory_store.get(credential_key)
79
+
80
+ if credential_json:
81
+ return json.loads(credential_json)
82
+ return None
83
+ except Exception as e:
84
+ print(f"Failed to retrieve credentials: {e}")
85
+ return None
86
+
87
+ def delete_credentials(self, profile_id: str) -> bool:
88
+ """
89
+ Delete stored credentials for a profile.
90
+
91
+ Args:
92
+ profile_id: Unique profile identifier
93
+
94
+ Returns:
95
+ True if successful, False otherwise
96
+ """
97
+ try:
98
+ credential_key = f"profile:{profile_id}"
99
+
100
+ if self._use_keyring:
101
+ try:
102
+ self._keyring.delete_password(
103
+ self.SERVICE_NAME,
104
+ credential_key
105
+ )
106
+ except self._keyring.errors.PasswordDeleteError:
107
+ # Credential doesn't exist, that's okay
108
+ pass
109
+ else:
110
+ # Fallback to in-memory
111
+ self._memory_store.pop(credential_key, None)
112
+
113
+ return True
114
+ except Exception as e:
115
+ print(f"Failed to delete credentials: {e}")
116
+ return False
117
+
118
+ def is_keyring_available(self) -> bool:
119
+ """Check if system keyring is available."""
120
+ return self._use_keyring
121
+
122
+ def clear_all_credentials(self):
123
+ """Clear all stored credentials. Use with caution!"""
124
+ if not self._use_keyring:
125
+ self._memory_store.clear()
126
+ else:
127
+ # For keyring, we'd need to track all profile IDs
128
+ # This is typically not needed, but can be implemented if required
129
+ print("Warning: clear_all_credentials not implemented for keyring backend")
130
+
@@ -66,7 +66,7 @@ def apply_client_side_filters(data: Dict[str, Any], filters: List[Dict[str, Any]
66
66
  "metadatas": [metadatas[i] for i in keep_indices if i < len(metadatas)],
67
67
  }
68
68
 
69
- if embeddings:
69
+ if embeddings is not None and len(embeddings) > 0:
70
70
  filtered_data["embeddings"] = [embeddings[i] for i in keep_indices if i < len(embeddings)]
71
71
 
72
72
  return filtered_data