vector-inspector 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/core/connection_manager.py +55 -49
  2. vector_inspector/core/connections/base_connection.py +41 -41
  3. vector_inspector/core/connections/chroma_connection.py +110 -86
  4. vector_inspector/core/connections/pinecone_connection.py +168 -182
  5. vector_inspector/core/connections/qdrant_connection.py +109 -126
  6. vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
  7. vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
  8. vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
  9. vector_inspector/core/connections/template_connection.py +55 -65
  10. vector_inspector/core/embedding_utils.py +32 -32
  11. vector_inspector/core/logging.py +27 -0
  12. vector_inspector/core/model_registry.py +4 -3
  13. vector_inspector/main.py +6 -2
  14. vector_inspector/services/backup_helpers.py +63 -0
  15. vector_inspector/services/backup_restore_service.py +73 -152
  16. vector_inspector/services/credential_service.py +33 -40
  17. vector_inspector/services/import_export_service.py +70 -67
  18. vector_inspector/services/profile_service.py +92 -94
  19. vector_inspector/services/settings_service.py +68 -48
  20. vector_inspector/services/visualization_service.py +40 -39
  21. vector_inspector/ui/components/splash_window.py +57 -0
  22. vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
  23. vector_inspector/ui/main_window.py +200 -146
  24. vector_inspector/ui/views/info_panel.py +208 -127
  25. vector_inspector/ui/views/metadata_view.py +8 -7
  26. vector_inspector/ui/views/search_view.py +97 -75
  27. vector_inspector/ui/views/visualization_view.py +140 -97
  28. vector_inspector/utils/version.py +5 -0
  29. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.3.dist-info}/METADATA +3 -1
  30. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.3.dist-info}/RECORD +32 -25
  31. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.3.dist-info}/WHEEL +0 -0
  32. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -9,11 +9,12 @@ from chromadb.api.models.Collection import Collection
9
9
  from chromadb import Documents, EmbeddingFunction, Embeddings
10
10
 
11
11
  from .base_connection import VectorDBConnection
12
+ from vector_inspector.core.logging import log_info, log_error
12
13
 
13
14
 
14
15
  class DimensionAwareEmbeddingFunction(EmbeddingFunction):
15
16
  """Embedding function that selects model based on collection's expected dimension."""
16
-
17
+
17
18
  def __init__(self, expected_dimension: int):
18
19
  """Initialize with expected dimension (model loaded lazily on first use)."""
19
20
  self.expected_dimension = expected_dimension
@@ -21,22 +22,31 @@ class DimensionAwareEmbeddingFunction(EmbeddingFunction):
21
22
  self.model_name = None
22
23
  self.model_type = None
23
24
  self._initialized = False
24
-
25
+
25
26
  def _ensure_model_loaded(self):
26
27
  """Lazy load the embedding model on first use."""
27
28
  if self._initialized:
28
29
  return
29
-
30
+
30
31
  from ..embedding_utils import get_embedding_model_for_dimension
31
- print(f"[ChromaDB] Loading embedding model for {self.expected_dimension}d vectors...")
32
- self.model, self.model_name, self.model_type = get_embedding_model_for_dimension(self.expected_dimension)
33
- print(f"[ChromaDB] Using {self.model_type} model '{self.model_name}' for {self.expected_dimension}d embeddings")
32
+
33
+ log_info("[ChromaDB] Loading embedding model for %dd vectors...", self.expected_dimension)
34
+ self.model, self.model_name, self.model_type = get_embedding_model_for_dimension(
35
+ self.expected_dimension
36
+ )
37
+ log_info(
38
+ "[ChromaDB] Using %s model '%s' for %dd embeddings",
39
+ self.model_type,
40
+ self.model_name,
41
+ self.expected_dimension,
42
+ )
34
43
  self._initialized = True
35
-
44
+
36
45
  def __call__(self, input: Documents) -> Embeddings:
37
46
  """Embed documents using the dimension-appropriate model."""
38
47
  self._ensure_model_loaded()
39
48
  from ..embedding_utils import encode_text
49
+
40
50
  embeddings = []
41
51
  for text in input:
42
52
  embedding = encode_text(text, self.model, self.model_type)
@@ -46,11 +56,13 @@ class DimensionAwareEmbeddingFunction(EmbeddingFunction):
46
56
 
47
57
  class ChromaDBConnection(VectorDBConnection):
48
58
  """Manages connection to ChromaDB and provides query interface."""
49
-
50
- def __init__(self, path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None):
59
+
60
+ def __init__(
61
+ self, path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None
62
+ ):
51
63
  """
52
64
  Initialize ChromaDB connection.
53
-
65
+
54
66
  Args:
55
67
  path: Path for persistent client (local storage)
56
68
  host: Host for HTTP client
@@ -61,11 +73,11 @@ class ChromaDBConnection(VectorDBConnection):
61
73
  self.port = port
62
74
  self._client: Optional[ClientAPI] = None
63
75
  self._current_collection: Optional[Collection] = None
64
-
76
+
65
77
  def connect(self) -> bool:
66
78
  """
67
79
  Establish connection to ChromaDB.
68
-
80
+
69
81
  Returns:
70
82
  True if connection successful, False otherwise
71
83
  """
@@ -83,7 +95,7 @@ class ChromaDBConnection(VectorDBConnection):
83
95
  self._client = chromadb.Client()
84
96
  return True
85
97
  except Exception as e:
86
- print(f"Connection failed: {e}")
98
+ log_error("Connection failed: %s", e)
87
99
  return False
88
100
 
89
101
  def _resolve_path(self, input_path: str) -> str:
@@ -97,21 +109,21 @@ class ChromaDBConnection(VectorDBConnection):
97
109
  return str((parent / input_path).resolve())
98
110
  # Fallback to CWD if project root not found
99
111
  return str(Path(input_path).resolve())
100
-
112
+
101
113
  def disconnect(self):
102
114
  """Close connection to ChromaDB."""
103
115
  self._client = None
104
116
  self._current_collection = None
105
-
117
+
106
118
  @property
107
119
  def is_connected(self) -> bool:
108
120
  """Check if connected to ChromaDB."""
109
121
  return self._client is not None
110
-
122
+
111
123
  def list_collections(self) -> List[str]:
112
124
  """
113
125
  Get list of all collections.
114
-
126
+
115
127
  Returns:
116
128
  List of collection names
117
129
  """
@@ -121,9 +133,9 @@ class ChromaDBConnection(VectorDBConnection):
121
133
  collections = self._client.list_collections()
122
134
  return [col.name for col in collections]
123
135
  except Exception as e:
124
- print(f"Failed to list collections: {e}")
136
+ log_info("Failed to list collections: %s", e)
125
137
  return []
126
-
138
+
127
139
  def _get_collection_basic(self, name: str) -> Optional[Collection]:
128
140
  """Get collection without custom embedding function (for info lookup)."""
129
141
  if not self._client:
@@ -132,14 +144,14 @@ class ChromaDBConnection(VectorDBConnection):
132
144
  return self._client.get_collection(name=name)
133
145
  except Exception as e:
134
146
  return None
135
-
147
+
136
148
  def _get_embedding_function_for_collection(self, name: str) -> Optional[EmbeddingFunction]:
137
149
  """Get the appropriate embedding function for a collection based on its dimension."""
138
150
  # Get basic collection to check dimension
139
151
  basic_col = self._get_collection_basic(name)
140
152
  if not basic_col:
141
153
  return None
142
-
154
+
143
155
  try:
144
156
  # Get a sample to determine vector dimension
145
157
  sample = basic_col.get(limit=1, include=["embeddings"])
@@ -150,22 +162,28 @@ class ChromaDBConnection(VectorDBConnection):
150
162
  # Check if embedding exists and has content
151
163
  if first_embedding is not None and len(first_embedding) > 0:
152
164
  vector_dim = len(first_embedding)
153
- print(f"[ChromaDB] Collection '{name}' has {vector_dim}d vectors")
165
+ log_info("[ChromaDB] Collection '%s' has %dd vectors", name, vector_dim)
154
166
  return DimensionAwareEmbeddingFunction(vector_dim)
155
167
  except Exception as e:
156
- print(f"[ChromaDB] Failed to determine embedding function: {e}")
157
168
  import traceback
158
- traceback.print_exc()
159
-
169
+
170
+ log_error(
171
+ "[ChromaDB] Failed to determine embedding function: %s\n%s",
172
+ e,
173
+ traceback.format_exc(),
174
+ )
175
+
160
176
  return None
161
-
162
- def get_collection(self, name: str, embedding_function: Optional[EmbeddingFunction] = None) -> Optional[Collection]:
177
+
178
+ def get_collection(
179
+ self, name: str, embedding_function: Optional[EmbeddingFunction] = None
180
+ ) -> Optional[Collection]:
163
181
  """Get a collection (without overriding existing embedding function).
164
-
182
+
165
183
  Args:
166
184
  name: Collection name
167
185
  embedding_function: Optional custom embedding function (ignored if collection exists)
168
-
186
+
169
187
  Returns:
170
188
  Collection object or None if failed
171
189
  """
@@ -177,38 +195,40 @@ class ChromaDBConnection(VectorDBConnection):
177
195
  self._current_collection = self._client.get_collection(name=name)
178
196
  return self._current_collection
179
197
  except Exception as e:
180
- print(f"Failed to get collection: {e}")
198
+ log_error("Failed to get collection: %s", e)
181
199
  return None
182
-
200
+
183
201
  def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
184
202
  """
185
203
  Get collection metadata and statistics.
186
-
204
+
187
205
  Args:
188
206
  name: Collection name
189
-
207
+
190
208
  Returns:
191
209
  Dictionary with collection info
192
210
  """
193
211
  collection = self._get_collection_basic(name)
194
212
  if not collection:
195
213
  return None
196
-
214
+
197
215
  try:
198
216
  count = collection.count()
199
217
  # Get a sample to determine metadata fields and vector dimensions
200
218
  sample = collection.get(limit=1, include=["metadatas", "embeddings"])
201
219
  metadata_fields = []
202
220
  vector_dimension = "Unknown"
203
-
221
+
204
222
  if sample and sample["metadatas"]:
205
- metadata_fields = list(sample["metadatas"][0].keys()) if sample["metadatas"][0] else []
206
-
223
+ metadata_fields = (
224
+ list(sample["metadatas"][0].keys()) if sample["metadatas"][0] else []
225
+ )
226
+
207
227
  # Determine vector dimensions from embeddings
208
228
  embeddings = sample.get("embeddings") if sample else None
209
229
  if embeddings is not None and len(embeddings) > 0 and embeddings[0] is not None:
210
230
  vector_dimension = len(embeddings[0])
211
-
231
+
212
232
  # ChromaDB uses cosine distance by default (or can be configured)
213
233
  # Try to get metadata from collection if available
214
234
  distance_metric = "Cosine (default)"
@@ -230,7 +250,7 @@ class ChromaDBConnection(VectorDBConnection):
230
250
  embedding_model = col_metadata["embedding_model"]
231
251
  except:
232
252
  pass # Use default if unable to determine
233
-
253
+
234
254
  result = {
235
255
  "name": name,
236
256
  "count": count,
@@ -238,15 +258,15 @@ class ChromaDBConnection(VectorDBConnection):
238
258
  "vector_dimension": vector_dimension,
239
259
  "distance_metric": distance_metric,
240
260
  }
241
-
261
+
242
262
  if embedding_model:
243
263
  result["embedding_model"] = embedding_model
244
-
264
+
245
265
  return result
246
266
  except Exception as e:
247
- print(f"Failed to get collection info: {e}")
267
+ log_error("Failed to get collection info: %s", e)
248
268
  return None
249
-
269
+
250
270
  def query_collection(
251
271
  self,
252
272
  collection_name: str,
@@ -258,7 +278,7 @@ class ChromaDBConnection(VectorDBConnection):
258
278
  ) -> Optional[Dict[str, Any]]:
259
279
  """
260
280
  Query a collection for similar vectors.
261
-
281
+
262
282
  Args:
263
283
  collection_name: Name of collection to query
264
284
  query_texts: Text queries to embed and search
@@ -266,26 +286,28 @@ class ChromaDBConnection(VectorDBConnection):
266
286
  n_results: Number of results to return
267
287
  where: Metadata filter
268
288
  where_document: Document content filter
269
-
289
+
270
290
  Returns:
271
291
  Query results or None if failed
272
292
  """
273
- print(f"[ChromaDB] query_collection called for '{collection_name}'")
293
+ log_info("[ChromaDB] query_collection called for '%s'", collection_name)
274
294
  collection = self.get_collection(collection_name)
275
295
  if not collection:
276
- print(f"[ChromaDB] Failed to get collection '{collection_name}'")
296
+ log_error("[ChromaDB] Failed to get collection '%s'", collection_name)
277
297
  return None
278
-
298
+
279
299
  # If query_texts provided, we need to manually embed them with dimension-aware model
280
300
  if query_texts and not query_embeddings:
281
301
  embedding_function = self._get_embedding_function_for_collection(collection_name)
282
302
  if embedding_function:
283
- print(f"[ChromaDB] Manually embedding query texts with dimension-aware model")
303
+ log_info("[ChromaDB] Manually embedding query texts with dimension-aware model")
284
304
  query_embeddings = embedding_function(query_texts)
285
305
  query_texts = None # Use embeddings instead of texts
286
306
  else:
287
- print(f"[ChromaDB] Warning: Could not determine embedding function, using collection's default")
288
-
307
+ log_info(
308
+ "[ChromaDB] Warning: Could not determine embedding function, using collection's default"
309
+ )
310
+
289
311
  try:
290
312
  results = collection.query(
291
313
  query_texts=query_texts,
@@ -293,15 +315,15 @@ class ChromaDBConnection(VectorDBConnection):
293
315
  n_results=n_results,
294
316
  where=where,
295
317
  where_document=where_document, # type: ignore
296
- include=["metadatas", "documents", "distances", "embeddings"]
318
+ include=["metadatas", "documents", "distances", "embeddings"],
297
319
  )
298
320
  return cast(Dict[str, Any], results)
299
321
  except Exception as e:
300
- print(f"Query failed: {e}")
301
322
  import traceback
302
- traceback.print_exc()
323
+
324
+ log_error("Query failed: %s\n%s", e, traceback.format_exc())
303
325
  return None
304
-
326
+
305
327
  def get_all_items(
306
328
  self,
307
329
  collection_name: str,
@@ -311,32 +333,32 @@ class ChromaDBConnection(VectorDBConnection):
311
333
  ) -> Optional[Dict[str, Any]]:
312
334
  """
313
335
  Get all items from a collection.
314
-
336
+
315
337
  Args:
316
338
  collection_name: Name of collection
317
339
  limit: Maximum number of items to return
318
340
  offset: Number of items to skip
319
341
  where: Metadata filter
320
-
342
+
321
343
  Returns:
322
344
  Collection items or None if failed
323
345
  """
324
346
  collection = self.get_collection(collection_name)
325
347
  if not collection:
326
348
  return None
327
-
349
+
328
350
  try:
329
351
  results = collection.get(
330
352
  limit=limit,
331
353
  offset=offset,
332
354
  where=where,
333
- include=["metadatas", "documents", "embeddings"]
355
+ include=["metadatas", "documents", "embeddings"],
334
356
  )
335
357
  return cast(Dict[str, Any], results)
336
358
  except Exception as e:
337
- print(f"Failed to get items: {e}")
359
+ log_error("Failed to get items: %s", e)
338
360
  return None
339
-
361
+
340
362
  def add_items(
341
363
  self,
342
364
  collection_name: str,
@@ -347,33 +369,33 @@ class ChromaDBConnection(VectorDBConnection):
347
369
  ) -> bool:
348
370
  """
349
371
  Add items to a collection.
350
-
372
+
351
373
  Args:
352
374
  collection_name: Name of collection
353
375
  documents: Document texts
354
376
  metadatas: Metadata for each document
355
377
  ids: IDs for each document
356
378
  embeddings: Pre-computed embeddings
357
-
379
+
358
380
  Returns:
359
381
  True if successful, False otherwise
360
382
  """
361
383
  collection = self.get_collection(collection_name)
362
384
  if not collection:
363
385
  return False
364
-
386
+
365
387
  try:
366
388
  collection.add(
367
389
  documents=documents,
368
390
  metadatas=metadatas, # type: ignore
369
391
  ids=ids, # type: ignore
370
- embeddings=embeddings # type: ignore
392
+ embeddings=embeddings, # type: ignore
371
393
  )
372
394
  return True
373
395
  except Exception as e:
374
- print(f"Failed to add items: {e}")
396
+ log_error("Failed to add items: %s", e)
375
397
  return False
376
-
398
+
377
399
  def update_items(
378
400
  self,
379
401
  collection_name: str,
@@ -384,33 +406,33 @@ class ChromaDBConnection(VectorDBConnection):
384
406
  ) -> bool:
385
407
  """
386
408
  Update items in a collection.
387
-
409
+
388
410
  Args:
389
411
  collection_name: Name of collection
390
412
  ids: IDs of items to update
391
413
  documents: New document texts
392
414
  metadatas: New metadata
393
415
  embeddings: New embeddings
394
-
416
+
395
417
  Returns:
396
418
  True if successful, False otherwise
397
419
  """
398
420
  collection = self.get_collection(collection_name)
399
421
  if not collection:
400
422
  return False
401
-
423
+
402
424
  try:
403
425
  collection.update(
404
426
  ids=ids,
405
427
  documents=documents,
406
428
  metadatas=metadatas, # type: ignore
407
- embeddings=embeddings # type: ignore
429
+ embeddings=embeddings, # type: ignore
408
430
  )
409
431
  return True
410
432
  except Exception as e:
411
- print(f"Failed to update items: {e}")
433
+ log_error("Failed to update items: %s", e)
412
434
  return False
413
-
435
+
414
436
  def delete_items(
415
437
  self,
416
438
  collection_name: str,
@@ -419,46 +441,46 @@ class ChromaDBConnection(VectorDBConnection):
419
441
  ) -> bool:
420
442
  """
421
443
  Delete items from a collection.
422
-
444
+
423
445
  Args:
424
446
  collection_name: Name of collection
425
447
  ids: IDs of items to delete
426
448
  where: Metadata filter for items to delete
427
-
449
+
428
450
  Returns:
429
451
  True if successful, False otherwise
430
452
  """
431
453
  collection = self.get_collection(collection_name)
432
454
  if not collection:
433
455
  return False
434
-
456
+
435
457
  try:
436
458
  collection.delete(ids=ids, where=where)
437
459
  return True
438
460
  except Exception as e:
439
- print(f"Failed to delete items: {e}")
461
+ log_error("Failed to delete items: %s", e)
440
462
  return False
441
-
463
+
442
464
  def delete_collection(self, name: str) -> bool:
443
465
  """
444
466
  Delete an entire collection.
445
-
467
+
446
468
  Args:
447
469
  name: Collection name
448
-
470
+
449
471
  Returns:
450
472
  True if successful, False otherwise
451
473
  """
452
474
  if not self._client:
453
475
  return False
454
-
476
+
455
477
  try:
456
478
  self._client.delete_collection(name=name)
457
479
  if self._current_collection and self._current_collection.name == name:
458
480
  self._current_collection = None
459
481
  return True
460
482
  except Exception as e:
461
- print(f"Failed to delete collection: {e}")
483
+ log_error("Failed to delete collection: %s", e)
462
484
  return False
463
485
 
464
486
  # Implement base connection uniform APIs
@@ -489,7 +511,7 @@ class ChromaDBConnection(VectorDBConnection):
489
511
  col = self.get_collection(name)
490
512
  return col is not None
491
513
  except Exception as e:
492
- print(f"Failed to create collection: {e}")
514
+ log_error("Failed to create collection: %s", e)
493
515
  return False
494
516
 
495
517
  def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
@@ -497,7 +519,9 @@ class ChromaDBConnection(VectorDBConnection):
497
519
  col = self.get_collection(name)
498
520
  if not col:
499
521
  raise RuntimeError("Collection not available")
500
- return cast(Dict[str, Any], col.get(ids=ids, include=["metadatas", "documents", "embeddings"]))
522
+ return cast(
523
+ Dict[str, Any], col.get(ids=ids, include=["metadatas", "documents", "embeddings"])
524
+ )
501
525
 
502
526
  def count_collection(self, name: str) -> int:
503
527
  """Count items in a collection."""
@@ -508,11 +532,11 @@ class ChromaDBConnection(VectorDBConnection):
508
532
  return col.count()
509
533
  except Exception:
510
534
  return 0
511
-
535
+
512
536
  def get_supported_filter_operators(self) -> List[Dict[str, Any]]:
513
537
  """
514
538
  Get filter operators supported by ChromaDB.
515
-
539
+
516
540
  Returns:
517
541
  List of operator dictionaries
518
542
  """