vector-inspector 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. vector_inspector/__init__.py +1 -1
  2. vector_inspector/core/connection_manager.py +91 -19
  3. vector_inspector/core/connections/base_connection.py +43 -43
  4. vector_inspector/core/connections/chroma_connection.py +1 -1
  5. vector_inspector/core/connections/pgvector_connection.py +11 -171
  6. vector_inspector/core/connections/pinecone_connection.py +596 -99
  7. vector_inspector/core/connections/qdrant_connection.py +35 -44
  8. vector_inspector/core/embedding_utils.py +14 -5
  9. vector_inspector/core/logging.py +3 -1
  10. vector_inspector/main.py +42 -15
  11. vector_inspector/services/backup_restore_service.py +228 -15
  12. vector_inspector/services/settings_service.py +71 -19
  13. vector_inspector/ui/components/backup_restore_dialog.py +215 -101
  14. vector_inspector/ui/components/connection_manager_panel.py +155 -14
  15. vector_inspector/ui/dialogs/cross_db_migration.py +126 -99
  16. vector_inspector/ui/dialogs/settings_dialog.py +13 -6
  17. vector_inspector/ui/loading_screen.py +169 -0
  18. vector_inspector/ui/main_window.py +44 -19
  19. vector_inspector/ui/services/dialog_service.py +1 -0
  20. vector_inspector/ui/views/collection_browser.py +36 -34
  21. vector_inspector/ui/views/connection_view.py +7 -1
  22. vector_inspector/ui/views/info_panel.py +118 -52
  23. vector_inspector/ui/views/metadata_view.py +30 -31
  24. vector_inspector/ui/views/search_view.py +20 -19
  25. vector_inspector/ui/views/visualization_view.py +18 -15
  26. {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/METADATA +17 -4
  27. {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/RECORD +30 -28
  28. vector_inspector-0.3.12.dist-info/licenses/LICENSE +1 -0
  29. {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/WHEEL +0 -0
  30. {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/entry_points.txt +0 -0
@@ -1,27 +1,22 @@
1
1
  """Qdrant connection manager."""
2
2
 
3
- from typing import Optional, List, Dict, Any
4
3
  import uuid
4
+ from typing import Any, Optional
5
+
5
6
  from qdrant_client import QdrantClient
6
7
  from qdrant_client.models import (
7
8
  Distance,
8
- VectorParams,
9
- PointStruct,
10
9
  Filter,
11
- FieldCondition,
12
- MatchValue,
13
- MatchText,
14
- MatchAny,
15
- MatchExcept,
16
- Range,
10
+ PointStruct,
11
+ VectorParams,
17
12
  )
18
13
 
19
14
  from vector_inspector.core.connections.base_connection import VectorDBConnection
20
- from vector_inspector.core.logging import log_info, log_error, log_debug
21
- from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
22
15
  from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import (
23
16
  resolve_embedding_model,
24
17
  )
18
+ from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
19
+ from vector_inspector.core.logging import log_error, log_info
25
20
 
26
21
 
27
22
  class QdrantConnection(VectorDBConnection):
@@ -133,7 +128,7 @@ class QdrantConnection(VectorDBConnection):
133
128
  except Exception:
134
129
  return 0
135
130
 
136
- def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
131
+ def get_items(self, name: str, ids: list[str]) -> dict[str, Any]:
137
132
  """
138
133
  Get items by IDs (implementation for compatibility).
139
134
 
@@ -162,7 +157,7 @@ class QdrantConnection(VectorDBConnection):
162
157
  log_error("Failed to get items: %s", e)
163
158
  return {"documents": [], "metadatas": []}
164
159
 
165
- def list_collections(self) -> List[str]:
160
+ def list_collections(self) -> list[str]:
166
161
  """
167
162
  Get list of all collections.
168
163
 
@@ -178,7 +173,7 @@ class QdrantConnection(VectorDBConnection):
178
173
  log_error("Failed to list collections: %s", e)
179
174
  return []
180
175
 
181
- def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
176
+ def get_collection_info(self, name: str) -> Optional[dict[str, Any]]:
182
177
  """
183
178
  Get collection metadata and statistics.
184
179
 
@@ -296,7 +291,7 @@ class QdrantConnection(VectorDBConnection):
296
291
  model = load_embedding_model(model_name, model_type)
297
292
  return (model, model_name, model_type)
298
293
 
299
- def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
294
+ def _build_qdrant_filter(self, where: Optional[dict[str, Any]] = None) -> Optional[Filter]:
300
295
  """Delegate filter construction to helper module."""
301
296
  try:
302
297
  return build_filter(where)
@@ -307,12 +302,12 @@ class QdrantConnection(VectorDBConnection):
307
302
  def query_collection(
308
303
  self,
309
304
  collection_name: str,
310
- query_texts: Optional[List[str]] = None,
311
- query_embeddings: Optional[List[List[float]]] = None,
305
+ query_texts: Optional[list[str]] = None,
306
+ query_embeddings: Optional[list[list[float]]] = None,
312
307
  n_results: int = 10,
313
- where: Optional[Dict[str, Any]] = None,
314
- where_document: Optional[Dict[str, Any]] = None,
315
- ) -> Optional[Dict[str, Any]]:
308
+ where: Optional[dict[str, Any]] = None,
309
+ where_document: Optional[dict[str, Any]] = None,
310
+ ) -> Optional[dict[str, Any]]:
316
311
  """
317
312
  Query a collection for similar vectors.
318
313
 
@@ -357,9 +352,9 @@ class QdrantConnection(VectorDBConnection):
357
352
  for query in queries:
358
353
  # Embed text queries if needed
359
354
  if isinstance(query, str):
360
- # Generate embeddings for text query using appropriate model for this collection
355
+ # Generate embeddings for text query using inherited method
361
356
  try:
362
- model, model_name, model_type = self._get_embedding_model_for_collection(
357
+ model, model_name, model_type = self.load_embedding_model_for_collection(
363
358
  collection_name
364
359
  )
365
360
 
@@ -425,8 +420,8 @@ class QdrantConnection(VectorDBConnection):
425
420
  collection_name: str,
426
421
  limit: Optional[int] = None,
427
422
  offset: Optional[int] = None,
428
- where: Optional[Dict[str, Any]] = None,
429
- ) -> Optional[Dict[str, Any]]:
423
+ where: Optional[dict[str, Any]] = None,
424
+ ) -> Optional[dict[str, Any]]:
430
425
  """
431
426
  Get all items from a collection.
432
427
 
@@ -492,10 +487,10 @@ class QdrantConnection(VectorDBConnection):
492
487
  def add_items(
493
488
  self,
494
489
  collection_name: str,
495
- documents: List[str],
496
- metadatas: Optional[List[Dict[str, Any]]] = None,
497
- ids: Optional[List[str]] = None,
498
- embeddings: Optional[List[List[float]]] = None,
490
+ documents: list[str],
491
+ metadatas: Optional[list[dict[str, Any]]] = None,
492
+ ids: Optional[list[str]] = None,
493
+ embeddings: Optional[list[list[float]]] = None,
499
494
  ) -> bool:
500
495
  """
501
496
  Add items to a collection.
@@ -532,9 +527,7 @@ class QdrantConnection(VectorDBConnection):
532
527
  embeddings = self.compute_embeddings_for_documents(
533
528
  collection_name,
534
529
  documents,
535
- getattr(self, "path", None)
536
- or getattr(self, "url", None)
537
- or getattr(self, "host", None),
530
+ getattr(self, "profile_name", None),
538
531
  )
539
532
  except Exception as e:
540
533
  log_error("Embeddings are required for Qdrant and computing them failed: %s", e)
@@ -569,10 +562,10 @@ class QdrantConnection(VectorDBConnection):
569
562
  def update_items(
570
563
  self,
571
564
  collection_name: str,
572
- ids: List[str],
573
- documents: Optional[List[str]] = None,
574
- metadatas: Optional[List[Dict[str, Any]]] = None,
575
- embeddings: Optional[List[List[float]]] = None,
565
+ ids: list[str],
566
+ documents: Optional[list[str]] = None,
567
+ metadatas: Optional[list[dict[str, Any]]] = None,
568
+ embeddings: Optional[list[list[float]]] = None,
576
569
  ) -> bool:
577
570
  """
578
571
  Update items in a collection.
@@ -627,9 +620,7 @@ class QdrantConnection(VectorDBConnection):
627
620
  computed = self.compute_embeddings_for_documents(
628
621
  collection_name,
629
622
  [documents[i]],
630
- getattr(self, "path", None)
631
- or getattr(self, "url", None)
632
- or getattr(self, "host", None),
623
+ getattr(self, "profile_name", None),
633
624
  )
634
625
  vector = computed[0] if computed else vector
635
626
  except Exception as e:
@@ -651,8 +642,8 @@ class QdrantConnection(VectorDBConnection):
651
642
  def delete_items(
652
643
  self,
653
644
  collection_name: str,
654
- ids: Optional[List[str]] = None,
655
- where: Optional[Dict[str, Any]] = None,
645
+ ids: Optional[list[str]] = None,
646
+ where: Optional[dict[str, Any]] = None,
656
647
  ) -> bool:
657
648
  """
658
649
  Delete items from a collection.
@@ -739,7 +730,7 @@ class QdrantConnection(VectorDBConnection):
739
730
  log_error(f"Failed to create collection: {e}")
740
731
  return False
741
732
 
742
- def prepare_restore(self, metadata: Dict[str, Any], data: Dict[str, Any]) -> bool:
733
+ def prepare_restore(self, metadata: dict[str, Any], data: dict[str, Any]) -> bool:
743
734
  """Provider-specific hook invoked before restoring data.
744
735
 
745
736
  The connection can use metadata and data to pre-create collections,
@@ -832,9 +823,9 @@ class QdrantConnection(VectorDBConnection):
832
823
  log_error("prepare_restore failed: %s", e)
833
824
  return False
834
825
 
835
- def get_connection_info(self) -> Dict[str, Any]:
826
+ def get_connection_info(self) -> dict[str, Any]:
836
827
  """Get information about the current connection."""
837
- info: Dict[str, Any] = {
828
+ info: dict[str, Any] = {
838
829
  "provider": "Qdrant",
839
830
  "connected": self.is_connected,
840
831
  }
@@ -852,7 +843,7 @@ class QdrantConnection(VectorDBConnection):
852
843
  info["mode"] = "memory"
853
844
  return info
854
845
 
855
- def get_supported_filter_operators(self) -> List[Dict[str, Any]]:
846
+ def get_supported_filter_operators(self) -> list[dict[str, Any]]:
856
847
  """Get filter operators supported by Qdrant."""
857
848
  return [
858
849
  {"name": "=", "server_side": True},
@@ -1,7 +1,9 @@
1
1
  """Utilities for managing embedding models and vector dimensions."""
2
2
 
3
- from typing import Optional, Union, Tuple
4
- from sentence_transformers import SentenceTransformer
3
+ from __future__ import annotations # Allows us to use class names in typehints while lazyloading
4
+ from typing import Optional, Tuple, Any
5
+
6
+ # Lazy import: see below
5
7
  from vector_inspector.core.logging import log_info
6
8
 
7
9
  from vector_inspector.core.model_registry import get_model_registry
@@ -101,7 +103,7 @@ def get_available_models_for_dimension(dimension: int) -> list:
101
103
  return models
102
104
 
103
105
 
104
- def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTransformer, any]:
106
+ def load_embedding_model(model_name: str, model_type: str) -> SentenceTransformer | Any:
105
107
  """
106
108
  Load an embedding model (sentence-transformer or CLIP).
107
109
 
@@ -117,12 +119,16 @@ def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTran
117
119
 
118
120
  model = CLIPModel.from_pretrained(model_name)
119
121
  processor = CLIPProcessor.from_pretrained(model_name)
122
+ # Returns a tuple: (CLIPModel, CLIPProcessor)
120
123
  return (model, processor)
121
124
  else:
125
+ from sentence_transformers import SentenceTransformer
126
+
127
+ # Returns a SentenceTransformer instance
122
128
  return SentenceTransformer(model_name)
123
129
 
124
130
 
125
- def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type: str) -> list:
131
+ def encode_text(text: str, model: "SentenceTransformer" | Tuple, model_type: str) -> list:
126
132
  """
127
133
  Encode text using the appropriate model.
128
134
 
@@ -146,13 +152,15 @@ def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type:
146
152
  return text_features[0].cpu().numpy().tolist()
147
153
  else:
148
154
  # sentence-transformer
155
+ # Lazy import for type hint only
156
+ # from sentence_transformers import SentenceTransformer
149
157
  embedding = model.encode(text)
150
158
  return embedding.tolist()
151
159
 
152
160
 
153
161
  def get_embedding_model_for_dimension(
154
162
  dimension: int,
155
- ) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
163
+ ) -> Tuple["SentenceTransformer" | Tuple, str, str]:
156
164
  """
157
165
  Get a loaded embedding model for a specific dimension.
158
166
 
@@ -164,4 +172,5 @@ def get_embedding_model_for_dimension(
164
172
  """
165
173
  model_name, model_type = get_model_for_dimension(dimension)
166
174
  model = load_embedding_model(model_name, model_type)
175
+ # Returns a tuple: (loaded_model, model_name, model_type)
167
176
  return (model, model_name, model_type)
@@ -3,6 +3,7 @@
3
3
  Provides `log_info`, `log_error`, and `log_debug` helpers that delegate
4
4
  to the standard `logging` module but keep call sites concise.
5
5
  """
6
+
6
7
  import logging
7
8
  from typing import Any
8
9
 
@@ -12,7 +13,8 @@ if not _logger.handlers:
12
13
  formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
13
14
  handler.setFormatter(formatter)
14
15
  _logger.addHandler(handler)
15
- _logger.setLevel(logging.INFO)
16
+ # Default to WARNING to reduce console noise; set to DEBUG for troubleshooting
17
+ _logger.setLevel(logging.WARNING)
16
18
 
17
19
 
18
20
  def log_info(msg: str, *args: Any, **kwargs: Any) -> None:
vector_inspector/main.py CHANGED
@@ -1,10 +1,13 @@
1
1
  """Main entry point for Vector Inspector application."""
2
2
 
3
- import sys
4
3
  import os
4
+ import sys
5
+
6
+ from PySide6.QtCore import QTimer
5
7
  from PySide6.QtWidgets import QApplication
6
- from vector_inspector.ui.main_window import MainWindow
7
- from vector_inspector.core.logging import log_error
8
+
9
+ from vector_inspector import get_version
10
+ from vector_inspector.ui.loading_screen import show_loading_screen
8
11
 
9
12
  # Ensures the app looks in its own folder for the raw libraries
10
13
  sys.path.append(os.path.dirname(sys.executable))
@@ -16,24 +19,48 @@ def main():
16
19
  app.setApplicationName("Vector Inspector")
17
20
  app.setOrganizationName("Vector Inspector")
18
21
 
19
- # Telemetry: send launch ping if enabled
20
- try:
21
- from vector_inspector.services.telemetry_service import TelemetryService
22
- from vector_inspector import get_version, __version__
22
+ # Get version once for all uses
23
+ app_version = get_version()
24
+
25
+ # Show loading screen (if not disabled in settings)
26
+ loading = show_loading_screen(
27
+ app_name="Vector Inspector",
28
+ version=f"v{app_version}",
29
+ tagline="The missing toolset for your vector data",
30
+ loading_text="Initializing providers…"
31
+ )
32
+
33
+ # Heavy imports after loading screen is visible
34
+ if loading:
35
+ loading.set_loading_text("Loading main window...")
36
+ app.processEvents()
23
37
 
24
- app_version = None
38
+ from vector_inspector.core.logging import log_error
39
+ from vector_inspector.ui.main_window import MainWindow
40
+
41
+ def send_ping():
42
+ # Telemetry: send launch ping if enabled
25
43
  try:
26
- app_version = get_version()
27
- except Exception:
28
- app_version = __version__
29
- telemetry = TelemetryService()
30
- telemetry.send_launch_ping(app_version=app_version)
31
- except Exception as e:
32
- log_error(f"[Telemetry] Failed to send launch ping: {e}")
44
+ from vector_inspector.services.telemetry_service import TelemetryService
45
+
46
+ telemetry = TelemetryService()
47
+ telemetry.send_launch_ping(app_version=app_version)
48
+ except Exception as e:
49
+ log_error(f"[Telemetry] Failed to send launch ping: {e}")
50
+
51
+ if loading:
52
+ loading.set_loading_text("Preparing interface...")
53
+ app.processEvents()
33
54
 
34
55
  window = MainWindow()
35
56
  window.show()
36
57
 
58
+ # Always fade out loading screen automatically
59
+ if loading:
60
+ loading.fade_out()
61
+
62
+ QTimer.singleShot(0, lambda: send_ping())
63
+
37
64
  sys.exit(app.exec())
38
65
 
39
66
 
@@ -1,13 +1,12 @@
1
1
  """Service for backing up and restoring collections."""
2
2
 
3
- import json
4
- from typing import Dict, Any, Optional
3
+ from datetime import datetime, timezone
5
4
  from pathlib import Path
6
- from datetime import datetime
7
- import shutil
5
+ from typing import Optional
8
6
 
9
- from vector_inspector.core.logging import log_info, log_error, log_debug
10
- from .backup_helpers import write_backup_zip, read_backup_zip, normalize_embeddings
7
+ from vector_inspector.core.logging import log_debug, log_error, log_info
8
+
9
+ from .backup_helpers import normalize_embeddings, read_backup_zip, write_backup_zip
11
10
 
12
11
 
13
12
  class BackupRestoreService:
@@ -15,7 +14,11 @@ class BackupRestoreService:
15
14
 
16
15
  @staticmethod
17
16
  def backup_collection(
18
- connection, collection_name: str, backup_dir: str, include_embeddings: bool = True
17
+ connection,
18
+ collection_name: str,
19
+ backup_dir: str,
20
+ include_embeddings: bool = True,
21
+ profile_name: Optional[str] = None,
19
22
  ) -> Optional[str]:
20
23
  """
21
24
  Backup a collection to a directory.
@@ -25,6 +28,7 @@ class BackupRestoreService:
25
28
  collection_name: Name of collection to backup
26
29
  backup_dir: Directory to store backups
27
30
  include_embeddings: Whether to include embedding vectors
31
+ connection_id: Optional connection ID for retrieving model config from settings
28
32
 
29
33
  Returns:
30
34
  Path to backup file or None if failed
@@ -50,13 +54,51 @@ class BackupRestoreService:
50
54
 
51
55
  backup_metadata = {
52
56
  "collection_name": collection_name,
53
- "backup_timestamp": datetime.now().isoformat(),
57
+ "backup_timestamp": datetime.now(tz=timezone.utc).isoformat(),
54
58
  "item_count": len(all_data["ids"]),
55
59
  "collection_info": collection_info,
56
60
  "include_embeddings": include_embeddings,
57
61
  }
58
-
59
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
62
+ # Include embedding model info when available to assist accurate restores
63
+ try:
64
+ embed_model = None
65
+ embed_model_type = None
66
+ # Prefer explicit collection_info entries
67
+ if collection_info and collection_info.get("embedding_model"):
68
+ embed_model = collection_info.get("embedding_model")
69
+ embed_model_type = collection_info.get("embedding_model_type")
70
+ else:
71
+ # Ask connection for a model hint (may consult settings/service)
72
+ try:
73
+ embed_model = connection.get_embedding_model(collection_name)
74
+ except Exception:
75
+ embed_model = None
76
+
77
+ # If not found yet, check app settings as a fallback
78
+ if not embed_model and profile_name:
79
+ try:
80
+ from vector_inspector.services.settings_service import SettingsService
81
+
82
+ settings = SettingsService()
83
+ model_info = settings.get_embedding_model(
84
+ profile_name,
85
+ collection_name,
86
+ )
87
+ if model_info:
88
+ embed_model = model_info.get("model")
89
+ embed_model_type = model_info.get("type", "sentence-transformer")
90
+ except Exception:
91
+ pass
92
+
93
+ if embed_model:
94
+ backup_metadata["embedding_model"] = embed_model
95
+ if embed_model_type:
96
+ backup_metadata["embedding_model_type"] = embed_model_type
97
+ except Exception as e:
98
+ # Embedding metadata is optional; log failure but do not abort backup.
99
+ log_debug("Failed to populate embedding metadata for %s: %s", collection_name, e)
100
+
101
+ timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
60
102
  backup_filename = f"{collection_name}_backup_{timestamp}.zip"
61
103
  backup_path = Path(backup_dir) / backup_filename
62
104
 
@@ -67,9 +109,14 @@ class BackupRestoreService:
67
109
  log_error("Backup failed: %s", e)
68
110
  return None
69
111
 
70
- @staticmethod
71
112
  def restore_collection(
72
- connection, backup_file: str, collection_name: Optional[str] = None, overwrite: bool = False
113
+ self,
114
+ connection,
115
+ backup_file: str,
116
+ collection_name: Optional[str] = None,
117
+ overwrite: bool = False,
118
+ recompute_embeddings: Optional[bool] = None,
119
+ profile_name: Optional[str] = None,
73
120
  ) -> bool:
74
121
  """
75
122
  Restore a collection from a backup file.
@@ -79,6 +126,11 @@ class BackupRestoreService:
79
126
  backup_file: Path to backup zip file
80
127
  collection_name: Optional new name for restored collection
81
128
  overwrite: Whether to overwrite existing collection
129
+ recompute_embeddings: How to handle embeddings during restore:
130
+ - None (default): Use stored embeddings as-is from backup (safest, fastest)
131
+ - True: Force recompute embeddings from documents using model metadata
132
+ - False: Omit embeddings entirely (documents/metadata only)
133
+ connection_id: Optional connection ID for saving model config to app settings
82
134
 
83
135
  Returns:
84
136
  True if successful, False otherwise
@@ -96,8 +148,48 @@ class BackupRestoreService:
96
148
  restore_collection_name,
97
149
  )
98
150
  return False
99
- else:
100
- connection.delete_collection(restore_collection_name)
151
+ connection.delete_collection(restore_collection_name)
152
+ else:
153
+ # Collection does not exist on target; attempt to create it.
154
+ # Try to infer vector size from metadata or embedded vectors in backup.
155
+ try:
156
+ inferred_size = None
157
+ col_info = metadata.get("collection_info") if metadata else None
158
+ if (
159
+ col_info
160
+ and col_info.get("vector_dimension")
161
+ and isinstance(col_info.get("vector_dimension"), int)
162
+ ):
163
+ inferred_size = int(col_info.get("vector_dimension"))
164
+
165
+ # Fallback: inspect embeddings in backup data
166
+ if inferred_size is None and data and data.get("embeddings"):
167
+ first_emb = data.get("embeddings")[0]
168
+ if first_emb is not None:
169
+ inferred_size = len(first_emb)
170
+
171
+ # Final fallback: common default
172
+ if inferred_size is None:
173
+ log_error(
174
+ "Unable to infer vector dimension for collection %s from metadata or backup data; restore aborted.",
175
+ restore_collection_name,
176
+ )
177
+ return False
178
+
179
+ created = True
180
+ if hasattr(connection, "create_collection"):
181
+ created = connection.create_collection(
182
+ restore_collection_name, inferred_size
183
+ )
184
+
185
+ if not created:
186
+ log_error(
187
+ "Failed to create collection %s before restore", restore_collection_name
188
+ )
189
+ return False
190
+ except Exception as e:
191
+ log_error("Error while creating collection %s: %s", restore_collection_name, e)
192
+ return False
101
193
 
102
194
  # Provider-specific preparation hook
103
195
  if hasattr(connection, "prepare_restore"):
@@ -109,17 +201,138 @@ class BackupRestoreService:
109
201
  # Ensure embeddings normalized
110
202
  data = normalize_embeddings(data)
111
203
 
204
+ # Decide how to handle embeddings based on user choice
205
+ embeddings_to_use = None
206
+ stored_embeddings = data.get("embeddings")
207
+
208
+ if recompute_embeddings is False:
209
+ # User explicitly chose to omit embeddings
210
+ log_info("Restoring without embeddings (user choice)")
211
+ embeddings_to_use = None
212
+
213
+ elif recompute_embeddings is True:
214
+ # User explicitly chose to recompute embeddings
215
+ log_info("Recomputing embeddings from documents")
216
+ try:
217
+ from vector_inspector.core.embedding_utils import (
218
+ encode_text,
219
+ load_embedding_model,
220
+ )
221
+
222
+ model_name = metadata.get("embedding_model") if metadata else None
223
+ docs = data.get("documents", [])
224
+
225
+ if not model_name:
226
+ log_error(
227
+ "Cannot recompute: No embedding model available in backup metadata"
228
+ )
229
+ embeddings_to_use = None
230
+ elif not docs:
231
+ log_error("Cannot recompute: No documents available in backup")
232
+ embeddings_to_use = None
233
+ else:
234
+ model_type = metadata.get("embedding_model_type", "sentence-transformer")
235
+ log_info("Loading embedding model: %s (%s)", model_name, model_type)
236
+ model = load_embedding_model(model_name, model_type)
237
+ new_embeddings = []
238
+ if model_type == "clip":
239
+ # CLIP: encode per-document
240
+ for d in docs:
241
+ new_embeddings.append(encode_text(d, model, model_type))
242
+ else:
243
+ # sentence-transformer supports batch encode
244
+ new_embeddings = model.encode(docs, show_progress_bar=False).tolist()
245
+
246
+ embeddings_to_use = new_embeddings
247
+ log_info("Successfully recomputed %d embeddings", len(new_embeddings))
248
+ except Exception as e:
249
+ log_error("Failed to recompute embeddings: %s", e)
250
+ embeddings_to_use = None
251
+
252
+ else:
253
+ # Default (None): Use stored embeddings as-is if available
254
+ if stored_embeddings:
255
+ # Check dimension compatibility with target collection
256
+ try:
257
+ if stored_embeddings and len(stored_embeddings) > 0:
258
+ stored_dim = len(stored_embeddings[0])
259
+ target_dim = inferred_size # We already calculated this above
260
+
261
+ if stored_dim == target_dim:
262
+ log_info(
263
+ "Using stored embeddings from backup (dimension: %d)",
264
+ stored_dim,
265
+ )
266
+ embeddings_to_use = stored_embeddings
267
+ else:
268
+ log_error(
269
+ "Dimension mismatch: backup has %d, target needs %d. Omitting embeddings.",
270
+ stored_dim,
271
+ target_dim,
272
+ )
273
+ embeddings_to_use = None
274
+ else:
275
+ embeddings_to_use = stored_embeddings
276
+ except Exception as e:
277
+ log_error("Error checking embedding dimensions: %s", e)
278
+ # Try to use them anyway
279
+ embeddings_to_use = stored_embeddings
280
+ else:
281
+ log_info("No embeddings in backup to restore")
282
+ embeddings_to_use = None
283
+
112
284
  success = connection.add_items(
113
285
  restore_collection_name,
114
286
  documents=data.get("documents", []),
115
287
  metadatas=data.get("metadatas"),
116
288
  ids=data.get("ids"),
117
- embeddings=data.get("embeddings"),
289
+ embeddings=embeddings_to_use,
118
290
  )
119
291
 
120
292
  if success:
121
293
  log_info("Collection '%s' restored from backup", restore_collection_name)
122
294
  log_info("Restored %d items", len(data.get("ids", [])))
295
+
296
+ # Save model config to app settings if available
297
+ if profile_name and restore_collection_name and metadata:
298
+ try:
299
+ embed_model = metadata.get("embedding_model")
300
+ embed_model_type = metadata.get(
301
+ "embedding_model_type", "sentence-transformer"
302
+ )
303
+ if embed_model:
304
+ from vector_inspector.services.settings_service import SettingsService
305
+
306
+ settings = SettingsService()
307
+ settings.save_embedding_model(
308
+ profile_name,
309
+ restore_collection_name,
310
+ embed_model,
311
+ embed_model_type,
312
+ )
313
+ log_info(
314
+ "Saved model config to settings: %s (%s)",
315
+ embed_model,
316
+ embed_model_type,
317
+ )
318
+ except Exception as e:
319
+ log_error("Failed to save model config to settings: %s", e)
320
+
321
+ # Clear the cache for this collection so the info panel gets fresh data
322
+ if profile_name and restore_collection_name:
323
+ try:
324
+ from vector_inspector.core.cache_manager import get_cache_manager
325
+
326
+ cache = get_cache_manager()
327
+ # Use profile_name as the database identifier for cache
328
+ cache.invalidate(profile_name, restore_collection_name)
329
+ log_info(
330
+ "Cleared cache for restored collection: %s",
331
+ restore_collection_name,
332
+ )
333
+ except Exception as e:
334
+ log_error("Failed to clear cache after restore: %s", e)
335
+
123
336
  return True
124
337
 
125
338
  # Failure: attempt cleanup