vector-inspector 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/__init__.py +1 -1
- vector_inspector/core/connection_manager.py +91 -19
- vector_inspector/core/connections/base_connection.py +43 -43
- vector_inspector/core/connections/chroma_connection.py +1 -1
- vector_inspector/core/connections/pgvector_connection.py +11 -171
- vector_inspector/core/connections/pinecone_connection.py +596 -99
- vector_inspector/core/connections/qdrant_connection.py +35 -44
- vector_inspector/core/embedding_utils.py +14 -5
- vector_inspector/core/logging.py +3 -1
- vector_inspector/main.py +42 -15
- vector_inspector/services/backup_restore_service.py +228 -15
- vector_inspector/services/settings_service.py +71 -19
- vector_inspector/ui/components/backup_restore_dialog.py +215 -101
- vector_inspector/ui/components/connection_manager_panel.py +155 -14
- vector_inspector/ui/dialogs/cross_db_migration.py +126 -99
- vector_inspector/ui/dialogs/settings_dialog.py +13 -6
- vector_inspector/ui/loading_screen.py +169 -0
- vector_inspector/ui/main_window.py +44 -19
- vector_inspector/ui/services/dialog_service.py +1 -0
- vector_inspector/ui/views/collection_browser.py +36 -34
- vector_inspector/ui/views/connection_view.py +7 -1
- vector_inspector/ui/views/info_panel.py +118 -52
- vector_inspector/ui/views/metadata_view.py +30 -31
- vector_inspector/ui/views/search_view.py +20 -19
- vector_inspector/ui/views/visualization_view.py +18 -15
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/METADATA +17 -4
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/RECORD +30 -28
- vector_inspector-0.3.12.dist-info/licenses/LICENSE +1 -0
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/entry_points.txt +0 -0
|
@@ -1,27 +1,22 @@
|
|
|
1
1
|
"""Qdrant connection manager."""
|
|
2
2
|
|
|
3
|
-
from typing import Optional, List, Dict, Any
|
|
4
3
|
import uuid
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
5
6
|
from qdrant_client import QdrantClient
|
|
6
7
|
from qdrant_client.models import (
|
|
7
8
|
Distance,
|
|
8
|
-
VectorParams,
|
|
9
|
-
PointStruct,
|
|
10
9
|
Filter,
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
MatchText,
|
|
14
|
-
MatchAny,
|
|
15
|
-
MatchExcept,
|
|
16
|
-
Range,
|
|
10
|
+
PointStruct,
|
|
11
|
+
VectorParams,
|
|
17
12
|
)
|
|
18
13
|
|
|
19
14
|
from vector_inspector.core.connections.base_connection import VectorDBConnection
|
|
20
|
-
from vector_inspector.core.logging import log_info, log_error, log_debug
|
|
21
|
-
from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
|
|
22
15
|
from vector_inspector.core.connections.qdrant_helpers.qdrant_embedding_resolver import (
|
|
23
16
|
resolve_embedding_model,
|
|
24
17
|
)
|
|
18
|
+
from vector_inspector.core.connections.qdrant_helpers.qdrant_filter_builder import build_filter
|
|
19
|
+
from vector_inspector.core.logging import log_error, log_info
|
|
25
20
|
|
|
26
21
|
|
|
27
22
|
class QdrantConnection(VectorDBConnection):
|
|
@@ -133,7 +128,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
133
128
|
except Exception:
|
|
134
129
|
return 0
|
|
135
130
|
|
|
136
|
-
def get_items(self, name: str, ids:
|
|
131
|
+
def get_items(self, name: str, ids: list[str]) -> dict[str, Any]:
|
|
137
132
|
"""
|
|
138
133
|
Get items by IDs (implementation for compatibility).
|
|
139
134
|
|
|
@@ -162,7 +157,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
162
157
|
log_error("Failed to get items: %s", e)
|
|
163
158
|
return {"documents": [], "metadatas": []}
|
|
164
159
|
|
|
165
|
-
def list_collections(self) ->
|
|
160
|
+
def list_collections(self) -> list[str]:
|
|
166
161
|
"""
|
|
167
162
|
Get list of all collections.
|
|
168
163
|
|
|
@@ -178,7 +173,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
178
173
|
log_error("Failed to list collections: %s", e)
|
|
179
174
|
return []
|
|
180
175
|
|
|
181
|
-
def get_collection_info(self, name: str) -> Optional[
|
|
176
|
+
def get_collection_info(self, name: str) -> Optional[dict[str, Any]]:
|
|
182
177
|
"""
|
|
183
178
|
Get collection metadata and statistics.
|
|
184
179
|
|
|
@@ -296,7 +291,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
296
291
|
model = load_embedding_model(model_name, model_type)
|
|
297
292
|
return (model, model_name, model_type)
|
|
298
293
|
|
|
299
|
-
def _build_qdrant_filter(self, where: Optional[
|
|
294
|
+
def _build_qdrant_filter(self, where: Optional[dict[str, Any]] = None) -> Optional[Filter]:
|
|
300
295
|
"""Delegate filter construction to helper module."""
|
|
301
296
|
try:
|
|
302
297
|
return build_filter(where)
|
|
@@ -307,12 +302,12 @@ class QdrantConnection(VectorDBConnection):
|
|
|
307
302
|
def query_collection(
|
|
308
303
|
self,
|
|
309
304
|
collection_name: str,
|
|
310
|
-
query_texts: Optional[
|
|
311
|
-
query_embeddings: Optional[
|
|
305
|
+
query_texts: Optional[list[str]] = None,
|
|
306
|
+
query_embeddings: Optional[list[list[float]]] = None,
|
|
312
307
|
n_results: int = 10,
|
|
313
|
-
where: Optional[
|
|
314
|
-
where_document: Optional[
|
|
315
|
-
) -> Optional[
|
|
308
|
+
where: Optional[dict[str, Any]] = None,
|
|
309
|
+
where_document: Optional[dict[str, Any]] = None,
|
|
310
|
+
) -> Optional[dict[str, Any]]:
|
|
316
311
|
"""
|
|
317
312
|
Query a collection for similar vectors.
|
|
318
313
|
|
|
@@ -357,9 +352,9 @@ class QdrantConnection(VectorDBConnection):
|
|
|
357
352
|
for query in queries:
|
|
358
353
|
# Embed text queries if needed
|
|
359
354
|
if isinstance(query, str):
|
|
360
|
-
# Generate embeddings for text query using
|
|
355
|
+
# Generate embeddings for text query using inherited method
|
|
361
356
|
try:
|
|
362
|
-
model, model_name, model_type = self.
|
|
357
|
+
model, model_name, model_type = self.load_embedding_model_for_collection(
|
|
363
358
|
collection_name
|
|
364
359
|
)
|
|
365
360
|
|
|
@@ -425,8 +420,8 @@ class QdrantConnection(VectorDBConnection):
|
|
|
425
420
|
collection_name: str,
|
|
426
421
|
limit: Optional[int] = None,
|
|
427
422
|
offset: Optional[int] = None,
|
|
428
|
-
where: Optional[
|
|
429
|
-
) -> Optional[
|
|
423
|
+
where: Optional[dict[str, Any]] = None,
|
|
424
|
+
) -> Optional[dict[str, Any]]:
|
|
430
425
|
"""
|
|
431
426
|
Get all items from a collection.
|
|
432
427
|
|
|
@@ -492,10 +487,10 @@ class QdrantConnection(VectorDBConnection):
|
|
|
492
487
|
def add_items(
|
|
493
488
|
self,
|
|
494
489
|
collection_name: str,
|
|
495
|
-
documents:
|
|
496
|
-
metadatas: Optional[
|
|
497
|
-
ids: Optional[
|
|
498
|
-
embeddings: Optional[
|
|
490
|
+
documents: list[str],
|
|
491
|
+
metadatas: Optional[list[dict[str, Any]]] = None,
|
|
492
|
+
ids: Optional[list[str]] = None,
|
|
493
|
+
embeddings: Optional[list[list[float]]] = None,
|
|
499
494
|
) -> bool:
|
|
500
495
|
"""
|
|
501
496
|
Add items to a collection.
|
|
@@ -532,9 +527,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
532
527
|
embeddings = self.compute_embeddings_for_documents(
|
|
533
528
|
collection_name,
|
|
534
529
|
documents,
|
|
535
|
-
getattr(self, "
|
|
536
|
-
or getattr(self, "url", None)
|
|
537
|
-
or getattr(self, "host", None),
|
|
530
|
+
getattr(self, "profile_name", None),
|
|
538
531
|
)
|
|
539
532
|
except Exception as e:
|
|
540
533
|
log_error("Embeddings are required for Qdrant and computing them failed: %s", e)
|
|
@@ -569,10 +562,10 @@ class QdrantConnection(VectorDBConnection):
|
|
|
569
562
|
def update_items(
|
|
570
563
|
self,
|
|
571
564
|
collection_name: str,
|
|
572
|
-
ids:
|
|
573
|
-
documents: Optional[
|
|
574
|
-
metadatas: Optional[
|
|
575
|
-
embeddings: Optional[
|
|
565
|
+
ids: list[str],
|
|
566
|
+
documents: Optional[list[str]] = None,
|
|
567
|
+
metadatas: Optional[list[dict[str, Any]]] = None,
|
|
568
|
+
embeddings: Optional[list[list[float]]] = None,
|
|
576
569
|
) -> bool:
|
|
577
570
|
"""
|
|
578
571
|
Update items in a collection.
|
|
@@ -627,9 +620,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
627
620
|
computed = self.compute_embeddings_for_documents(
|
|
628
621
|
collection_name,
|
|
629
622
|
[documents[i]],
|
|
630
|
-
getattr(self, "
|
|
631
|
-
or getattr(self, "url", None)
|
|
632
|
-
or getattr(self, "host", None),
|
|
623
|
+
getattr(self, "profile_name", None),
|
|
633
624
|
)
|
|
634
625
|
vector = computed[0] if computed else vector
|
|
635
626
|
except Exception as e:
|
|
@@ -651,8 +642,8 @@ class QdrantConnection(VectorDBConnection):
|
|
|
651
642
|
def delete_items(
|
|
652
643
|
self,
|
|
653
644
|
collection_name: str,
|
|
654
|
-
ids: Optional[
|
|
655
|
-
where: Optional[
|
|
645
|
+
ids: Optional[list[str]] = None,
|
|
646
|
+
where: Optional[dict[str, Any]] = None,
|
|
656
647
|
) -> bool:
|
|
657
648
|
"""
|
|
658
649
|
Delete items from a collection.
|
|
@@ -739,7 +730,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
739
730
|
log_error(f"Failed to create collection: {e}")
|
|
740
731
|
return False
|
|
741
732
|
|
|
742
|
-
def prepare_restore(self, metadata:
|
|
733
|
+
def prepare_restore(self, metadata: dict[str, Any], data: dict[str, Any]) -> bool:
|
|
743
734
|
"""Provider-specific hook invoked before restoring data.
|
|
744
735
|
|
|
745
736
|
The connection can use metadata and data to pre-create collections,
|
|
@@ -832,9 +823,9 @@ class QdrantConnection(VectorDBConnection):
|
|
|
832
823
|
log_error("prepare_restore failed: %s", e)
|
|
833
824
|
return False
|
|
834
825
|
|
|
835
|
-
def get_connection_info(self) ->
|
|
826
|
+
def get_connection_info(self) -> dict[str, Any]:
|
|
836
827
|
"""Get information about the current connection."""
|
|
837
|
-
info:
|
|
828
|
+
info: dict[str, Any] = {
|
|
838
829
|
"provider": "Qdrant",
|
|
839
830
|
"connected": self.is_connected,
|
|
840
831
|
}
|
|
@@ -852,7 +843,7 @@ class QdrantConnection(VectorDBConnection):
|
|
|
852
843
|
info["mode"] = "memory"
|
|
853
844
|
return info
|
|
854
845
|
|
|
855
|
-
def get_supported_filter_operators(self) ->
|
|
846
|
+
def get_supported_filter_operators(self) -> list[dict[str, Any]]:
|
|
856
847
|
"""Get filter operators supported by Qdrant."""
|
|
857
848
|
return [
|
|
858
849
|
{"name": "=", "server_side": True},
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"""Utilities for managing embedding models and vector dimensions."""
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from
|
|
3
|
+
from __future__ import annotations # Allows us to use class names in typehints while lazyloading
|
|
4
|
+
from typing import Optional, Tuple, Any
|
|
5
|
+
|
|
6
|
+
# Lazy import: see below
|
|
5
7
|
from vector_inspector.core.logging import log_info
|
|
6
8
|
|
|
7
9
|
from vector_inspector.core.model_registry import get_model_registry
|
|
@@ -101,7 +103,7 @@ def get_available_models_for_dimension(dimension: int) -> list:
|
|
|
101
103
|
return models
|
|
102
104
|
|
|
103
105
|
|
|
104
|
-
def load_embedding_model(model_name: str, model_type: str) ->
|
|
106
|
+
def load_embedding_model(model_name: str, model_type: str) -> SentenceTransformer | Any:
|
|
105
107
|
"""
|
|
106
108
|
Load an embedding model (sentence-transformer or CLIP).
|
|
107
109
|
|
|
@@ -117,12 +119,16 @@ def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTran
|
|
|
117
119
|
|
|
118
120
|
model = CLIPModel.from_pretrained(model_name)
|
|
119
121
|
processor = CLIPProcessor.from_pretrained(model_name)
|
|
122
|
+
# Returns a tuple: (CLIPModel, CLIPProcessor)
|
|
120
123
|
return (model, processor)
|
|
121
124
|
else:
|
|
125
|
+
from sentence_transformers import SentenceTransformer
|
|
126
|
+
|
|
127
|
+
# Returns a SentenceTransformer instance
|
|
122
128
|
return SentenceTransformer(model_name)
|
|
123
129
|
|
|
124
130
|
|
|
125
|
-
def encode_text(text: str, model:
|
|
131
|
+
def encode_text(text: str, model: "SentenceTransformer" | Tuple, model_type: str) -> list:
|
|
126
132
|
"""
|
|
127
133
|
Encode text using the appropriate model.
|
|
128
134
|
|
|
@@ -146,13 +152,15 @@ def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type:
|
|
|
146
152
|
return text_features[0].cpu().numpy().tolist()
|
|
147
153
|
else:
|
|
148
154
|
# sentence-transformer
|
|
155
|
+
# Lazy import for type hint only
|
|
156
|
+
# from sentence_transformers import SentenceTransformer
|
|
149
157
|
embedding = model.encode(text)
|
|
150
158
|
return embedding.tolist()
|
|
151
159
|
|
|
152
160
|
|
|
153
161
|
def get_embedding_model_for_dimension(
|
|
154
162
|
dimension: int,
|
|
155
|
-
) -> Tuple[
|
|
163
|
+
) -> Tuple["SentenceTransformer" | Tuple, str, str]:
|
|
156
164
|
"""
|
|
157
165
|
Get a loaded embedding model for a specific dimension.
|
|
158
166
|
|
|
@@ -164,4 +172,5 @@ def get_embedding_model_for_dimension(
|
|
|
164
172
|
"""
|
|
165
173
|
model_name, model_type = get_model_for_dimension(dimension)
|
|
166
174
|
model = load_embedding_model(model_name, model_type)
|
|
175
|
+
# Returns a tuple: (loaded_model, model_name, model_type)
|
|
167
176
|
return (model, model_name, model_type)
|
vector_inspector/core/logging.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
Provides `log_info`, `log_error`, and `log_debug` helpers that delegate
|
|
4
4
|
to the standard `logging` module but keep call sites concise.
|
|
5
5
|
"""
|
|
6
|
+
|
|
6
7
|
import logging
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
@@ -12,7 +13,8 @@ if not _logger.handlers:
|
|
|
12
13
|
formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
|
|
13
14
|
handler.setFormatter(formatter)
|
|
14
15
|
_logger.addHandler(handler)
|
|
15
|
-
|
|
16
|
+
# Default to WARNING to reduce console noise; set to DEBUG for troubleshooting
|
|
17
|
+
_logger.setLevel(logging.WARNING)
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def log_info(msg: str, *args: Any, **kwargs: Any) -> None:
|
vector_inspector/main.py
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""Main entry point for Vector Inspector application."""
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from PySide6.QtCore import QTimer
|
|
5
7
|
from PySide6.QtWidgets import QApplication
|
|
6
|
-
|
|
7
|
-
from vector_inspector
|
|
8
|
+
|
|
9
|
+
from vector_inspector import get_version
|
|
10
|
+
from vector_inspector.ui.loading_screen import show_loading_screen
|
|
8
11
|
|
|
9
12
|
# Ensures the app looks in its own folder for the raw libraries
|
|
10
13
|
sys.path.append(os.path.dirname(sys.executable))
|
|
@@ -16,24 +19,48 @@ def main():
|
|
|
16
19
|
app.setApplicationName("Vector Inspector")
|
|
17
20
|
app.setOrganizationName("Vector Inspector")
|
|
18
21
|
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
22
|
+
# Get version once for all uses
|
|
23
|
+
app_version = get_version()
|
|
24
|
+
|
|
25
|
+
# Show loading screen (if not disabled in settings)
|
|
26
|
+
loading = show_loading_screen(
|
|
27
|
+
app_name="Vector Inspector",
|
|
28
|
+
version=f"v{app_version}",
|
|
29
|
+
tagline="The missing toolset for your vector data",
|
|
30
|
+
loading_text="Initializing providers…"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# Heavy imports after loading screen is visible
|
|
34
|
+
if loading:
|
|
35
|
+
loading.set_loading_text("Loading main window...")
|
|
36
|
+
app.processEvents()
|
|
23
37
|
|
|
24
|
-
|
|
38
|
+
from vector_inspector.core.logging import log_error
|
|
39
|
+
from vector_inspector.ui.main_window import MainWindow
|
|
40
|
+
|
|
41
|
+
def send_ping():
|
|
42
|
+
# Telemetry: send launch ping if enabled
|
|
25
43
|
try:
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
44
|
+
from vector_inspector.services.telemetry_service import TelemetryService
|
|
45
|
+
|
|
46
|
+
telemetry = TelemetryService()
|
|
47
|
+
telemetry.send_launch_ping(app_version=app_version)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
log_error(f"[Telemetry] Failed to send launch ping: {e}")
|
|
50
|
+
|
|
51
|
+
if loading:
|
|
52
|
+
loading.set_loading_text("Preparing interface...")
|
|
53
|
+
app.processEvents()
|
|
33
54
|
|
|
34
55
|
window = MainWindow()
|
|
35
56
|
window.show()
|
|
36
57
|
|
|
58
|
+
# Always fade out loading screen automatically
|
|
59
|
+
if loading:
|
|
60
|
+
loading.fade_out()
|
|
61
|
+
|
|
62
|
+
QTimer.singleShot(0, lambda: send_ping())
|
|
63
|
+
|
|
37
64
|
sys.exit(app.exec())
|
|
38
65
|
|
|
39
66
|
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"""Service for backing up and restoring collections."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from typing import Dict, Any, Optional
|
|
3
|
+
from datetime import datetime, timezone
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from
|
|
7
|
-
import shutil
|
|
5
|
+
from typing import Optional
|
|
8
6
|
|
|
9
|
-
from vector_inspector.core.logging import
|
|
10
|
-
|
|
7
|
+
from vector_inspector.core.logging import log_debug, log_error, log_info
|
|
8
|
+
|
|
9
|
+
from .backup_helpers import normalize_embeddings, read_backup_zip, write_backup_zip
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
class BackupRestoreService:
|
|
@@ -15,7 +14,11 @@ class BackupRestoreService:
|
|
|
15
14
|
|
|
16
15
|
@staticmethod
|
|
17
16
|
def backup_collection(
|
|
18
|
-
connection,
|
|
17
|
+
connection,
|
|
18
|
+
collection_name: str,
|
|
19
|
+
backup_dir: str,
|
|
20
|
+
include_embeddings: bool = True,
|
|
21
|
+
profile_name: Optional[str] = None,
|
|
19
22
|
) -> Optional[str]:
|
|
20
23
|
"""
|
|
21
24
|
Backup a collection to a directory.
|
|
@@ -25,6 +28,7 @@ class BackupRestoreService:
|
|
|
25
28
|
collection_name: Name of collection to backup
|
|
26
29
|
backup_dir: Directory to store backups
|
|
27
30
|
include_embeddings: Whether to include embedding vectors
|
|
31
|
+
connection_id: Optional connection ID for retrieving model config from settings
|
|
28
32
|
|
|
29
33
|
Returns:
|
|
30
34
|
Path to backup file or None if failed
|
|
@@ -50,13 +54,51 @@ class BackupRestoreService:
|
|
|
50
54
|
|
|
51
55
|
backup_metadata = {
|
|
52
56
|
"collection_name": collection_name,
|
|
53
|
-
"backup_timestamp": datetime.now().isoformat(),
|
|
57
|
+
"backup_timestamp": datetime.now(tz=timezone.utc).isoformat(),
|
|
54
58
|
"item_count": len(all_data["ids"]),
|
|
55
59
|
"collection_info": collection_info,
|
|
56
60
|
"include_embeddings": include_embeddings,
|
|
57
61
|
}
|
|
58
|
-
|
|
59
|
-
|
|
62
|
+
# Include embedding model info when available to assist accurate restores
|
|
63
|
+
try:
|
|
64
|
+
embed_model = None
|
|
65
|
+
embed_model_type = None
|
|
66
|
+
# Prefer explicit collection_info entries
|
|
67
|
+
if collection_info and collection_info.get("embedding_model"):
|
|
68
|
+
embed_model = collection_info.get("embedding_model")
|
|
69
|
+
embed_model_type = collection_info.get("embedding_model_type")
|
|
70
|
+
else:
|
|
71
|
+
# Ask connection for a model hint (may consult settings/service)
|
|
72
|
+
try:
|
|
73
|
+
embed_model = connection.get_embedding_model(collection_name)
|
|
74
|
+
except Exception:
|
|
75
|
+
embed_model = None
|
|
76
|
+
|
|
77
|
+
# If not found yet, check app settings as a fallback
|
|
78
|
+
if not embed_model and profile_name:
|
|
79
|
+
try:
|
|
80
|
+
from vector_inspector.services.settings_service import SettingsService
|
|
81
|
+
|
|
82
|
+
settings = SettingsService()
|
|
83
|
+
model_info = settings.get_embedding_model(
|
|
84
|
+
profile_name,
|
|
85
|
+
collection_name,
|
|
86
|
+
)
|
|
87
|
+
if model_info:
|
|
88
|
+
embed_model = model_info.get("model")
|
|
89
|
+
embed_model_type = model_info.get("type", "sentence-transformer")
|
|
90
|
+
except Exception:
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
if embed_model:
|
|
94
|
+
backup_metadata["embedding_model"] = embed_model
|
|
95
|
+
if embed_model_type:
|
|
96
|
+
backup_metadata["embedding_model_type"] = embed_model_type
|
|
97
|
+
except Exception as e:
|
|
98
|
+
# Embedding metadata is optional; log failure but do not abort backup.
|
|
99
|
+
log_debug("Failed to populate embedding metadata for %s: %s", collection_name, e)
|
|
100
|
+
|
|
101
|
+
timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
60
102
|
backup_filename = f"{collection_name}_backup_{timestamp}.zip"
|
|
61
103
|
backup_path = Path(backup_dir) / backup_filename
|
|
62
104
|
|
|
@@ -67,9 +109,14 @@ class BackupRestoreService:
|
|
|
67
109
|
log_error("Backup failed: %s", e)
|
|
68
110
|
return None
|
|
69
111
|
|
|
70
|
-
@staticmethod
|
|
71
112
|
def restore_collection(
|
|
72
|
-
|
|
113
|
+
self,
|
|
114
|
+
connection,
|
|
115
|
+
backup_file: str,
|
|
116
|
+
collection_name: Optional[str] = None,
|
|
117
|
+
overwrite: bool = False,
|
|
118
|
+
recompute_embeddings: Optional[bool] = None,
|
|
119
|
+
profile_name: Optional[str] = None,
|
|
73
120
|
) -> bool:
|
|
74
121
|
"""
|
|
75
122
|
Restore a collection from a backup file.
|
|
@@ -79,6 +126,11 @@ class BackupRestoreService:
|
|
|
79
126
|
backup_file: Path to backup zip file
|
|
80
127
|
collection_name: Optional new name for restored collection
|
|
81
128
|
overwrite: Whether to overwrite existing collection
|
|
129
|
+
recompute_embeddings: How to handle embeddings during restore:
|
|
130
|
+
- None (default): Use stored embeddings as-is from backup (safest, fastest)
|
|
131
|
+
- True: Force recompute embeddings from documents using model metadata
|
|
132
|
+
- False: Omit embeddings entirely (documents/metadata only)
|
|
133
|
+
connection_id: Optional connection ID for saving model config to app settings
|
|
82
134
|
|
|
83
135
|
Returns:
|
|
84
136
|
True if successful, False otherwise
|
|
@@ -96,8 +148,48 @@ class BackupRestoreService:
|
|
|
96
148
|
restore_collection_name,
|
|
97
149
|
)
|
|
98
150
|
return False
|
|
99
|
-
|
|
100
|
-
|
|
151
|
+
connection.delete_collection(restore_collection_name)
|
|
152
|
+
else:
|
|
153
|
+
# Collection does not exist on target; attempt to create it.
|
|
154
|
+
# Try to infer vector size from metadata or embedded vectors in backup.
|
|
155
|
+
try:
|
|
156
|
+
inferred_size = None
|
|
157
|
+
col_info = metadata.get("collection_info") if metadata else None
|
|
158
|
+
if (
|
|
159
|
+
col_info
|
|
160
|
+
and col_info.get("vector_dimension")
|
|
161
|
+
and isinstance(col_info.get("vector_dimension"), int)
|
|
162
|
+
):
|
|
163
|
+
inferred_size = int(col_info.get("vector_dimension"))
|
|
164
|
+
|
|
165
|
+
# Fallback: inspect embeddings in backup data
|
|
166
|
+
if inferred_size is None and data and data.get("embeddings"):
|
|
167
|
+
first_emb = data.get("embeddings")[0]
|
|
168
|
+
if first_emb is not None:
|
|
169
|
+
inferred_size = len(first_emb)
|
|
170
|
+
|
|
171
|
+
# Final fallback: common default
|
|
172
|
+
if inferred_size is None:
|
|
173
|
+
log_error(
|
|
174
|
+
"Unable to infer vector dimension for collection %s from metadata or backup data; restore aborted.",
|
|
175
|
+
restore_collection_name,
|
|
176
|
+
)
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
created = True
|
|
180
|
+
if hasattr(connection, "create_collection"):
|
|
181
|
+
created = connection.create_collection(
|
|
182
|
+
restore_collection_name, inferred_size
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if not created:
|
|
186
|
+
log_error(
|
|
187
|
+
"Failed to create collection %s before restore", restore_collection_name
|
|
188
|
+
)
|
|
189
|
+
return False
|
|
190
|
+
except Exception as e:
|
|
191
|
+
log_error("Error while creating collection %s: %s", restore_collection_name, e)
|
|
192
|
+
return False
|
|
101
193
|
|
|
102
194
|
# Provider-specific preparation hook
|
|
103
195
|
if hasattr(connection, "prepare_restore"):
|
|
@@ -109,17 +201,138 @@ class BackupRestoreService:
|
|
|
109
201
|
# Ensure embeddings normalized
|
|
110
202
|
data = normalize_embeddings(data)
|
|
111
203
|
|
|
204
|
+
# Decide how to handle embeddings based on user choice
|
|
205
|
+
embeddings_to_use = None
|
|
206
|
+
stored_embeddings = data.get("embeddings")
|
|
207
|
+
|
|
208
|
+
if recompute_embeddings is False:
|
|
209
|
+
# User explicitly chose to omit embeddings
|
|
210
|
+
log_info("Restoring without embeddings (user choice)")
|
|
211
|
+
embeddings_to_use = None
|
|
212
|
+
|
|
213
|
+
elif recompute_embeddings is True:
|
|
214
|
+
# User explicitly chose to recompute embeddings
|
|
215
|
+
log_info("Recomputing embeddings from documents")
|
|
216
|
+
try:
|
|
217
|
+
from vector_inspector.core.embedding_utils import (
|
|
218
|
+
encode_text,
|
|
219
|
+
load_embedding_model,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
model_name = metadata.get("embedding_model") if metadata else None
|
|
223
|
+
docs = data.get("documents", [])
|
|
224
|
+
|
|
225
|
+
if not model_name:
|
|
226
|
+
log_error(
|
|
227
|
+
"Cannot recompute: No embedding model available in backup metadata"
|
|
228
|
+
)
|
|
229
|
+
embeddings_to_use = None
|
|
230
|
+
elif not docs:
|
|
231
|
+
log_error("Cannot recompute: No documents available in backup")
|
|
232
|
+
embeddings_to_use = None
|
|
233
|
+
else:
|
|
234
|
+
model_type = metadata.get("embedding_model_type", "sentence-transformer")
|
|
235
|
+
log_info("Loading embedding model: %s (%s)", model_name, model_type)
|
|
236
|
+
model = load_embedding_model(model_name, model_type)
|
|
237
|
+
new_embeddings = []
|
|
238
|
+
if model_type == "clip":
|
|
239
|
+
# CLIP: encode per-document
|
|
240
|
+
for d in docs:
|
|
241
|
+
new_embeddings.append(encode_text(d, model, model_type))
|
|
242
|
+
else:
|
|
243
|
+
# sentence-transformer supports batch encode
|
|
244
|
+
new_embeddings = model.encode(docs, show_progress_bar=False).tolist()
|
|
245
|
+
|
|
246
|
+
embeddings_to_use = new_embeddings
|
|
247
|
+
log_info("Successfully recomputed %d embeddings", len(new_embeddings))
|
|
248
|
+
except Exception as e:
|
|
249
|
+
log_error("Failed to recompute embeddings: %s", e)
|
|
250
|
+
embeddings_to_use = None
|
|
251
|
+
|
|
252
|
+
else:
|
|
253
|
+
# Default (None): Use stored embeddings as-is if available
|
|
254
|
+
if stored_embeddings:
|
|
255
|
+
# Check dimension compatibility with target collection
|
|
256
|
+
try:
|
|
257
|
+
if stored_embeddings and len(stored_embeddings) > 0:
|
|
258
|
+
stored_dim = len(stored_embeddings[0])
|
|
259
|
+
target_dim = inferred_size # We already calculated this above
|
|
260
|
+
|
|
261
|
+
if stored_dim == target_dim:
|
|
262
|
+
log_info(
|
|
263
|
+
"Using stored embeddings from backup (dimension: %d)",
|
|
264
|
+
stored_dim,
|
|
265
|
+
)
|
|
266
|
+
embeddings_to_use = stored_embeddings
|
|
267
|
+
else:
|
|
268
|
+
log_error(
|
|
269
|
+
"Dimension mismatch: backup has %d, target needs %d. Omitting embeddings.",
|
|
270
|
+
stored_dim,
|
|
271
|
+
target_dim,
|
|
272
|
+
)
|
|
273
|
+
embeddings_to_use = None
|
|
274
|
+
else:
|
|
275
|
+
embeddings_to_use = stored_embeddings
|
|
276
|
+
except Exception as e:
|
|
277
|
+
log_error("Error checking embedding dimensions: %s", e)
|
|
278
|
+
# Try to use them anyway
|
|
279
|
+
embeddings_to_use = stored_embeddings
|
|
280
|
+
else:
|
|
281
|
+
log_info("No embeddings in backup to restore")
|
|
282
|
+
embeddings_to_use = None
|
|
283
|
+
|
|
112
284
|
success = connection.add_items(
|
|
113
285
|
restore_collection_name,
|
|
114
286
|
documents=data.get("documents", []),
|
|
115
287
|
metadatas=data.get("metadatas"),
|
|
116
288
|
ids=data.get("ids"),
|
|
117
|
-
embeddings=
|
|
289
|
+
embeddings=embeddings_to_use,
|
|
118
290
|
)
|
|
119
291
|
|
|
120
292
|
if success:
|
|
121
293
|
log_info("Collection '%s' restored from backup", restore_collection_name)
|
|
122
294
|
log_info("Restored %d items", len(data.get("ids", [])))
|
|
295
|
+
|
|
296
|
+
# Save model config to app settings if available
|
|
297
|
+
if profile_name and restore_collection_name and metadata:
|
|
298
|
+
try:
|
|
299
|
+
embed_model = metadata.get("embedding_model")
|
|
300
|
+
embed_model_type = metadata.get(
|
|
301
|
+
"embedding_model_type", "sentence-transformer"
|
|
302
|
+
)
|
|
303
|
+
if embed_model:
|
|
304
|
+
from vector_inspector.services.settings_service import SettingsService
|
|
305
|
+
|
|
306
|
+
settings = SettingsService()
|
|
307
|
+
settings.save_embedding_model(
|
|
308
|
+
profile_name,
|
|
309
|
+
restore_collection_name,
|
|
310
|
+
embed_model,
|
|
311
|
+
embed_model_type,
|
|
312
|
+
)
|
|
313
|
+
log_info(
|
|
314
|
+
"Saved model config to settings: %s (%s)",
|
|
315
|
+
embed_model,
|
|
316
|
+
embed_model_type,
|
|
317
|
+
)
|
|
318
|
+
except Exception as e:
|
|
319
|
+
log_error("Failed to save model config to settings: %s", e)
|
|
320
|
+
|
|
321
|
+
# Clear the cache for this collection so the info panel gets fresh data
|
|
322
|
+
if profile_name and restore_collection_name:
|
|
323
|
+
try:
|
|
324
|
+
from vector_inspector.core.cache_manager import get_cache_manager
|
|
325
|
+
|
|
326
|
+
cache = get_cache_manager()
|
|
327
|
+
# Use profile_name as the database identifier for cache
|
|
328
|
+
cache.invalidate(profile_name, restore_collection_name)
|
|
329
|
+
log_info(
|
|
330
|
+
"Cleared cache for restored collection: %s",
|
|
331
|
+
restore_collection_name,
|
|
332
|
+
)
|
|
333
|
+
except Exception as e:
|
|
334
|
+
log_error("Failed to clear cache after restore: %s", e)
|
|
335
|
+
|
|
123
336
|
return True
|
|
124
337
|
|
|
125
338
|
# Failure: attempt cleanup
|