vector-inspector 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/core/connection_manager.py +55 -49
- vector_inspector/core/connections/base_connection.py +41 -41
- vector_inspector/core/connections/chroma_connection.py +110 -86
- vector_inspector/core/connections/pinecone_connection.py +168 -182
- vector_inspector/core/connections/qdrant_connection.py +109 -126
- vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
- vector_inspector/core/connections/template_connection.py +55 -65
- vector_inspector/core/embedding_utils.py +32 -32
- vector_inspector/core/logging.py +27 -0
- vector_inspector/core/model_registry.py +4 -3
- vector_inspector/main.py +6 -2
- vector_inspector/services/backup_helpers.py +63 -0
- vector_inspector/services/backup_restore_service.py +73 -152
- vector_inspector/services/credential_service.py +33 -40
- vector_inspector/services/import_export_service.py +70 -67
- vector_inspector/services/profile_service.py +92 -94
- vector_inspector/services/settings_service.py +68 -48
- vector_inspector/services/visualization_service.py +40 -39
- vector_inspector/ui/components/splash_window.py +57 -0
- vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
- vector_inspector/ui/main_window.py +200 -146
- vector_inspector/ui/views/info_panel.py +208 -127
- vector_inspector/ui/views/metadata_view.py +8 -7
- vector_inspector/ui/views/search_view.py +97 -75
- vector_inspector/ui/views/visualization_view.py +140 -97
- vector_inspector/utils/version.py +5 -0
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/METADATA +10 -2
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/RECORD +32 -25
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"""Service for backing up and restoring collections."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
import zipfile
|
|
5
4
|
from typing import Dict, Any, Optional
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from datetime import datetime
|
|
8
7
|
import shutil
|
|
9
8
|
|
|
9
|
+
from ..core.logging import log_info, log_error, log_debug
|
|
10
|
+
from .backup_helpers import write_backup_zip, read_backup_zip, normalize_embeddings
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
class BackupRestoreService:
|
|
12
14
|
"""Handles backup and restore operations for vector database collections."""
|
|
@@ -31,67 +33,41 @@ class BackupRestoreService:
|
|
|
31
33
|
Path to backup file or None if failed
|
|
32
34
|
"""
|
|
33
35
|
try:
|
|
34
|
-
# Create backup directory if it doesn't exist
|
|
35
36
|
Path(backup_dir).mkdir(parents=True, exist_ok=True)
|
|
36
|
-
|
|
37
|
-
# Get collection info
|
|
37
|
+
|
|
38
38
|
collection_info = connection.get_collection_info(collection_name)
|
|
39
39
|
if not collection_info:
|
|
40
|
-
|
|
40
|
+
log_error("Failed to get collection info for %s", collection_name)
|
|
41
41
|
return None
|
|
42
|
-
|
|
43
|
-
# Get all items from collection
|
|
42
|
+
|
|
44
43
|
all_data = connection.get_all_items(collection_name)
|
|
45
44
|
if not all_data or not all_data.get("ids"):
|
|
46
|
-
|
|
45
|
+
log_info("No data to backup from collection %s", collection_name)
|
|
47
46
|
return None
|
|
48
|
-
|
|
49
|
-
#
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
import numpy as np
|
|
53
|
-
if isinstance(all_data["embeddings"], np.ndarray):
|
|
54
|
-
all_data["embeddings"] = all_data["embeddings"].tolist()
|
|
55
|
-
elif isinstance(all_data["embeddings"], list):
|
|
56
|
-
# Convert any numpy arrays in the list
|
|
57
|
-
all_data["embeddings"] = [
|
|
58
|
-
emb.tolist() if isinstance(emb, np.ndarray) else emb
|
|
59
|
-
for emb in all_data["embeddings"]
|
|
60
|
-
]
|
|
61
|
-
except ImportError:
|
|
62
|
-
pass # numpy not available, assume already lists
|
|
63
|
-
|
|
64
|
-
# Remove embeddings if not needed (to save space)
|
|
47
|
+
|
|
48
|
+
# Normalize embeddings to plain lists
|
|
49
|
+
all_data = normalize_embeddings(all_data)
|
|
50
|
+
|
|
65
51
|
if not include_embeddings and "embeddings" in all_data:
|
|
66
52
|
del all_data["embeddings"]
|
|
67
|
-
|
|
68
|
-
# Create backup metadata
|
|
53
|
+
|
|
69
54
|
backup_metadata = {
|
|
70
55
|
"collection_name": collection_name,
|
|
71
56
|
"backup_timestamp": datetime.now().isoformat(),
|
|
72
57
|
"item_count": len(all_data["ids"]),
|
|
73
58
|
"collection_info": collection_info,
|
|
74
|
-
"include_embeddings": include_embeddings
|
|
59
|
+
"include_embeddings": include_embeddings,
|
|
75
60
|
}
|
|
76
|
-
|
|
77
|
-
# Create backup filename with timestamp
|
|
61
|
+
|
|
78
62
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
79
63
|
backup_filename = f"{collection_name}_backup_{timestamp}.zip"
|
|
80
64
|
backup_path = Path(backup_dir) / backup_filename
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
# Write metadata
|
|
85
|
-
zipf.writestr('metadata.json', json.dumps(backup_metadata, indent=2))
|
|
86
|
-
|
|
87
|
-
# Write collection data
|
|
88
|
-
zipf.writestr('data.json', json.dumps(all_data, indent=2))
|
|
89
|
-
|
|
90
|
-
print(f"Backup created: {backup_path}")
|
|
65
|
+
|
|
66
|
+
write_backup_zip(backup_path, backup_metadata, all_data)
|
|
67
|
+
log_info("Backup created: %s", backup_path)
|
|
91
68
|
return str(backup_path)
|
|
92
|
-
|
|
93
69
|
except Exception as e:
|
|
94
|
-
|
|
70
|
+
log_error("Backup failed: %s", e)
|
|
95
71
|
return None
|
|
96
72
|
|
|
97
73
|
@staticmethod
|
|
@@ -113,104 +89,60 @@ class BackupRestoreService:
|
|
|
113
89
|
Returns:
|
|
114
90
|
True if successful, False otherwise
|
|
115
91
|
"""
|
|
92
|
+
restore_collection_name = None
|
|
116
93
|
try:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
metadata_str = zipf.read('metadata.json').decode('utf-8')
|
|
121
|
-
metadata = json.loads(metadata_str)
|
|
122
|
-
|
|
123
|
-
# Read data
|
|
124
|
-
data_str = zipf.read('data.json').decode('utf-8')
|
|
125
|
-
data = json.loads(data_str)
|
|
126
|
-
|
|
127
|
-
# Determine collection name
|
|
128
|
-
restore_collection_name = collection_name or metadata["collection_name"]
|
|
129
|
-
|
|
130
|
-
# Check if collection exists
|
|
94
|
+
metadata, data = read_backup_zip(backup_file)
|
|
95
|
+
restore_collection_name = collection_name or metadata.get("collection_name")
|
|
96
|
+
|
|
131
97
|
existing_collections = connection.list_collections()
|
|
132
98
|
if restore_collection_name in existing_collections:
|
|
133
99
|
if not overwrite:
|
|
134
|
-
|
|
100
|
+
log_info("Collection %s already exists. Use overwrite=True to replace it.", restore_collection_name)
|
|
135
101
|
return False
|
|
136
102
|
else:
|
|
137
|
-
# Delete existing collection
|
|
138
103
|
connection.delete_collection(restore_collection_name)
|
|
139
|
-
|
|
140
|
-
#
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
if metadata.get("collection_info") and "vector_size" in metadata["collection_info"]:
|
|
146
|
-
vector_size = metadata["collection_info"]["vector_size"]
|
|
147
|
-
elif data.get("embeddings") and len(data["embeddings"]) > 0:
|
|
148
|
-
vector_size = len(data["embeddings"][0])
|
|
149
|
-
|
|
150
|
-
if not vector_size:
|
|
151
|
-
print("Cannot determine vector size for Qdrant collection")
|
|
152
|
-
return False
|
|
153
|
-
|
|
154
|
-
# Create collection
|
|
155
|
-
distance = metadata.get("collection_info", {}).get("distance", "Cosine")
|
|
156
|
-
if not connection.create_collection(restore_collection_name, vector_size, distance):
|
|
157
|
-
print(f"Failed to create collection {restore_collection_name}")
|
|
104
|
+
|
|
105
|
+
# Provider-specific preparation hook
|
|
106
|
+
if hasattr(connection, "prepare_restore"):
|
|
107
|
+
ok = connection.prepare_restore(metadata, data)
|
|
108
|
+
if not ok:
|
|
109
|
+
log_error("Provider prepare_restore failed for %s", restore_collection_name)
|
|
158
110
|
return False
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
try:
|
|
164
|
-
from sentence_transformers import SentenceTransformer
|
|
165
|
-
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
166
|
-
documents = data.get("documents", [])
|
|
167
|
-
data["embeddings"] = model.encode(documents, show_progress_bar=True).tolist()
|
|
168
|
-
except Exception as e:
|
|
169
|
-
print(f"Failed to generate embeddings: {e}")
|
|
170
|
-
return False
|
|
171
|
-
|
|
172
|
-
# Keep IDs as strings - Qdrant's _to_uuid method handles conversion
|
|
173
|
-
# Just ensure all IDs are strings
|
|
174
|
-
original_ids = data.get("ids", [])
|
|
175
|
-
data["ids"] = [str(id_val) for id_val in original_ids]
|
|
176
|
-
|
|
177
|
-
# Add items to collection
|
|
111
|
+
|
|
112
|
+
# Ensure embeddings normalized
|
|
113
|
+
data = normalize_embeddings(data)
|
|
114
|
+
|
|
178
115
|
success = connection.add_items(
|
|
179
116
|
restore_collection_name,
|
|
180
117
|
documents=data.get("documents", []),
|
|
181
118
|
metadatas=data.get("metadatas"),
|
|
182
119
|
ids=data.get("ids"),
|
|
183
|
-
embeddings=data.get("embeddings")
|
|
120
|
+
embeddings=data.get("embeddings"),
|
|
184
121
|
)
|
|
185
|
-
|
|
122
|
+
|
|
186
123
|
if success:
|
|
187
|
-
|
|
188
|
-
|
|
124
|
+
log_info("Collection '%s' restored from backup", restore_collection_name)
|
|
125
|
+
log_info("Restored %d items", len(data.get("ids", [])))
|
|
189
126
|
return True
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
try:
|
|
194
|
-
if restore_collection_name in connection.list_collections():
|
|
195
|
-
print(f"Cleaning up failed restore: deleting collection '{restore_collection_name}'")
|
|
196
|
-
connection.delete_collection(restore_collection_name)
|
|
197
|
-
except Exception as cleanup_error:
|
|
198
|
-
print(f"Warning: Failed to clean up collection: {cleanup_error}")
|
|
199
|
-
return False
|
|
200
|
-
|
|
201
|
-
except Exception as e:
|
|
202
|
-
print(f"Restore failed: {e}")
|
|
203
|
-
import traceback
|
|
204
|
-
traceback.print_exc()
|
|
205
|
-
|
|
206
|
-
# Clean up partially created collection
|
|
127
|
+
|
|
128
|
+
# Failure: attempt cleanup
|
|
129
|
+
log_error("Failed to restore collection %s", restore_collection_name)
|
|
207
130
|
try:
|
|
208
131
|
if restore_collection_name in connection.list_collections():
|
|
209
|
-
|
|
132
|
+
log_info("Cleaning up failed restore: deleting collection '%s'", restore_collection_name)
|
|
210
133
|
connection.delete_collection(restore_collection_name)
|
|
211
134
|
except Exception as cleanup_error:
|
|
212
|
-
|
|
213
|
-
|
|
135
|
+
log_error("Warning: Failed to clean up collection: %s", cleanup_error)
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
except Exception as e:
|
|
139
|
+
log_error("Restore failed: %s", e)
|
|
140
|
+
try:
|
|
141
|
+
if restore_collection_name and restore_collection_name in connection.list_collections():
|
|
142
|
+
log_info("Cleaning up failed restore: deleting collection '%s'", restore_collection_name)
|
|
143
|
+
connection.delete_collection(restore_collection_name)
|
|
144
|
+
except Exception as cleanup_error:
|
|
145
|
+
log_error("Warning: Failed to clean up collection: %s", cleanup_error)
|
|
214
146
|
return False
|
|
215
147
|
|
|
216
148
|
@staticmethod
|
|
@@ -224,38 +156,27 @@ class BackupRestoreService:
|
|
|
224
156
|
Returns:
|
|
225
157
|
List of backup file information dictionaries
|
|
226
158
|
"""
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if not backup_path.exists():
|
|
230
|
-
return []
|
|
231
|
-
|
|
232
|
-
backups = []
|
|
233
|
-
for backup_file in backup_path.glob("*_backup_*.zip"):
|
|
234
|
-
try:
|
|
235
|
-
# Read metadata from backup
|
|
236
|
-
with zipfile.ZipFile(backup_file, 'r') as zipf:
|
|
237
|
-
metadata_str = zipf.read('metadata.json').decode('utf-8')
|
|
238
|
-
metadata = json.loads(metadata_str)
|
|
239
|
-
|
|
240
|
-
backups.append({
|
|
241
|
-
"file_path": str(backup_file),
|
|
242
|
-
"file_name": backup_file.name,
|
|
243
|
-
"collection_name": metadata.get("collection_name", "Unknown"),
|
|
244
|
-
"timestamp": metadata.get("backup_timestamp", "Unknown"),
|
|
245
|
-
"item_count": metadata.get("item_count", 0),
|
|
246
|
-
"file_size": backup_file.stat().st_size
|
|
247
|
-
})
|
|
248
|
-
except Exception:
|
|
249
|
-
# Skip invalid backup files
|
|
250
|
-
continue
|
|
251
|
-
|
|
252
|
-
# Sort by timestamp (newest first)
|
|
253
|
-
backups.sort(key=lambda x: x["timestamp"], reverse=True)
|
|
254
|
-
return backups
|
|
255
|
-
|
|
256
|
-
except Exception as e:
|
|
257
|
-
print(f"Failed to list backups: {e}")
|
|
159
|
+
backup_path = Path(backup_dir)
|
|
160
|
+
if not backup_path.exists():
|
|
258
161
|
return []
|
|
162
|
+
|
|
163
|
+
backups = []
|
|
164
|
+
for backup_file in backup_path.glob("*_backup_*.zip"):
|
|
165
|
+
try:
|
|
166
|
+
metadata, _ = read_backup_zip(backup_file)
|
|
167
|
+
backups.append({
|
|
168
|
+
"file_path": str(backup_file),
|
|
169
|
+
"file_name": backup_file.name,
|
|
170
|
+
"collection_name": metadata.get("collection_name", "Unknown"),
|
|
171
|
+
"timestamp": metadata.get("backup_timestamp", "Unknown"),
|
|
172
|
+
"item_count": metadata.get("item_count", 0),
|
|
173
|
+
"file_size": backup_file.stat().st_size,
|
|
174
|
+
})
|
|
175
|
+
except Exception:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
backups.sort(key=lambda x: x["timestamp"], reverse=True)
|
|
179
|
+
return backups
|
|
259
180
|
|
|
260
181
|
@staticmethod
|
|
261
182
|
def delete_backup(backup_file: str) -> bool:
|
|
@@ -272,5 +193,5 @@ class BackupRestoreService:
|
|
|
272
193
|
Path(backup_file).unlink()
|
|
273
194
|
return True
|
|
274
195
|
except Exception as e:
|
|
275
|
-
|
|
196
|
+
log_error("Failed to delete backup: %s", e)
|
|
276
197
|
return False
|
|
@@ -2,123 +2,117 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
import json
|
|
5
|
+
from vector_inspector.core.logging import log_info, log_error
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class CredentialService:
|
|
8
9
|
"""Handles secure storage and retrieval of credentials using system keychains.
|
|
9
|
-
|
|
10
|
+
|
|
10
11
|
Falls back to in-memory storage if keyring is not available (not recommended for production).
|
|
11
12
|
"""
|
|
12
|
-
|
|
13
|
+
|
|
13
14
|
SERVICE_NAME = "vector-inspector"
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
def __init__(self):
|
|
16
17
|
"""Initialize credential service with keyring if available."""
|
|
17
18
|
self._use_keyring = False
|
|
18
19
|
self._memory_store = {} # Fallback in-memory storage
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
try:
|
|
21
22
|
import keyring
|
|
23
|
+
|
|
22
24
|
self._keyring = keyring
|
|
23
25
|
self._use_keyring = True
|
|
24
26
|
except ImportError:
|
|
25
|
-
|
|
27
|
+
log_info(
|
|
28
|
+
"Warning: keyring module not available. Credentials will not be persisted securely."
|
|
29
|
+
)
|
|
26
30
|
self._keyring = None
|
|
27
|
-
|
|
31
|
+
|
|
28
32
|
def store_credentials(self, profile_id: str, credentials: dict) -> bool:
|
|
29
33
|
"""
|
|
30
34
|
Store credentials for a profile.
|
|
31
|
-
|
|
35
|
+
|
|
32
36
|
Args:
|
|
33
37
|
profile_id: Unique profile identifier
|
|
34
38
|
credentials: Dictionary of credential data (api_key, password, etc.)
|
|
35
|
-
|
|
39
|
+
|
|
36
40
|
Returns:
|
|
37
41
|
True if successful, False otherwise
|
|
38
42
|
"""
|
|
39
43
|
try:
|
|
40
44
|
credential_key = f"profile:{profile_id}"
|
|
41
45
|
credential_json = json.dumps(credentials)
|
|
42
|
-
|
|
46
|
+
|
|
43
47
|
if self._use_keyring:
|
|
44
|
-
self._keyring.set_password(
|
|
45
|
-
self.SERVICE_NAME,
|
|
46
|
-
credential_key,
|
|
47
|
-
credential_json
|
|
48
|
-
)
|
|
48
|
+
self._keyring.set_password(self.SERVICE_NAME, credential_key, credential_json)
|
|
49
49
|
else:
|
|
50
50
|
# Fallback to in-memory (not persistent)
|
|
51
51
|
self._memory_store[credential_key] = credential_json
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
return True
|
|
54
54
|
except Exception as e:
|
|
55
|
-
|
|
55
|
+
log_error("Failed to store credentials: %s", e)
|
|
56
56
|
return False
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
def get_credentials(self, profile_id: str) -> Optional[dict]:
|
|
59
59
|
"""
|
|
60
60
|
Retrieve credentials for a profile.
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
Args:
|
|
63
63
|
profile_id: Unique profile identifier
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
Returns:
|
|
66
66
|
Dictionary of credential data, or None if not found
|
|
67
67
|
"""
|
|
68
68
|
try:
|
|
69
69
|
credential_key = f"profile:{profile_id}"
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
if self._use_keyring:
|
|
72
|
-
credential_json = self._keyring.get_password(
|
|
73
|
-
self.SERVICE_NAME,
|
|
74
|
-
credential_key
|
|
75
|
-
)
|
|
72
|
+
credential_json = self._keyring.get_password(self.SERVICE_NAME, credential_key)
|
|
76
73
|
else:
|
|
77
74
|
# Fallback to in-memory
|
|
78
75
|
credential_json = self._memory_store.get(credential_key)
|
|
79
|
-
|
|
76
|
+
|
|
80
77
|
if credential_json:
|
|
81
78
|
return json.loads(credential_json)
|
|
82
79
|
return None
|
|
83
80
|
except Exception as e:
|
|
84
|
-
|
|
81
|
+
log_error("Failed to retrieve credentials: %s", e)
|
|
85
82
|
return None
|
|
86
|
-
|
|
83
|
+
|
|
87
84
|
def delete_credentials(self, profile_id: str) -> bool:
|
|
88
85
|
"""
|
|
89
86
|
Delete stored credentials for a profile.
|
|
90
|
-
|
|
87
|
+
|
|
91
88
|
Args:
|
|
92
89
|
profile_id: Unique profile identifier
|
|
93
|
-
|
|
90
|
+
|
|
94
91
|
Returns:
|
|
95
92
|
True if successful, False otherwise
|
|
96
93
|
"""
|
|
97
94
|
try:
|
|
98
95
|
credential_key = f"profile:{profile_id}"
|
|
99
|
-
|
|
96
|
+
|
|
100
97
|
if self._use_keyring:
|
|
101
98
|
try:
|
|
102
|
-
self._keyring.delete_password(
|
|
103
|
-
self.SERVICE_NAME,
|
|
104
|
-
credential_key
|
|
105
|
-
)
|
|
99
|
+
self._keyring.delete_password(self.SERVICE_NAME, credential_key)
|
|
106
100
|
except self._keyring.errors.PasswordDeleteError:
|
|
107
101
|
# Credential doesn't exist, that's okay
|
|
108
102
|
pass
|
|
109
103
|
else:
|
|
110
104
|
# Fallback to in-memory
|
|
111
105
|
self._memory_store.pop(credential_key, None)
|
|
112
|
-
|
|
106
|
+
|
|
113
107
|
return True
|
|
114
108
|
except Exception as e:
|
|
115
|
-
|
|
109
|
+
log_error("Failed to delete credentials: %s", e)
|
|
116
110
|
return False
|
|
117
|
-
|
|
111
|
+
|
|
118
112
|
def is_keyring_available(self) -> bool:
|
|
119
113
|
"""Check if system keyring is available."""
|
|
120
114
|
return self._use_keyring
|
|
121
|
-
|
|
115
|
+
|
|
122
116
|
def clear_all_credentials(self):
|
|
123
117
|
"""Clear all stored credentials. Use with caution!"""
|
|
124
118
|
if not self._use_keyring:
|
|
@@ -126,5 +120,4 @@ class CredentialService:
|
|
|
126
120
|
else:
|
|
127
121
|
# For keyring, we'd need to track all profile IDs
|
|
128
122
|
# This is typically not needed, but can be implemented if required
|
|
129
|
-
|
|
130
|
-
|
|
123
|
+
log_info("Warning: clear_all_credentials not implemented for keyring backend")
|