vector-inspector 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/core/connection_manager.py +55 -49
  2. vector_inspector/core/connections/base_connection.py +41 -41
  3. vector_inspector/core/connections/chroma_connection.py +110 -86
  4. vector_inspector/core/connections/pinecone_connection.py +168 -182
  5. vector_inspector/core/connections/qdrant_connection.py +109 -126
  6. vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
  7. vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
  8. vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
  9. vector_inspector/core/connections/template_connection.py +55 -65
  10. vector_inspector/core/embedding_utils.py +32 -32
  11. vector_inspector/core/logging.py +27 -0
  12. vector_inspector/core/model_registry.py +4 -3
  13. vector_inspector/main.py +6 -2
  14. vector_inspector/services/backup_helpers.py +63 -0
  15. vector_inspector/services/backup_restore_service.py +73 -152
  16. vector_inspector/services/credential_service.py +33 -40
  17. vector_inspector/services/import_export_service.py +70 -67
  18. vector_inspector/services/profile_service.py +92 -94
  19. vector_inspector/services/settings_service.py +68 -48
  20. vector_inspector/services/visualization_service.py +40 -39
  21. vector_inspector/ui/components/splash_window.py +57 -0
  22. vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
  23. vector_inspector/ui/main_window.py +200 -146
  24. vector_inspector/ui/views/info_panel.py +208 -127
  25. vector_inspector/ui/views/metadata_view.py +8 -7
  26. vector_inspector/ui/views/search_view.py +97 -75
  27. vector_inspector/ui/views/visualization_view.py +140 -97
  28. vector_inspector/utils/version.py +5 -0
  29. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/METADATA +10 -2
  30. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/RECORD +32 -25
  31. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/WHEEL +0 -0
  32. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,14 @@
1
1
  """Service for backing up and restoring collections."""
2
2
 
3
3
  import json
4
- import zipfile
5
4
  from typing import Dict, Any, Optional
6
5
  from pathlib import Path
7
6
  from datetime import datetime
8
7
  import shutil
9
8
 
9
+ from ..core.logging import log_info, log_error, log_debug
10
+ from .backup_helpers import write_backup_zip, read_backup_zip, normalize_embeddings
11
+
10
12
 
11
13
  class BackupRestoreService:
12
14
  """Handles backup and restore operations for vector database collections."""
@@ -31,67 +33,41 @@ class BackupRestoreService:
31
33
  Path to backup file or None if failed
32
34
  """
33
35
  try:
34
- # Create backup directory if it doesn't exist
35
36
  Path(backup_dir).mkdir(parents=True, exist_ok=True)
36
-
37
- # Get collection info
37
+
38
38
  collection_info = connection.get_collection_info(collection_name)
39
39
  if not collection_info:
40
- print(f"Failed to get collection info for {collection_name}")
40
+ log_error("Failed to get collection info for %s", collection_name)
41
41
  return None
42
-
43
- # Get all items from collection
42
+
44
43
  all_data = connection.get_all_items(collection_name)
45
44
  if not all_data or not all_data.get("ids"):
46
- print(f"No data to backup from collection {collection_name}")
45
+ log_info("No data to backup from collection %s", collection_name)
47
46
  return None
48
-
49
- # Convert numpy arrays to lists for JSON serialization
50
- if "embeddings" in all_data:
51
- try:
52
- import numpy as np
53
- if isinstance(all_data["embeddings"], np.ndarray):
54
- all_data["embeddings"] = all_data["embeddings"].tolist()
55
- elif isinstance(all_data["embeddings"], list):
56
- # Convert any numpy arrays in the list
57
- all_data["embeddings"] = [
58
- emb.tolist() if isinstance(emb, np.ndarray) else emb
59
- for emb in all_data["embeddings"]
60
- ]
61
- except ImportError:
62
- pass # numpy not available, assume already lists
63
-
64
- # Remove embeddings if not needed (to save space)
47
+
48
+ # Normalize embeddings to plain lists
49
+ all_data = normalize_embeddings(all_data)
50
+
65
51
  if not include_embeddings and "embeddings" in all_data:
66
52
  del all_data["embeddings"]
67
-
68
- # Create backup metadata
53
+
69
54
  backup_metadata = {
70
55
  "collection_name": collection_name,
71
56
  "backup_timestamp": datetime.now().isoformat(),
72
57
  "item_count": len(all_data["ids"]),
73
58
  "collection_info": collection_info,
74
- "include_embeddings": include_embeddings
59
+ "include_embeddings": include_embeddings,
75
60
  }
76
-
77
- # Create backup filename with timestamp
61
+
78
62
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
79
63
  backup_filename = f"{collection_name}_backup_{timestamp}.zip"
80
64
  backup_path = Path(backup_dir) / backup_filename
81
-
82
- # Create zip file with data and metadata
83
- with zipfile.ZipFile(backup_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
84
- # Write metadata
85
- zipf.writestr('metadata.json', json.dumps(backup_metadata, indent=2))
86
-
87
- # Write collection data
88
- zipf.writestr('data.json', json.dumps(all_data, indent=2))
89
-
90
- print(f"Backup created: {backup_path}")
65
+
66
+ write_backup_zip(backup_path, backup_metadata, all_data)
67
+ log_info("Backup created: %s", backup_path)
91
68
  return str(backup_path)
92
-
93
69
  except Exception as e:
94
- print(f"Backup failed: {e}")
70
+ log_error("Backup failed: %s", e)
95
71
  return None
96
72
 
97
73
  @staticmethod
@@ -113,104 +89,60 @@ class BackupRestoreService:
113
89
  Returns:
114
90
  True if successful, False otherwise
115
91
  """
92
+ restore_collection_name = None
116
93
  try:
117
- # Extract backup
118
- with zipfile.ZipFile(backup_file, 'r') as zipf:
119
- # Read metadata
120
- metadata_str = zipf.read('metadata.json').decode('utf-8')
121
- metadata = json.loads(metadata_str)
122
-
123
- # Read data
124
- data_str = zipf.read('data.json').decode('utf-8')
125
- data = json.loads(data_str)
126
-
127
- # Determine collection name
128
- restore_collection_name = collection_name or metadata["collection_name"]
129
-
130
- # Check if collection exists
94
+ metadata, data = read_backup_zip(backup_file)
95
+ restore_collection_name = collection_name or metadata.get("collection_name")
96
+
131
97
  existing_collections = connection.list_collections()
132
98
  if restore_collection_name in existing_collections:
133
99
  if not overwrite:
134
- print(f"Collection {restore_collection_name} already exists. Use overwrite=True to replace it.")
100
+ log_info("Collection %s already exists. Use overwrite=True to replace it.", restore_collection_name)
135
101
  return False
136
102
  else:
137
- # Delete existing collection
138
103
  connection.delete_collection(restore_collection_name)
139
-
140
- # Check if this is Qdrant - need to create collection first
141
- from vector_inspector.core.connections.qdrant_connection import QdrantConnection
142
- if isinstance(connection, QdrantConnection):
143
- # Get vector size from collection info or embeddings
144
- vector_size = None
145
- if metadata.get("collection_info") and "vector_size" in metadata["collection_info"]:
146
- vector_size = metadata["collection_info"]["vector_size"]
147
- elif data.get("embeddings") and len(data["embeddings"]) > 0:
148
- vector_size = len(data["embeddings"][0])
149
-
150
- if not vector_size:
151
- print("Cannot determine vector size for Qdrant collection")
152
- return False
153
-
154
- # Create collection
155
- distance = metadata.get("collection_info", {}).get("distance", "Cosine")
156
- if not connection.create_collection(restore_collection_name, vector_size, distance):
157
- print(f"Failed to create collection {restore_collection_name}")
104
+
105
+ # Provider-specific preparation hook
106
+ if hasattr(connection, "prepare_restore"):
107
+ ok = connection.prepare_restore(metadata, data)
108
+ if not ok:
109
+ log_error("Provider prepare_restore failed for %s", restore_collection_name)
158
110
  return False
159
-
160
- # Check if embeddings are missing and need to be generated
161
- if not data.get("embeddings"):
162
- print("Embeddings missing in backup. Generating embeddings...")
163
- try:
164
- from sentence_transformers import SentenceTransformer
165
- model = SentenceTransformer("all-MiniLM-L6-v2")
166
- documents = data.get("documents", [])
167
- data["embeddings"] = model.encode(documents, show_progress_bar=True).tolist()
168
- except Exception as e:
169
- print(f"Failed to generate embeddings: {e}")
170
- return False
171
-
172
- # Keep IDs as strings - Qdrant's _to_uuid method handles conversion
173
- # Just ensure all IDs are strings
174
- original_ids = data.get("ids", [])
175
- data["ids"] = [str(id_val) for id_val in original_ids]
176
-
177
- # Add items to collection
111
+
112
+ # Ensure embeddings normalized
113
+ data = normalize_embeddings(data)
114
+
178
115
  success = connection.add_items(
179
116
  restore_collection_name,
180
117
  documents=data.get("documents", []),
181
118
  metadatas=data.get("metadatas"),
182
119
  ids=data.get("ids"),
183
- embeddings=data.get("embeddings")
120
+ embeddings=data.get("embeddings"),
184
121
  )
185
-
122
+
186
123
  if success:
187
- print(f"Collection '{restore_collection_name}' restored from backup")
188
- print(f"Restored {len(data.get('ids', []))} items")
124
+ log_info("Collection '%s' restored from backup", restore_collection_name)
125
+ log_info("Restored %d items", len(data.get("ids", [])))
189
126
  return True
190
- else:
191
- print("Failed to restore collection")
192
- # Clean up partially created collection
193
- try:
194
- if restore_collection_name in connection.list_collections():
195
- print(f"Cleaning up failed restore: deleting collection '{restore_collection_name}'")
196
- connection.delete_collection(restore_collection_name)
197
- except Exception as cleanup_error:
198
- print(f"Warning: Failed to clean up collection: {cleanup_error}")
199
- return False
200
-
201
- except Exception as e:
202
- print(f"Restore failed: {e}")
203
- import traceback
204
- traceback.print_exc()
205
-
206
- # Clean up partially created collection
127
+
128
+ # Failure: attempt cleanup
129
+ log_error("Failed to restore collection %s", restore_collection_name)
207
130
  try:
208
131
  if restore_collection_name in connection.list_collections():
209
- print(f"Cleaning up failed restore: deleting collection '{restore_collection_name}'")
132
+ log_info("Cleaning up failed restore: deleting collection '%s'", restore_collection_name)
210
133
  connection.delete_collection(restore_collection_name)
211
134
  except Exception as cleanup_error:
212
- print(f"Warning: Failed to clean up collection: {cleanup_error}")
213
-
135
+ log_error("Warning: Failed to clean up collection: %s", cleanup_error)
136
+ return False
137
+
138
+ except Exception as e:
139
+ log_error("Restore failed: %s", e)
140
+ try:
141
+ if restore_collection_name and restore_collection_name in connection.list_collections():
142
+ log_info("Cleaning up failed restore: deleting collection '%s'", restore_collection_name)
143
+ connection.delete_collection(restore_collection_name)
144
+ except Exception as cleanup_error:
145
+ log_error("Warning: Failed to clean up collection: %s", cleanup_error)
214
146
  return False
215
147
 
216
148
  @staticmethod
@@ -224,38 +156,27 @@ class BackupRestoreService:
224
156
  Returns:
225
157
  List of backup file information dictionaries
226
158
  """
227
- try:
228
- backup_path = Path(backup_dir)
229
- if not backup_path.exists():
230
- return []
231
-
232
- backups = []
233
- for backup_file in backup_path.glob("*_backup_*.zip"):
234
- try:
235
- # Read metadata from backup
236
- with zipfile.ZipFile(backup_file, 'r') as zipf:
237
- metadata_str = zipf.read('metadata.json').decode('utf-8')
238
- metadata = json.loads(metadata_str)
239
-
240
- backups.append({
241
- "file_path": str(backup_file),
242
- "file_name": backup_file.name,
243
- "collection_name": metadata.get("collection_name", "Unknown"),
244
- "timestamp": metadata.get("backup_timestamp", "Unknown"),
245
- "item_count": metadata.get("item_count", 0),
246
- "file_size": backup_file.stat().st_size
247
- })
248
- except Exception:
249
- # Skip invalid backup files
250
- continue
251
-
252
- # Sort by timestamp (newest first)
253
- backups.sort(key=lambda x: x["timestamp"], reverse=True)
254
- return backups
255
-
256
- except Exception as e:
257
- print(f"Failed to list backups: {e}")
159
+ backup_path = Path(backup_dir)
160
+ if not backup_path.exists():
258
161
  return []
162
+
163
+ backups = []
164
+ for backup_file in backup_path.glob("*_backup_*.zip"):
165
+ try:
166
+ metadata, _ = read_backup_zip(backup_file)
167
+ backups.append({
168
+ "file_path": str(backup_file),
169
+ "file_name": backup_file.name,
170
+ "collection_name": metadata.get("collection_name", "Unknown"),
171
+ "timestamp": metadata.get("backup_timestamp", "Unknown"),
172
+ "item_count": metadata.get("item_count", 0),
173
+ "file_size": backup_file.stat().st_size,
174
+ })
175
+ except Exception:
176
+ continue
177
+
178
+ backups.sort(key=lambda x: x["timestamp"], reverse=True)
179
+ return backups
259
180
 
260
181
  @staticmethod
261
182
  def delete_backup(backup_file: str) -> bool:
@@ -272,5 +193,5 @@ class BackupRestoreService:
272
193
  Path(backup_file).unlink()
273
194
  return True
274
195
  except Exception as e:
275
- print(f"Failed to delete backup: {e}")
196
+ log_error("Failed to delete backup: %s", e)
276
197
  return False
@@ -2,123 +2,117 @@
2
2
 
3
3
  from typing import Optional
4
4
  import json
5
+ from vector_inspector.core.logging import log_info, log_error
5
6
 
6
7
 
7
8
  class CredentialService:
8
9
  """Handles secure storage and retrieval of credentials using system keychains.
9
-
10
+
10
11
  Falls back to in-memory storage if keyring is not available (not recommended for production).
11
12
  """
12
-
13
+
13
14
  SERVICE_NAME = "vector-inspector"
14
-
15
+
15
16
  def __init__(self):
16
17
  """Initialize credential service with keyring if available."""
17
18
  self._use_keyring = False
18
19
  self._memory_store = {} # Fallback in-memory storage
19
-
20
+
20
21
  try:
21
22
  import keyring
23
+
22
24
  self._keyring = keyring
23
25
  self._use_keyring = True
24
26
  except ImportError:
25
- print("Warning: keyring module not available. Credentials will not be persisted securely.")
27
+ log_info(
28
+ "Warning: keyring module not available. Credentials will not be persisted securely."
29
+ )
26
30
  self._keyring = None
27
-
31
+
28
32
  def store_credentials(self, profile_id: str, credentials: dict) -> bool:
29
33
  """
30
34
  Store credentials for a profile.
31
-
35
+
32
36
  Args:
33
37
  profile_id: Unique profile identifier
34
38
  credentials: Dictionary of credential data (api_key, password, etc.)
35
-
39
+
36
40
  Returns:
37
41
  True if successful, False otherwise
38
42
  """
39
43
  try:
40
44
  credential_key = f"profile:{profile_id}"
41
45
  credential_json = json.dumps(credentials)
42
-
46
+
43
47
  if self._use_keyring:
44
- self._keyring.set_password(
45
- self.SERVICE_NAME,
46
- credential_key,
47
- credential_json
48
- )
48
+ self._keyring.set_password(self.SERVICE_NAME, credential_key, credential_json)
49
49
  else:
50
50
  # Fallback to in-memory (not persistent)
51
51
  self._memory_store[credential_key] = credential_json
52
-
52
+
53
53
  return True
54
54
  except Exception as e:
55
- print(f"Failed to store credentials: {e}")
55
+ log_error("Failed to store credentials: %s", e)
56
56
  return False
57
-
57
+
58
58
  def get_credentials(self, profile_id: str) -> Optional[dict]:
59
59
  """
60
60
  Retrieve credentials for a profile.
61
-
61
+
62
62
  Args:
63
63
  profile_id: Unique profile identifier
64
-
64
+
65
65
  Returns:
66
66
  Dictionary of credential data, or None if not found
67
67
  """
68
68
  try:
69
69
  credential_key = f"profile:{profile_id}"
70
-
70
+
71
71
  if self._use_keyring:
72
- credential_json = self._keyring.get_password(
73
- self.SERVICE_NAME,
74
- credential_key
75
- )
72
+ credential_json = self._keyring.get_password(self.SERVICE_NAME, credential_key)
76
73
  else:
77
74
  # Fallback to in-memory
78
75
  credential_json = self._memory_store.get(credential_key)
79
-
76
+
80
77
  if credential_json:
81
78
  return json.loads(credential_json)
82
79
  return None
83
80
  except Exception as e:
84
- print(f"Failed to retrieve credentials: {e}")
81
+ log_error("Failed to retrieve credentials: %s", e)
85
82
  return None
86
-
83
+
87
84
  def delete_credentials(self, profile_id: str) -> bool:
88
85
  """
89
86
  Delete stored credentials for a profile.
90
-
87
+
91
88
  Args:
92
89
  profile_id: Unique profile identifier
93
-
90
+
94
91
  Returns:
95
92
  True if successful, False otherwise
96
93
  """
97
94
  try:
98
95
  credential_key = f"profile:{profile_id}"
99
-
96
+
100
97
  if self._use_keyring:
101
98
  try:
102
- self._keyring.delete_password(
103
- self.SERVICE_NAME,
104
- credential_key
105
- )
99
+ self._keyring.delete_password(self.SERVICE_NAME, credential_key)
106
100
  except self._keyring.errors.PasswordDeleteError:
107
101
  # Credential doesn't exist, that's okay
108
102
  pass
109
103
  else:
110
104
  # Fallback to in-memory
111
105
  self._memory_store.pop(credential_key, None)
112
-
106
+
113
107
  return True
114
108
  except Exception as e:
115
- print(f"Failed to delete credentials: {e}")
109
+ log_error("Failed to delete credentials: %s", e)
116
110
  return False
117
-
111
+
118
112
  def is_keyring_available(self) -> bool:
119
113
  """Check if system keyring is available."""
120
114
  return self._use_keyring
121
-
115
+
122
116
  def clear_all_credentials(self):
123
117
  """Clear all stored credentials. Use with caution!"""
124
118
  if not self._use_keyring:
@@ -126,5 +120,4 @@ class CredentialService:
126
120
  else:
127
121
  # For keyring, we'd need to track all profile IDs
128
122
  # This is typically not needed, but can be implemented if required
129
- print("Warning: clear_all_credentials not implemented for keyring backend")
130
-
123
+ log_info("Warning: clear_all_credentials not implemented for keyring backend")