vector-inspector 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/__init__.py +3 -0
- vector_inspector/__main__.py +4 -0
- vector_inspector/core/__init__.py +1 -0
- vector_inspector/core/connections/__init__.py +7 -0
- vector_inspector/core/connections/base_connection.py +233 -0
- vector_inspector/core/connections/chroma_connection.py +384 -0
- vector_inspector/core/connections/qdrant_connection.py +723 -0
- vector_inspector/core/connections/template_connection.py +346 -0
- vector_inspector/main.py +21 -0
- vector_inspector/services/__init__.py +1 -0
- vector_inspector/services/backup_restore_service.py +286 -0
- vector_inspector/services/filter_service.py +72 -0
- vector_inspector/services/import_export_service.py +287 -0
- vector_inspector/services/settings_service.py +60 -0
- vector_inspector/services/visualization_service.py +116 -0
- vector_inspector/ui/__init__.py +1 -0
- vector_inspector/ui/components/__init__.py +1 -0
- vector_inspector/ui/components/backup_restore_dialog.py +350 -0
- vector_inspector/ui/components/filter_builder.py +370 -0
- vector_inspector/ui/components/item_dialog.py +118 -0
- vector_inspector/ui/components/loading_dialog.py +30 -0
- vector_inspector/ui/main_window.py +288 -0
- vector_inspector/ui/views/__init__.py +1 -0
- vector_inspector/ui/views/collection_browser.py +112 -0
- vector_inspector/ui/views/connection_view.py +423 -0
- vector_inspector/ui/views/metadata_view.py +555 -0
- vector_inspector/ui/views/search_view.py +268 -0
- vector_inspector/ui/views/visualization_view.py +245 -0
- vector_inspector-0.2.0.dist-info/METADATA +382 -0
- vector_inspector-0.2.0.dist-info/RECORD +32 -0
- vector_inspector-0.2.0.dist-info/WHEEL +4 -0
- vector_inspector-0.2.0.dist-info/entry_points.txt +5 -0
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
"""Template for implementing a new vector database connection.
|
|
2
|
+
|
|
3
|
+
Copy this file and replace 'Template' with your database name.
|
|
4
|
+
Implement all abstract methods according to your database's API.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, List, Dict, Any
|
|
8
|
+
from .base_connection import VectorDBConnection
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TemplateConnection(VectorDBConnection):
|
|
12
|
+
"""Template vector database connection.
|
|
13
|
+
|
|
14
|
+
Replace this with your database provider name (e.g., PineconeConnection, QdrantConnection).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, **kwargs):
|
|
18
|
+
"""
|
|
19
|
+
Initialize connection parameters.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
**kwargs: Provider-specific connection parameters
|
|
23
|
+
(e.g., api_key, host, port, credentials, etc.)
|
|
24
|
+
"""
|
|
25
|
+
# Store your connection parameters here
|
|
26
|
+
self._client = None
|
|
27
|
+
# Add your provider-specific attributes
|
|
28
|
+
|
|
29
|
+
def connect(self) -> bool:
|
|
30
|
+
"""
|
|
31
|
+
Establish connection to the vector database.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
True if connection successful, False otherwise
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
# Initialize your database client here
|
|
38
|
+
# self._client = YourDatabaseClient(...)
|
|
39
|
+
return True
|
|
40
|
+
except Exception as e:
|
|
41
|
+
print(f"Connection failed: {e}")
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
def disconnect(self):
|
|
45
|
+
"""Close connection to the vector database."""
|
|
46
|
+
# Clean up your connection
|
|
47
|
+
self._client = None
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def is_connected(self) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Check if connected to the vector database.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
True if connected, False otherwise
|
|
56
|
+
"""
|
|
57
|
+
# Return whether the client is active
|
|
58
|
+
return self._client is not None
|
|
59
|
+
|
|
60
|
+
def list_collections(self) -> List[str]:
|
|
61
|
+
"""
|
|
62
|
+
Get list of all collections/indexes.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
List of collection/index names
|
|
66
|
+
"""
|
|
67
|
+
if not self._client:
|
|
68
|
+
return []
|
|
69
|
+
try:
|
|
70
|
+
# Call your database API to list collections
|
|
71
|
+
# collections = self._client.list_collections()
|
|
72
|
+
# return [col.name for col in collections]
|
|
73
|
+
return []
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(f"Failed to list collections: {e}")
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
|
|
79
|
+
"""
|
|
80
|
+
Get collection metadata and statistics.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
name: Collection/index name
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dictionary with collection info:
|
|
87
|
+
- name: Collection name
|
|
88
|
+
- count: Number of items
|
|
89
|
+
- metadata_fields: List of available metadata field names
|
|
90
|
+
"""
|
|
91
|
+
if not self._client:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
# Get collection stats from your database
|
|
96
|
+
# collection = self._client.get_collection(name)
|
|
97
|
+
# count = collection.count()
|
|
98
|
+
# metadata_fields = collection.get_metadata_fields()
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"name": name,
|
|
102
|
+
"count": 0, # Replace with actual count
|
|
103
|
+
"metadata_fields": [], # Replace with actual fields
|
|
104
|
+
}
|
|
105
|
+
except Exception as e:
|
|
106
|
+
print(f"Failed to get collection info: {e}")
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
def query_collection(
|
|
110
|
+
self,
|
|
111
|
+
collection_name: str,
|
|
112
|
+
query_texts: Optional[List[str]] = None,
|
|
113
|
+
query_embeddings: Optional[List[List[float]]] = None,
|
|
114
|
+
n_results: int = 10,
|
|
115
|
+
where: Optional[Dict[str, Any]] = None,
|
|
116
|
+
where_document: Optional[Dict[str, Any]] = None,
|
|
117
|
+
) -> Optional[Dict[str, Any]]:
|
|
118
|
+
"""
|
|
119
|
+
Query a collection for similar vectors.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
collection_name: Name of collection to query
|
|
123
|
+
query_texts: Text queries to embed and search
|
|
124
|
+
query_embeddings: Direct embedding vectors to search
|
|
125
|
+
n_results: Number of results to return
|
|
126
|
+
where: Metadata filter
|
|
127
|
+
where_document: Document content filter
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Query results dictionary with keys:
|
|
131
|
+
- ids: List of result IDs
|
|
132
|
+
- distances: List of distances/scores
|
|
133
|
+
- documents: List of document texts
|
|
134
|
+
- metadatas: List of metadata dicts
|
|
135
|
+
- embeddings: List of embedding vectors (optional)
|
|
136
|
+
"""
|
|
137
|
+
if not self._client:
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
# Perform similarity search
|
|
142
|
+
# results = self._client.query(
|
|
143
|
+
# collection=collection_name,
|
|
144
|
+
# query_embeddings=query_embeddings,
|
|
145
|
+
# n_results=n_results,
|
|
146
|
+
# filter=where
|
|
147
|
+
# )
|
|
148
|
+
|
|
149
|
+
# Transform results to standard format
|
|
150
|
+
return {
|
|
151
|
+
"ids": [],
|
|
152
|
+
"distances": [],
|
|
153
|
+
"documents": [],
|
|
154
|
+
"metadatas": [],
|
|
155
|
+
"embeddings": []
|
|
156
|
+
}
|
|
157
|
+
except Exception as e:
|
|
158
|
+
print(f"Query failed: {e}")
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
def get_all_items(
|
|
162
|
+
self,
|
|
163
|
+
collection_name: str,
|
|
164
|
+
limit: Optional[int] = None,
|
|
165
|
+
offset: Optional[int] = None,
|
|
166
|
+
where: Optional[Dict[str, Any]] = None,
|
|
167
|
+
) -> Optional[Dict[str, Any]]:
|
|
168
|
+
"""
|
|
169
|
+
Get all items from a collection.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
collection_name: Name of collection
|
|
173
|
+
limit: Maximum number of items to return
|
|
174
|
+
offset: Number of items to skip
|
|
175
|
+
where: Metadata filter
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Dictionary with collection items:
|
|
179
|
+
- ids: List of item IDs
|
|
180
|
+
- documents: List of document texts
|
|
181
|
+
- metadatas: List of metadata dicts
|
|
182
|
+
- embeddings: List of embedding vectors
|
|
183
|
+
"""
|
|
184
|
+
if not self._client:
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
# Fetch items from collection with pagination
|
|
189
|
+
# results = self._client.fetch(
|
|
190
|
+
# collection=collection_name,
|
|
191
|
+
# limit=limit,
|
|
192
|
+
# offset=offset,
|
|
193
|
+
# filter=where
|
|
194
|
+
# )
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
"ids": [],
|
|
198
|
+
"documents": [],
|
|
199
|
+
"metadatas": [],
|
|
200
|
+
"embeddings": []
|
|
201
|
+
}
|
|
202
|
+
except Exception as e:
|
|
203
|
+
print(f"Failed to get items: {e}")
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
def add_items(
|
|
207
|
+
self,
|
|
208
|
+
collection_name: str,
|
|
209
|
+
documents: List[str],
|
|
210
|
+
metadatas: Optional[List[Dict[str, Any]]] = None,
|
|
211
|
+
ids: Optional[List[str]] = None,
|
|
212
|
+
embeddings: Optional[List[List[float]]] = None,
|
|
213
|
+
) -> bool:
|
|
214
|
+
"""
|
|
215
|
+
Add items to a collection.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
collection_name: Name of collection
|
|
219
|
+
documents: Document texts
|
|
220
|
+
metadatas: Metadata for each document
|
|
221
|
+
ids: IDs for each document
|
|
222
|
+
embeddings: Pre-computed embeddings
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
True if successful, False otherwise
|
|
226
|
+
"""
|
|
227
|
+
if not self._client:
|
|
228
|
+
return False
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
# Add items to the collection
|
|
232
|
+
# self._client.upsert(
|
|
233
|
+
# collection=collection_name,
|
|
234
|
+
# documents=documents,
|
|
235
|
+
# metadatas=metadatas,
|
|
236
|
+
# ids=ids,
|
|
237
|
+
# embeddings=embeddings
|
|
238
|
+
# )
|
|
239
|
+
return True
|
|
240
|
+
except Exception as e:
|
|
241
|
+
print(f"Failed to add items: {e}")
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
def update_items(
|
|
245
|
+
self,
|
|
246
|
+
collection_name: str,
|
|
247
|
+
ids: List[str],
|
|
248
|
+
documents: Optional[List[str]] = None,
|
|
249
|
+
metadatas: Optional[List[Dict[str, Any]]] = None,
|
|
250
|
+
embeddings: Optional[List[List[float]]] = None,
|
|
251
|
+
) -> bool:
|
|
252
|
+
"""
|
|
253
|
+
Update items in a collection.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
collection_name: Name of collection
|
|
257
|
+
ids: IDs of items to update
|
|
258
|
+
documents: New document texts
|
|
259
|
+
metadatas: New metadata
|
|
260
|
+
embeddings: New embeddings
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
True if successful, False otherwise
|
|
264
|
+
"""
|
|
265
|
+
if not self._client:
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
# Update existing items
|
|
270
|
+
# self._client.update(
|
|
271
|
+
# collection=collection_name,
|
|
272
|
+
# ids=ids,
|
|
273
|
+
# documents=documents,
|
|
274
|
+
# metadatas=metadatas,
|
|
275
|
+
# embeddings=embeddings
|
|
276
|
+
# )
|
|
277
|
+
return True
|
|
278
|
+
except Exception as e:
|
|
279
|
+
print(f"Failed to update items: {e}")
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
def delete_items(
|
|
283
|
+
self,
|
|
284
|
+
collection_name: str,
|
|
285
|
+
ids: Optional[List[str]] = None,
|
|
286
|
+
where: Optional[Dict[str, Any]] = None,
|
|
287
|
+
) -> bool:
|
|
288
|
+
"""
|
|
289
|
+
Delete items from a collection.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
collection_name: Name of collection
|
|
293
|
+
ids: IDs of items to delete
|
|
294
|
+
where: Metadata filter for items to delete
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
True if successful, False otherwise
|
|
298
|
+
"""
|
|
299
|
+
if not self._client:
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
# Delete items
|
|
304
|
+
# self._client.delete(
|
|
305
|
+
# collection=collection_name,
|
|
306
|
+
# ids=ids,
|
|
307
|
+
# filter=where
|
|
308
|
+
# )
|
|
309
|
+
return True
|
|
310
|
+
except Exception as e:
|
|
311
|
+
print(f"Failed to delete items: {e}")
|
|
312
|
+
return False
|
|
313
|
+
|
|
314
|
+
def delete_collection(self, name: str) -> bool:
|
|
315
|
+
"""
|
|
316
|
+
Delete an entire collection.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
name: Collection name
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
True if successful, False otherwise
|
|
323
|
+
"""
|
|
324
|
+
if not self._client:
|
|
325
|
+
return False
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
# Delete the collection
|
|
329
|
+
# self._client.delete_collection(name)
|
|
330
|
+
return True
|
|
331
|
+
except Exception as e:
|
|
332
|
+
print(f"Failed to delete collection: {e}")
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
def get_connection_info(self) -> Dict[str, Any]:
|
|
336
|
+
"""
|
|
337
|
+
Get information about the current connection.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Dictionary with connection details
|
|
341
|
+
"""
|
|
342
|
+
return {
|
|
343
|
+
"provider": "Template", # Replace with your provider name
|
|
344
|
+
"connected": self.is_connected,
|
|
345
|
+
# Add provider-specific details here
|
|
346
|
+
}
|
vector_inspector/main.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Main entry point for Vector Inspector application."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from PySide6.QtWidgets import QApplication
|
|
5
|
+
from vector_inspector.ui.main_window import MainWindow
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
"""Launch the Vector Inspector application."""
|
|
10
|
+
app = QApplication(sys.argv)
|
|
11
|
+
app.setApplicationName("Vector Inspector")
|
|
12
|
+
app.setOrganizationName("Vector Inspector")
|
|
13
|
+
|
|
14
|
+
window = MainWindow()
|
|
15
|
+
window.show()
|
|
16
|
+
|
|
17
|
+
sys.exit(app.exec())
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if __name__ == "__main__":
|
|
21
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Services for embeddings, visualization, and data processing."""
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""Service for backing up and restoring collections."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import zipfile
|
|
5
|
+
from typing import Dict, Any, Optional
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
import shutil
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BackupRestoreService:
|
|
12
|
+
"""Handles backup and restore operations for vector database collections."""
|
|
13
|
+
|
|
14
|
+
@staticmethod
|
|
15
|
+
def backup_collection(
|
|
16
|
+
connection,
|
|
17
|
+
collection_name: str,
|
|
18
|
+
backup_dir: str,
|
|
19
|
+
include_embeddings: bool = True
|
|
20
|
+
) -> Optional[str]:
|
|
21
|
+
"""
|
|
22
|
+
Backup a collection to a directory.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
connection: Vector database connection
|
|
26
|
+
collection_name: Name of collection to backup
|
|
27
|
+
backup_dir: Directory to store backups
|
|
28
|
+
include_embeddings: Whether to include embedding vectors
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Path to backup file or None if failed
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
# Create backup directory if it doesn't exist
|
|
35
|
+
Path(backup_dir).mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
# Get collection info
|
|
38
|
+
collection_info = connection.get_collection_info(collection_name)
|
|
39
|
+
if not collection_info:
|
|
40
|
+
print(f"Failed to get collection info for {collection_name}")
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
# Get all items from collection
|
|
44
|
+
all_data = connection.get_all_items(collection_name)
|
|
45
|
+
if not all_data or not all_data.get("ids"):
|
|
46
|
+
print(f"No data to backup from collection {collection_name}")
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
# Convert numpy arrays to lists for JSON serialization
|
|
50
|
+
if "embeddings" in all_data:
|
|
51
|
+
try:
|
|
52
|
+
import numpy as np
|
|
53
|
+
if isinstance(all_data["embeddings"], np.ndarray):
|
|
54
|
+
all_data["embeddings"] = all_data["embeddings"].tolist()
|
|
55
|
+
elif isinstance(all_data["embeddings"], list):
|
|
56
|
+
# Convert any numpy arrays in the list
|
|
57
|
+
all_data["embeddings"] = [
|
|
58
|
+
emb.tolist() if isinstance(emb, np.ndarray) else emb
|
|
59
|
+
for emb in all_data["embeddings"]
|
|
60
|
+
]
|
|
61
|
+
except ImportError:
|
|
62
|
+
pass # numpy not available, assume already lists
|
|
63
|
+
|
|
64
|
+
# Remove embeddings if not needed (to save space)
|
|
65
|
+
if not include_embeddings and "embeddings" in all_data:
|
|
66
|
+
del all_data["embeddings"]
|
|
67
|
+
|
|
68
|
+
# Create backup metadata
|
|
69
|
+
backup_metadata = {
|
|
70
|
+
"collection_name": collection_name,
|
|
71
|
+
"backup_timestamp": datetime.now().isoformat(),
|
|
72
|
+
"item_count": len(all_data["ids"]),
|
|
73
|
+
"collection_info": collection_info,
|
|
74
|
+
"include_embeddings": include_embeddings
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Create backup filename with timestamp
|
|
78
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
79
|
+
backup_filename = f"{collection_name}_backup_{timestamp}.zip"
|
|
80
|
+
backup_path = Path(backup_dir) / backup_filename
|
|
81
|
+
|
|
82
|
+
# Create zip file with data and metadata
|
|
83
|
+
with zipfile.ZipFile(backup_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
|
84
|
+
# Write metadata
|
|
85
|
+
zipf.writestr('metadata.json', json.dumps(backup_metadata, indent=2))
|
|
86
|
+
|
|
87
|
+
# Write collection data
|
|
88
|
+
zipf.writestr('data.json', json.dumps(all_data, indent=2))
|
|
89
|
+
|
|
90
|
+
print(f"Backup created: {backup_path}")
|
|
91
|
+
return str(backup_path)
|
|
92
|
+
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print(f"Backup failed: {e}")
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
@staticmethod
|
|
98
|
+
def restore_collection(
|
|
99
|
+
connection,
|
|
100
|
+
backup_file: str,
|
|
101
|
+
collection_name: Optional[str] = None,
|
|
102
|
+
overwrite: bool = False
|
|
103
|
+
) -> bool:
|
|
104
|
+
"""
|
|
105
|
+
Restore a collection from a backup file.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
connection: Vector database connection
|
|
109
|
+
backup_file: Path to backup zip file
|
|
110
|
+
collection_name: Optional new name for restored collection
|
|
111
|
+
overwrite: Whether to overwrite existing collection
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
True if successful, False otherwise
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
# Extract backup
|
|
118
|
+
with zipfile.ZipFile(backup_file, 'r') as zipf:
|
|
119
|
+
# Read metadata
|
|
120
|
+
metadata_str = zipf.read('metadata.json').decode('utf-8')
|
|
121
|
+
metadata = json.loads(metadata_str)
|
|
122
|
+
|
|
123
|
+
# Read data
|
|
124
|
+
data_str = zipf.read('data.json').decode('utf-8')
|
|
125
|
+
data = json.loads(data_str)
|
|
126
|
+
|
|
127
|
+
# Determine collection name
|
|
128
|
+
restore_collection_name = collection_name or metadata["collection_name"]
|
|
129
|
+
|
|
130
|
+
# Check if collection exists
|
|
131
|
+
existing_collections = connection.list_collections()
|
|
132
|
+
if restore_collection_name in existing_collections:
|
|
133
|
+
if not overwrite:
|
|
134
|
+
print(f"Collection {restore_collection_name} already exists. Use overwrite=True to replace it.")
|
|
135
|
+
return False
|
|
136
|
+
else:
|
|
137
|
+
# Delete existing collection
|
|
138
|
+
connection.delete_collection(restore_collection_name)
|
|
139
|
+
|
|
140
|
+
# Check if this is Qdrant - need to create collection first
|
|
141
|
+
from vector_inspector.core.connections.qdrant_connection import QdrantConnection
|
|
142
|
+
if isinstance(connection, QdrantConnection):
|
|
143
|
+
# Get vector size from collection info or embeddings
|
|
144
|
+
vector_size = None
|
|
145
|
+
if metadata.get("collection_info") and "vector_size" in metadata["collection_info"]:
|
|
146
|
+
vector_size = metadata["collection_info"]["vector_size"]
|
|
147
|
+
elif data.get("embeddings") and len(data["embeddings"]) > 0:
|
|
148
|
+
vector_size = len(data["embeddings"][0])
|
|
149
|
+
|
|
150
|
+
if not vector_size:
|
|
151
|
+
print("Cannot determine vector size for Qdrant collection")
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
# Create collection
|
|
155
|
+
distance = metadata.get("collection_info", {}).get("distance", "Cosine")
|
|
156
|
+
if not connection.create_collection(restore_collection_name, vector_size, distance):
|
|
157
|
+
print(f"Failed to create collection {restore_collection_name}")
|
|
158
|
+
return False
|
|
159
|
+
|
|
160
|
+
# Check if embeddings are missing and need to be generated
|
|
161
|
+
if not data.get("embeddings"):
|
|
162
|
+
print("Embeddings missing in backup. Generating embeddings...")
|
|
163
|
+
try:
|
|
164
|
+
from sentence_transformers import SentenceTransformer
|
|
165
|
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
166
|
+
documents = data.get("documents", [])
|
|
167
|
+
data["embeddings"] = model.encode(documents, show_progress_bar=True).tolist()
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f"Failed to generate embeddings: {e}")
|
|
170
|
+
return False
|
|
171
|
+
|
|
172
|
+
# Convert IDs to Qdrant-compatible format (integers or UUIDs)
|
|
173
|
+
# Store original IDs in metadata
|
|
174
|
+
original_ids = data.get("ids", [])
|
|
175
|
+
qdrant_ids = []
|
|
176
|
+
metadatas = data.get("metadatas", [])
|
|
177
|
+
|
|
178
|
+
for i, orig_id in enumerate(original_ids):
|
|
179
|
+
# Try to convert to integer, otherwise use index
|
|
180
|
+
try:
|
|
181
|
+
# If it's like "doc_123", extract the number
|
|
182
|
+
if isinstance(orig_id, str) and "_" in orig_id:
|
|
183
|
+
qdrant_id = int(orig_id.split("_")[-1])
|
|
184
|
+
else:
|
|
185
|
+
qdrant_id = int(orig_id)
|
|
186
|
+
except (ValueError, AttributeError):
|
|
187
|
+
# Use index as ID if can't convert
|
|
188
|
+
qdrant_id = i
|
|
189
|
+
|
|
190
|
+
qdrant_ids.append(qdrant_id)
|
|
191
|
+
|
|
192
|
+
# Store original ID in metadata
|
|
193
|
+
if i < len(metadatas):
|
|
194
|
+
if metadatas[i] is None:
|
|
195
|
+
metadatas[i] = {}
|
|
196
|
+
metadatas[i]["original_id"] = orig_id
|
|
197
|
+
else:
|
|
198
|
+
metadatas.append({"original_id": orig_id})
|
|
199
|
+
|
|
200
|
+
data["ids"] = qdrant_ids
|
|
201
|
+
data["metadatas"] = metadatas
|
|
202
|
+
|
|
203
|
+
# Add items to collection
|
|
204
|
+
success = connection.add_items(
|
|
205
|
+
restore_collection_name,
|
|
206
|
+
documents=data.get("documents", []),
|
|
207
|
+
metadatas=data.get("metadatas"),
|
|
208
|
+
ids=data.get("ids"),
|
|
209
|
+
embeddings=data.get("embeddings")
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if success:
|
|
213
|
+
print(f"Collection '{restore_collection_name}' restored from backup")
|
|
214
|
+
print(f"Restored {len(data.get('ids', []))} items")
|
|
215
|
+
return True
|
|
216
|
+
else:
|
|
217
|
+
print("Failed to restore collection")
|
|
218
|
+
return False
|
|
219
|
+
|
|
220
|
+
except Exception as e:
|
|
221
|
+
print(f"Restore failed: {e}")
|
|
222
|
+
import traceback
|
|
223
|
+
traceback.print_exc()
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def list_backups(backup_dir: str) -> list:
|
|
228
|
+
"""
|
|
229
|
+
List all backup files in a directory.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
backup_dir: Directory containing backups
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
List of backup file information dictionaries
|
|
236
|
+
"""
|
|
237
|
+
try:
|
|
238
|
+
backup_path = Path(backup_dir)
|
|
239
|
+
if not backup_path.exists():
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
backups = []
|
|
243
|
+
for backup_file in backup_path.glob("*_backup_*.zip"):
|
|
244
|
+
try:
|
|
245
|
+
# Read metadata from backup
|
|
246
|
+
with zipfile.ZipFile(backup_file, 'r') as zipf:
|
|
247
|
+
metadata_str = zipf.read('metadata.json').decode('utf-8')
|
|
248
|
+
metadata = json.loads(metadata_str)
|
|
249
|
+
|
|
250
|
+
backups.append({
|
|
251
|
+
"file_path": str(backup_file),
|
|
252
|
+
"file_name": backup_file.name,
|
|
253
|
+
"collection_name": metadata.get("collection_name", "Unknown"),
|
|
254
|
+
"timestamp": metadata.get("backup_timestamp", "Unknown"),
|
|
255
|
+
"item_count": metadata.get("item_count", 0),
|
|
256
|
+
"file_size": backup_file.stat().st_size
|
|
257
|
+
})
|
|
258
|
+
except Exception:
|
|
259
|
+
# Skip invalid backup files
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Sort by timestamp (newest first)
|
|
263
|
+
backups.sort(key=lambda x: x["timestamp"], reverse=True)
|
|
264
|
+
return backups
|
|
265
|
+
|
|
266
|
+
except Exception as e:
|
|
267
|
+
print(f"Failed to list backups: {e}")
|
|
268
|
+
return []
|
|
269
|
+
|
|
270
|
+
@staticmethod
|
|
271
|
+
def delete_backup(backup_file: str) -> bool:
|
|
272
|
+
"""
|
|
273
|
+
Delete a backup file.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
backup_file: Path to backup file to delete
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
True if successful, False otherwise
|
|
280
|
+
"""
|
|
281
|
+
try:
|
|
282
|
+
Path(backup_file).unlink()
|
|
283
|
+
return True
|
|
284
|
+
except Exception as e:
|
|
285
|
+
print(f"Failed to delete backup: {e}")
|
|
286
|
+
return False
|