vector-inspector 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/__init__.py +3 -0
  2. vector_inspector/__main__.py +4 -0
  3. vector_inspector/core/__init__.py +1 -0
  4. vector_inspector/core/connections/__init__.py +7 -0
  5. vector_inspector/core/connections/base_connection.py +233 -0
  6. vector_inspector/core/connections/chroma_connection.py +384 -0
  7. vector_inspector/core/connections/qdrant_connection.py +723 -0
  8. vector_inspector/core/connections/template_connection.py +346 -0
  9. vector_inspector/main.py +21 -0
  10. vector_inspector/services/__init__.py +1 -0
  11. vector_inspector/services/backup_restore_service.py +286 -0
  12. vector_inspector/services/filter_service.py +72 -0
  13. vector_inspector/services/import_export_service.py +287 -0
  14. vector_inspector/services/settings_service.py +60 -0
  15. vector_inspector/services/visualization_service.py +116 -0
  16. vector_inspector/ui/__init__.py +1 -0
  17. vector_inspector/ui/components/__init__.py +1 -0
  18. vector_inspector/ui/components/backup_restore_dialog.py +350 -0
  19. vector_inspector/ui/components/filter_builder.py +370 -0
  20. vector_inspector/ui/components/item_dialog.py +118 -0
  21. vector_inspector/ui/components/loading_dialog.py +30 -0
  22. vector_inspector/ui/main_window.py +288 -0
  23. vector_inspector/ui/views/__init__.py +1 -0
  24. vector_inspector/ui/views/collection_browser.py +112 -0
  25. vector_inspector/ui/views/connection_view.py +423 -0
  26. vector_inspector/ui/views/metadata_view.py +555 -0
  27. vector_inspector/ui/views/search_view.py +268 -0
  28. vector_inspector/ui/views/visualization_view.py +245 -0
  29. vector_inspector-0.2.0.dist-info/METADATA +382 -0
  30. vector_inspector-0.2.0.dist-info/RECORD +32 -0
  31. vector_inspector-0.2.0.dist-info/WHEEL +4 -0
  32. vector_inspector-0.2.0.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,346 @@
1
+ """Template for implementing a new vector database connection.
2
+
3
+ Copy this file and replace 'Template' with your database name.
4
+ Implement all abstract methods according to your database's API.
5
+ """
6
+
7
+ from typing import Optional, List, Dict, Any
8
+ from .base_connection import VectorDBConnection
9
+
10
+
11
+ class TemplateConnection(VectorDBConnection):
12
+ """Template vector database connection.
13
+
14
+ Replace this with your database provider name (e.g., PineconeConnection, QdrantConnection).
15
+ """
16
+
17
+ def __init__(self, **kwargs):
18
+ """
19
+ Initialize connection parameters.
20
+
21
+ Args:
22
+ **kwargs: Provider-specific connection parameters
23
+ (e.g., api_key, host, port, credentials, etc.)
24
+ """
25
+ # Store your connection parameters here
26
+ self._client = None
27
+ # Add your provider-specific attributes
28
+
29
+ def connect(self) -> bool:
30
+ """
31
+ Establish connection to the vector database.
32
+
33
+ Returns:
34
+ True if connection successful, False otherwise
35
+ """
36
+ try:
37
+ # Initialize your database client here
38
+ # self._client = YourDatabaseClient(...)
39
+ return True
40
+ except Exception as e:
41
+ print(f"Connection failed: {e}")
42
+ return False
43
+
44
+ def disconnect(self):
45
+ """Close connection to the vector database."""
46
+ # Clean up your connection
47
+ self._client = None
48
+
49
+ @property
50
+ def is_connected(self) -> bool:
51
+ """
52
+ Check if connected to the vector database.
53
+
54
+ Returns:
55
+ True if connected, False otherwise
56
+ """
57
+ # Return whether the client is active
58
+ return self._client is not None
59
+
60
+ def list_collections(self) -> List[str]:
61
+ """
62
+ Get list of all collections/indexes.
63
+
64
+ Returns:
65
+ List of collection/index names
66
+ """
67
+ if not self._client:
68
+ return []
69
+ try:
70
+ # Call your database API to list collections
71
+ # collections = self._client.list_collections()
72
+ # return [col.name for col in collections]
73
+ return []
74
+ except Exception as e:
75
+ print(f"Failed to list collections: {e}")
76
+ return []
77
+
78
+ def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
79
+ """
80
+ Get collection metadata and statistics.
81
+
82
+ Args:
83
+ name: Collection/index name
84
+
85
+ Returns:
86
+ Dictionary with collection info:
87
+ - name: Collection name
88
+ - count: Number of items
89
+ - metadata_fields: List of available metadata field names
90
+ """
91
+ if not self._client:
92
+ return None
93
+
94
+ try:
95
+ # Get collection stats from your database
96
+ # collection = self._client.get_collection(name)
97
+ # count = collection.count()
98
+ # metadata_fields = collection.get_metadata_fields()
99
+
100
+ return {
101
+ "name": name,
102
+ "count": 0, # Replace with actual count
103
+ "metadata_fields": [], # Replace with actual fields
104
+ }
105
+ except Exception as e:
106
+ print(f"Failed to get collection info: {e}")
107
+ return None
108
+
109
+ def query_collection(
110
+ self,
111
+ collection_name: str,
112
+ query_texts: Optional[List[str]] = None,
113
+ query_embeddings: Optional[List[List[float]]] = None,
114
+ n_results: int = 10,
115
+ where: Optional[Dict[str, Any]] = None,
116
+ where_document: Optional[Dict[str, Any]] = None,
117
+ ) -> Optional[Dict[str, Any]]:
118
+ """
119
+ Query a collection for similar vectors.
120
+
121
+ Args:
122
+ collection_name: Name of collection to query
123
+ query_texts: Text queries to embed and search
124
+ query_embeddings: Direct embedding vectors to search
125
+ n_results: Number of results to return
126
+ where: Metadata filter
127
+ where_document: Document content filter
128
+
129
+ Returns:
130
+ Query results dictionary with keys:
131
+ - ids: List of result IDs
132
+ - distances: List of distances/scores
133
+ - documents: List of document texts
134
+ - metadatas: List of metadata dicts
135
+ - embeddings: List of embedding vectors (optional)
136
+ """
137
+ if not self._client:
138
+ return None
139
+
140
+ try:
141
+ # Perform similarity search
142
+ # results = self._client.query(
143
+ # collection=collection_name,
144
+ # query_embeddings=query_embeddings,
145
+ # n_results=n_results,
146
+ # filter=where
147
+ # )
148
+
149
+ # Transform results to standard format
150
+ return {
151
+ "ids": [],
152
+ "distances": [],
153
+ "documents": [],
154
+ "metadatas": [],
155
+ "embeddings": []
156
+ }
157
+ except Exception as e:
158
+ print(f"Query failed: {e}")
159
+ return None
160
+
161
+ def get_all_items(
162
+ self,
163
+ collection_name: str,
164
+ limit: Optional[int] = None,
165
+ offset: Optional[int] = None,
166
+ where: Optional[Dict[str, Any]] = None,
167
+ ) -> Optional[Dict[str, Any]]:
168
+ """
169
+ Get all items from a collection.
170
+
171
+ Args:
172
+ collection_name: Name of collection
173
+ limit: Maximum number of items to return
174
+ offset: Number of items to skip
175
+ where: Metadata filter
176
+
177
+ Returns:
178
+ Dictionary with collection items:
179
+ - ids: List of item IDs
180
+ - documents: List of document texts
181
+ - metadatas: List of metadata dicts
182
+ - embeddings: List of embedding vectors
183
+ """
184
+ if not self._client:
185
+ return None
186
+
187
+ try:
188
+ # Fetch items from collection with pagination
189
+ # results = self._client.fetch(
190
+ # collection=collection_name,
191
+ # limit=limit,
192
+ # offset=offset,
193
+ # filter=where
194
+ # )
195
+
196
+ return {
197
+ "ids": [],
198
+ "documents": [],
199
+ "metadatas": [],
200
+ "embeddings": []
201
+ }
202
+ except Exception as e:
203
+ print(f"Failed to get items: {e}")
204
+ return None
205
+
206
+ def add_items(
207
+ self,
208
+ collection_name: str,
209
+ documents: List[str],
210
+ metadatas: Optional[List[Dict[str, Any]]] = None,
211
+ ids: Optional[List[str]] = None,
212
+ embeddings: Optional[List[List[float]]] = None,
213
+ ) -> bool:
214
+ """
215
+ Add items to a collection.
216
+
217
+ Args:
218
+ collection_name: Name of collection
219
+ documents: Document texts
220
+ metadatas: Metadata for each document
221
+ ids: IDs for each document
222
+ embeddings: Pre-computed embeddings
223
+
224
+ Returns:
225
+ True if successful, False otherwise
226
+ """
227
+ if not self._client:
228
+ return False
229
+
230
+ try:
231
+ # Add items to the collection
232
+ # self._client.upsert(
233
+ # collection=collection_name,
234
+ # documents=documents,
235
+ # metadatas=metadatas,
236
+ # ids=ids,
237
+ # embeddings=embeddings
238
+ # )
239
+ return True
240
+ except Exception as e:
241
+ print(f"Failed to add items: {e}")
242
+ return False
243
+
244
+ def update_items(
245
+ self,
246
+ collection_name: str,
247
+ ids: List[str],
248
+ documents: Optional[List[str]] = None,
249
+ metadatas: Optional[List[Dict[str, Any]]] = None,
250
+ embeddings: Optional[List[List[float]]] = None,
251
+ ) -> bool:
252
+ """
253
+ Update items in a collection.
254
+
255
+ Args:
256
+ collection_name: Name of collection
257
+ ids: IDs of items to update
258
+ documents: New document texts
259
+ metadatas: New metadata
260
+ embeddings: New embeddings
261
+
262
+ Returns:
263
+ True if successful, False otherwise
264
+ """
265
+ if not self._client:
266
+ return False
267
+
268
+ try:
269
+ # Update existing items
270
+ # self._client.update(
271
+ # collection=collection_name,
272
+ # ids=ids,
273
+ # documents=documents,
274
+ # metadatas=metadatas,
275
+ # embeddings=embeddings
276
+ # )
277
+ return True
278
+ except Exception as e:
279
+ print(f"Failed to update items: {e}")
280
+ return False
281
+
282
+ def delete_items(
283
+ self,
284
+ collection_name: str,
285
+ ids: Optional[List[str]] = None,
286
+ where: Optional[Dict[str, Any]] = None,
287
+ ) -> bool:
288
+ """
289
+ Delete items from a collection.
290
+
291
+ Args:
292
+ collection_name: Name of collection
293
+ ids: IDs of items to delete
294
+ where: Metadata filter for items to delete
295
+
296
+ Returns:
297
+ True if successful, False otherwise
298
+ """
299
+ if not self._client:
300
+ return False
301
+
302
+ try:
303
+ # Delete items
304
+ # self._client.delete(
305
+ # collection=collection_name,
306
+ # ids=ids,
307
+ # filter=where
308
+ # )
309
+ return True
310
+ except Exception as e:
311
+ print(f"Failed to delete items: {e}")
312
+ return False
313
+
314
+ def delete_collection(self, name: str) -> bool:
315
+ """
316
+ Delete an entire collection.
317
+
318
+ Args:
319
+ name: Collection name
320
+
321
+ Returns:
322
+ True if successful, False otherwise
323
+ """
324
+ if not self._client:
325
+ return False
326
+
327
+ try:
328
+ # Delete the collection
329
+ # self._client.delete_collection(name)
330
+ return True
331
+ except Exception as e:
332
+ print(f"Failed to delete collection: {e}")
333
+ return False
334
+
335
+ def get_connection_info(self) -> Dict[str, Any]:
336
+ """
337
+ Get information about the current connection.
338
+
339
+ Returns:
340
+ Dictionary with connection details
341
+ """
342
+ return {
343
+ "provider": "Template", # Replace with your provider name
344
+ "connected": self.is_connected,
345
+ # Add provider-specific details here
346
+ }
@@ -0,0 +1,21 @@
1
+ """Main entry point for Vector Inspector application."""
2
+
3
+ import sys
4
+ from PySide6.QtWidgets import QApplication
5
+ from vector_inspector.ui.main_window import MainWindow
6
+
7
+
8
+ def main():
9
+ """Launch the Vector Inspector application."""
10
+ app = QApplication(sys.argv)
11
+ app.setApplicationName("Vector Inspector")
12
+ app.setOrganizationName("Vector Inspector")
13
+
14
+ window = MainWindow()
15
+ window.show()
16
+
17
+ sys.exit(app.exec())
18
+
19
+
20
+ if __name__ == "__main__":
21
+ main()
@@ -0,0 +1 @@
1
+ """Services for embeddings, visualization, and data processing."""
@@ -0,0 +1,286 @@
1
+ """Service for backing up and restoring collections."""
2
+
3
+ import json
4
+ import zipfile
5
+ from typing import Dict, Any, Optional
6
+ from pathlib import Path
7
+ from datetime import datetime
8
+ import shutil
9
+
10
+
11
+ class BackupRestoreService:
12
+ """Handles backup and restore operations for vector database collections."""
13
+
14
+ @staticmethod
15
+ def backup_collection(
16
+ connection,
17
+ collection_name: str,
18
+ backup_dir: str,
19
+ include_embeddings: bool = True
20
+ ) -> Optional[str]:
21
+ """
22
+ Backup a collection to a directory.
23
+
24
+ Args:
25
+ connection: Vector database connection
26
+ collection_name: Name of collection to backup
27
+ backup_dir: Directory to store backups
28
+ include_embeddings: Whether to include embedding vectors
29
+
30
+ Returns:
31
+ Path to backup file or None if failed
32
+ """
33
+ try:
34
+ # Create backup directory if it doesn't exist
35
+ Path(backup_dir).mkdir(parents=True, exist_ok=True)
36
+
37
+ # Get collection info
38
+ collection_info = connection.get_collection_info(collection_name)
39
+ if not collection_info:
40
+ print(f"Failed to get collection info for {collection_name}")
41
+ return None
42
+
43
+ # Get all items from collection
44
+ all_data = connection.get_all_items(collection_name)
45
+ if not all_data or not all_data.get("ids"):
46
+ print(f"No data to backup from collection {collection_name}")
47
+ return None
48
+
49
+ # Convert numpy arrays to lists for JSON serialization
50
+ if "embeddings" in all_data:
51
+ try:
52
+ import numpy as np
53
+ if isinstance(all_data["embeddings"], np.ndarray):
54
+ all_data["embeddings"] = all_data["embeddings"].tolist()
55
+ elif isinstance(all_data["embeddings"], list):
56
+ # Convert any numpy arrays in the list
57
+ all_data["embeddings"] = [
58
+ emb.tolist() if isinstance(emb, np.ndarray) else emb
59
+ for emb in all_data["embeddings"]
60
+ ]
61
+ except ImportError:
62
+ pass # numpy not available, assume already lists
63
+
64
+ # Remove embeddings if not needed (to save space)
65
+ if not include_embeddings and "embeddings" in all_data:
66
+ del all_data["embeddings"]
67
+
68
+ # Create backup metadata
69
+ backup_metadata = {
70
+ "collection_name": collection_name,
71
+ "backup_timestamp": datetime.now().isoformat(),
72
+ "item_count": len(all_data["ids"]),
73
+ "collection_info": collection_info,
74
+ "include_embeddings": include_embeddings
75
+ }
76
+
77
+ # Create backup filename with timestamp
78
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
79
+ backup_filename = f"{collection_name}_backup_{timestamp}.zip"
80
+ backup_path = Path(backup_dir) / backup_filename
81
+
82
+ # Create zip file with data and metadata
83
+ with zipfile.ZipFile(backup_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
84
+ # Write metadata
85
+ zipf.writestr('metadata.json', json.dumps(backup_metadata, indent=2))
86
+
87
+ # Write collection data
88
+ zipf.writestr('data.json', json.dumps(all_data, indent=2))
89
+
90
+ print(f"Backup created: {backup_path}")
91
+ return str(backup_path)
92
+
93
+ except Exception as e:
94
+ print(f"Backup failed: {e}")
95
+ return None
96
+
97
+ @staticmethod
98
+ def restore_collection(
99
+ connection,
100
+ backup_file: str,
101
+ collection_name: Optional[str] = None,
102
+ overwrite: bool = False
103
+ ) -> bool:
104
+ """
105
+ Restore a collection from a backup file.
106
+
107
+ Args:
108
+ connection: Vector database connection
109
+ backup_file: Path to backup zip file
110
+ collection_name: Optional new name for restored collection
111
+ overwrite: Whether to overwrite existing collection
112
+
113
+ Returns:
114
+ True if successful, False otherwise
115
+ """
116
+ try:
117
+ # Extract backup
118
+ with zipfile.ZipFile(backup_file, 'r') as zipf:
119
+ # Read metadata
120
+ metadata_str = zipf.read('metadata.json').decode('utf-8')
121
+ metadata = json.loads(metadata_str)
122
+
123
+ # Read data
124
+ data_str = zipf.read('data.json').decode('utf-8')
125
+ data = json.loads(data_str)
126
+
127
+ # Determine collection name
128
+ restore_collection_name = collection_name or metadata["collection_name"]
129
+
130
+ # Check if collection exists
131
+ existing_collections = connection.list_collections()
132
+ if restore_collection_name in existing_collections:
133
+ if not overwrite:
134
+ print(f"Collection {restore_collection_name} already exists. Use overwrite=True to replace it.")
135
+ return False
136
+ else:
137
+ # Delete existing collection
138
+ connection.delete_collection(restore_collection_name)
139
+
140
+ # Check if this is Qdrant - need to create collection first
141
+ from vector_inspector.core.connections.qdrant_connection import QdrantConnection
142
+ if isinstance(connection, QdrantConnection):
143
+ # Get vector size from collection info or embeddings
144
+ vector_size = None
145
+ if metadata.get("collection_info") and "vector_size" in metadata["collection_info"]:
146
+ vector_size = metadata["collection_info"]["vector_size"]
147
+ elif data.get("embeddings") and len(data["embeddings"]) > 0:
148
+ vector_size = len(data["embeddings"][0])
149
+
150
+ if not vector_size:
151
+ print("Cannot determine vector size for Qdrant collection")
152
+ return False
153
+
154
+ # Create collection
155
+ distance = metadata.get("collection_info", {}).get("distance", "Cosine")
156
+ if not connection.create_collection(restore_collection_name, vector_size, distance):
157
+ print(f"Failed to create collection {restore_collection_name}")
158
+ return False
159
+
160
+ # Check if embeddings are missing and need to be generated
161
+ if not data.get("embeddings"):
162
+ print("Embeddings missing in backup. Generating embeddings...")
163
+ try:
164
+ from sentence_transformers import SentenceTransformer
165
+ model = SentenceTransformer("all-MiniLM-L6-v2")
166
+ documents = data.get("documents", [])
167
+ data["embeddings"] = model.encode(documents, show_progress_bar=True).tolist()
168
+ except Exception as e:
169
+ print(f"Failed to generate embeddings: {e}")
170
+ return False
171
+
172
+ # Convert IDs to Qdrant-compatible format (integers or UUIDs)
173
+ # Store original IDs in metadata
174
+ original_ids = data.get("ids", [])
175
+ qdrant_ids = []
176
+ metadatas = data.get("metadatas", [])
177
+
178
+ for i, orig_id in enumerate(original_ids):
179
+ # Try to convert to integer, otherwise use index
180
+ try:
181
+ # If it's like "doc_123", extract the number
182
+ if isinstance(orig_id, str) and "_" in orig_id:
183
+ qdrant_id = int(orig_id.split("_")[-1])
184
+ else:
185
+ qdrant_id = int(orig_id)
186
+ except (ValueError, AttributeError):
187
+ # Use index as ID if can't convert
188
+ qdrant_id = i
189
+
190
+ qdrant_ids.append(qdrant_id)
191
+
192
+ # Store original ID in metadata
193
+ if i < len(metadatas):
194
+ if metadatas[i] is None:
195
+ metadatas[i] = {}
196
+ metadatas[i]["original_id"] = orig_id
197
+ else:
198
+ metadatas.append({"original_id": orig_id})
199
+
200
+ data["ids"] = qdrant_ids
201
+ data["metadatas"] = metadatas
202
+
203
+ # Add items to collection
204
+ success = connection.add_items(
205
+ restore_collection_name,
206
+ documents=data.get("documents", []),
207
+ metadatas=data.get("metadatas"),
208
+ ids=data.get("ids"),
209
+ embeddings=data.get("embeddings")
210
+ )
211
+
212
+ if success:
213
+ print(f"Collection '{restore_collection_name}' restored from backup")
214
+ print(f"Restored {len(data.get('ids', []))} items")
215
+ return True
216
+ else:
217
+ print("Failed to restore collection")
218
+ return False
219
+
220
+ except Exception as e:
221
+ print(f"Restore failed: {e}")
222
+ import traceback
223
+ traceback.print_exc()
224
+ return False
225
+
226
+ @staticmethod
227
+ def list_backups(backup_dir: str) -> list:
228
+ """
229
+ List all backup files in a directory.
230
+
231
+ Args:
232
+ backup_dir: Directory containing backups
233
+
234
+ Returns:
235
+ List of backup file information dictionaries
236
+ """
237
+ try:
238
+ backup_path = Path(backup_dir)
239
+ if not backup_path.exists():
240
+ return []
241
+
242
+ backups = []
243
+ for backup_file in backup_path.glob("*_backup_*.zip"):
244
+ try:
245
+ # Read metadata from backup
246
+ with zipfile.ZipFile(backup_file, 'r') as zipf:
247
+ metadata_str = zipf.read('metadata.json').decode('utf-8')
248
+ metadata = json.loads(metadata_str)
249
+
250
+ backups.append({
251
+ "file_path": str(backup_file),
252
+ "file_name": backup_file.name,
253
+ "collection_name": metadata.get("collection_name", "Unknown"),
254
+ "timestamp": metadata.get("backup_timestamp", "Unknown"),
255
+ "item_count": metadata.get("item_count", 0),
256
+ "file_size": backup_file.stat().st_size
257
+ })
258
+ except Exception:
259
+ # Skip invalid backup files
260
+ continue
261
+
262
+ # Sort by timestamp (newest first)
263
+ backups.sort(key=lambda x: x["timestamp"], reverse=True)
264
+ return backups
265
+
266
+ except Exception as e:
267
+ print(f"Failed to list backups: {e}")
268
+ return []
269
+
270
+ @staticmethod
271
+ def delete_backup(backup_file: str) -> bool:
272
+ """
273
+ Delete a backup file.
274
+
275
+ Args:
276
+ backup_file: Path to backup file to delete
277
+
278
+ Returns:
279
+ True if successful, False otherwise
280
+ """
281
+ try:
282
+ Path(backup_file).unlink()
283
+ return True
284
+ except Exception as e:
285
+ print(f"Failed to delete backup: {e}")
286
+ return False