vector-inspector 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/core/connection_manager.py +55 -49
- vector_inspector/core/connections/base_connection.py +41 -41
- vector_inspector/core/connections/chroma_connection.py +110 -86
- vector_inspector/core/connections/pinecone_connection.py +168 -182
- vector_inspector/core/connections/qdrant_connection.py +109 -126
- vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
- vector_inspector/core/connections/template_connection.py +55 -65
- vector_inspector/core/embedding_utils.py +32 -32
- vector_inspector/core/logging.py +27 -0
- vector_inspector/core/model_registry.py +4 -3
- vector_inspector/main.py +6 -2
- vector_inspector/services/backup_helpers.py +63 -0
- vector_inspector/services/backup_restore_service.py +73 -152
- vector_inspector/services/credential_service.py +33 -40
- vector_inspector/services/import_export_service.py +70 -67
- vector_inspector/services/profile_service.py +92 -94
- vector_inspector/services/settings_service.py +68 -48
- vector_inspector/services/visualization_service.py +40 -39
- vector_inspector/ui/components/splash_window.py +57 -0
- vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
- vector_inspector/ui/main_window.py +200 -146
- vector_inspector/ui/views/info_panel.py +208 -127
- vector_inspector/ui/views/metadata_view.py +8 -7
- vector_inspector/ui/views/search_view.py +97 -75
- vector_inspector/ui/views/visualization_view.py +140 -97
- vector_inspector/utils/version.py +5 -0
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/METADATA +10 -2
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/RECORD +32 -25
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/entry_points.txt +0 -0
|
@@ -6,18 +6,19 @@ Implement all abstract methods according to your database's API.
|
|
|
6
6
|
|
|
7
7
|
from typing import Optional, List, Dict, Any
|
|
8
8
|
from .base_connection import VectorDBConnection
|
|
9
|
+
from vector_inspector.core.logging import log_error
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class TemplateConnection(VectorDBConnection):
|
|
12
13
|
"""Template vector database connection.
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
Replace this with your database provider name (e.g., PineconeConnection, QdrantConnection).
|
|
15
16
|
"""
|
|
16
|
-
|
|
17
|
+
|
|
17
18
|
def __init__(self, **kwargs):
|
|
18
19
|
"""
|
|
19
20
|
Initialize connection parameters.
|
|
20
|
-
|
|
21
|
+
|
|
21
22
|
Args:
|
|
22
23
|
**kwargs: Provider-specific connection parameters
|
|
23
24
|
(e.g., api_key, host, port, credentials, etc.)
|
|
@@ -25,11 +26,11 @@ class TemplateConnection(VectorDBConnection):
|
|
|
25
26
|
# Store your connection parameters here
|
|
26
27
|
self._client = None
|
|
27
28
|
# Add your provider-specific attributes
|
|
28
|
-
|
|
29
|
+
|
|
29
30
|
def connect(self) -> bool:
|
|
30
31
|
"""
|
|
31
32
|
Establish connection to the vector database.
|
|
32
|
-
|
|
33
|
+
|
|
33
34
|
Returns:
|
|
34
35
|
True if connection successful, False otherwise
|
|
35
36
|
"""
|
|
@@ -38,29 +39,29 @@ class TemplateConnection(VectorDBConnection):
|
|
|
38
39
|
# self._client = YourDatabaseClient(...)
|
|
39
40
|
return True
|
|
40
41
|
except Exception as e:
|
|
41
|
-
|
|
42
|
+
log_error("Connection failed: %s", e)
|
|
42
43
|
return False
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
def disconnect(self):
|
|
45
46
|
"""Close connection to the vector database."""
|
|
46
47
|
# Clean up your connection
|
|
47
48
|
self._client = None
|
|
48
|
-
|
|
49
|
+
|
|
49
50
|
@property
|
|
50
51
|
def is_connected(self) -> bool:
|
|
51
52
|
"""
|
|
52
53
|
Check if connected to the vector database.
|
|
53
|
-
|
|
54
|
+
|
|
54
55
|
Returns:
|
|
55
56
|
True if connected, False otherwise
|
|
56
57
|
"""
|
|
57
58
|
# Return whether the client is active
|
|
58
59
|
return self._client is not None
|
|
59
|
-
|
|
60
|
+
|
|
60
61
|
def list_collections(self) -> List[str]:
|
|
61
62
|
"""
|
|
62
63
|
Get list of all collections/indexes.
|
|
63
|
-
|
|
64
|
+
|
|
64
65
|
Returns:
|
|
65
66
|
List of collection/index names
|
|
66
67
|
"""
|
|
@@ -72,16 +73,16 @@ class TemplateConnection(VectorDBConnection):
|
|
|
72
73
|
# return [col.name for col in collections]
|
|
73
74
|
return []
|
|
74
75
|
except Exception as e:
|
|
75
|
-
|
|
76
|
+
log_error("Failed to list collections: %s", e)
|
|
76
77
|
return []
|
|
77
|
-
|
|
78
|
+
|
|
78
79
|
def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
|
|
79
80
|
"""
|
|
80
81
|
Get collection metadata and statistics.
|
|
81
|
-
|
|
82
|
+
|
|
82
83
|
Args:
|
|
83
84
|
name: Collection/index name
|
|
84
|
-
|
|
85
|
+
|
|
85
86
|
Returns:
|
|
86
87
|
Dictionary with collection info:
|
|
87
88
|
- name: Collection name
|
|
@@ -90,22 +91,22 @@ class TemplateConnection(VectorDBConnection):
|
|
|
90
91
|
"""
|
|
91
92
|
if not self._client:
|
|
92
93
|
return None
|
|
93
|
-
|
|
94
|
+
|
|
94
95
|
try:
|
|
95
96
|
# Get collection stats from your database
|
|
96
97
|
# collection = self._client.get_collection(name)
|
|
97
98
|
# count = collection.count()
|
|
98
99
|
# metadata_fields = collection.get_metadata_fields()
|
|
99
|
-
|
|
100
|
+
|
|
100
101
|
return {
|
|
101
102
|
"name": name,
|
|
102
103
|
"count": 0, # Replace with actual count
|
|
103
104
|
"metadata_fields": [], # Replace with actual fields
|
|
104
105
|
}
|
|
105
106
|
except Exception as e:
|
|
106
|
-
|
|
107
|
+
log_error("Failed to get collection info: %s", e)
|
|
107
108
|
return None
|
|
108
|
-
|
|
109
|
+
|
|
109
110
|
def query_collection(
|
|
110
111
|
self,
|
|
111
112
|
collection_name: str,
|
|
@@ -117,7 +118,7 @@ class TemplateConnection(VectorDBConnection):
|
|
|
117
118
|
) -> Optional[Dict[str, Any]]:
|
|
118
119
|
"""
|
|
119
120
|
Query a collection for similar vectors.
|
|
120
|
-
|
|
121
|
+
|
|
121
122
|
Args:
|
|
122
123
|
collection_name: Name of collection to query
|
|
123
124
|
query_texts: Text queries to embed and search
|
|
@@ -125,7 +126,7 @@ class TemplateConnection(VectorDBConnection):
|
|
|
125
126
|
n_results: Number of results to return
|
|
126
127
|
where: Metadata filter
|
|
127
128
|
where_document: Document content filter
|
|
128
|
-
|
|
129
|
+
|
|
129
130
|
Returns:
|
|
130
131
|
Query results dictionary with keys:
|
|
131
132
|
- ids: List of result IDs
|
|
@@ -136,7 +137,7 @@ class TemplateConnection(VectorDBConnection):
|
|
|
136
137
|
"""
|
|
137
138
|
if not self._client:
|
|
138
139
|
return None
|
|
139
|
-
|
|
140
|
+
|
|
140
141
|
try:
|
|
141
142
|
# Perform similarity search
|
|
142
143
|
# results = self._client.query(
|
|
@@ -145,19 +146,13 @@ class TemplateConnection(VectorDBConnection):
|
|
|
145
146
|
# n_results=n_results,
|
|
146
147
|
# filter=where
|
|
147
148
|
# )
|
|
148
|
-
|
|
149
|
+
|
|
149
150
|
# Transform results to standard format
|
|
150
|
-
return {
|
|
151
|
-
"ids": [],
|
|
152
|
-
"distances": [],
|
|
153
|
-
"documents": [],
|
|
154
|
-
"metadatas": [],
|
|
155
|
-
"embeddings": []
|
|
156
|
-
}
|
|
151
|
+
return {"ids": [], "distances": [], "documents": [], "metadatas": [], "embeddings": []}
|
|
157
152
|
except Exception as e:
|
|
158
|
-
|
|
153
|
+
log_error("Query failed: %s", e)
|
|
159
154
|
return None
|
|
160
|
-
|
|
155
|
+
|
|
161
156
|
def get_all_items(
|
|
162
157
|
self,
|
|
163
158
|
collection_name: str,
|
|
@@ -167,13 +162,13 @@ class TemplateConnection(VectorDBConnection):
|
|
|
167
162
|
) -> Optional[Dict[str, Any]]:
|
|
168
163
|
"""
|
|
169
164
|
Get all items from a collection.
|
|
170
|
-
|
|
165
|
+
|
|
171
166
|
Args:
|
|
172
167
|
collection_name: Name of collection
|
|
173
168
|
limit: Maximum number of items to return
|
|
174
169
|
offset: Number of items to skip
|
|
175
170
|
where: Metadata filter
|
|
176
|
-
|
|
171
|
+
|
|
177
172
|
Returns:
|
|
178
173
|
Dictionary with collection items:
|
|
179
174
|
- ids: List of item IDs
|
|
@@ -183,7 +178,7 @@ class TemplateConnection(VectorDBConnection):
|
|
|
183
178
|
"""
|
|
184
179
|
if not self._client:
|
|
185
180
|
return None
|
|
186
|
-
|
|
181
|
+
|
|
187
182
|
try:
|
|
188
183
|
# Fetch items from collection with pagination
|
|
189
184
|
# results = self._client.fetch(
|
|
@@ -192,17 +187,12 @@ class TemplateConnection(VectorDBConnection):
|
|
|
192
187
|
# offset=offset,
|
|
193
188
|
# filter=where
|
|
194
189
|
# )
|
|
195
|
-
|
|
196
|
-
return {
|
|
197
|
-
"ids": [],
|
|
198
|
-
"documents": [],
|
|
199
|
-
"metadatas": [],
|
|
200
|
-
"embeddings": []
|
|
201
|
-
}
|
|
190
|
+
|
|
191
|
+
return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
|
|
202
192
|
except Exception as e:
|
|
203
|
-
|
|
193
|
+
log_error("Failed to get items: %s", e)
|
|
204
194
|
return None
|
|
205
|
-
|
|
195
|
+
|
|
206
196
|
def add_items(
|
|
207
197
|
self,
|
|
208
198
|
collection_name: str,
|
|
@@ -213,20 +203,20 @@ class TemplateConnection(VectorDBConnection):
|
|
|
213
203
|
) -> bool:
|
|
214
204
|
"""
|
|
215
205
|
Add items to a collection.
|
|
216
|
-
|
|
206
|
+
|
|
217
207
|
Args:
|
|
218
208
|
collection_name: Name of collection
|
|
219
209
|
documents: Document texts
|
|
220
210
|
metadatas: Metadata for each document
|
|
221
211
|
ids: IDs for each document
|
|
222
212
|
embeddings: Pre-computed embeddings
|
|
223
|
-
|
|
213
|
+
|
|
224
214
|
Returns:
|
|
225
215
|
True if successful, False otherwise
|
|
226
216
|
"""
|
|
227
217
|
if not self._client:
|
|
228
218
|
return False
|
|
229
|
-
|
|
219
|
+
|
|
230
220
|
try:
|
|
231
221
|
# Add items to the collection
|
|
232
222
|
# self._client.upsert(
|
|
@@ -238,9 +228,9 @@ class TemplateConnection(VectorDBConnection):
|
|
|
238
228
|
# )
|
|
239
229
|
return True
|
|
240
230
|
except Exception as e:
|
|
241
|
-
|
|
231
|
+
log_error("Failed to add items: %s", e)
|
|
242
232
|
return False
|
|
243
|
-
|
|
233
|
+
|
|
244
234
|
def update_items(
|
|
245
235
|
self,
|
|
246
236
|
collection_name: str,
|
|
@@ -251,20 +241,20 @@ class TemplateConnection(VectorDBConnection):
|
|
|
251
241
|
) -> bool:
|
|
252
242
|
"""
|
|
253
243
|
Update items in a collection.
|
|
254
|
-
|
|
244
|
+
|
|
255
245
|
Args:
|
|
256
246
|
collection_name: Name of collection
|
|
257
247
|
ids: IDs of items to update
|
|
258
248
|
documents: New document texts
|
|
259
249
|
metadatas: New metadata
|
|
260
250
|
embeddings: New embeddings
|
|
261
|
-
|
|
251
|
+
|
|
262
252
|
Returns:
|
|
263
253
|
True if successful, False otherwise
|
|
264
254
|
"""
|
|
265
255
|
if not self._client:
|
|
266
256
|
return False
|
|
267
|
-
|
|
257
|
+
|
|
268
258
|
try:
|
|
269
259
|
# Update existing items
|
|
270
260
|
# self._client.update(
|
|
@@ -276,9 +266,9 @@ class TemplateConnection(VectorDBConnection):
|
|
|
276
266
|
# )
|
|
277
267
|
return True
|
|
278
268
|
except Exception as e:
|
|
279
|
-
|
|
269
|
+
log_error("Failed to update items: %s", e)
|
|
280
270
|
return False
|
|
281
|
-
|
|
271
|
+
|
|
282
272
|
def delete_items(
|
|
283
273
|
self,
|
|
284
274
|
collection_name: str,
|
|
@@ -287,18 +277,18 @@ class TemplateConnection(VectorDBConnection):
|
|
|
287
277
|
) -> bool:
|
|
288
278
|
"""
|
|
289
279
|
Delete items from a collection.
|
|
290
|
-
|
|
280
|
+
|
|
291
281
|
Args:
|
|
292
282
|
collection_name: Name of collection
|
|
293
283
|
ids: IDs of items to delete
|
|
294
284
|
where: Metadata filter for items to delete
|
|
295
|
-
|
|
285
|
+
|
|
296
286
|
Returns:
|
|
297
287
|
True if successful, False otherwise
|
|
298
288
|
"""
|
|
299
289
|
if not self._client:
|
|
300
290
|
return False
|
|
301
|
-
|
|
291
|
+
|
|
302
292
|
try:
|
|
303
293
|
# Delete items
|
|
304
294
|
# self._client.delete(
|
|
@@ -308,34 +298,34 @@ class TemplateConnection(VectorDBConnection):
|
|
|
308
298
|
# )
|
|
309
299
|
return True
|
|
310
300
|
except Exception as e:
|
|
311
|
-
|
|
301
|
+
log_error("Failed to delete items: %s", e)
|
|
312
302
|
return False
|
|
313
|
-
|
|
303
|
+
|
|
314
304
|
def delete_collection(self, name: str) -> bool:
|
|
315
305
|
"""
|
|
316
306
|
Delete an entire collection.
|
|
317
|
-
|
|
307
|
+
|
|
318
308
|
Args:
|
|
319
309
|
name: Collection name
|
|
320
|
-
|
|
310
|
+
|
|
321
311
|
Returns:
|
|
322
312
|
True if successful, False otherwise
|
|
323
313
|
"""
|
|
324
314
|
if not self._client:
|
|
325
315
|
return False
|
|
326
|
-
|
|
316
|
+
|
|
327
317
|
try:
|
|
328
318
|
# Delete the collection
|
|
329
319
|
# self._client.delete_collection(name)
|
|
330
320
|
return True
|
|
331
321
|
except Exception as e:
|
|
332
|
-
|
|
322
|
+
log_error("Failed to delete collection: %s", e)
|
|
333
323
|
return False
|
|
334
|
-
|
|
324
|
+
|
|
335
325
|
def get_connection_info(self) -> Dict[str, Any]:
|
|
336
326
|
"""
|
|
337
327
|
Get information about the current connection.
|
|
338
|
-
|
|
328
|
+
|
|
339
329
|
Returns:
|
|
340
330
|
Dictionary with connection details
|
|
341
331
|
"""
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Optional, Union, Tuple
|
|
4
4
|
from sentence_transformers import SentenceTransformer
|
|
5
|
+
from vector_inspector.core.logging import log_info
|
|
5
6
|
|
|
6
7
|
from .model_registry import get_model_registry
|
|
7
8
|
|
|
@@ -12,19 +13,17 @@ DEFAULT_MODEL = ("all-MiniLM-L6-v2", "sentence-transformer")
|
|
|
12
13
|
|
|
13
14
|
def _get_dimension_to_model_dict():
|
|
14
15
|
"""Build dimension->models dictionary from registry.
|
|
15
|
-
|
|
16
|
+
|
|
16
17
|
Returns:
|
|
17
18
|
Dict mapping dimension to list of (name, type, description) tuples
|
|
18
19
|
"""
|
|
19
20
|
registry = get_model_registry()
|
|
20
21
|
dimension_map = {}
|
|
21
|
-
|
|
22
|
+
|
|
22
23
|
for dimension in registry.get_all_dimensions():
|
|
23
24
|
models = registry.get_models_by_dimension(dimension)
|
|
24
|
-
dimension_map[dimension] = [
|
|
25
|
-
|
|
26
|
-
]
|
|
27
|
-
|
|
25
|
+
dimension_map[dimension] = [(m.name, m.type, m.description) for m in models]
|
|
26
|
+
|
|
28
27
|
return dimension_map
|
|
29
28
|
|
|
30
29
|
|
|
@@ -35,37 +34,37 @@ DIMENSION_TO_MODEL = _get_dimension_to_model_dict()
|
|
|
35
34
|
def get_model_for_dimension(dimension: int, prefer_multimodal: bool = True) -> Tuple[str, str]:
|
|
36
35
|
"""
|
|
37
36
|
Get the appropriate embedding model name and type for a given vector dimension.
|
|
38
|
-
|
|
37
|
+
|
|
39
38
|
Args:
|
|
40
39
|
dimension: The vector dimension size
|
|
41
|
-
prefer_multimodal: If True and multiple models exist for this dimension,
|
|
40
|
+
prefer_multimodal: If True and multiple models exist for this dimension,
|
|
42
41
|
prefer multi-modal (CLIP) over text-only models
|
|
43
|
-
|
|
42
|
+
|
|
44
43
|
Returns:
|
|
45
44
|
Tuple of (model_name, model_type) where model_type is "sentence-transformer" or "clip"
|
|
46
45
|
"""
|
|
47
46
|
registry = get_model_registry()
|
|
48
47
|
models = registry.get_models_by_dimension(dimension)
|
|
49
|
-
|
|
48
|
+
|
|
50
49
|
if not models:
|
|
51
50
|
# Find the closest dimension if exact match not found
|
|
52
51
|
closest_dim = registry.find_closest_dimension(dimension)
|
|
53
52
|
if closest_dim:
|
|
54
53
|
models = registry.get_models_by_dimension(closest_dim)
|
|
55
|
-
|
|
54
|
+
|
|
56
55
|
if not models:
|
|
57
56
|
return DEFAULT_MODEL
|
|
58
|
-
|
|
57
|
+
|
|
59
58
|
if len(models) == 1:
|
|
60
59
|
return (models[0].name, models[0].type)
|
|
61
|
-
|
|
60
|
+
|
|
62
61
|
# Multiple models available - apply preference
|
|
63
62
|
if prefer_multimodal:
|
|
64
63
|
# Prefer CLIP/multimodal
|
|
65
64
|
for model in models:
|
|
66
65
|
if model.modality == "multimodal" or model.type == "clip":
|
|
67
66
|
return (model.name, model.type)
|
|
68
|
-
|
|
67
|
+
|
|
69
68
|
# Default to first option
|
|
70
69
|
return (models[0].name, models[0].type)
|
|
71
70
|
|
|
@@ -74,10 +73,10 @@ def get_available_models_for_dimension(dimension: int) -> list:
|
|
|
74
73
|
"""
|
|
75
74
|
Get all available model options for a given dimension.
|
|
76
75
|
Includes both predefined (from registry) and custom user-added models.
|
|
77
|
-
|
|
76
|
+
|
|
78
77
|
Args:
|
|
79
78
|
dimension: The vector dimension size
|
|
80
|
-
|
|
79
|
+
|
|
81
80
|
Returns:
|
|
82
81
|
List of tuples: [(model_name, model_type, description), ...]
|
|
83
82
|
"""
|
|
@@ -85,39 +84,37 @@ def get_available_models_for_dimension(dimension: int) -> list:
|
|
|
85
84
|
registry = get_model_registry()
|
|
86
85
|
registry_models = registry.get_models_by_dimension(dimension)
|
|
87
86
|
models = [(m.name, m.type, m.description) for m in registry_models]
|
|
88
|
-
|
|
87
|
+
|
|
89
88
|
# Add custom models from settings
|
|
90
89
|
try:
|
|
91
90
|
from ..services.settings_service import SettingsService
|
|
91
|
+
|
|
92
92
|
settings = SettingsService()
|
|
93
93
|
custom_models = settings.get_custom_embedding_models(dimension)
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
for model in custom_models:
|
|
96
96
|
# Format: (model_name, model_type, description)
|
|
97
|
-
models.append((
|
|
98
|
-
model["name"],
|
|
99
|
-
model["type"],
|
|
100
|
-
f"{model['description']} (custom)"
|
|
101
|
-
))
|
|
97
|
+
models.append((model["name"], model["type"], f"{model['description']} (custom)"))
|
|
102
98
|
except Exception as e:
|
|
103
|
-
|
|
104
|
-
|
|
99
|
+
log_info("Warning: Could not load custom models: %s", e)
|
|
100
|
+
|
|
105
101
|
return models
|
|
106
102
|
|
|
107
103
|
|
|
108
104
|
def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTransformer, any]:
|
|
109
105
|
"""
|
|
110
106
|
Load an embedding model (sentence-transformer or CLIP).
|
|
111
|
-
|
|
107
|
+
|
|
112
108
|
Args:
|
|
113
109
|
model_name: Name of the model to load
|
|
114
110
|
model_type: Type of model ("sentence-transformer" or "clip")
|
|
115
|
-
|
|
111
|
+
|
|
116
112
|
Returns:
|
|
117
113
|
Loaded model (SentenceTransformer or CLIP model)
|
|
118
114
|
"""
|
|
119
115
|
if model_type == "clip":
|
|
120
116
|
from transformers import CLIPModel, CLIPProcessor
|
|
117
|
+
|
|
121
118
|
model = CLIPModel.from_pretrained(model_name)
|
|
122
119
|
processor = CLIPProcessor.from_pretrained(model_name)
|
|
123
120
|
return (model, processor)
|
|
@@ -128,17 +125,18 @@ def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTran
|
|
|
128
125
|
def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type: str) -> list:
|
|
129
126
|
"""
|
|
130
127
|
Encode text using the appropriate model.
|
|
131
|
-
|
|
128
|
+
|
|
132
129
|
Args:
|
|
133
130
|
text: Text to encode
|
|
134
131
|
model: The loaded model (SentenceTransformer or (CLIPModel, CLIPProcessor) tuple)
|
|
135
132
|
model_type: Type of model ("sentence-transformer" or "clip")
|
|
136
|
-
|
|
133
|
+
|
|
137
134
|
Returns:
|
|
138
135
|
Embedding vector as a list
|
|
139
136
|
"""
|
|
140
137
|
if model_type == "clip":
|
|
141
138
|
import torch
|
|
139
|
+
|
|
142
140
|
clip_model, processor = model
|
|
143
141
|
inputs = processor(text=[text], return_tensors="pt", padding=True)
|
|
144
142
|
with torch.no_grad():
|
|
@@ -152,13 +150,15 @@ def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type:
|
|
|
152
150
|
return embedding.tolist()
|
|
153
151
|
|
|
154
152
|
|
|
155
|
-
def get_embedding_model_for_dimension(
|
|
153
|
+
def get_embedding_model_for_dimension(
|
|
154
|
+
dimension: int,
|
|
155
|
+
) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
|
|
156
156
|
"""
|
|
157
157
|
Get a loaded embedding model for a specific dimension.
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
Args:
|
|
160
160
|
dimension: The vector dimension size
|
|
161
|
-
|
|
161
|
+
|
|
162
162
|
Returns:
|
|
163
163
|
Tuple of (loaded_model, model_name, model_type)
|
|
164
164
|
"""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Tiny logging wrapper for consistent logs across the project.
|
|
2
|
+
|
|
3
|
+
Provides `log_info`, `log_error`, and `log_debug` helpers that delegate
|
|
4
|
+
to the standard `logging` module but keep call sites concise.
|
|
5
|
+
"""
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
_logger = logging.getLogger("vector_inspector")
|
|
10
|
+
if not _logger.handlers:
|
|
11
|
+
handler = logging.StreamHandler()
|
|
12
|
+
formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
|
|
13
|
+
handler.setFormatter(formatter)
|
|
14
|
+
_logger.addHandler(handler)
|
|
15
|
+
_logger.setLevel(logging.INFO)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def log_info(msg: str, *args: Any, **kwargs: Any) -> None:
|
|
19
|
+
_logger.info(msg, *args, **kwargs)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def log_error(msg: str, *args: Any, **kwargs: Any) -> None:
|
|
23
|
+
_logger.error(msg, *args, **kwargs)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def log_debug(msg: str, *args: Any, **kwargs: Any) -> None:
|
|
27
|
+
_logger.debug(msg, *args, **kwargs)
|
|
@@ -4,6 +4,7 @@ import json
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import List, Dict, Optional, Tuple
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
+
from vector_inspector.core.logging import log_info, log_error
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
@dataclass
|
|
@@ -63,7 +64,7 @@ class EmbeddingModelRegistry:
|
|
|
63
64
|
registry_path = Path(__file__).parent.parent / "config" / "known_embedding_models.json"
|
|
64
65
|
|
|
65
66
|
if not registry_path.exists():
|
|
66
|
-
|
|
67
|
+
log_info("Warning: Model registry not found at %s", registry_path)
|
|
67
68
|
return
|
|
68
69
|
|
|
69
70
|
try:
|
|
@@ -83,10 +84,10 @@ class EmbeddingModelRegistry:
|
|
|
83
84
|
# Index by name
|
|
84
85
|
self._name_index[model_info.name.lower()] = model_info
|
|
85
86
|
|
|
86
|
-
|
|
87
|
+
log_info("Loaded %d models from registry", len(self._models))
|
|
87
88
|
#...
|
|
88
89
|
except Exception as e:
|
|
89
|
-
|
|
90
|
+
log_error("Error loading model registry: %s", e)
|
|
90
91
|
|
|
91
92
|
def get_models_by_dimension(self, dimension: int) -> List[ModelInfo]:
|
|
92
93
|
"""Get all models for a specific dimension.
|
vector_inspector/main.py
CHANGED
|
@@ -1,19 +1,23 @@
|
|
|
1
1
|
"""Main entry point for Vector Inspector application."""
|
|
2
2
|
|
|
3
3
|
import sys
|
|
4
|
+
import os
|
|
4
5
|
from PySide6.QtWidgets import QApplication
|
|
5
6
|
from vector_inspector.ui.main_window import MainWindow
|
|
6
7
|
|
|
8
|
+
# Ensures the app looks in its own folder for the raw libraries
|
|
9
|
+
sys.path.append(os.path.dirname(sys.executable))
|
|
10
|
+
|
|
7
11
|
|
|
8
12
|
def main():
|
|
9
13
|
"""Launch the Vector Inspector application."""
|
|
10
14
|
app = QApplication(sys.argv)
|
|
11
15
|
app.setApplicationName("Vector Inspector")
|
|
12
16
|
app.setOrganizationName("Vector Inspector")
|
|
13
|
-
|
|
17
|
+
|
|
14
18
|
window = MainWindow()
|
|
15
19
|
window.show()
|
|
16
|
-
|
|
20
|
+
|
|
17
21
|
sys.exit(app.exec())
|
|
18
22
|
|
|
19
23
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Helpers for backup/restore: zip read/write and embedding normalization.
|
|
2
|
+
|
|
3
|
+
Minimal, well-tested helpers to keep `BackupRestoreService` concise.
|
|
4
|
+
"""
|
|
5
|
+
import json
|
|
6
|
+
import zipfile
|
|
7
|
+
from typing import Tuple, Dict, Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def write_backup_zip(path, metadata: Dict[str, Any], data: Dict[str, Any]):
|
|
11
|
+
"""Write metadata and data into a zip file at `path`.
|
|
12
|
+
|
|
13
|
+
`path` may be a pathlib.Path or string.
|
|
14
|
+
"""
|
|
15
|
+
with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
|
16
|
+
zipf.writestr('metadata.json', json.dumps(metadata, indent=2))
|
|
17
|
+
zipf.writestr('data.json', json.dumps(data, indent=2))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def read_backup_zip(path) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
|
21
|
+
"""Read metadata.json and data.json from a backup zip and return them.
|
|
22
|
+
|
|
23
|
+
Returns (metadata, data).
|
|
24
|
+
"""
|
|
25
|
+
with zipfile.ZipFile(path, 'r') as zipf:
|
|
26
|
+
metadata_str = zipf.read('metadata.json').decode('utf-8')
|
|
27
|
+
metadata = json.loads(metadata_str)
|
|
28
|
+
data_str = zipf.read('data.json').decode('utf-8')
|
|
29
|
+
data = json.loads(data_str)
|
|
30
|
+
return metadata, data
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def normalize_embeddings(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
34
|
+
"""Ensure embeddings in `data` are plain python lists (no numpy objects).
|
|
35
|
+
|
|
36
|
+
This mutates and returns the same `data` dict for convenience.
|
|
37
|
+
"""
|
|
38
|
+
if 'embeddings' not in data or data['embeddings'] is None:
|
|
39
|
+
return data
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
import numpy as np
|
|
43
|
+
except Exception:
|
|
44
|
+
np = None
|
|
45
|
+
|
|
46
|
+
emb = data['embeddings']
|
|
47
|
+
if np is not None:
|
|
48
|
+
if isinstance(emb, np.ndarray):
|
|
49
|
+
data['embeddings'] = emb.tolist()
|
|
50
|
+
return data
|
|
51
|
+
|
|
52
|
+
if isinstance(emb, list):
|
|
53
|
+
new_list = []
|
|
54
|
+
for item in emb:
|
|
55
|
+
if isinstance(item, np.ndarray):
|
|
56
|
+
new_list.append(item.tolist())
|
|
57
|
+
else:
|
|
58
|
+
new_list.append(item)
|
|
59
|
+
data['embeddings'] = new_list
|
|
60
|
+
return data
|
|
61
|
+
|
|
62
|
+
# No numpy available — assume data already serializable
|
|
63
|
+
return data
|