vector-inspector 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/__init__.py +1 -1
- vector_inspector/core/connection_manager.py +91 -19
- vector_inspector/core/connections/base_connection.py +43 -43
- vector_inspector/core/connections/chroma_connection.py +1 -1
- vector_inspector/core/connections/pgvector_connection.py +11 -171
- vector_inspector/core/connections/pinecone_connection.py +596 -99
- vector_inspector/core/connections/qdrant_connection.py +35 -44
- vector_inspector/core/embedding_utils.py +14 -5
- vector_inspector/core/logging.py +3 -1
- vector_inspector/main.py +42 -15
- vector_inspector/services/backup_restore_service.py +228 -15
- vector_inspector/services/settings_service.py +71 -19
- vector_inspector/ui/components/backup_restore_dialog.py +215 -101
- vector_inspector/ui/components/connection_manager_panel.py +155 -14
- vector_inspector/ui/dialogs/cross_db_migration.py +126 -99
- vector_inspector/ui/dialogs/settings_dialog.py +13 -6
- vector_inspector/ui/loading_screen.py +169 -0
- vector_inspector/ui/main_window.py +44 -19
- vector_inspector/ui/services/dialog_service.py +1 -0
- vector_inspector/ui/views/collection_browser.py +36 -34
- vector_inspector/ui/views/connection_view.py +7 -1
- vector_inspector/ui/views/info_panel.py +118 -52
- vector_inspector/ui/views/metadata_view.py +30 -31
- vector_inspector/ui/views/search_view.py +20 -19
- vector_inspector/ui/views/visualization_view.py +18 -15
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/METADATA +17 -4
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/RECORD +30 -28
- vector_inspector-0.3.12.dist-info/licenses/LICENSE +1 -0
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/entry_points.txt +0 -0
vector_inspector/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Vector Inspector - A comprehensive desktop application for vector database visualization."""
|
|
2
2
|
|
|
3
|
-
__version__ = "0.3.
|
|
3
|
+
__version__ = "0.3.12" # Keep in sync with pyproject.toml for dev mode fallback
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def get_version():
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
"""Connection manager for handling multiple vector database connections."""
|
|
2
2
|
|
|
3
3
|
import uuid
|
|
4
|
-
from typing import Dict, Optional, List, Any
|
|
5
4
|
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
6
7
|
from PySide6.QtCore import QObject, Signal
|
|
7
8
|
|
|
8
|
-
from .connections.base_connection import VectorDBConnection
|
|
9
9
|
from vector_inspector.core.logging import log_error
|
|
10
10
|
|
|
11
|
+
from .connections.base_connection import VectorDBConnection
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
class ConnectionState(Enum):
|
|
13
15
|
"""Possible connection states."""
|
|
@@ -27,7 +29,7 @@ class ConnectionInstance:
|
|
|
27
29
|
name: str,
|
|
28
30
|
provider: str,
|
|
29
31
|
connection: VectorDBConnection,
|
|
30
|
-
config:
|
|
32
|
+
config: dict[str, Any],
|
|
31
33
|
):
|
|
32
34
|
"""
|
|
33
35
|
Initialize a connection instance.
|
|
@@ -42,12 +44,17 @@ class ConnectionInstance:
|
|
|
42
44
|
self.id = connection_id
|
|
43
45
|
self.name = name
|
|
44
46
|
self.provider = provider
|
|
45
|
-
self.
|
|
47
|
+
self.database = connection
|
|
46
48
|
self.config = config
|
|
47
49
|
self.state = ConnectionState.DISCONNECTED
|
|
48
|
-
self.active_collection:
|
|
49
|
-
self.collections:
|
|
50
|
-
self.error_message:
|
|
50
|
+
self.active_collection: str | None = None
|
|
51
|
+
self.collections: list[str] = []
|
|
52
|
+
self.error_message: str | None = None
|
|
53
|
+
|
|
54
|
+
# Set profile_name on the underlying connection object so it can be used
|
|
55
|
+
# for settings lookups (embedding models, etc.)
|
|
56
|
+
# Note: This dynamically adds an attribute to the connection object
|
|
57
|
+
self.database.profile_name = name # type: ignore[attr-defined]
|
|
51
58
|
|
|
52
59
|
def get_display_name(self) -> str:
|
|
53
60
|
"""Get a display-friendly connection name."""
|
|
@@ -59,6 +66,59 @@ class ConnectionInstance:
|
|
|
59
66
|
return f"{self.name} > {self.active_collection}"
|
|
60
67
|
return self.name
|
|
61
68
|
|
|
69
|
+
def __getattr__(self, name):
|
|
70
|
+
"""Forward unknown attribute lookups to the underlying database connection.
|
|
71
|
+
|
|
72
|
+
This allows `ConnectionInstance` to act as a thin wrapper while
|
|
73
|
+
exposing the provider-specific API (e.g. `get_all_items`,
|
|
74
|
+
`query_collection`) without callers needing to access
|
|
75
|
+
`.database` explicitly.
|
|
76
|
+
"""
|
|
77
|
+
return getattr(self.database, name)
|
|
78
|
+
|
|
79
|
+
# Convenience proxy methods to forward common operations to the underlying
|
|
80
|
+
# VectorDBConnection. This prevents callers from needing to access
|
|
81
|
+
# `instance.database` directly and centralizes error handling.
|
|
82
|
+
def list_collections(self) -> list[str]:
|
|
83
|
+
"""Return list of collections from the underlying database connection.
|
|
84
|
+
|
|
85
|
+
Falls back to the cached `collections` attribute on error.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
return self.database.list_collections()
|
|
89
|
+
except Exception:
|
|
90
|
+
return self.collections or []
|
|
91
|
+
|
|
92
|
+
def connect(self) -> bool:
|
|
93
|
+
"""Proxy to connect the underlying database connection."""
|
|
94
|
+
return self.database.connect()
|
|
95
|
+
|
|
96
|
+
def disconnect(self) -> None:
|
|
97
|
+
"""Proxy to disconnect the underlying database connection; logs errors."""
|
|
98
|
+
try:
|
|
99
|
+
self.database.disconnect()
|
|
100
|
+
except Exception as e:
|
|
101
|
+
log_error("Error disconnecting underlying database: %s", e)
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def is_connected(self) -> bool:
|
|
105
|
+
"""Whether the underlying database connection is currently connected."""
|
|
106
|
+
return getattr(self.database, "is_connected", False)
|
|
107
|
+
|
|
108
|
+
def get_collection_info(self, collection_name: str):
|
|
109
|
+
"""Proxy to get collection-specific information."""
|
|
110
|
+
try:
|
|
111
|
+
return self.database.get_collection_info(collection_name)
|
|
112
|
+
except Exception:
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def delete_collection(self, collection_name: str) -> bool:
|
|
116
|
+
"""Proxy to delete a collection on the underlying database connection."""
|
|
117
|
+
try:
|
|
118
|
+
return self.database.delete_collection(collection_name)
|
|
119
|
+
except Exception:
|
|
120
|
+
return False
|
|
121
|
+
|
|
62
122
|
|
|
63
123
|
class ConnectionManager(QObject):
|
|
64
124
|
"""Manages multiple vector database connections and saved profiles.
|
|
@@ -85,16 +145,28 @@ class ConnectionManager(QObject):
|
|
|
85
145
|
def __init__(self):
|
|
86
146
|
"""Initialize the connection manager."""
|
|
87
147
|
super().__init__()
|
|
88
|
-
self._connections:
|
|
89
|
-
self._active_connection_id:
|
|
148
|
+
self._connections: dict[str, ConnectionInstance] = {}
|
|
149
|
+
self._active_connection_id: str | None = None
|
|
150
|
+
|
|
151
|
+
def get_active_collection(self) -> str | None:
|
|
152
|
+
"""
|
|
153
|
+
Get the active collection name for the currently active connection.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
The active collection name, or None if no active connection or collection.
|
|
157
|
+
"""
|
|
158
|
+
active_conn = self.get_active_connection()
|
|
159
|
+
if active_conn:
|
|
160
|
+
return active_conn.active_collection
|
|
161
|
+
return None
|
|
90
162
|
|
|
91
163
|
def create_connection(
|
|
92
164
|
self,
|
|
93
165
|
name: str,
|
|
94
166
|
provider: str,
|
|
95
167
|
connection: VectorDBConnection,
|
|
96
|
-
config:
|
|
97
|
-
connection_id: str = None,
|
|
168
|
+
config: dict[str, Any],
|
|
169
|
+
connection_id: str | None = None,
|
|
98
170
|
) -> str:
|
|
99
171
|
"""
|
|
100
172
|
Create a new connection instance (not yet connected).
|
|
@@ -138,17 +210,17 @@ class ConnectionManager(QObject):
|
|
|
138
210
|
if connection_id in self._connections:
|
|
139
211
|
self.connection_opened.emit(connection_id)
|
|
140
212
|
|
|
141
|
-
def get_connection(self, connection_id: str) ->
|
|
213
|
+
def get_connection(self, connection_id: str) -> ConnectionInstance | None:
|
|
142
214
|
"""Get a connection instance by ID."""
|
|
143
215
|
return self._connections.get(connection_id)
|
|
144
216
|
|
|
145
|
-
def get_active_connection(self) ->
|
|
217
|
+
def get_active_connection(self) -> ConnectionInstance | None:
|
|
146
218
|
"""Get the currently active connection instance."""
|
|
147
219
|
if self._active_connection_id:
|
|
148
220
|
return self._connections.get(self._active_connection_id)
|
|
149
221
|
return None
|
|
150
222
|
|
|
151
|
-
def get_active_connection_id(self) ->
|
|
223
|
+
def get_active_connection_id(self) -> str | None:
|
|
152
224
|
"""Get the currently active connection ID."""
|
|
153
225
|
return self._active_connection_id
|
|
154
226
|
|
|
@@ -185,7 +257,7 @@ class ConnectionManager(QObject):
|
|
|
185
257
|
|
|
186
258
|
# Disconnect the connection
|
|
187
259
|
try:
|
|
188
|
-
instance.
|
|
260
|
+
instance.disconnect()
|
|
189
261
|
except Exception as e:
|
|
190
262
|
log_error("Error disconnecting: %s", e)
|
|
191
263
|
|
|
@@ -206,7 +278,7 @@ class ConnectionManager(QObject):
|
|
|
206
278
|
return True
|
|
207
279
|
|
|
208
280
|
def update_connection_state(
|
|
209
|
-
self, connection_id: str, state: ConnectionState, error:
|
|
281
|
+
self, connection_id: str, state: ConnectionState, error: str | None = None
|
|
210
282
|
):
|
|
211
283
|
"""
|
|
212
284
|
Update the state of a connection.
|
|
@@ -225,7 +297,7 @@ class ConnectionManager(QObject):
|
|
|
225
297
|
instance.error_message = None
|
|
226
298
|
self.connection_state_changed.emit(connection_id, state)
|
|
227
299
|
|
|
228
|
-
def update_collections(self, connection_id: str, collections:
|
|
300
|
+
def update_collections(self, connection_id: str, collections: list[str]):
|
|
229
301
|
"""
|
|
230
302
|
Update the collections list for a connection.
|
|
231
303
|
|
|
@@ -238,7 +310,7 @@ class ConnectionManager(QObject):
|
|
|
238
310
|
instance.collections = collections
|
|
239
311
|
self.collections_updated.emit(connection_id, collections)
|
|
240
312
|
|
|
241
|
-
def set_active_collection(self, connection_id: str, collection_name:
|
|
313
|
+
def set_active_collection(self, connection_id: str, collection_name: str | None):
|
|
242
314
|
"""
|
|
243
315
|
Set the active collection for a connection.
|
|
244
316
|
|
|
@@ -251,7 +323,7 @@ class ConnectionManager(QObject):
|
|
|
251
323
|
instance.active_collection = collection_name
|
|
252
324
|
self.active_collection_changed.emit(connection_id, collection_name)
|
|
253
325
|
|
|
254
|
-
def get_all_connections(self) ->
|
|
326
|
+
def get_all_connections(self) -> list[ConnectionInstance]:
|
|
255
327
|
"""Get list of all connection instances."""
|
|
256
328
|
return list(self._connections.values())
|
|
257
329
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Abstract base class for vector database connections."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
5
6
|
from vector_inspector.core.logging import log_error
|
|
6
7
|
|
|
7
8
|
|
|
@@ -39,7 +40,7 @@ class VectorDBConnection(ABC):
|
|
|
39
40
|
pass
|
|
40
41
|
|
|
41
42
|
@abstractmethod
|
|
42
|
-
def list_collections(self) ->
|
|
43
|
+
def list_collections(self) -> list[str]:
|
|
43
44
|
"""
|
|
44
45
|
Get list of all collections/indexes.
|
|
45
46
|
|
|
@@ -49,7 +50,7 @@ class VectorDBConnection(ABC):
|
|
|
49
50
|
pass
|
|
50
51
|
|
|
51
52
|
@abstractmethod
|
|
52
|
-
def get_collection_info(self, name: str) ->
|
|
53
|
+
def get_collection_info(self, name: str) -> dict[str, Any] | None:
|
|
53
54
|
"""
|
|
54
55
|
Get collection metadata and statistics.
|
|
55
56
|
|
|
@@ -73,16 +74,16 @@ class VectorDBConnection(ABC):
|
|
|
73
74
|
def add_items(
|
|
74
75
|
self,
|
|
75
76
|
collection_name: str,
|
|
76
|
-
documents:
|
|
77
|
-
metadatas:
|
|
78
|
-
ids:
|
|
79
|
-
embeddings:
|
|
77
|
+
documents: list[str],
|
|
78
|
+
metadatas: list[dict[str, Any]] | None = None,
|
|
79
|
+
ids: list[str] | None = None,
|
|
80
|
+
embeddings: list[list[float]] | None = None,
|
|
80
81
|
) -> bool:
|
|
81
82
|
"""Add items to a collection."""
|
|
82
83
|
pass
|
|
83
84
|
|
|
84
85
|
@abstractmethod
|
|
85
|
-
def get_items(self, name: str, ids:
|
|
86
|
+
def get_items(self, name: str, ids: list[str]) -> dict[str, Any]:
|
|
86
87
|
"""Retrieve items by original ids. Should return a dict with 'documents' and 'metadatas'."""
|
|
87
88
|
pass
|
|
88
89
|
|
|
@@ -100,12 +101,12 @@ class VectorDBConnection(ABC):
|
|
|
100
101
|
def query_collection(
|
|
101
102
|
self,
|
|
102
103
|
collection_name: str,
|
|
103
|
-
query_texts:
|
|
104
|
-
query_embeddings:
|
|
104
|
+
query_texts: list[str] | None = None,
|
|
105
|
+
query_embeddings: list[list[float]] | None = None,
|
|
105
106
|
n_results: int = 10,
|
|
106
|
-
where:
|
|
107
|
-
where_document:
|
|
108
|
-
) ->
|
|
107
|
+
where: dict[str, Any] | None = None,
|
|
108
|
+
where_document: dict[str, Any] | None = None,
|
|
109
|
+
) -> dict[str, Any] | None:
|
|
109
110
|
"""
|
|
110
111
|
Query a collection for similar vectors.
|
|
111
112
|
|
|
@@ -131,10 +132,10 @@ class VectorDBConnection(ABC):
|
|
|
131
132
|
def get_all_items(
|
|
132
133
|
self,
|
|
133
134
|
collection_name: str,
|
|
134
|
-
limit:
|
|
135
|
-
offset:
|
|
136
|
-
where:
|
|
137
|
-
) ->
|
|
135
|
+
limit: int | None = None,
|
|
136
|
+
offset: int | None = None,
|
|
137
|
+
where: dict[str, Any] | None = None,
|
|
138
|
+
) -> dict[str, Any] | None:
|
|
138
139
|
"""
|
|
139
140
|
Get all items from a collection.
|
|
140
141
|
|
|
@@ -157,10 +158,10 @@ class VectorDBConnection(ABC):
|
|
|
157
158
|
def update_items(
|
|
158
159
|
self,
|
|
159
160
|
collection_name: str,
|
|
160
|
-
ids:
|
|
161
|
-
documents:
|
|
162
|
-
metadatas:
|
|
163
|
-
embeddings:
|
|
161
|
+
ids: list[str],
|
|
162
|
+
documents: list[str] | None = None,
|
|
163
|
+
metadatas: list[dict[str, Any]] | None = None,
|
|
164
|
+
embeddings: list[list[float]] | None = None,
|
|
164
165
|
) -> bool:
|
|
165
166
|
"""
|
|
166
167
|
Update items in a collection.
|
|
@@ -181,8 +182,8 @@ class VectorDBConnection(ABC):
|
|
|
181
182
|
def delete_items(
|
|
182
183
|
self,
|
|
183
184
|
collection_name: str,
|
|
184
|
-
ids:
|
|
185
|
-
where:
|
|
185
|
+
ids: list[str] | None = None,
|
|
186
|
+
where: dict[str, Any] | None = None,
|
|
186
187
|
) -> bool:
|
|
187
188
|
"""
|
|
188
189
|
Delete items from a collection.
|
|
@@ -199,7 +200,7 @@ class VectorDBConnection(ABC):
|
|
|
199
200
|
|
|
200
201
|
# Optional: Methods that may be provider-specific but useful to define
|
|
201
202
|
|
|
202
|
-
def get_connection_info(self) ->
|
|
203
|
+
def get_connection_info(self) -> dict[str, Any]:
|
|
203
204
|
"""
|
|
204
205
|
Get information about the current connection.
|
|
205
206
|
|
|
@@ -208,7 +209,7 @@ class VectorDBConnection(ABC):
|
|
|
208
209
|
"""
|
|
209
210
|
return {"provider": self.__class__.__name__, "connected": self.is_connected}
|
|
210
211
|
|
|
211
|
-
def get_supported_filter_operators(self) ->
|
|
212
|
+
def get_supported_filter_operators(self) -> list[dict[str, Any]]:
|
|
212
213
|
"""
|
|
213
214
|
Get list of filter operators supported by this provider.
|
|
214
215
|
|
|
@@ -228,9 +229,7 @@ class VectorDBConnection(ABC):
|
|
|
228
229
|
{"name": "contains", "server_side": False},
|
|
229
230
|
]
|
|
230
231
|
|
|
231
|
-
def get_embedding_model(
|
|
232
|
-
self, collection_name: str, connection_id: Optional[str] = None
|
|
233
|
-
) -> Optional[str]:
|
|
232
|
+
def get_embedding_model(self, collection_name: str) -> str | None:
|
|
234
233
|
"""
|
|
235
234
|
Get the embedding model used for a collection.
|
|
236
235
|
|
|
@@ -260,11 +259,12 @@ class VectorDBConnection(ABC):
|
|
|
260
259
|
return metadata["_embedding_model"]
|
|
261
260
|
|
|
262
261
|
# Finally, check user settings (for collections we can't modify)
|
|
263
|
-
|
|
262
|
+
profile_name = getattr(self, "profile_name", None)
|
|
263
|
+
if profile_name:
|
|
264
264
|
from vector_inspector.services.settings_service import SettingsService
|
|
265
265
|
|
|
266
266
|
settings = SettingsService()
|
|
267
|
-
model_info = settings.get_embedding_model(
|
|
267
|
+
model_info = settings.get_embedding_model(profile_name, collection_name)
|
|
268
268
|
if model_info:
|
|
269
269
|
return model_info["model"]
|
|
270
270
|
|
|
@@ -274,7 +274,7 @@ class VectorDBConnection(ABC):
|
|
|
274
274
|
return None
|
|
275
275
|
|
|
276
276
|
def load_embedding_model_for_collection(
|
|
277
|
-
self, collection_name: str,
|
|
277
|
+
self, collection_name: str, profile_name_override: str | None = None
|
|
278
278
|
):
|
|
279
279
|
"""
|
|
280
280
|
Resolve and load an embedding model for a collection.
|
|
@@ -289,17 +289,18 @@ class VectorDBConnection(ABC):
|
|
|
289
289
|
(loaded_model, model_name, model_type)
|
|
290
290
|
"""
|
|
291
291
|
try:
|
|
292
|
-
from vector_inspector.services.settings_service import SettingsService
|
|
293
292
|
from vector_inspector.core.embedding_utils import (
|
|
294
|
-
load_embedding_model,
|
|
295
|
-
get_embedding_model_for_dimension,
|
|
296
293
|
DEFAULT_MODEL,
|
|
294
|
+
get_embedding_model_for_dimension,
|
|
295
|
+
load_embedding_model,
|
|
297
296
|
)
|
|
297
|
+
from vector_inspector.services.settings_service import SettingsService
|
|
298
298
|
|
|
299
299
|
# 1) settings
|
|
300
|
-
|
|
300
|
+
profile_name = profile_name_override or getattr(self, "profile_name", None)
|
|
301
|
+
if profile_name:
|
|
301
302
|
settings = SettingsService()
|
|
302
|
-
cfg = settings.get_embedding_model(
|
|
303
|
+
cfg = settings.get_embedding_model(profile_name, collection_name)
|
|
303
304
|
if cfg and cfg.get("model"):
|
|
304
305
|
model_name = cfg.get("model")
|
|
305
306
|
model_type = cfg.get("type", "sentence-transformer")
|
|
@@ -336,8 +337,8 @@ class VectorDBConnection(ABC):
|
|
|
336
337
|
raise
|
|
337
338
|
|
|
338
339
|
def compute_embeddings_for_documents(
|
|
339
|
-
self, collection_name: str, documents:
|
|
340
|
-
) ->
|
|
340
|
+
self, collection_name: str, documents: list[str], profile_name_override: str | None = None
|
|
341
|
+
) -> list[list[float]]:
|
|
341
342
|
"""
|
|
342
343
|
Compute embeddings for a list of documents using the resolved model for the collection.
|
|
343
344
|
|
|
@@ -345,15 +346,14 @@ class VectorDBConnection(ABC):
|
|
|
345
346
|
raises an exception.
|
|
346
347
|
"""
|
|
347
348
|
model, model_name, model_type = self.load_embedding_model_for_collection(
|
|
348
|
-
collection_name,
|
|
349
|
+
collection_name, profile_name_override
|
|
349
350
|
)
|
|
350
351
|
|
|
351
352
|
# Use batch encoding when available (sentence-transformer), otherwise per-doc
|
|
352
353
|
if model_type != "clip":
|
|
353
354
|
# sentence-transformer-like models support batch encode
|
|
354
355
|
return model.encode(documents, show_progress_bar=False).tolist()
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
from vector_inspector.core.embedding_utils import encode_text
|
|
356
|
+
# CLIP - use encode_text helper for each document
|
|
357
|
+
from vector_inspector.core.embedding_utils import encode_text
|
|
358
358
|
|
|
359
|
-
|
|
359
|
+
return [encode_text(d, model, model_type) for d in documents]
|
|
@@ -389,7 +389,7 @@ class ChromaDBConnection(VectorDBConnection):
|
|
|
389
389
|
if not embeddings and documents:
|
|
390
390
|
try:
|
|
391
391
|
embeddings = self.compute_embeddings_for_documents(
|
|
392
|
-
collection_name, documents, getattr(self, "
|
|
392
|
+
collection_name, documents, getattr(self, "profile_name", None)
|
|
393
393
|
)
|
|
394
394
|
except Exception as e:
|
|
395
395
|
log_error("Failed to compute embeddings for Chroma add_items: %s", e)
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
"""PgVector/PostgreSQL connection manager."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
4
3
|
import json
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
5
6
|
import psycopg2
|
|
6
7
|
from psycopg2 import sql
|
|
7
8
|
|
|
8
|
-
from vector_inspector.core.logging import log_info
|
|
9
|
-
|
|
10
9
|
## No need to import register_vector; pgvector extension is enabled at table creation
|
|
11
10
|
from vector_inspector.core.connections.base_connection import VectorDBConnection
|
|
12
11
|
from vector_inspector.core.logging import log_error, log_info
|
|
@@ -525,52 +524,12 @@ class PgVectorConnection(VectorDBConnection):
|
|
|
525
524
|
# If caller provided query texts (not embeddings), compute embeddings using configured model
|
|
526
525
|
if (not query_embeddings) and query_texts:
|
|
527
526
|
try:
|
|
528
|
-
from vector_inspector.
|
|
529
|
-
from vector_inspector.core.embedding_utils import (
|
|
530
|
-
load_embedding_model,
|
|
531
|
-
get_embedding_model_for_dimension,
|
|
532
|
-
DEFAULT_MODEL,
|
|
533
|
-
encode_text,
|
|
534
|
-
)
|
|
527
|
+
from vector_inspector.core.embedding_utils import encode_text
|
|
535
528
|
|
|
536
|
-
|
|
537
|
-
model_type =
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
settings = SettingsService()
|
|
541
|
-
model_info = settings.get_embedding_model(self.database, collection_name)
|
|
542
|
-
if model_info:
|
|
543
|
-
model_name = model_info.get("model")
|
|
544
|
-
model_type = model_info.get("type", "sentence-transformer")
|
|
545
|
-
|
|
546
|
-
# 2) collection metadata
|
|
547
|
-
if not model_name:
|
|
548
|
-
coll_info = self.get_collection_info(collection_name)
|
|
549
|
-
if coll_info and coll_info.get("embedding_model"):
|
|
550
|
-
model_name = coll_info.get("embedding_model")
|
|
551
|
-
model_type = coll_info.get("embedding_model_type", "stored")
|
|
552
|
-
|
|
553
|
-
# 3) dimension-based fallback
|
|
554
|
-
loaded_model = None
|
|
555
|
-
if not model_name:
|
|
556
|
-
# Try to get vector dimension
|
|
557
|
-
dim = None
|
|
558
|
-
coll_info = self.get_collection_info(collection_name)
|
|
559
|
-
if coll_info and coll_info.get("vector_dimension"):
|
|
560
|
-
try:
|
|
561
|
-
dim = int(coll_info.get("vector_dimension"))
|
|
562
|
-
except Exception:
|
|
563
|
-
dim = None
|
|
564
|
-
if dim:
|
|
565
|
-
loaded_model, model_name, model_type = get_embedding_model_for_dimension(
|
|
566
|
-
dim
|
|
567
|
-
)
|
|
568
|
-
else:
|
|
569
|
-
# Use default model
|
|
570
|
-
model_name, model_type = DEFAULT_MODEL
|
|
571
|
-
|
|
572
|
-
if not loaded_model:
|
|
573
|
-
loaded_model = load_embedding_model(model_name, model_type)
|
|
529
|
+
# Use inherited method to resolve and load the embedding model
|
|
530
|
+
loaded_model, model_name, model_type = self.load_embedding_model_for_collection(
|
|
531
|
+
collection_name
|
|
532
|
+
)
|
|
574
533
|
|
|
575
534
|
# Compute embeddings for the provided query_texts (use helper for CLIP)
|
|
576
535
|
if model_type != "clip":
|
|
@@ -820,70 +779,14 @@ class PgVectorConnection(VectorDBConnection):
|
|
|
820
779
|
self._last_regenerated_count = 0
|
|
821
780
|
if (not embeddings) and documents:
|
|
822
781
|
try:
|
|
823
|
-
#
|
|
824
|
-
from vector_inspector.services.settings_service import SettingsService
|
|
825
|
-
from vector_inspector.core.embedding_utils import (
|
|
826
|
-
load_embedding_model,
|
|
827
|
-
get_embedding_model_for_dimension,
|
|
828
|
-
DEFAULT_MODEL,
|
|
829
|
-
)
|
|
830
|
-
|
|
831
|
-
model_name = None
|
|
832
|
-
model_type = None
|
|
833
|
-
|
|
834
|
-
# 1) settings
|
|
835
|
-
settings = SettingsService()
|
|
836
|
-
model_info = settings.get_embedding_model(self.database, collection_name)
|
|
837
|
-
if model_info:
|
|
838
|
-
model_name = model_info.get("model")
|
|
839
|
-
model_type = model_info.get("type", "sentence-transformer")
|
|
840
|
-
|
|
841
|
-
# 2) collection metadata
|
|
842
|
-
if not model_name:
|
|
843
|
-
coll_info = self.get_collection_info(collection_name)
|
|
844
|
-
if coll_info and coll_info.get("embedding_model"):
|
|
845
|
-
model_name = coll_info.get("embedding_model")
|
|
846
|
-
model_type = coll_info.get("embedding_model_type", "stored")
|
|
847
|
-
|
|
848
|
-
# 3) dimension-based fallback
|
|
849
|
-
loaded_model = None
|
|
850
|
-
if not model_name:
|
|
851
|
-
# Try to get vector dimension
|
|
852
|
-
dim = None
|
|
853
|
-
coll_info = self.get_collection_info(collection_name)
|
|
854
|
-
if coll_info and coll_info.get("vector_dimension"):
|
|
855
|
-
try:
|
|
856
|
-
dim = int(coll_info.get("vector_dimension"))
|
|
857
|
-
except Exception:
|
|
858
|
-
dim = None
|
|
859
|
-
if dim:
|
|
860
|
-
loaded_model, model_name, model_type = (
|
|
861
|
-
get_embedding_model_for_dimension(dim)
|
|
862
|
-
)
|
|
863
|
-
else:
|
|
864
|
-
# Use default model
|
|
865
|
-
model_name, model_type = DEFAULT_MODEL
|
|
866
|
-
|
|
867
|
-
# Load model if not already loaded
|
|
868
|
-
if not loaded_model:
|
|
869
|
-
loaded_model = load_embedding_model(model_name, model_type)
|
|
870
|
-
|
|
782
|
+
# Use inherited method to compute embeddings
|
|
871
783
|
# Compute embeddings only for documents that are present
|
|
872
784
|
compute_idxs = [i for i, d in enumerate(documents) if d]
|
|
873
785
|
if compute_idxs:
|
|
874
786
|
docs_to_compute = [documents[i] for i in compute_idxs]
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
docs_to_compute, show_progress_bar=False
|
|
879
|
-
).tolist()
|
|
880
|
-
else:
|
|
881
|
-
# CLIP type - encode per document using helper
|
|
882
|
-
from vector_inspector.core.embedding_utils import encode_text
|
|
883
|
-
|
|
884
|
-
computed = [
|
|
885
|
-
encode_text(d, loaded_model, model_type) for d in docs_to_compute
|
|
886
|
-
]
|
|
787
|
+
computed = self.compute_embeddings_for_documents(
|
|
788
|
+
collection_name, docs_to_compute
|
|
789
|
+
)
|
|
887
790
|
embeddings_local = [None] * len(ids)
|
|
888
791
|
for idx, emb in zip(compute_idxs, computed):
|
|
889
792
|
embeddings_local[idx] = emb
|
|
@@ -1075,66 +978,3 @@ class PgVectorConnection(VectorDBConnection):
|
|
|
1075
978
|
return []
|
|
1076
979
|
log_info("[pgvector] _parse_vector: unhandled type %s, returning []", type(vector_str))
|
|
1077
980
|
return []
|
|
1078
|
-
|
|
1079
|
-
def compute_embeddings_for_documents(
|
|
1080
|
-
self, collection_name: str, documents: list[str]
|
|
1081
|
-
) -> list[list[float]] | None:
|
|
1082
|
-
"""
|
|
1083
|
-
Compute embeddings for a list of documents using the configured/default model for the collection.
|
|
1084
|
-
Returns a list of embeddings, or None on failure.
|
|
1085
|
-
"""
|
|
1086
|
-
try:
|
|
1087
|
-
from vector_inspector.services.settings_service import SettingsService
|
|
1088
|
-
from vector_inspector.core.embedding_utils import (
|
|
1089
|
-
load_embedding_model,
|
|
1090
|
-
get_embedding_model_for_dimension,
|
|
1091
|
-
DEFAULT_MODEL,
|
|
1092
|
-
encode_text,
|
|
1093
|
-
)
|
|
1094
|
-
|
|
1095
|
-
model_name = None
|
|
1096
|
-
model_type = None
|
|
1097
|
-
|
|
1098
|
-
# 1) settings
|
|
1099
|
-
settings = SettingsService()
|
|
1100
|
-
model_info = settings.get_embedding_model(self.database, collection_name)
|
|
1101
|
-
if model_info:
|
|
1102
|
-
model_name = model_info.get("model")
|
|
1103
|
-
model_type = model_info.get("type", "sentence-transformer")
|
|
1104
|
-
|
|
1105
|
-
# 2) collection metadata
|
|
1106
|
-
if not model_name:
|
|
1107
|
-
coll_info = self.get_collection_info(collection_name)
|
|
1108
|
-
if coll_info and coll_info.get("embedding_model"):
|
|
1109
|
-
model_name = coll_info.get("embedding_model")
|
|
1110
|
-
model_type = coll_info.get("embedding_model_type", "stored")
|
|
1111
|
-
|
|
1112
|
-
# 3) dimension-based fallback
|
|
1113
|
-
loaded_model = None
|
|
1114
|
-
if not model_name:
|
|
1115
|
-
# Try to get vector dimension
|
|
1116
|
-
dim = None
|
|
1117
|
-
coll_info = self.get_collection_info(collection_name)
|
|
1118
|
-
if coll_info and coll_info.get("vector_dimension"):
|
|
1119
|
-
try:
|
|
1120
|
-
dim = int(coll_info.get("vector_dimension"))
|
|
1121
|
-
except Exception:
|
|
1122
|
-
dim = None
|
|
1123
|
-
if dim:
|
|
1124
|
-
loaded_model, model_name, model_type = get_embedding_model_for_dimension(dim)
|
|
1125
|
-
else:
|
|
1126
|
-
model_name, model_type = DEFAULT_MODEL
|
|
1127
|
-
|
|
1128
|
-
# Load model
|
|
1129
|
-
if not loaded_model:
|
|
1130
|
-
loaded_model = load_embedding_model(model_name, model_type)
|
|
1131
|
-
|
|
1132
|
-
# Compute embeddings for all documents
|
|
1133
|
-
if model_type != "clip":
|
|
1134
|
-
embeddings = loaded_model.encode(documents, show_progress_bar=False).tolist()
|
|
1135
|
-
else:
|
|
1136
|
-
embeddings = [encode_text(d, loaded_model, model_type) for d in documents]
|
|
1137
|
-
return embeddings
|
|
1138
|
-
except Exception as e:
|
|
1139
|
-
log_error("Failed to compute embeddings: %s", e)
|
|
1140
|
-
return None
|