vector-inspector 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -261,7 +261,7 @@ class VectorDBConnection(ABC):
261
261
 
262
262
  # Finally, check user settings (for collections we can't modify)
263
263
  if connection_id:
264
- from ...services.settings_service import SettingsService
264
+ from vector_inspector.services.settings_service import SettingsService
265
265
 
266
266
  settings = SettingsService()
267
267
  model_info = settings.get_embedding_model(connection_id, collection_name)
@@ -272,3 +272,88 @@ class VectorDBConnection(ABC):
272
272
  except Exception as e:
273
273
  log_error("Failed to get embedding model: %s", e)
274
274
  return None
275
+
276
+ def load_embedding_model_for_collection(
277
+ self, collection_name: str, connection_id: Optional[str] = None
278
+ ):
279
+ """
280
+ Resolve and load an embedding model for a collection.
281
+
282
+ Resolution order:
283
+ 1. User settings (SettingsService)
284
+ 2. Collection metadata (get_collection_info)
285
+ 3. Dimension-based registry (embedding_utils.get_embedding_model_for_dimension)
286
+ 4. DEFAULT_MODEL
287
+
288
+ Returns:
289
+ (loaded_model, model_name, model_type)
290
+ """
291
+ try:
292
+ from vector_inspector.services.settings_service import SettingsService
293
+ from vector_inspector.core.embedding_utils import (
294
+ load_embedding_model,
295
+ get_embedding_model_for_dimension,
296
+ DEFAULT_MODEL,
297
+ )
298
+
299
+ # 1) settings
300
+ if connection_id:
301
+ settings = SettingsService()
302
+ cfg = settings.get_embedding_model(connection_id, collection_name)
303
+ if cfg and cfg.get("model"):
304
+ model_name = cfg.get("model")
305
+ model_type = cfg.get("type", "sentence-transformer")
306
+ model = load_embedding_model(model_name, model_type)
307
+ return (model, model_name, model_type)
308
+
309
+ # 2) collection metadata
310
+ try:
311
+ info = self.get_collection_info(collection_name)
312
+ except Exception:
313
+ info = None
314
+
315
+ if info and info.get("embedding_model"):
316
+ model_name = info.get("embedding_model")
317
+ model_type = info.get("embedding_model_type", "sentence-transformer")
318
+ model = load_embedding_model(model_name, model_type)
319
+ return (model, model_name, model_type)
320
+
321
+ # 3) dimension based
322
+ if info and info.get("vector_dimension"):
323
+ try:
324
+ dim = int(info.get("vector_dimension"))
325
+ model, model_name, model_type = get_embedding_model_for_dimension(dim)
326
+ return (model, model_name, model_type)
327
+ except Exception:
328
+ pass
329
+
330
+ # 4) fallback
331
+ model_name, model_type = DEFAULT_MODEL
332
+ model = load_embedding_model(model_name, model_type)
333
+ return (model, model_name, model_type)
334
+ except Exception as e:
335
+ log_error("Failed to load embedding model for collection %s: %s", collection_name, e)
336
+ raise
337
+
338
+ def compute_embeddings_for_documents(
339
+ self, collection_name: str, documents: List[str], connection_id: Optional[str] = None
340
+ ) -> List[List[float]]:
341
+ """
342
+ Compute embeddings for a list of documents using the resolved model for the collection.
343
+
344
+ Returns a list of embedding vectors (one per document). If encoding fails,
345
+ raises an exception.
346
+ """
347
+ model, model_name, model_type = self.load_embedding_model_for_collection(
348
+ collection_name, connection_id
349
+ )
350
+
351
+ # Use batch encoding when available (sentence-transformer), otherwise per-doc
352
+ if model_type != "clip":
353
+ # sentence-transformer-like models support batch encode
354
+ return model.encode(documents, show_progress_bar=False).tolist()
355
+ else:
356
+ # CLIP - use encode_text helper for each document
357
+ from vector_inspector.core.embedding_utils import encode_text
358
+
359
+ return [encode_text(d, model, model_type) for d in documents]
@@ -8,7 +8,7 @@ from chromadb.api import ClientAPI
8
8
  from chromadb.api.models.Collection import Collection
9
9
  from chromadb import Documents, EmbeddingFunction, Embeddings
10
10
 
11
- from .base_connection import VectorDBConnection
11
+ from vector_inspector.core.connections.base_connection import VectorDBConnection
12
12
  from vector_inspector.core.logging import log_info, log_error
13
13
 
14
14
 
@@ -28,7 +28,7 @@ class DimensionAwareEmbeddingFunction(EmbeddingFunction):
28
28
  if self._initialized:
29
29
  return
30
30
 
31
- from ..embedding_utils import get_embedding_model_for_dimension
31
+ from vector_inspector.core.embedding_utils import get_embedding_model_for_dimension
32
32
 
33
33
  log_info("[ChromaDB] Loading embedding model for %dd vectors...", self.expected_dimension)
34
34
  self.model, self.model_name, self.model_type = get_embedding_model_for_dimension(
@@ -45,7 +45,7 @@ class DimensionAwareEmbeddingFunction(EmbeddingFunction):
45
45
  def __call__(self, input: Documents) -> Embeddings:
46
46
  """Embed documents using the dimension-appropriate model."""
47
47
  self._ensure_model_loaded()
48
- from ..embedding_utils import encode_text
48
+ from vector_inspector.core.embedding_utils import encode_text
49
49
 
50
50
  embeddings = []
51
51
  for text in input:
@@ -385,6 +385,16 @@ class ChromaDBConnection(VectorDBConnection):
385
385
  return False
386
386
 
387
387
  try:
388
+ # If embeddings not provided, compute using collection model
389
+ if not embeddings and documents:
390
+ try:
391
+ embeddings = self.compute_embeddings_for_documents(
392
+ collection_name, documents, getattr(self, "connection_id", None)
393
+ )
394
+ except Exception as e:
395
+ log_error("Failed to compute embeddings for Chroma add_items: %s", e)
396
+ return False
397
+
388
398
  collection.add(
389
399
  documents=documents,
390
400
  metadatas=metadatas, # type: ignore
@@ -422,6 +432,16 @@ class ChromaDBConnection(VectorDBConnection):
422
432
  return False
423
433
 
424
434
  try:
435
+ # If embeddings not provided but documents changed, compute embeddings
436
+ if (not embeddings) and documents:
437
+ try:
438
+ embeddings = self.compute_embeddings_for_documents(
439
+ collection_name, documents, getattr(self, "connection_id", None)
440
+ )
441
+ except Exception as e:
442
+ log_error("Failed to compute embeddings for Chroma update_items: %s", e)
443
+ return False
444
+
425
445
  collection.update(
426
446
  ids=ids,
427
447
  documents=documents,