vector-inspector 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/core/connection_manager.py +55 -49
  2. vector_inspector/core/connections/base_connection.py +41 -41
  3. vector_inspector/core/connections/chroma_connection.py +110 -86
  4. vector_inspector/core/connections/pinecone_connection.py +168 -182
  5. vector_inspector/core/connections/qdrant_connection.py +109 -126
  6. vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
  7. vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
  8. vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
  9. vector_inspector/core/connections/template_connection.py +55 -65
  10. vector_inspector/core/embedding_utils.py +32 -32
  11. vector_inspector/core/logging.py +27 -0
  12. vector_inspector/core/model_registry.py +4 -3
  13. vector_inspector/main.py +6 -2
  14. vector_inspector/services/backup_helpers.py +63 -0
  15. vector_inspector/services/backup_restore_service.py +73 -152
  16. vector_inspector/services/credential_service.py +33 -40
  17. vector_inspector/services/import_export_service.py +70 -67
  18. vector_inspector/services/profile_service.py +92 -94
  19. vector_inspector/services/settings_service.py +68 -48
  20. vector_inspector/services/visualization_service.py +40 -39
  21. vector_inspector/ui/components/splash_window.py +57 -0
  22. vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
  23. vector_inspector/ui/main_window.py +200 -146
  24. vector_inspector/ui/views/info_panel.py +208 -127
  25. vector_inspector/ui/views/metadata_view.py +8 -7
  26. vector_inspector/ui/views/search_view.py +97 -75
  27. vector_inspector/ui/views/visualization_view.py +140 -97
  28. vector_inspector/utils/version.py +5 -0
  29. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/METADATA +10 -2
  30. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/RECORD +32 -25
  31. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/WHEEL +0 -0
  32. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -6,18 +6,19 @@ Implement all abstract methods according to your database's API.
6
6
 
7
7
  from typing import Optional, List, Dict, Any
8
8
  from .base_connection import VectorDBConnection
9
+ from vector_inspector.core.logging import log_error
9
10
 
10
11
 
11
12
  class TemplateConnection(VectorDBConnection):
12
13
  """Template vector database connection.
13
-
14
+
14
15
  Replace this with your database provider name (e.g., PineconeConnection, QdrantConnection).
15
16
  """
16
-
17
+
17
18
  def __init__(self, **kwargs):
18
19
  """
19
20
  Initialize connection parameters.
20
-
21
+
21
22
  Args:
22
23
  **kwargs: Provider-specific connection parameters
23
24
  (e.g., api_key, host, port, credentials, etc.)
@@ -25,11 +26,11 @@ class TemplateConnection(VectorDBConnection):
25
26
  # Store your connection parameters here
26
27
  self._client = None
27
28
  # Add your provider-specific attributes
28
-
29
+
29
30
  def connect(self) -> bool:
30
31
  """
31
32
  Establish connection to the vector database.
32
-
33
+
33
34
  Returns:
34
35
  True if connection successful, False otherwise
35
36
  """
@@ -38,29 +39,29 @@ class TemplateConnection(VectorDBConnection):
38
39
  # self._client = YourDatabaseClient(...)
39
40
  return True
40
41
  except Exception as e:
41
- print(f"Connection failed: {e}")
42
+ log_error("Connection failed: %s", e)
42
43
  return False
43
-
44
+
44
45
  def disconnect(self):
45
46
  """Close connection to the vector database."""
46
47
  # Clean up your connection
47
48
  self._client = None
48
-
49
+
49
50
  @property
50
51
  def is_connected(self) -> bool:
51
52
  """
52
53
  Check if connected to the vector database.
53
-
54
+
54
55
  Returns:
55
56
  True if connected, False otherwise
56
57
  """
57
58
  # Return whether the client is active
58
59
  return self._client is not None
59
-
60
+
60
61
  def list_collections(self) -> List[str]:
61
62
  """
62
63
  Get list of all collections/indexes.
63
-
64
+
64
65
  Returns:
65
66
  List of collection/index names
66
67
  """
@@ -72,16 +73,16 @@ class TemplateConnection(VectorDBConnection):
72
73
  # return [col.name for col in collections]
73
74
  return []
74
75
  except Exception as e:
75
- print(f"Failed to list collections: {e}")
76
+ log_error("Failed to list collections: %s", e)
76
77
  return []
77
-
78
+
78
79
  def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
79
80
  """
80
81
  Get collection metadata and statistics.
81
-
82
+
82
83
  Args:
83
84
  name: Collection/index name
84
-
85
+
85
86
  Returns:
86
87
  Dictionary with collection info:
87
88
  - name: Collection name
@@ -90,22 +91,22 @@ class TemplateConnection(VectorDBConnection):
90
91
  """
91
92
  if not self._client:
92
93
  return None
93
-
94
+
94
95
  try:
95
96
  # Get collection stats from your database
96
97
  # collection = self._client.get_collection(name)
97
98
  # count = collection.count()
98
99
  # metadata_fields = collection.get_metadata_fields()
99
-
100
+
100
101
  return {
101
102
  "name": name,
102
103
  "count": 0, # Replace with actual count
103
104
  "metadata_fields": [], # Replace with actual fields
104
105
  }
105
106
  except Exception as e:
106
- print(f"Failed to get collection info: {e}")
107
+ log_error("Failed to get collection info: %s", e)
107
108
  return None
108
-
109
+
109
110
  def query_collection(
110
111
  self,
111
112
  collection_name: str,
@@ -117,7 +118,7 @@ class TemplateConnection(VectorDBConnection):
117
118
  ) -> Optional[Dict[str, Any]]:
118
119
  """
119
120
  Query a collection for similar vectors.
120
-
121
+
121
122
  Args:
122
123
  collection_name: Name of collection to query
123
124
  query_texts: Text queries to embed and search
@@ -125,7 +126,7 @@ class TemplateConnection(VectorDBConnection):
125
126
  n_results: Number of results to return
126
127
  where: Metadata filter
127
128
  where_document: Document content filter
128
-
129
+
129
130
  Returns:
130
131
  Query results dictionary with keys:
131
132
  - ids: List of result IDs
@@ -136,7 +137,7 @@ class TemplateConnection(VectorDBConnection):
136
137
  """
137
138
  if not self._client:
138
139
  return None
139
-
140
+
140
141
  try:
141
142
  # Perform similarity search
142
143
  # results = self._client.query(
@@ -145,19 +146,13 @@ class TemplateConnection(VectorDBConnection):
145
146
  # n_results=n_results,
146
147
  # filter=where
147
148
  # )
148
-
149
+
149
150
  # Transform results to standard format
150
- return {
151
- "ids": [],
152
- "distances": [],
153
- "documents": [],
154
- "metadatas": [],
155
- "embeddings": []
156
- }
151
+ return {"ids": [], "distances": [], "documents": [], "metadatas": [], "embeddings": []}
157
152
  except Exception as e:
158
- print(f"Query failed: {e}")
153
+ log_error("Query failed: %s", e)
159
154
  return None
160
-
155
+
161
156
  def get_all_items(
162
157
  self,
163
158
  collection_name: str,
@@ -167,13 +162,13 @@ class TemplateConnection(VectorDBConnection):
167
162
  ) -> Optional[Dict[str, Any]]:
168
163
  """
169
164
  Get all items from a collection.
170
-
165
+
171
166
  Args:
172
167
  collection_name: Name of collection
173
168
  limit: Maximum number of items to return
174
169
  offset: Number of items to skip
175
170
  where: Metadata filter
176
-
171
+
177
172
  Returns:
178
173
  Dictionary with collection items:
179
174
  - ids: List of item IDs
@@ -183,7 +178,7 @@ class TemplateConnection(VectorDBConnection):
183
178
  """
184
179
  if not self._client:
185
180
  return None
186
-
181
+
187
182
  try:
188
183
  # Fetch items from collection with pagination
189
184
  # results = self._client.fetch(
@@ -192,17 +187,12 @@ class TemplateConnection(VectorDBConnection):
192
187
  # offset=offset,
193
188
  # filter=where
194
189
  # )
195
-
196
- return {
197
- "ids": [],
198
- "documents": [],
199
- "metadatas": [],
200
- "embeddings": []
201
- }
190
+
191
+ return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
202
192
  except Exception as e:
203
- print(f"Failed to get items: {e}")
193
+ log_error("Failed to get items: %s", e)
204
194
  return None
205
-
195
+
206
196
  def add_items(
207
197
  self,
208
198
  collection_name: str,
@@ -213,20 +203,20 @@ class TemplateConnection(VectorDBConnection):
213
203
  ) -> bool:
214
204
  """
215
205
  Add items to a collection.
216
-
206
+
217
207
  Args:
218
208
  collection_name: Name of collection
219
209
  documents: Document texts
220
210
  metadatas: Metadata for each document
221
211
  ids: IDs for each document
222
212
  embeddings: Pre-computed embeddings
223
-
213
+
224
214
  Returns:
225
215
  True if successful, False otherwise
226
216
  """
227
217
  if not self._client:
228
218
  return False
229
-
219
+
230
220
  try:
231
221
  # Add items to the collection
232
222
  # self._client.upsert(
@@ -238,9 +228,9 @@ class TemplateConnection(VectorDBConnection):
238
228
  # )
239
229
  return True
240
230
  except Exception as e:
241
- print(f"Failed to add items: {e}")
231
+ log_error("Failed to add items: %s", e)
242
232
  return False
243
-
233
+
244
234
  def update_items(
245
235
  self,
246
236
  collection_name: str,
@@ -251,20 +241,20 @@ class TemplateConnection(VectorDBConnection):
251
241
  ) -> bool:
252
242
  """
253
243
  Update items in a collection.
254
-
244
+
255
245
  Args:
256
246
  collection_name: Name of collection
257
247
  ids: IDs of items to update
258
248
  documents: New document texts
259
249
  metadatas: New metadata
260
250
  embeddings: New embeddings
261
-
251
+
262
252
  Returns:
263
253
  True if successful, False otherwise
264
254
  """
265
255
  if not self._client:
266
256
  return False
267
-
257
+
268
258
  try:
269
259
  # Update existing items
270
260
  # self._client.update(
@@ -276,9 +266,9 @@ class TemplateConnection(VectorDBConnection):
276
266
  # )
277
267
  return True
278
268
  except Exception as e:
279
- print(f"Failed to update items: {e}")
269
+ log_error("Failed to update items: %s", e)
280
270
  return False
281
-
271
+
282
272
  def delete_items(
283
273
  self,
284
274
  collection_name: str,
@@ -287,18 +277,18 @@ class TemplateConnection(VectorDBConnection):
287
277
  ) -> bool:
288
278
  """
289
279
  Delete items from a collection.
290
-
280
+
291
281
  Args:
292
282
  collection_name: Name of collection
293
283
  ids: IDs of items to delete
294
284
  where: Metadata filter for items to delete
295
-
285
+
296
286
  Returns:
297
287
  True if successful, False otherwise
298
288
  """
299
289
  if not self._client:
300
290
  return False
301
-
291
+
302
292
  try:
303
293
  # Delete items
304
294
  # self._client.delete(
@@ -308,34 +298,34 @@ class TemplateConnection(VectorDBConnection):
308
298
  # )
309
299
  return True
310
300
  except Exception as e:
311
- print(f"Failed to delete items: {e}")
301
+ log_error("Failed to delete items: %s", e)
312
302
  return False
313
-
303
+
314
304
  def delete_collection(self, name: str) -> bool:
315
305
  """
316
306
  Delete an entire collection.
317
-
307
+
318
308
  Args:
319
309
  name: Collection name
320
-
310
+
321
311
  Returns:
322
312
  True if successful, False otherwise
323
313
  """
324
314
  if not self._client:
325
315
  return False
326
-
316
+
327
317
  try:
328
318
  # Delete the collection
329
319
  # self._client.delete_collection(name)
330
320
  return True
331
321
  except Exception as e:
332
- print(f"Failed to delete collection: {e}")
322
+ log_error("Failed to delete collection: %s", e)
333
323
  return False
334
-
324
+
335
325
  def get_connection_info(self) -> Dict[str, Any]:
336
326
  """
337
327
  Get information about the current connection.
338
-
328
+
339
329
  Returns:
340
330
  Dictionary with connection details
341
331
  """
@@ -2,6 +2,7 @@
2
2
 
3
3
  from typing import Optional, Union, Tuple
4
4
  from sentence_transformers import SentenceTransformer
5
+ from vector_inspector.core.logging import log_info
5
6
 
6
7
  from .model_registry import get_model_registry
7
8
 
@@ -12,19 +13,17 @@ DEFAULT_MODEL = ("all-MiniLM-L6-v2", "sentence-transformer")
12
13
 
13
14
  def _get_dimension_to_model_dict():
14
15
  """Build dimension->models dictionary from registry.
15
-
16
+
16
17
  Returns:
17
18
  Dict mapping dimension to list of (name, type, description) tuples
18
19
  """
19
20
  registry = get_model_registry()
20
21
  dimension_map = {}
21
-
22
+
22
23
  for dimension in registry.get_all_dimensions():
23
24
  models = registry.get_models_by_dimension(dimension)
24
- dimension_map[dimension] = [
25
- (m.name, m.type, m.description) for m in models
26
- ]
27
-
25
+ dimension_map[dimension] = [(m.name, m.type, m.description) for m in models]
26
+
28
27
  return dimension_map
29
28
 
30
29
 
@@ -35,37 +34,37 @@ DIMENSION_TO_MODEL = _get_dimension_to_model_dict()
35
34
  def get_model_for_dimension(dimension: int, prefer_multimodal: bool = True) -> Tuple[str, str]:
36
35
  """
37
36
  Get the appropriate embedding model name and type for a given vector dimension.
38
-
37
+
39
38
  Args:
40
39
  dimension: The vector dimension size
41
- prefer_multimodal: If True and multiple models exist for this dimension,
40
+ prefer_multimodal: If True and multiple models exist for this dimension,
42
41
  prefer multi-modal (CLIP) over text-only models
43
-
42
+
44
43
  Returns:
45
44
  Tuple of (model_name, model_type) where model_type is "sentence-transformer" or "clip"
46
45
  """
47
46
  registry = get_model_registry()
48
47
  models = registry.get_models_by_dimension(dimension)
49
-
48
+
50
49
  if not models:
51
50
  # Find the closest dimension if exact match not found
52
51
  closest_dim = registry.find_closest_dimension(dimension)
53
52
  if closest_dim:
54
53
  models = registry.get_models_by_dimension(closest_dim)
55
-
54
+
56
55
  if not models:
57
56
  return DEFAULT_MODEL
58
-
57
+
59
58
  if len(models) == 1:
60
59
  return (models[0].name, models[0].type)
61
-
60
+
62
61
  # Multiple models available - apply preference
63
62
  if prefer_multimodal:
64
63
  # Prefer CLIP/multimodal
65
64
  for model in models:
66
65
  if model.modality == "multimodal" or model.type == "clip":
67
66
  return (model.name, model.type)
68
-
67
+
69
68
  # Default to first option
70
69
  return (models[0].name, models[0].type)
71
70
 
@@ -74,10 +73,10 @@ def get_available_models_for_dimension(dimension: int) -> list:
74
73
  """
75
74
  Get all available model options for a given dimension.
76
75
  Includes both predefined (from registry) and custom user-added models.
77
-
76
+
78
77
  Args:
79
78
  dimension: The vector dimension size
80
-
79
+
81
80
  Returns:
82
81
  List of tuples: [(model_name, model_type, description), ...]
83
82
  """
@@ -85,39 +84,37 @@ def get_available_models_for_dimension(dimension: int) -> list:
85
84
  registry = get_model_registry()
86
85
  registry_models = registry.get_models_by_dimension(dimension)
87
86
  models = [(m.name, m.type, m.description) for m in registry_models]
88
-
87
+
89
88
  # Add custom models from settings
90
89
  try:
91
90
  from ..services.settings_service import SettingsService
91
+
92
92
  settings = SettingsService()
93
93
  custom_models = settings.get_custom_embedding_models(dimension)
94
-
94
+
95
95
  for model in custom_models:
96
96
  # Format: (model_name, model_type, description)
97
- models.append((
98
- model["name"],
99
- model["type"],
100
- f"{model['description']} (custom)"
101
- ))
97
+ models.append((model["name"], model["type"], f"{model['description']} (custom)"))
102
98
  except Exception as e:
103
- print(f"Warning: Could not load custom models: {e}")
104
-
99
+ log_info("Warning: Could not load custom models: %s", e)
100
+
105
101
  return models
106
102
 
107
103
 
108
104
  def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTransformer, any]:
109
105
  """
110
106
  Load an embedding model (sentence-transformer or CLIP).
111
-
107
+
112
108
  Args:
113
109
  model_name: Name of the model to load
114
110
  model_type: Type of model ("sentence-transformer" or "clip")
115
-
111
+
116
112
  Returns:
117
113
  Loaded model (SentenceTransformer or CLIP model)
118
114
  """
119
115
  if model_type == "clip":
120
116
  from transformers import CLIPModel, CLIPProcessor
117
+
121
118
  model = CLIPModel.from_pretrained(model_name)
122
119
  processor = CLIPProcessor.from_pretrained(model_name)
123
120
  return (model, processor)
@@ -128,17 +125,18 @@ def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTran
128
125
  def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type: str) -> list:
129
126
  """
130
127
  Encode text using the appropriate model.
131
-
128
+
132
129
  Args:
133
130
  text: Text to encode
134
131
  model: The loaded model (SentenceTransformer or (CLIPModel, CLIPProcessor) tuple)
135
132
  model_type: Type of model ("sentence-transformer" or "clip")
136
-
133
+
137
134
  Returns:
138
135
  Embedding vector as a list
139
136
  """
140
137
  if model_type == "clip":
141
138
  import torch
139
+
142
140
  clip_model, processor = model
143
141
  inputs = processor(text=[text], return_tensors="pt", padding=True)
144
142
  with torch.no_grad():
@@ -152,13 +150,15 @@ def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type:
152
150
  return embedding.tolist()
153
151
 
154
152
 
155
- def get_embedding_model_for_dimension(dimension: int) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
153
+ def get_embedding_model_for_dimension(
154
+ dimension: int,
155
+ ) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
156
156
  """
157
157
  Get a loaded embedding model for a specific dimension.
158
-
158
+
159
159
  Args:
160
160
  dimension: The vector dimension size
161
-
161
+
162
162
  Returns:
163
163
  Tuple of (loaded_model, model_name, model_type)
164
164
  """
@@ -0,0 +1,27 @@
1
+ """Tiny logging wrapper for consistent logs across the project.
2
+
3
+ Provides `log_info`, `log_error`, and `log_debug` helpers that delegate
4
+ to the standard `logging` module but keep call sites concise.
5
+ """
6
+ import logging
7
+ from typing import Any
8
+
9
+ _logger = logging.getLogger("vector_inspector")
10
+ if not _logger.handlers:
11
+ handler = logging.StreamHandler()
12
+ formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
13
+ handler.setFormatter(formatter)
14
+ _logger.addHandler(handler)
15
+ _logger.setLevel(logging.INFO)
16
+
17
+
18
+ def log_info(msg: str, *args: Any, **kwargs: Any) -> None:
19
+ _logger.info(msg, *args, **kwargs)
20
+
21
+
22
+ def log_error(msg: str, *args: Any, **kwargs: Any) -> None:
23
+ _logger.error(msg, *args, **kwargs)
24
+
25
+
26
+ def log_debug(msg: str, *args: Any, **kwargs: Any) -> None:
27
+ _logger.debug(msg, *args, **kwargs)
@@ -4,6 +4,7 @@ import json
4
4
  from pathlib import Path
5
5
  from typing import List, Dict, Optional, Tuple
6
6
  from dataclasses import dataclass
7
+ from vector_inspector.core.logging import log_info, log_error
7
8
 
8
9
 
9
10
  @dataclass
@@ -63,7 +64,7 @@ class EmbeddingModelRegistry:
63
64
  registry_path = Path(__file__).parent.parent / "config" / "known_embedding_models.json"
64
65
 
65
66
  if not registry_path.exists():
66
- print(f"Warning: Model registry not found at {registry_path}")
67
+ log_info("Warning: Model registry not found at %s", registry_path)
67
68
  return
68
69
 
69
70
  try:
@@ -83,10 +84,10 @@ class EmbeddingModelRegistry:
83
84
  # Index by name
84
85
  self._name_index[model_info.name.lower()] = model_info
85
86
 
86
- print(f"Loaded {len(self._models)} models from registry")
87
+ log_info("Loaded %d models from registry", len(self._models))
87
88
  #...
88
89
  except Exception as e:
89
- print(f"Error loading model registry: {e}")
90
+ log_error("Error loading model registry: %s", e)
90
91
 
91
92
  def get_models_by_dimension(self, dimension: int) -> List[ModelInfo]:
92
93
  """Get all models for a specific dimension.
vector_inspector/main.py CHANGED
@@ -1,19 +1,23 @@
1
1
  """Main entry point for Vector Inspector application."""
2
2
 
3
3
  import sys
4
+ import os
4
5
  from PySide6.QtWidgets import QApplication
5
6
  from vector_inspector.ui.main_window import MainWindow
6
7
 
8
+ # Ensures the app looks in its own folder for the raw libraries
9
+ sys.path.append(os.path.dirname(sys.executable))
10
+
7
11
 
8
12
  def main():
9
13
  """Launch the Vector Inspector application."""
10
14
  app = QApplication(sys.argv)
11
15
  app.setApplicationName("Vector Inspector")
12
16
  app.setOrganizationName("Vector Inspector")
13
-
17
+
14
18
  window = MainWindow()
15
19
  window.show()
16
-
20
+
17
21
  sys.exit(app.exec())
18
22
 
19
23
 
@@ -0,0 +1,63 @@
1
+ """Helpers for backup/restore: zip read/write and embedding normalization.
2
+
3
+ Minimal, well-tested helpers to keep `BackupRestoreService` concise.
4
+ """
5
+ import json
6
+ import zipfile
7
+ from typing import Tuple, Dict, Any
8
+
9
+
10
+ def write_backup_zip(path, metadata: Dict[str, Any], data: Dict[str, Any]):
11
+ """Write metadata and data into a zip file at `path`.
12
+
13
+ `path` may be a pathlib.Path or string.
14
+ """
15
+ with zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED) as zipf:
16
+ zipf.writestr('metadata.json', json.dumps(metadata, indent=2))
17
+ zipf.writestr('data.json', json.dumps(data, indent=2))
18
+
19
+
20
+ def read_backup_zip(path) -> Tuple[Dict[str, Any], Dict[str, Any]]:
21
+ """Read metadata.json and data.json from a backup zip and return them.
22
+
23
+ Returns (metadata, data).
24
+ """
25
+ with zipfile.ZipFile(path, 'r') as zipf:
26
+ metadata_str = zipf.read('metadata.json').decode('utf-8')
27
+ metadata = json.loads(metadata_str)
28
+ data_str = zipf.read('data.json').decode('utf-8')
29
+ data = json.loads(data_str)
30
+ return metadata, data
31
+
32
+
33
+ def normalize_embeddings(data: Dict[str, Any]) -> Dict[str, Any]:
34
+ """Ensure embeddings in `data` are plain python lists (no numpy objects).
35
+
36
+ This mutates and returns the same `data` dict for convenience.
37
+ """
38
+ if 'embeddings' not in data or data['embeddings'] is None:
39
+ return data
40
+
41
+ try:
42
+ import numpy as np
43
+ except Exception:
44
+ np = None
45
+
46
+ emb = data['embeddings']
47
+ if np is not None:
48
+ if isinstance(emb, np.ndarray):
49
+ data['embeddings'] = emb.tolist()
50
+ return data
51
+
52
+ if isinstance(emb, list):
53
+ new_list = []
54
+ for item in emb:
55
+ if isinstance(item, np.ndarray):
56
+ new_list.append(item.tolist())
57
+ else:
58
+ new_list.append(item)
59
+ data['embeddings'] = new_list
60
+ return data
61
+
62
+ # No numpy available — assume data already serializable
63
+ return data