vector-inspector 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/core/cache_manager.py +159 -0
- vector_inspector/core/connection_manager.py +277 -0
- vector_inspector/core/connections/chroma_connection.py +90 -5
- vector_inspector/core/connections/qdrant_connection.py +62 -8
- vector_inspector/core/embedding_utils.py +140 -0
- vector_inspector/services/backup_restore_service.py +3 -29
- vector_inspector/services/credential_service.py +130 -0
- vector_inspector/services/filter_service.py +1 -1
- vector_inspector/services/profile_service.py +409 -0
- vector_inspector/services/settings_service.py +20 -1
- vector_inspector/services/visualization_service.py +11 -7
- vector_inspector/ui/components/connection_manager_panel.py +320 -0
- vector_inspector/ui/components/profile_manager_panel.py +518 -0
- vector_inspector/ui/dialogs/__init__.py +5 -0
- vector_inspector/ui/dialogs/cross_db_migration.py +364 -0
- vector_inspector/ui/dialogs/embedding_config_dialog.py +176 -0
- vector_inspector/ui/main_window.py +429 -181
- vector_inspector/ui/views/connection_view.py +43 -8
- vector_inspector/ui/views/info_panel.py +226 -80
- vector_inspector/ui/views/metadata_view.py +136 -28
- vector_inspector/ui/views/search_view.py +43 -3
- {vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/METADATA +5 -3
- vector_inspector-0.2.7.dist-info/RECORD +45 -0
- vector_inspector-0.2.5.dist-info/RECORD +0 -35
- {vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/WHEEL +0 -0
- {vector_inspector-0.2.5.dist-info → vector_inspector-0.2.7.dist-info}/entry_points.txt +0 -0
|
@@ -50,16 +50,22 @@ class QdrantConnection(VectorDBConnection):
|
|
|
50
50
|
True if connection successful, False otherwise
|
|
51
51
|
"""
|
|
52
52
|
try:
|
|
53
|
+
# Common parameters for stability
|
|
54
|
+
common_params = {
|
|
55
|
+
'check_compatibility': False,
|
|
56
|
+
'timeout': 300, # 5 minutes timeout for long operations
|
|
57
|
+
}
|
|
58
|
+
|
|
53
59
|
if self.path:
|
|
54
60
|
# Local/embedded mode
|
|
55
|
-
self._client = QdrantClient(path=self.path,
|
|
61
|
+
self._client = QdrantClient(path=self.path, **common_params)
|
|
56
62
|
elif self.url:
|
|
57
63
|
# Full URL provided
|
|
58
64
|
self._client = QdrantClient(
|
|
59
65
|
url=self.url,
|
|
60
66
|
api_key=self.api_key,
|
|
61
67
|
prefer_grpc=self.prefer_grpc,
|
|
62
|
-
|
|
68
|
+
**common_params
|
|
63
69
|
)
|
|
64
70
|
elif self.host:
|
|
65
71
|
# Host and port provided
|
|
@@ -68,11 +74,11 @@ class QdrantConnection(VectorDBConnection):
|
|
|
68
74
|
port=self.port,
|
|
69
75
|
api_key=self.api_key,
|
|
70
76
|
prefer_grpc=self.prefer_grpc,
|
|
71
|
-
|
|
77
|
+
**common_params
|
|
72
78
|
)
|
|
73
79
|
else:
|
|
74
80
|
# Default to in-memory client
|
|
75
|
-
self._client = QdrantClient(":memory:",
|
|
81
|
+
self._client = QdrantClient(":memory:", **common_params)
|
|
76
82
|
|
|
77
83
|
# Test connection
|
|
78
84
|
self._client.get_collections()
|
|
@@ -251,6 +257,14 @@ class QdrantConnection(VectorDBConnection):
|
|
|
251
257
|
"distance_metric": distance_metric,
|
|
252
258
|
}
|
|
253
259
|
|
|
260
|
+
# Check for embedding model metadata (if collection creator stored it)
|
|
261
|
+
if hasattr(collection_info.config, 'metadata') and collection_info.config.metadata:
|
|
262
|
+
metadata = collection_info.config.metadata
|
|
263
|
+
if 'embedding_model' in metadata:
|
|
264
|
+
result['embedding_model'] = metadata['embedding_model']
|
|
265
|
+
if 'embedding_model_type' in metadata:
|
|
266
|
+
result['embedding_model_type'] = metadata['embedding_model_type']
|
|
267
|
+
|
|
254
268
|
if config_details:
|
|
255
269
|
result['config'] = config_details
|
|
256
270
|
|
|
@@ -260,6 +274,46 @@ class QdrantConnection(VectorDBConnection):
|
|
|
260
274
|
print(f"Failed to get collection info: {e}")
|
|
261
275
|
return None
|
|
262
276
|
|
|
277
|
+
def _get_embedding_model_for_collection(self, collection_name: str):
|
|
278
|
+
"""Get the appropriate embedding model for a collection based on stored metadata, settings, or dimension."""
|
|
279
|
+
from ..embedding_utils import get_model_for_dimension, load_embedding_model, DEFAULT_MODEL
|
|
280
|
+
|
|
281
|
+
# Get collection info to determine vector dimension and check metadata
|
|
282
|
+
collection_info = self.get_collection_info(collection_name)
|
|
283
|
+
if not collection_info:
|
|
284
|
+
# Default if we can't determine
|
|
285
|
+
print(f"Warning: Could not determine collection info for {collection_name}, using default")
|
|
286
|
+
model_name, model_type = DEFAULT_MODEL
|
|
287
|
+
model = load_embedding_model(model_name, model_type)
|
|
288
|
+
return (model, model_name, model_type)
|
|
289
|
+
|
|
290
|
+
# Priority 1: Check if collection metadata has embedding model info (most reliable)
|
|
291
|
+
if 'embedding_model' in collection_info:
|
|
292
|
+
model_name = collection_info['embedding_model']
|
|
293
|
+
model_type = collection_info.get('embedding_model_type', 'sentence-transformer')
|
|
294
|
+
print(f"Using stored embedding model '{model_name}' ({model_type}) for collection '{collection_name}'")
|
|
295
|
+
model = load_embedding_model(model_name, model_type)
|
|
296
|
+
return (model, model_name, model_type)
|
|
297
|
+
|
|
298
|
+
# Priority 2: Check user settings for manual override (skip in connection class)
|
|
299
|
+
# Settings lookup is done in the UI layer where connection_id is available
|
|
300
|
+
|
|
301
|
+
# Priority 3: Fall back to dimension-based guessing (least reliable)
|
|
302
|
+
vector_dim = collection_info.get("vector_dimension")
|
|
303
|
+
if not vector_dim or vector_dim == "Unknown":
|
|
304
|
+
print(f"Warning: No vector dimension in collection info, using default")
|
|
305
|
+
model_name, model_type = DEFAULT_MODEL
|
|
306
|
+
model = load_embedding_model(model_name, model_type)
|
|
307
|
+
return (model, model_name, model_type)
|
|
308
|
+
|
|
309
|
+
# Get the appropriate model for this dimension
|
|
310
|
+
model_name, model_type = get_model_for_dimension(vector_dim)
|
|
311
|
+
model = load_embedding_model(model_name, model_type)
|
|
312
|
+
|
|
313
|
+
print(f"⚠️ Guessing {model_type} model '{model_name}' based on dimension {vector_dim} for '{collection_name}'")
|
|
314
|
+
print(f" To specify the correct model, use Settings > Configure Collection Embedding Models")
|
|
315
|
+
return (model, model_name, model_type)
|
|
316
|
+
|
|
263
317
|
def _build_qdrant_filter(self, where: Optional[Dict[str, Any]] = None) -> Optional[Filter]:
|
|
264
318
|
"""
|
|
265
319
|
Build Qdrant filter from ChromaDB-style where clause.
|
|
@@ -374,11 +428,11 @@ class QdrantConnection(VectorDBConnection):
|
|
|
374
428
|
for query in queries:
|
|
375
429
|
# Embed text queries if needed
|
|
376
430
|
if isinstance(query, str):
|
|
377
|
-
# Generate embeddings for text query
|
|
431
|
+
# Generate embeddings for text query using appropriate model for this collection
|
|
378
432
|
try:
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
query_vector =
|
|
433
|
+
model, model_name, model_type = self._get_embedding_model_for_collection(collection_name)
|
|
434
|
+
from ..embedding_utils import encode_text
|
|
435
|
+
query_vector = encode_text(query, model, model_type)
|
|
382
436
|
except Exception as e:
|
|
383
437
|
print(f"Failed to embed query text: {e}")
|
|
384
438
|
continue
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Utilities for managing embedding models and vector dimensions."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Union, Tuple
|
|
4
|
+
from sentence_transformers import SentenceTransformer
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# Mapping of vector dimensions to appropriate models
|
|
8
|
+
# Format: dimension -> list of (model_name, model_type, description)
|
|
9
|
+
# Listed in order of preference for ambiguous cases
|
|
10
|
+
DIMENSION_TO_MODEL = {
|
|
11
|
+
384: [
|
|
12
|
+
("all-MiniLM-L6-v2", "sentence-transformer", "Fast text embeddings"),
|
|
13
|
+
],
|
|
14
|
+
512: [
|
|
15
|
+
("openai/clip-vit-base-patch32", "clip", "Multi-modal (text + images)"),
|
|
16
|
+
("paraphrase-albert-small-v2", "sentence-transformer", "Text-only paraphrase"),
|
|
17
|
+
],
|
|
18
|
+
768: [
|
|
19
|
+
("all-mpnet-base-v2", "sentence-transformer", "High quality text embeddings"),
|
|
20
|
+
],
|
|
21
|
+
1024: [
|
|
22
|
+
("all-roberta-large-v1", "sentence-transformer", "Large text embeddings"),
|
|
23
|
+
],
|
|
24
|
+
1536: [
|
|
25
|
+
("gtr-t5-large", "sentence-transformer", "Very large text embeddings"),
|
|
26
|
+
],
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Default model to use when dimension is unknown or not mapped
|
|
30
|
+
DEFAULT_MODEL = ("all-MiniLM-L6-v2", "sentence-transformer")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_model_for_dimension(dimension: int, prefer_multimodal: bool = True) -> Tuple[str, str]:
|
|
34
|
+
"""
|
|
35
|
+
Get the appropriate embedding model name and type for a given vector dimension.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
dimension: The vector dimension size
|
|
39
|
+
prefer_multimodal: If True and multiple models exist for this dimension,
|
|
40
|
+
prefer multi-modal (CLIP) over text-only models
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Tuple of (model_name, model_type) where model_type is "sentence-transformer" or "clip"
|
|
44
|
+
"""
|
|
45
|
+
if dimension in DIMENSION_TO_MODEL:
|
|
46
|
+
models = DIMENSION_TO_MODEL[dimension]
|
|
47
|
+
if len(models) == 1:
|
|
48
|
+
return (models[0][0], models[0][1])
|
|
49
|
+
|
|
50
|
+
# Multiple models available - apply preference
|
|
51
|
+
if prefer_multimodal:
|
|
52
|
+
# Prefer CLIP/multimodal
|
|
53
|
+
for model_name, model_type, desc in models:
|
|
54
|
+
if model_type == "clip":
|
|
55
|
+
return (model_name, model_type)
|
|
56
|
+
|
|
57
|
+
# Default to first option
|
|
58
|
+
return (models[0][0], models[0][1])
|
|
59
|
+
|
|
60
|
+
# Find the closest dimension if exact match not found
|
|
61
|
+
closest_dim = min(DIMENSION_TO_MODEL.keys(), key=lambda x: abs(x - dimension))
|
|
62
|
+
models = DIMENSION_TO_MODEL[closest_dim]
|
|
63
|
+
return (models[0][0], models[0][1])
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_available_models_for_dimension(dimension: int) -> list:
|
|
67
|
+
"""
|
|
68
|
+
Get all available model options for a given dimension.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
dimension: The vector dimension size
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of tuples: [(model_name, model_type, description), ...]
|
|
75
|
+
"""
|
|
76
|
+
if dimension in DIMENSION_TO_MODEL:
|
|
77
|
+
return DIMENSION_TO_MODEL[dimension]
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def load_embedding_model(model_name: str, model_type: str) -> Union[SentenceTransformer, any]:
|
|
82
|
+
"""
|
|
83
|
+
Load an embedding model (sentence-transformer or CLIP).
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
model_name: Name of the model to load
|
|
87
|
+
model_type: Type of model ("sentence-transformer" or "clip")
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Loaded model (SentenceTransformer or CLIP model)
|
|
91
|
+
"""
|
|
92
|
+
if model_type == "clip":
|
|
93
|
+
from transformers import CLIPModel, CLIPProcessor
|
|
94
|
+
model = CLIPModel.from_pretrained(model_name)
|
|
95
|
+
processor = CLIPProcessor.from_pretrained(model_name)
|
|
96
|
+
return (model, processor)
|
|
97
|
+
else:
|
|
98
|
+
return SentenceTransformer(model_name)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def encode_text(text: str, model: Union[SentenceTransformer, Tuple], model_type: str) -> list:
|
|
102
|
+
"""
|
|
103
|
+
Encode text using the appropriate model.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
text: Text to encode
|
|
107
|
+
model: The loaded model (SentenceTransformer or (CLIPModel, CLIPProcessor) tuple)
|
|
108
|
+
model_type: Type of model ("sentence-transformer" or "clip")
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Embedding vector as a list
|
|
112
|
+
"""
|
|
113
|
+
if model_type == "clip":
|
|
114
|
+
import torch
|
|
115
|
+
clip_model, processor = model
|
|
116
|
+
inputs = processor(text=[text], return_tensors="pt", padding=True)
|
|
117
|
+
with torch.no_grad():
|
|
118
|
+
text_features = clip_model.get_text_features(**inputs)
|
|
119
|
+
# Normalize the features (CLIP embeddings are typically normalized)
|
|
120
|
+
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
|
121
|
+
return text_features[0].cpu().numpy().tolist()
|
|
122
|
+
else:
|
|
123
|
+
# sentence-transformer
|
|
124
|
+
embedding = model.encode(text)
|
|
125
|
+
return embedding.tolist()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def get_embedding_model_for_dimension(dimension: int) -> Tuple[Union[SentenceTransformer, Tuple], str, str]:
|
|
129
|
+
"""
|
|
130
|
+
Get a loaded embedding model for a specific dimension.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
dimension: The vector dimension size
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Tuple of (loaded_model, model_name, model_type)
|
|
137
|
+
"""
|
|
138
|
+
model_name, model_type = get_model_for_dimension(dimension)
|
|
139
|
+
model = load_embedding_model(model_name, model_type)
|
|
140
|
+
return (model, model_name, model_type)
|
|
@@ -169,36 +169,10 @@ class BackupRestoreService:
|
|
|
169
169
|
print(f"Failed to generate embeddings: {e}")
|
|
170
170
|
return False
|
|
171
171
|
|
|
172
|
-
#
|
|
173
|
-
#
|
|
172
|
+
# Keep IDs as strings - Qdrant's _to_uuid method handles conversion
|
|
173
|
+
# Just ensure all IDs are strings
|
|
174
174
|
original_ids = data.get("ids", [])
|
|
175
|
-
|
|
176
|
-
metadatas = data.get("metadatas", [])
|
|
177
|
-
|
|
178
|
-
for i, orig_id in enumerate(original_ids):
|
|
179
|
-
# Try to convert to integer, otherwise use index
|
|
180
|
-
try:
|
|
181
|
-
# If it's like "doc_123", extract the number
|
|
182
|
-
if isinstance(orig_id, str) and "_" in orig_id:
|
|
183
|
-
qdrant_id = int(orig_id.split("_")[-1])
|
|
184
|
-
else:
|
|
185
|
-
qdrant_id = int(orig_id)
|
|
186
|
-
except (ValueError, AttributeError):
|
|
187
|
-
# Use index as ID if can't convert
|
|
188
|
-
qdrant_id = i
|
|
189
|
-
|
|
190
|
-
qdrant_ids.append(qdrant_id)
|
|
191
|
-
|
|
192
|
-
# Store original ID in metadata
|
|
193
|
-
if i < len(metadatas):
|
|
194
|
-
if metadatas[i] is None:
|
|
195
|
-
metadatas[i] = {}
|
|
196
|
-
metadatas[i]["original_id"] = orig_id
|
|
197
|
-
else:
|
|
198
|
-
metadatas.append({"original_id": orig_id})
|
|
199
|
-
|
|
200
|
-
data["ids"] = qdrant_ids
|
|
201
|
-
data["metadatas"] = metadatas
|
|
175
|
+
data["ids"] = [str(id_val) for id_val in original_ids]
|
|
202
176
|
|
|
203
177
|
# Add items to collection
|
|
204
178
|
success = connection.add_items(
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Service for secure credential storage using system keychains."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CredentialService:
|
|
8
|
+
"""Handles secure storage and retrieval of credentials using system keychains.
|
|
9
|
+
|
|
10
|
+
Falls back to in-memory storage if keyring is not available (not recommended for production).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
SERVICE_NAME = "vector-inspector"
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
"""Initialize credential service with keyring if available."""
|
|
17
|
+
self._use_keyring = False
|
|
18
|
+
self._memory_store = {} # Fallback in-memory storage
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import keyring
|
|
22
|
+
self._keyring = keyring
|
|
23
|
+
self._use_keyring = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
print("Warning: keyring module not available. Credentials will not be persisted securely.")
|
|
26
|
+
self._keyring = None
|
|
27
|
+
|
|
28
|
+
def store_credentials(self, profile_id: str, credentials: dict) -> bool:
|
|
29
|
+
"""
|
|
30
|
+
Store credentials for a profile.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
profile_id: Unique profile identifier
|
|
34
|
+
credentials: Dictionary of credential data (api_key, password, etc.)
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
True if successful, False otherwise
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
credential_key = f"profile:{profile_id}"
|
|
41
|
+
credential_json = json.dumps(credentials)
|
|
42
|
+
|
|
43
|
+
if self._use_keyring:
|
|
44
|
+
self._keyring.set_password(
|
|
45
|
+
self.SERVICE_NAME,
|
|
46
|
+
credential_key,
|
|
47
|
+
credential_json
|
|
48
|
+
)
|
|
49
|
+
else:
|
|
50
|
+
# Fallback to in-memory (not persistent)
|
|
51
|
+
self._memory_store[credential_key] = credential_json
|
|
52
|
+
|
|
53
|
+
return True
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"Failed to store credentials: {e}")
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
def get_credentials(self, profile_id: str) -> Optional[dict]:
|
|
59
|
+
"""
|
|
60
|
+
Retrieve credentials for a profile.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
profile_id: Unique profile identifier
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Dictionary of credential data, or None if not found
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
credential_key = f"profile:{profile_id}"
|
|
70
|
+
|
|
71
|
+
if self._use_keyring:
|
|
72
|
+
credential_json = self._keyring.get_password(
|
|
73
|
+
self.SERVICE_NAME,
|
|
74
|
+
credential_key
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
# Fallback to in-memory
|
|
78
|
+
credential_json = self._memory_store.get(credential_key)
|
|
79
|
+
|
|
80
|
+
if credential_json:
|
|
81
|
+
return json.loads(credential_json)
|
|
82
|
+
return None
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(f"Failed to retrieve credentials: {e}")
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def delete_credentials(self, profile_id: str) -> bool:
|
|
88
|
+
"""
|
|
89
|
+
Delete stored credentials for a profile.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
profile_id: Unique profile identifier
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
True if successful, False otherwise
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
credential_key = f"profile:{profile_id}"
|
|
99
|
+
|
|
100
|
+
if self._use_keyring:
|
|
101
|
+
try:
|
|
102
|
+
self._keyring.delete_password(
|
|
103
|
+
self.SERVICE_NAME,
|
|
104
|
+
credential_key
|
|
105
|
+
)
|
|
106
|
+
except self._keyring.errors.PasswordDeleteError:
|
|
107
|
+
# Credential doesn't exist, that's okay
|
|
108
|
+
pass
|
|
109
|
+
else:
|
|
110
|
+
# Fallback to in-memory
|
|
111
|
+
self._memory_store.pop(credential_key, None)
|
|
112
|
+
|
|
113
|
+
return True
|
|
114
|
+
except Exception as e:
|
|
115
|
+
print(f"Failed to delete credentials: {e}")
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def is_keyring_available(self) -> bool:
|
|
119
|
+
"""Check if system keyring is available."""
|
|
120
|
+
return self._use_keyring
|
|
121
|
+
|
|
122
|
+
def clear_all_credentials(self):
|
|
123
|
+
"""Clear all stored credentials. Use with caution!"""
|
|
124
|
+
if not self._use_keyring:
|
|
125
|
+
self._memory_store.clear()
|
|
126
|
+
else:
|
|
127
|
+
# For keyring, we'd need to track all profile IDs
|
|
128
|
+
# This is typically not needed, but can be implemented if required
|
|
129
|
+
print("Warning: clear_all_credentials not implemented for keyring backend")
|
|
130
|
+
|
|
@@ -66,7 +66,7 @@ def apply_client_side_filters(data: Dict[str, Any], filters: List[Dict[str, Any]
|
|
|
66
66
|
"metadatas": [metadatas[i] for i in keep_indices if i < len(metadatas)],
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
if embeddings:
|
|
69
|
+
if embeddings is not None and len(embeddings) > 0:
|
|
70
70
|
filtered_data["embeddings"] = [embeddings[i] for i in keep_indices if i < len(embeddings)]
|
|
71
71
|
|
|
72
72
|
return filtered_data
|