vector-inspector 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/core/connection_manager.py +55 -49
- vector_inspector/core/connections/base_connection.py +41 -41
- vector_inspector/core/connections/chroma_connection.py +110 -86
- vector_inspector/core/connections/pinecone_connection.py +168 -182
- vector_inspector/core/connections/qdrant_connection.py +109 -126
- vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
- vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
- vector_inspector/core/connections/template_connection.py +55 -65
- vector_inspector/core/embedding_utils.py +32 -32
- vector_inspector/core/logging.py +27 -0
- vector_inspector/core/model_registry.py +4 -3
- vector_inspector/main.py +6 -2
- vector_inspector/services/backup_helpers.py +63 -0
- vector_inspector/services/backup_restore_service.py +73 -152
- vector_inspector/services/credential_service.py +33 -40
- vector_inspector/services/import_export_service.py +70 -67
- vector_inspector/services/profile_service.py +92 -94
- vector_inspector/services/settings_service.py +68 -48
- vector_inspector/services/visualization_service.py +40 -39
- vector_inspector/ui/components/splash_window.py +57 -0
- vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
- vector_inspector/ui/main_window.py +200 -146
- vector_inspector/ui/views/info_panel.py +208 -127
- vector_inspector/ui/views/metadata_view.py +8 -7
- vector_inspector/ui/views/search_view.py +97 -75
- vector_inspector/ui/views/visualization_view.py +140 -97
- vector_inspector/utils/version.py +5 -0
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/METADATA +9 -2
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/RECORD +32 -25
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.1.dist-info → vector_inspector-0.3.3.dist-info}/entry_points.txt +0 -0
|
@@ -6,20 +6,18 @@ from pinecone import Pinecone, ServerlessSpec
|
|
|
6
6
|
from pinecone.exceptions import PineconeException
|
|
7
7
|
|
|
8
8
|
from .base_connection import VectorDBConnection
|
|
9
|
+
from vector_inspector.core.logging import log_error
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class PineconeConnection(VectorDBConnection):
|
|
12
13
|
"""Manages connection to Pinecone and provides query interface."""
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
api_key: str,
|
|
17
|
-
environment: Optional[str] = None,
|
|
18
|
-
index_host: Optional[str] = None
|
|
16
|
+
self, api_key: str, environment: Optional[str] = None, index_host: Optional[str] = None
|
|
19
17
|
):
|
|
20
18
|
"""
|
|
21
19
|
Initialize Pinecone connection.
|
|
22
|
-
|
|
20
|
+
|
|
23
21
|
Args:
|
|
24
22
|
api_key: Pinecone API key
|
|
25
23
|
environment: Pinecone environment (optional, auto-detected)
|
|
@@ -31,41 +29,41 @@ class PineconeConnection(VectorDBConnection):
|
|
|
31
29
|
self._client: Optional[Pinecone] = None
|
|
32
30
|
self._current_index = None
|
|
33
31
|
self._current_index_name: Optional[str] = None
|
|
34
|
-
|
|
32
|
+
|
|
35
33
|
def connect(self) -> bool:
|
|
36
34
|
"""
|
|
37
35
|
Establish connection to Pinecone.
|
|
38
|
-
|
|
36
|
+
|
|
39
37
|
Returns:
|
|
40
38
|
True if connection successful, False otherwise
|
|
41
39
|
"""
|
|
42
40
|
try:
|
|
43
41
|
# Initialize Pinecone client
|
|
44
42
|
self._client = Pinecone(api_key=self.api_key)
|
|
45
|
-
|
|
43
|
+
|
|
46
44
|
# Test connection by listing indexes
|
|
47
45
|
self._client.list_indexes()
|
|
48
46
|
return True
|
|
49
47
|
except Exception as e:
|
|
50
|
-
|
|
48
|
+
log_error("Connection failed: %s", e)
|
|
51
49
|
self._client = None # Reset client on failure
|
|
52
50
|
return False
|
|
53
|
-
|
|
51
|
+
|
|
54
52
|
def disconnect(self):
|
|
55
53
|
"""Close connection to Pinecone."""
|
|
56
54
|
self._client = None
|
|
57
55
|
self._current_index = None
|
|
58
56
|
self._current_index_name = None
|
|
59
|
-
|
|
57
|
+
|
|
60
58
|
@property
|
|
61
59
|
def is_connected(self) -> bool:
|
|
62
60
|
"""Check if connected to Pinecone."""
|
|
63
61
|
return self._client is not None
|
|
64
|
-
|
|
62
|
+
|
|
65
63
|
def list_collections(self) -> List[str]:
|
|
66
64
|
"""
|
|
67
65
|
Get list of all indexes (collections in Pinecone terminology).
|
|
68
|
-
|
|
66
|
+
|
|
69
67
|
Returns:
|
|
70
68
|
List of index names
|
|
71
69
|
"""
|
|
@@ -75,14 +73,14 @@ class PineconeConnection(VectorDBConnection):
|
|
|
75
73
|
indexes = self._client.list_indexes()
|
|
76
74
|
return [str(idx.name) for idx in indexes] # type: ignore
|
|
77
75
|
except Exception as e:
|
|
78
|
-
|
|
76
|
+
log_error("Failed to list indexes: %s", e)
|
|
79
77
|
return []
|
|
80
|
-
|
|
78
|
+
|
|
81
79
|
def _get_index(self, name: str):
|
|
82
80
|
"""Get or create index reference."""
|
|
83
81
|
if not self._client:
|
|
84
82
|
return None
|
|
85
|
-
|
|
83
|
+
|
|
86
84
|
try:
|
|
87
85
|
# Cache the current index to avoid repeated lookups
|
|
88
86
|
if self._current_index_name != name:
|
|
@@ -90,38 +88,38 @@ class PineconeConnection(VectorDBConnection):
|
|
|
90
88
|
self._current_index_name = name
|
|
91
89
|
return self._current_index
|
|
92
90
|
except Exception as e:
|
|
93
|
-
|
|
91
|
+
log_error("Failed to get index: %s", e)
|
|
94
92
|
return None
|
|
95
|
-
|
|
93
|
+
|
|
96
94
|
def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
|
|
97
95
|
"""
|
|
98
96
|
Get index metadata and statistics.
|
|
99
|
-
|
|
97
|
+
|
|
100
98
|
Args:
|
|
101
99
|
name: Index name
|
|
102
|
-
|
|
100
|
+
|
|
103
101
|
Returns:
|
|
104
102
|
Dictionary with index info
|
|
105
103
|
"""
|
|
106
104
|
if not self._client:
|
|
107
105
|
return None
|
|
108
|
-
|
|
106
|
+
|
|
109
107
|
try:
|
|
110
108
|
# Get index description
|
|
111
109
|
index_description = self._client.describe_index(name)
|
|
112
|
-
|
|
110
|
+
|
|
113
111
|
# Get index stats
|
|
114
112
|
index = self._get_index(name)
|
|
115
113
|
if not index:
|
|
116
114
|
return None
|
|
117
|
-
|
|
115
|
+
|
|
118
116
|
stats = index.describe_index_stats()
|
|
119
|
-
|
|
117
|
+
|
|
120
118
|
# Extract information
|
|
121
|
-
total_vector_count = stats.get(
|
|
119
|
+
total_vector_count = stats.get("total_vector_count", 0)
|
|
122
120
|
dimension = index_description.dimension
|
|
123
121
|
metric = index_description.metric
|
|
124
|
-
|
|
122
|
+
|
|
125
123
|
# Get metadata fields from a sample query (if vectors exist)
|
|
126
124
|
metadata_fields = []
|
|
127
125
|
if total_vector_count > 0:
|
|
@@ -129,46 +127,50 @@ class PineconeConnection(VectorDBConnection):
|
|
|
129
127
|
# Query for a small sample to see metadata structure
|
|
130
128
|
dimension_val = int(dimension) if dimension else 0
|
|
131
129
|
sample_query = index.query(
|
|
132
|
-
vector=[0.0] * dimension_val,
|
|
133
|
-
top_k=1,
|
|
134
|
-
include_metadata=True
|
|
130
|
+
vector=[0.0] * dimension_val, top_k=1, include_metadata=True
|
|
135
131
|
)
|
|
136
|
-
if hasattr(sample_query,
|
|
132
|
+
if hasattr(sample_query, "matches") and sample_query.matches: # type: ignore
|
|
137
133
|
metadata = sample_query.matches[0].metadata # type: ignore
|
|
138
134
|
if metadata:
|
|
139
135
|
metadata_fields = list(metadata.keys())
|
|
140
136
|
except Exception:
|
|
141
137
|
pass # Metadata fields will remain empty
|
|
142
|
-
|
|
138
|
+
|
|
143
139
|
return {
|
|
144
140
|
"name": name,
|
|
145
141
|
"count": total_vector_count,
|
|
146
142
|
"metadata_fields": metadata_fields,
|
|
147
143
|
"vector_dimension": dimension,
|
|
148
144
|
"distance_metric": str(metric).upper() if metric else "UNKNOWN",
|
|
149
|
-
"host": str(index_description.host)
|
|
150
|
-
|
|
151
|
-
|
|
145
|
+
"host": str(index_description.host)
|
|
146
|
+
if hasattr(index_description, "host")
|
|
147
|
+
else "N/A",
|
|
148
|
+
"status": index_description.status.get("state", "unknown")
|
|
149
|
+
if hasattr(index_description.status, "get")
|
|
150
|
+
else str(index_description.status), # type: ignore
|
|
151
|
+
"spec": str(index_description.spec)
|
|
152
|
+
if hasattr(index_description, "spec")
|
|
153
|
+
else "N/A",
|
|
152
154
|
}
|
|
153
155
|
except Exception as e:
|
|
154
|
-
|
|
156
|
+
log_error("Failed to get index info: %s", e)
|
|
155
157
|
return None
|
|
156
|
-
|
|
158
|
+
|
|
157
159
|
def create_collection(self, name: str, vector_size: int, distance: str = "Cosine") -> bool:
|
|
158
160
|
"""
|
|
159
161
|
Create a new index.
|
|
160
|
-
|
|
162
|
+
|
|
161
163
|
Args:
|
|
162
164
|
name: Index name
|
|
163
165
|
vector_size: Dimension of vectors
|
|
164
166
|
distance: Distance metric (Cosine, Euclidean, DotProduct)
|
|
165
|
-
|
|
167
|
+
|
|
166
168
|
Returns:
|
|
167
169
|
True if successful, False otherwise
|
|
168
170
|
"""
|
|
169
171
|
if not self._client:
|
|
170
172
|
return False
|
|
171
|
-
|
|
173
|
+
|
|
172
174
|
try:
|
|
173
175
|
# Map distance names to Pinecone metrics
|
|
174
176
|
metric_map = {
|
|
@@ -178,33 +180,34 @@ class PineconeConnection(VectorDBConnection):
|
|
|
178
180
|
"dot": "dotproduct",
|
|
179
181
|
}
|
|
180
182
|
metric = metric_map.get(distance.lower(), "cosine")
|
|
181
|
-
|
|
183
|
+
|
|
182
184
|
# Create serverless index (default configuration)
|
|
183
185
|
self._client.create_index(
|
|
184
186
|
name=name,
|
|
185
187
|
dimension=vector_size,
|
|
186
188
|
metric=metric,
|
|
187
|
-
spec=ServerlessSpec(
|
|
188
|
-
cloud='aws',
|
|
189
|
-
region='us-east-1'
|
|
190
|
-
)
|
|
189
|
+
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
|
|
191
190
|
)
|
|
192
|
-
|
|
191
|
+
|
|
193
192
|
# Wait for index to be ready
|
|
194
193
|
max_wait = 60 # seconds
|
|
195
194
|
start_time = time.time()
|
|
196
195
|
while time.time() - start_time < max_wait:
|
|
197
196
|
desc = self._client.describe_index(name)
|
|
198
|
-
status =
|
|
199
|
-
|
|
197
|
+
status = (
|
|
198
|
+
desc.status.get("state", "unknown")
|
|
199
|
+
if hasattr(desc.status, "get")
|
|
200
|
+
else str(desc.status)
|
|
201
|
+
) # type: ignore
|
|
202
|
+
if status.lower() == "ready":
|
|
200
203
|
return True
|
|
201
204
|
time.sleep(2)
|
|
202
|
-
|
|
205
|
+
|
|
203
206
|
return False
|
|
204
207
|
except Exception as e:
|
|
205
|
-
|
|
208
|
+
log_error("Failed to create index: %s", e)
|
|
206
209
|
return False
|
|
207
|
-
|
|
210
|
+
|
|
208
211
|
def add_items(
|
|
209
212
|
self,
|
|
210
213
|
collection_name: str,
|
|
@@ -215,111 +218,107 @@ class PineconeConnection(VectorDBConnection):
|
|
|
215
218
|
) -> bool:
|
|
216
219
|
"""
|
|
217
220
|
Add items to an index.
|
|
218
|
-
|
|
221
|
+
|
|
219
222
|
Args:
|
|
220
223
|
collection_name: Name of index
|
|
221
224
|
documents: Document texts (stored in metadata)
|
|
222
225
|
metadatas: Metadata for each vector
|
|
223
226
|
ids: IDs for each vector
|
|
224
227
|
embeddings: Pre-computed embeddings (required for Pinecone)
|
|
225
|
-
|
|
228
|
+
|
|
226
229
|
Returns:
|
|
227
230
|
True if successful, False otherwise
|
|
228
231
|
"""
|
|
229
232
|
if not embeddings:
|
|
230
|
-
|
|
233
|
+
log_error("Embeddings are required for Pinecone")
|
|
231
234
|
return False
|
|
232
|
-
|
|
235
|
+
|
|
233
236
|
index = self._get_index(collection_name)
|
|
234
237
|
if not index:
|
|
235
238
|
return False
|
|
236
|
-
|
|
239
|
+
|
|
237
240
|
try:
|
|
238
241
|
# Generate IDs if not provided
|
|
239
242
|
if not ids:
|
|
240
243
|
ids = [f"vec_{i}" for i in range(len(embeddings))]
|
|
241
|
-
|
|
244
|
+
|
|
242
245
|
# Prepare vectors for upsert
|
|
243
246
|
vectors = []
|
|
244
247
|
for i, embedding in enumerate(embeddings):
|
|
245
248
|
metadata = {}
|
|
246
249
|
if metadatas and i < len(metadatas):
|
|
247
250
|
metadata = metadatas[i].copy()
|
|
248
|
-
|
|
251
|
+
|
|
249
252
|
# Add document text to metadata
|
|
250
253
|
if documents and i < len(documents):
|
|
251
|
-
metadata[
|
|
252
|
-
|
|
253
|
-
vectors.append({
|
|
254
|
-
|
|
255
|
-
'values': embedding,
|
|
256
|
-
'metadata': metadata
|
|
257
|
-
})
|
|
258
|
-
|
|
254
|
+
metadata["document"] = documents[i]
|
|
255
|
+
|
|
256
|
+
vectors.append({"id": ids[i], "values": embedding, "metadata": metadata})
|
|
257
|
+
|
|
259
258
|
# Upsert in batches of 100 (Pinecone limit)
|
|
260
259
|
batch_size = 100
|
|
261
260
|
for i in range(0, len(vectors), batch_size):
|
|
262
|
-
batch = vectors[i:i + batch_size]
|
|
261
|
+
batch = vectors[i : i + batch_size]
|
|
263
262
|
index.upsert(vectors=batch)
|
|
264
|
-
|
|
263
|
+
|
|
265
264
|
return True
|
|
266
265
|
except Exception as e:
|
|
267
|
-
|
|
266
|
+
log_error("Failed to add items: %s", e)
|
|
268
267
|
return False
|
|
269
|
-
|
|
268
|
+
|
|
270
269
|
def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
|
|
271
270
|
"""
|
|
272
271
|
Retrieve items by IDs.
|
|
273
|
-
|
|
272
|
+
|
|
274
273
|
Args:
|
|
275
274
|
name: Index name
|
|
276
275
|
ids: List of vector IDs
|
|
277
|
-
|
|
276
|
+
|
|
278
277
|
Returns:
|
|
279
278
|
Dictionary with documents and metadatas
|
|
280
279
|
"""
|
|
281
280
|
index = self._get_index(name)
|
|
282
281
|
if not index:
|
|
283
282
|
return {"documents": [], "metadatas": []}
|
|
284
|
-
|
|
283
|
+
|
|
285
284
|
try:
|
|
286
285
|
# Fetch vectors
|
|
287
286
|
result = index.fetch(ids=ids)
|
|
288
|
-
|
|
287
|
+
|
|
289
288
|
documents = []
|
|
290
289
|
metadatas = []
|
|
291
|
-
|
|
290
|
+
|
|
292
291
|
for vid in ids:
|
|
293
292
|
if vid in result.vectors:
|
|
294
293
|
vector_data = result.vectors[vid]
|
|
295
294
|
metadata = vector_data.metadata or {}
|
|
296
|
-
|
|
295
|
+
|
|
297
296
|
# Extract document from metadata
|
|
298
|
-
doc = metadata.pop(
|
|
297
|
+
doc = metadata.pop("document", "")
|
|
299
298
|
documents.append(doc)
|
|
300
299
|
metadatas.append(metadata)
|
|
301
300
|
else:
|
|
302
|
-
documents.append(
|
|
301
|
+
documents.append("")
|
|
303
302
|
metadatas.append({})
|
|
304
|
-
|
|
303
|
+
|
|
305
304
|
return {"documents": documents, "metadatas": metadatas}
|
|
306
305
|
except Exception as e:
|
|
307
|
-
|
|
306
|
+
log_error("Failed to get items: %s", e)
|
|
308
307
|
return {"documents": [], "metadatas": []}
|
|
309
|
-
|
|
308
|
+
|
|
310
309
|
def delete_collection(self, name: str) -> bool:
|
|
311
310
|
"""
|
|
312
311
|
Delete an index.
|
|
313
|
-
|
|
312
|
+
|
|
314
313
|
Args:
|
|
315
314
|
name: Index name
|
|
316
|
-
|
|
315
|
+
|
|
317
316
|
Returns:
|
|
318
317
|
True if successful, False otherwise
|
|
319
318
|
"""
|
|
320
319
|
if not self._client:
|
|
321
320
|
return False
|
|
322
|
-
|
|
321
|
+
|
|
323
322
|
try:
|
|
324
323
|
self._client.delete_index(name)
|
|
325
324
|
if self._current_index_name == name:
|
|
@@ -327,29 +326,29 @@ class PineconeConnection(VectorDBConnection):
|
|
|
327
326
|
self._current_index_name = None
|
|
328
327
|
return True
|
|
329
328
|
except Exception as e:
|
|
330
|
-
|
|
329
|
+
log_error("Failed to delete index: %s", e)
|
|
331
330
|
return False
|
|
332
|
-
|
|
331
|
+
|
|
333
332
|
def count_collection(self, name: str) -> int:
|
|
334
333
|
"""
|
|
335
334
|
Return the number of vectors in the index.
|
|
336
|
-
|
|
335
|
+
|
|
337
336
|
Args:
|
|
338
337
|
name: Index name
|
|
339
|
-
|
|
338
|
+
|
|
340
339
|
Returns:
|
|
341
340
|
Number of vectors
|
|
342
341
|
"""
|
|
343
342
|
index = self._get_index(name)
|
|
344
343
|
if not index:
|
|
345
344
|
return 0
|
|
346
|
-
|
|
345
|
+
|
|
347
346
|
try:
|
|
348
347
|
stats = index.describe_index_stats()
|
|
349
|
-
return stats.get(
|
|
348
|
+
return stats.get("total_vector_count", 0)
|
|
350
349
|
except Exception:
|
|
351
350
|
return 0
|
|
352
|
-
|
|
351
|
+
|
|
353
352
|
def _get_embedding_function_for_collection(self, collection_name: str):
|
|
354
353
|
"""
|
|
355
354
|
Returns embedding function and model type for a given collection, matching ChromaDB/Qdrant API.
|
|
@@ -363,6 +362,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
363
362
|
|
|
364
363
|
# Prefer user-configured model for this collection
|
|
365
364
|
from vector_inspector.services.settings_service import SettingsService
|
|
365
|
+
|
|
366
366
|
model = None
|
|
367
367
|
model_type: str = "sentence-transformer"
|
|
368
368
|
if hasattr(self, "connection_id") and collection_name:
|
|
@@ -370,12 +370,14 @@ class PineconeConnection(VectorDBConnection):
|
|
|
370
370
|
cfg = settings.get_embedding_model(getattr(self, "connection_id", ""), collection_name)
|
|
371
371
|
if cfg and cfg.get("model") and cfg.get("type"):
|
|
372
372
|
from vector_inspector.core.embedding_utils import load_embedding_model
|
|
373
|
+
|
|
373
374
|
model = load_embedding_model(cfg["model"], cfg["type"])
|
|
374
375
|
model_type = str(cfg["type"]) or "sentence-transformer"
|
|
375
376
|
|
|
376
377
|
# Fallback to dimension-based model if none configured
|
|
377
378
|
if model is None:
|
|
378
379
|
from vector_inspector.core.embedding_utils import get_embedding_model_for_dimension
|
|
380
|
+
|
|
379
381
|
if dim_int is None:
|
|
380
382
|
dim_int = 384 # default for MiniLM
|
|
381
383
|
loaded_model, _, inferred_type = get_embedding_model_for_dimension(dim_int)
|
|
@@ -383,6 +385,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
383
385
|
model_type = str(inferred_type) or "sentence-transformer"
|
|
384
386
|
|
|
385
387
|
from vector_inspector.core.embedding_utils import encode_text
|
|
388
|
+
|
|
386
389
|
def embedding_fn(text: str):
|
|
387
390
|
return encode_text(text, model, model_type)
|
|
388
391
|
|
|
@@ -399,7 +402,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
399
402
|
) -> Optional[Dict[str, Any]]:
|
|
400
403
|
"""
|
|
401
404
|
Query an index for similar vectors.
|
|
402
|
-
|
|
405
|
+
|
|
403
406
|
Args:
|
|
404
407
|
collection_name: Name of index
|
|
405
408
|
query_texts: Text queries (will be embedded if provided)
|
|
@@ -418,13 +421,13 @@ class PineconeConnection(VectorDBConnection):
|
|
|
418
421
|
query_texts = None
|
|
419
422
|
|
|
420
423
|
if not query_embeddings:
|
|
421
|
-
|
|
424
|
+
log_error("Query embeddings are required for Pinecone")
|
|
422
425
|
return None
|
|
423
|
-
|
|
426
|
+
|
|
424
427
|
index = self._get_index(collection_name)
|
|
425
428
|
if not index:
|
|
426
429
|
return None
|
|
427
|
-
|
|
430
|
+
|
|
428
431
|
try:
|
|
429
432
|
# Pinecone queries one vector at a time
|
|
430
433
|
all_ids = []
|
|
@@ -432,54 +435,54 @@ class PineconeConnection(VectorDBConnection):
|
|
|
432
435
|
all_documents = []
|
|
433
436
|
all_metadatas = []
|
|
434
437
|
all_embeddings = []
|
|
435
|
-
|
|
438
|
+
|
|
436
439
|
for query_vector in query_embeddings:
|
|
437
440
|
# Build filter if provided
|
|
438
441
|
filter_dict = None
|
|
439
442
|
if where:
|
|
440
443
|
filter_dict = self._convert_filter(where)
|
|
441
|
-
|
|
444
|
+
|
|
442
445
|
result = index.query(
|
|
443
446
|
vector=query_vector,
|
|
444
447
|
top_k=n_results,
|
|
445
448
|
include_metadata=True,
|
|
446
449
|
include_values=True,
|
|
447
|
-
filter=filter_dict
|
|
450
|
+
filter=filter_dict,
|
|
448
451
|
)
|
|
449
|
-
|
|
452
|
+
|
|
450
453
|
# Extract results
|
|
451
454
|
ids = []
|
|
452
455
|
distances = []
|
|
453
456
|
documents = []
|
|
454
457
|
metadatas = []
|
|
455
458
|
embeddings = []
|
|
456
|
-
|
|
457
|
-
if hasattr(result,
|
|
459
|
+
|
|
460
|
+
if hasattr(result, "matches"):
|
|
458
461
|
for match in result.matches: # type: ignore
|
|
459
462
|
ids.append(match.id) # type: ignore
|
|
460
463
|
# Convert similarity to distance for cosine metric
|
|
461
|
-
score = getattr(match,
|
|
464
|
+
score = getattr(match, "score", None)
|
|
462
465
|
if score is not None:
|
|
463
466
|
distances.append(1.0 - score)
|
|
464
467
|
else:
|
|
465
468
|
distances.append(None)
|
|
466
|
-
|
|
469
|
+
|
|
467
470
|
metadata = match.metadata or {} # type: ignore
|
|
468
|
-
doc = metadata.pop(
|
|
471
|
+
doc = metadata.pop("document", "")
|
|
469
472
|
documents.append(doc)
|
|
470
473
|
metadatas.append(metadata)
|
|
471
|
-
|
|
472
|
-
if hasattr(match,
|
|
474
|
+
|
|
475
|
+
if hasattr(match, "values") and match.values: # type: ignore
|
|
473
476
|
embeddings.append(match.values) # type: ignore
|
|
474
477
|
else:
|
|
475
478
|
embeddings.append([])
|
|
476
|
-
|
|
479
|
+
|
|
477
480
|
all_ids.append(ids)
|
|
478
481
|
all_distances.append(distances)
|
|
479
482
|
all_documents.append(documents)
|
|
480
483
|
all_metadatas.append(metadatas)
|
|
481
484
|
all_embeddings.append(embeddings)
|
|
482
|
-
|
|
485
|
+
|
|
483
486
|
return {
|
|
484
487
|
"ids": all_ids,
|
|
485
488
|
"distances": all_distances,
|
|
@@ -488,21 +491,21 @@ class PineconeConnection(VectorDBConnection):
|
|
|
488
491
|
"embeddings": all_embeddings,
|
|
489
492
|
}
|
|
490
493
|
except Exception as e:
|
|
491
|
-
print(f"Query failed: {e}")
|
|
492
494
|
import traceback
|
|
493
|
-
|
|
495
|
+
|
|
496
|
+
log_error("Query failed: %s\n%s", e, traceback.format_exc())
|
|
494
497
|
return None
|
|
495
|
-
|
|
498
|
+
|
|
496
499
|
def _convert_filter(self, where: Dict[str, Any]) -> Dict[str, Any]:
|
|
497
500
|
"""
|
|
498
501
|
Convert generic filter to Pinecone filter format.
|
|
499
|
-
|
|
502
|
+
|
|
500
503
|
Pinecone supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
|
|
501
504
|
"""
|
|
502
505
|
# Simple conversion - map field equality
|
|
503
506
|
# For more complex filters, this would need expansion
|
|
504
507
|
pinecone_filter = {}
|
|
505
|
-
|
|
508
|
+
|
|
506
509
|
for key, value in where.items():
|
|
507
510
|
if isinstance(value, dict):
|
|
508
511
|
# Handle operator-based filters
|
|
@@ -510,9 +513,9 @@ class PineconeConnection(VectorDBConnection):
|
|
|
510
513
|
else:
|
|
511
514
|
# Simple equality
|
|
512
515
|
pinecone_filter[key] = {"$eq": value}
|
|
513
|
-
|
|
516
|
+
|
|
514
517
|
return pinecone_filter
|
|
515
|
-
|
|
518
|
+
|
|
516
519
|
def get_all_items(
|
|
517
520
|
self,
|
|
518
521
|
collection_name: str,
|
|
@@ -522,100 +525,90 @@ class PineconeConnection(VectorDBConnection):
|
|
|
522
525
|
) -> Optional[Dict[str, Any]]:
|
|
523
526
|
"""
|
|
524
527
|
Get all items from an index using pagination.
|
|
525
|
-
|
|
528
|
+
|
|
526
529
|
Note: Uses Pinecone's list() method which returns a generator of ID lists.
|
|
527
530
|
Offset-based pagination is simulated by skipping items.
|
|
528
|
-
|
|
531
|
+
|
|
529
532
|
Args:
|
|
530
533
|
collection_name: Name of index
|
|
531
534
|
limit: Maximum number of items to return
|
|
532
535
|
offset: Number of items to skip
|
|
533
536
|
where: Metadata filter (not supported in list operation)
|
|
534
|
-
|
|
537
|
+
|
|
535
538
|
Returns:
|
|
536
539
|
Index items or None if failed
|
|
537
540
|
"""
|
|
538
541
|
index = self._get_index(collection_name)
|
|
539
542
|
if not index:
|
|
540
543
|
return None
|
|
541
|
-
|
|
544
|
+
|
|
542
545
|
try:
|
|
543
546
|
ids_to_fetch = []
|
|
544
547
|
items_collected = 0
|
|
545
548
|
items_skipped = 0
|
|
546
549
|
target_offset = offset or 0
|
|
547
550
|
target_limit = limit or 100
|
|
548
|
-
|
|
551
|
+
|
|
549
552
|
# list() returns a generator that yields lists of IDs
|
|
550
553
|
for id_list in index.list(): # type: ignore
|
|
551
554
|
if not id_list:
|
|
552
555
|
continue
|
|
553
|
-
|
|
556
|
+
|
|
554
557
|
# Handle offset by skipping items
|
|
555
558
|
for vid in id_list:
|
|
556
559
|
if items_skipped < target_offset:
|
|
557
560
|
items_skipped += 1
|
|
558
561
|
continue
|
|
559
|
-
|
|
562
|
+
|
|
560
563
|
if items_collected < target_limit:
|
|
561
564
|
ids_to_fetch.append(vid)
|
|
562
565
|
items_collected += 1
|
|
563
566
|
else:
|
|
564
567
|
break
|
|
565
|
-
|
|
568
|
+
|
|
566
569
|
# Stop if we have enough
|
|
567
570
|
if items_collected >= target_limit:
|
|
568
571
|
break
|
|
569
|
-
|
|
572
|
+
|
|
570
573
|
# If no IDs found, return empty result
|
|
571
574
|
if not ids_to_fetch:
|
|
572
|
-
return {
|
|
573
|
-
|
|
574
|
-
"documents": [],
|
|
575
|
-
"metadatas": [],
|
|
576
|
-
"embeddings": []
|
|
577
|
-
}
|
|
578
|
-
|
|
575
|
+
return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
|
|
576
|
+
|
|
579
577
|
# Fetch the actual vector data in batches (Pinecone fetch limit is 1000)
|
|
580
578
|
batch_size = 1000
|
|
581
579
|
all_ids = []
|
|
582
580
|
all_documents = []
|
|
583
581
|
all_metadatas = []
|
|
584
582
|
all_embeddings = []
|
|
585
|
-
|
|
583
|
+
|
|
586
584
|
for i in range(0, len(ids_to_fetch), batch_size):
|
|
587
|
-
batch_ids = ids_to_fetch[i:i + batch_size]
|
|
585
|
+
batch_ids = ids_to_fetch[i : i + batch_size]
|
|
588
586
|
fetch_result = index.fetch(ids=batch_ids)
|
|
589
|
-
|
|
587
|
+
|
|
590
588
|
for vid in batch_ids:
|
|
591
589
|
if vid in fetch_result.vectors:
|
|
592
590
|
vector_data = fetch_result.vectors[vid]
|
|
593
591
|
all_ids.append(vid)
|
|
594
|
-
|
|
592
|
+
|
|
595
593
|
metadata = vector_data.metadata.copy() if vector_data.metadata else {}
|
|
596
|
-
doc = metadata.pop(
|
|
594
|
+
doc = metadata.pop("document", "")
|
|
597
595
|
all_documents.append(doc)
|
|
598
596
|
all_metadatas.append(metadata)
|
|
599
597
|
all_embeddings.append(vector_data.values)
|
|
600
|
-
|
|
598
|
+
|
|
601
599
|
return {
|
|
602
600
|
"ids": all_ids,
|
|
603
601
|
"documents": all_documents,
|
|
604
602
|
"metadatas": all_metadatas,
|
|
605
|
-
"embeddings": all_embeddings
|
|
603
|
+
"embeddings": all_embeddings,
|
|
606
604
|
}
|
|
607
|
-
|
|
605
|
+
|
|
608
606
|
except Exception as e:
|
|
609
|
-
print(f"Failed to get all items: {e}")
|
|
610
607
|
import traceback
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
"metadatas": [],
|
|
616
|
-
"embeddings": []
|
|
617
|
-
}
|
|
618
|
-
|
|
608
|
+
|
|
609
|
+
log_error("Failed to get all items: %s\n%s", e, traceback.format_exc())
|
|
610
|
+
return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
|
|
611
|
+
|
|
619
612
|
def update_items(
|
|
620
613
|
self,
|
|
621
614
|
collection_name: str,
|
|
@@ -626,27 +619,27 @@ class PineconeConnection(VectorDBConnection):
|
|
|
626
619
|
) -> bool:
|
|
627
620
|
"""
|
|
628
621
|
Update items in an index.
|
|
629
|
-
|
|
622
|
+
|
|
630
623
|
Note: Pinecone updates via upsert (add_items can be used)
|
|
631
|
-
|
|
624
|
+
|
|
632
625
|
Args:
|
|
633
626
|
collection_name: Name of index
|
|
634
627
|
ids: IDs of items to update
|
|
635
628
|
documents: New document texts
|
|
636
629
|
metadatas: New metadata
|
|
637
630
|
embeddings: New embeddings
|
|
638
|
-
|
|
631
|
+
|
|
639
632
|
Returns:
|
|
640
633
|
True if successful, False otherwise
|
|
641
634
|
"""
|
|
642
635
|
index = self._get_index(collection_name)
|
|
643
636
|
if not index:
|
|
644
637
|
return False
|
|
645
|
-
|
|
638
|
+
|
|
646
639
|
try:
|
|
647
640
|
# Fetch existing vectors to preserve data not being updated
|
|
648
641
|
existing = index.fetch(ids=ids)
|
|
649
|
-
|
|
642
|
+
|
|
650
643
|
vectors = []
|
|
651
644
|
for i, vid in enumerate(ids):
|
|
652
645
|
# Start with existing data
|
|
@@ -660,32 +653,28 @@ class PineconeConnection(VectorDBConnection):
|
|
|
660
653
|
continue
|
|
661
654
|
values = embeddings[i]
|
|
662
655
|
metadata = {}
|
|
663
|
-
|
|
656
|
+
|
|
664
657
|
# Update metadata
|
|
665
658
|
if metadatas and i < len(metadatas):
|
|
666
659
|
metadata.update(metadatas[i])
|
|
667
|
-
|
|
660
|
+
|
|
668
661
|
# Update document
|
|
669
662
|
if documents and i < len(documents):
|
|
670
|
-
metadata[
|
|
671
|
-
|
|
672
|
-
vectors.append({
|
|
673
|
-
|
|
674
|
-
'values': values,
|
|
675
|
-
'metadata': metadata
|
|
676
|
-
})
|
|
677
|
-
|
|
663
|
+
metadata["document"] = documents[i]
|
|
664
|
+
|
|
665
|
+
vectors.append({"id": vid, "values": values, "metadata": metadata})
|
|
666
|
+
|
|
678
667
|
# Upsert in batches
|
|
679
668
|
batch_size = 100
|
|
680
669
|
for i in range(0, len(vectors), batch_size):
|
|
681
|
-
batch = vectors[i:i + batch_size]
|
|
670
|
+
batch = vectors[i : i + batch_size]
|
|
682
671
|
index.upsert(vectors=batch)
|
|
683
|
-
|
|
672
|
+
|
|
684
673
|
return True
|
|
685
674
|
except Exception as e:
|
|
686
|
-
|
|
675
|
+
log_error("Failed to update items: %s", e)
|
|
687
676
|
return False
|
|
688
|
-
|
|
677
|
+
|
|
689
678
|
def delete_items(
|
|
690
679
|
self,
|
|
691
680
|
collection_name: str,
|
|
@@ -694,19 +683,19 @@ class PineconeConnection(VectorDBConnection):
|
|
|
694
683
|
) -> bool:
|
|
695
684
|
"""
|
|
696
685
|
Delete items from an index.
|
|
697
|
-
|
|
686
|
+
|
|
698
687
|
Args:
|
|
699
688
|
collection_name: Name of index
|
|
700
689
|
ids: IDs of items to delete
|
|
701
690
|
where: Metadata filter for items to delete
|
|
702
|
-
|
|
691
|
+
|
|
703
692
|
Returns:
|
|
704
693
|
True if successful, False otherwise
|
|
705
694
|
"""
|
|
706
695
|
index = self._get_index(collection_name)
|
|
707
696
|
if not index:
|
|
708
697
|
return False
|
|
709
|
-
|
|
698
|
+
|
|
710
699
|
try:
|
|
711
700
|
if ids:
|
|
712
701
|
# Delete by IDs
|
|
@@ -718,24 +707,21 @@ class PineconeConnection(VectorDBConnection):
|
|
|
718
707
|
else:
|
|
719
708
|
# Delete all (use with caution)
|
|
720
709
|
index.delete(delete_all=True)
|
|
721
|
-
|
|
710
|
+
|
|
722
711
|
return True
|
|
723
712
|
except Exception as e:
|
|
724
|
-
|
|
713
|
+
log_error("Failed to delete items: %s", e)
|
|
725
714
|
return False
|
|
726
|
-
|
|
715
|
+
|
|
727
716
|
def get_connection_info(self) -> Dict[str, Any]:
|
|
728
717
|
"""
|
|
729
718
|
Get information about the current connection.
|
|
730
|
-
|
|
719
|
+
|
|
731
720
|
Returns:
|
|
732
721
|
Dictionary with connection details
|
|
733
722
|
"""
|
|
734
|
-
info = {
|
|
735
|
-
|
|
736
|
-
"connected": self.is_connected
|
|
737
|
-
}
|
|
738
|
-
|
|
723
|
+
info = {"provider": "Pinecone", "connected": self.is_connected}
|
|
724
|
+
|
|
739
725
|
if self.is_connected and self._client:
|
|
740
726
|
try:
|
|
741
727
|
# Get account/environment info if available
|
|
@@ -743,13 +729,13 @@ class PineconeConnection(VectorDBConnection):
|
|
|
743
729
|
info["index_count"] = len(indexes)
|
|
744
730
|
except Exception:
|
|
745
731
|
pass
|
|
746
|
-
|
|
732
|
+
|
|
747
733
|
return info
|
|
748
|
-
|
|
734
|
+
|
|
749
735
|
def get_supported_filter_operators(self) -> List[Dict[str, Any]]:
|
|
750
736
|
"""
|
|
751
737
|
Get filter operators supported by Pinecone.
|
|
752
|
-
|
|
738
|
+
|
|
753
739
|
Returns:
|
|
754
740
|
List of operator dictionaries
|
|
755
741
|
"""
|