vector-inspector 0.3.11__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vector_inspector/__init__.py +1 -1
- vector_inspector/core/connection_manager.py +91 -19
- vector_inspector/core/connections/base_connection.py +43 -43
- vector_inspector/core/connections/chroma_connection.py +1 -1
- vector_inspector/core/connections/pgvector_connection.py +11 -171
- vector_inspector/core/connections/pinecone_connection.py +596 -99
- vector_inspector/core/connections/qdrant_connection.py +35 -44
- vector_inspector/core/embedding_utils.py +14 -5
- vector_inspector/core/logging.py +3 -1
- vector_inspector/main.py +42 -15
- vector_inspector/services/backup_restore_service.py +228 -15
- vector_inspector/services/settings_service.py +71 -19
- vector_inspector/ui/components/backup_restore_dialog.py +215 -101
- vector_inspector/ui/components/connection_manager_panel.py +155 -14
- vector_inspector/ui/dialogs/cross_db_migration.py +126 -99
- vector_inspector/ui/dialogs/settings_dialog.py +13 -6
- vector_inspector/ui/loading_screen.py +169 -0
- vector_inspector/ui/main_window.py +44 -19
- vector_inspector/ui/services/dialog_service.py +1 -0
- vector_inspector/ui/views/collection_browser.py +36 -34
- vector_inspector/ui/views/connection_view.py +7 -1
- vector_inspector/ui/views/info_panel.py +118 -52
- vector_inspector/ui/views/metadata_view.py +30 -31
- vector_inspector/ui/views/search_view.py +20 -19
- vector_inspector/ui/views/visualization_view.py +18 -15
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/METADATA +17 -4
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/RECORD +30 -28
- vector_inspector-0.3.12.dist-info/licenses/LICENSE +1 -0
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/WHEEL +0 -0
- {vector_inspector-0.3.11.dist-info → vector_inspector-0.3.12.dist-info}/entry_points.txt +0 -0
|
@@ -1,9 +1,27 @@
|
|
|
1
|
-
"""Pinecone connection manager.
|
|
1
|
+
"""Pinecone connection manager.
|
|
2
|
+
|
|
3
|
+
Namespace Best Practices:
|
|
4
|
+
-------------------------
|
|
5
|
+
Pinecone supports namespaces within indexes to organize vectors. In Vector Inspector,
|
|
6
|
+
namespaces are specified using the format: 'index_name::namespace'
|
|
7
|
+
|
|
8
|
+
IMPORTANT: Always use named namespaces (e.g., 'my-index::production') rather than
|
|
9
|
+
the default namespace (just 'my-index'). The default namespace has limitations:
|
|
10
|
+
- Not reported in describe_index_stats() API
|
|
11
|
+
- Not visible in Pinecone data browser
|
|
12
|
+
- Vectors exist and are queryable, but discovery is limited
|
|
13
|
+
|
|
14
|
+
Named namespaces work perfectly and are fully visible in all interfaces.
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
- Good: 'embeddings::production', 'embeddings::staging', 'embeddings::dev'
|
|
18
|
+
- Avoid: 'embeddings' (uses default namespace with limited visibility)
|
|
19
|
+
"""
|
|
2
20
|
|
|
3
|
-
from typing import Optional, List, Dict, Any
|
|
4
21
|
import time
|
|
22
|
+
from typing import Any, Optional
|
|
23
|
+
|
|
5
24
|
from pinecone import Pinecone, ServerlessSpec
|
|
6
|
-
from pinecone.exceptions import PineconeException
|
|
7
25
|
|
|
8
26
|
from vector_inspector.core.connections.base_connection import VectorDBConnection
|
|
9
27
|
from vector_inspector.core.logging import log_error
|
|
@@ -29,6 +47,42 @@ class PineconeConnection(VectorDBConnection):
|
|
|
29
47
|
self._client: Optional[Pinecone] = None
|
|
30
48
|
self._current_index = None
|
|
31
49
|
self._current_index_name: Optional[str] = None
|
|
50
|
+
self._hosted_models: dict[str, Optional[str]] = {} # Cache: index_name -> model_name
|
|
51
|
+
|
|
52
|
+
@staticmethod
|
|
53
|
+
def _parse_collection_name(collection_name: str) -> tuple[str, str]:
|
|
54
|
+
"""
|
|
55
|
+
Parse a collection name into (index_name, namespace).
|
|
56
|
+
|
|
57
|
+
Format: 'index_name' or 'index_name::namespace'
|
|
58
|
+
Empty namespace is represented as empty string ''.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
collection_name: Collection name, optionally with namespace
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Tuple of (index_name, namespace)
|
|
65
|
+
"""
|
|
66
|
+
if "::" in collection_name:
|
|
67
|
+
parts = collection_name.split("::", 1)
|
|
68
|
+
return parts[0], parts[1]
|
|
69
|
+
return collection_name, ""
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def _format_collection_name(index_name: str, namespace: str) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Format an index name and namespace into a collection name.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
index_name: Name of the Pinecone index
|
|
78
|
+
namespace: Namespace within the index (empty string for default)
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Formatted collection name
|
|
82
|
+
"""
|
|
83
|
+
if namespace:
|
|
84
|
+
return f"{index_name}::{namespace}"
|
|
85
|
+
return index_name
|
|
32
86
|
|
|
33
87
|
def connect(self) -> bool:
|
|
34
88
|
"""
|
|
@@ -60,18 +114,58 @@ class PineconeConnection(VectorDBConnection):
|
|
|
60
114
|
"""Check if connected to Pinecone."""
|
|
61
115
|
return self._client is not None
|
|
62
116
|
|
|
63
|
-
def list_collections(self) ->
|
|
117
|
+
def list_collections(self) -> list[str]:
|
|
64
118
|
"""
|
|
65
|
-
Get list of all indexes
|
|
119
|
+
Get list of all indexes and their namespaces.
|
|
120
|
+
|
|
121
|
+
Returns collection names in format:
|
|
122
|
+
- 'index_name' for default namespace
|
|
123
|
+
- 'index_name::namespace' for named namespaces
|
|
66
124
|
|
|
67
125
|
Returns:
|
|
68
|
-
List of index
|
|
126
|
+
List of collection names (index::namespace combinations)
|
|
69
127
|
"""
|
|
70
128
|
if not self._client:
|
|
71
129
|
return []
|
|
72
130
|
try:
|
|
73
131
|
indexes = self._client.list_indexes()
|
|
74
|
-
|
|
132
|
+
collections = []
|
|
133
|
+
|
|
134
|
+
for idx in indexes:
|
|
135
|
+
index_name = str(idx.name) # type: ignore
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
# Get stats to discover namespaces
|
|
139
|
+
index = self._client.Index(index_name)
|
|
140
|
+
stats = index.describe_index_stats()
|
|
141
|
+
|
|
142
|
+
# Extract namespaces from stats
|
|
143
|
+
namespaces_info = stats.get("namespaces", {})
|
|
144
|
+
|
|
145
|
+
if not namespaces_info:
|
|
146
|
+
# No vectors yet, just add the index with default namespace
|
|
147
|
+
collections.append(index_name)
|
|
148
|
+
else:
|
|
149
|
+
# Add entry for each namespace that has vectors
|
|
150
|
+
for namespace, ns_stats in namespaces_info.items():
|
|
151
|
+
vector_count = ns_stats.get("vector_count", 0)
|
|
152
|
+
if vector_count > 0:
|
|
153
|
+
collections.append(
|
|
154
|
+
self._format_collection_name(index_name, namespace)
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# If no namespaces have vectors, still show the index
|
|
158
|
+
if not collections or not any(
|
|
159
|
+
c.startswith(f"{index_name}") for c in collections
|
|
160
|
+
):
|
|
161
|
+
collections.append(index_name)
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
log_error("Failed to get namespace info for index %s: %s", index_name, e)
|
|
165
|
+
# Fallback: just add the index name
|
|
166
|
+
collections.append(index_name)
|
|
167
|
+
|
|
168
|
+
return collections
|
|
75
169
|
except Exception as e:
|
|
76
170
|
log_error("Failed to list indexes: %s", e)
|
|
77
171
|
return []
|
|
@@ -91,12 +185,98 @@ class PineconeConnection(VectorDBConnection):
|
|
|
91
185
|
log_error("Failed to get index: %s", e)
|
|
92
186
|
return None
|
|
93
187
|
|
|
94
|
-
def
|
|
188
|
+
def _check_hosted_model(self, index_name: str) -> Optional[str]:
|
|
189
|
+
"""
|
|
190
|
+
Check if an index uses a Pinecone-hosted embedding model.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
index_name: Name of the Pinecone index
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Model name if hosted model is used, None otherwise
|
|
197
|
+
"""
|
|
198
|
+
# Check cache first
|
|
199
|
+
if index_name in self._hosted_models:
|
|
200
|
+
return self._hosted_models[index_name]
|
|
201
|
+
|
|
202
|
+
# Query index description to check for hosted model
|
|
203
|
+
if not self._client:
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
index_description = self._client.describe_index(index_name)
|
|
208
|
+
hosted_model = None
|
|
209
|
+
|
|
210
|
+
# Check for model in embed field (Pinecone's hosted model info)
|
|
211
|
+
if hasattr(index_description, "embed"):
|
|
212
|
+
embed = index_description.embed
|
|
213
|
+
# embed might be a dict or an object
|
|
214
|
+
if isinstance(embed, dict) and "model" in embed:
|
|
215
|
+
hosted_model = embed["model"]
|
|
216
|
+
elif hasattr(embed, "model") and embed.model:
|
|
217
|
+
hosted_model = embed.model
|
|
218
|
+
# Also check spec (legacy/alternative location)
|
|
219
|
+
elif hasattr(index_description, "spec"):
|
|
220
|
+
spec = index_description.spec
|
|
221
|
+
if hasattr(spec, "model") and spec.model:
|
|
222
|
+
hosted_model = spec.model
|
|
223
|
+
elif hasattr(spec, "index_config") and hasattr(spec.index_config, "model"):
|
|
224
|
+
hosted_model = spec.index_config.model
|
|
225
|
+
|
|
226
|
+
# Cache the result
|
|
227
|
+
self._hosted_models[index_name] = hosted_model
|
|
228
|
+
if hosted_model:
|
|
229
|
+
log_error("✓ Detected Pinecone hosted model for '%s': %s", index_name, hosted_model)
|
|
230
|
+
return hosted_model
|
|
231
|
+
except Exception as e:
|
|
232
|
+
log_error("Failed to check hosted model for index %s: %s", index_name, e)
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
def _embed_with_inference_api(
|
|
236
|
+
self, model: str, texts: list[str], input_type: str = "query"
|
|
237
|
+
) -> list[list[float]]:
|
|
238
|
+
"""
|
|
239
|
+
Use Pinecone's inference API to embed texts.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
model: Model name (e.g., 'llama-text-embed-v2')
|
|
243
|
+
texts: List of texts to embed
|
|
244
|
+
input_type: 'query' or 'passage'
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
List of embedding vectors
|
|
248
|
+
|
|
249
|
+
Raises:
|
|
250
|
+
Exception if inference API is not available or fails
|
|
251
|
+
"""
|
|
252
|
+
if not self._client or not hasattr(self._client, "inference"):
|
|
253
|
+
raise Exception("Pinecone inference API not available on this client")
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
result = self._client.inference.embed(
|
|
257
|
+
model=model, inputs=texts, parameters={"input_type": input_type}
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Extract embeddings from result
|
|
261
|
+
embeddings = []
|
|
262
|
+
for item in result:
|
|
263
|
+
if hasattr(item, "values"):
|
|
264
|
+
embeddings.append(item.values)
|
|
265
|
+
elif isinstance(item, dict) and "values" in item:
|
|
266
|
+
embeddings.append(item["values"])
|
|
267
|
+
else:
|
|
268
|
+
raise Exception(f"Unexpected inference API response format: {type(item)}")
|
|
269
|
+
|
|
270
|
+
return embeddings
|
|
271
|
+
except Exception as e:
|
|
272
|
+
raise Exception(f"Inference API embedding failed: {e}") from e
|
|
273
|
+
|
|
274
|
+
def get_collection_info(self, name: str) -> Optional[dict[str, Any]]:
|
|
95
275
|
"""
|
|
96
|
-
Get index metadata and statistics.
|
|
276
|
+
Get index metadata and statistics for a specific namespace.
|
|
97
277
|
|
|
98
278
|
Args:
|
|
99
|
-
name:
|
|
279
|
+
name: Collection name (format: 'index' or 'index::namespace')
|
|
100
280
|
|
|
101
281
|
Returns:
|
|
102
282
|
Dictionary with index info
|
|
@@ -105,21 +285,51 @@ class PineconeConnection(VectorDBConnection):
|
|
|
105
285
|
return None
|
|
106
286
|
|
|
107
287
|
try:
|
|
288
|
+
# Parse collection name to get index and namespace
|
|
289
|
+
index_name, namespace = self._parse_collection_name(name)
|
|
290
|
+
|
|
108
291
|
# Get index description
|
|
109
|
-
index_description = self._client.describe_index(
|
|
292
|
+
index_description = self._client.describe_index(index_name)
|
|
110
293
|
|
|
111
294
|
# Get index stats
|
|
112
|
-
index = self._get_index(
|
|
295
|
+
index = self._get_index(index_name)
|
|
113
296
|
if not index:
|
|
114
297
|
return None
|
|
115
298
|
|
|
299
|
+
# Get all stats (describe_index_stats returns stats for all namespaces)
|
|
116
300
|
stats = index.describe_index_stats()
|
|
117
301
|
|
|
118
|
-
# Extract information
|
|
119
|
-
|
|
302
|
+
# Extract information for this specific namespace
|
|
303
|
+
if namespace and "namespaces" in stats:
|
|
304
|
+
namespace_stats = stats["namespaces"].get(namespace, {})
|
|
305
|
+
total_vector_count = namespace_stats.get("vector_count", 0)
|
|
306
|
+
else:
|
|
307
|
+
# For default namespace or when no namespaces exist
|
|
308
|
+
total_vector_count = stats.get("total_vector_count", 0)
|
|
309
|
+
|
|
120
310
|
dimension = index_description.dimension
|
|
121
311
|
metric = index_description.metric
|
|
122
312
|
|
|
313
|
+
# Check if index uses a Pinecone-hosted embedding model
|
|
314
|
+
hosted_model = None
|
|
315
|
+
if hasattr(index_description, "embed"):
|
|
316
|
+
embed = index_description.embed
|
|
317
|
+
# embed might be a dict or an object
|
|
318
|
+
if isinstance(embed, dict) and "model" in embed:
|
|
319
|
+
hosted_model = embed["model"]
|
|
320
|
+
elif hasattr(embed, "model") and embed.model:
|
|
321
|
+
hosted_model = embed.model
|
|
322
|
+
# Also check spec (legacy/alternative location)
|
|
323
|
+
elif hasattr(index_description, "spec"):
|
|
324
|
+
spec = index_description.spec
|
|
325
|
+
if hasattr(spec, "model") and spec.model:
|
|
326
|
+
hosted_model = spec.model
|
|
327
|
+
elif hasattr(spec, "index_config") and hasattr(spec.index_config, "model"):
|
|
328
|
+
hosted_model = spec.index_config.model
|
|
329
|
+
|
|
330
|
+
# Cache the hosted model info for this index
|
|
331
|
+
self._hosted_models[index_name] = hosted_model
|
|
332
|
+
|
|
123
333
|
# Get metadata fields from a sample query (if vectors exist)
|
|
124
334
|
metadata_fields = []
|
|
125
335
|
if total_vector_count > 0:
|
|
@@ -127,7 +337,10 @@ class PineconeConnection(VectorDBConnection):
|
|
|
127
337
|
# Query for a small sample to see metadata structure
|
|
128
338
|
dimension_val = int(dimension) if dimension else 0
|
|
129
339
|
sample_query = index.query(
|
|
130
|
-
vector=[0.0] * dimension_val,
|
|
340
|
+
vector=[0.0] * dimension_val,
|
|
341
|
+
top_k=1,
|
|
342
|
+
include_metadata=True,
|
|
343
|
+
namespace=namespace,
|
|
131
344
|
)
|
|
132
345
|
if hasattr(sample_query, "matches") and sample_query.matches: # type: ignore
|
|
133
346
|
metadata = sample_query.matches[0].metadata # type: ignore
|
|
@@ -136,8 +349,10 @@ class PineconeConnection(VectorDBConnection):
|
|
|
136
349
|
except Exception:
|
|
137
350
|
pass # Metadata fields will remain empty
|
|
138
351
|
|
|
139
|
-
|
|
352
|
+
info_dict = {
|
|
140
353
|
"name": name,
|
|
354
|
+
"index_name": index_name,
|
|
355
|
+
"namespace": namespace if namespace else "(default)",
|
|
141
356
|
"count": total_vector_count,
|
|
142
357
|
"metadata_fields": metadata_fields,
|
|
143
358
|
"vector_dimension": dimension,
|
|
@@ -152,6 +367,13 @@ class PineconeConnection(VectorDBConnection):
|
|
|
152
367
|
if hasattr(index_description, "spec")
|
|
153
368
|
else "N/A",
|
|
154
369
|
}
|
|
370
|
+
|
|
371
|
+
# Add hosted model info if detected
|
|
372
|
+
if hosted_model:
|
|
373
|
+
info_dict["embedding_model"] = hosted_model
|
|
374
|
+
info_dict["embedding_model_type"] = "pinecone-hosted"
|
|
375
|
+
|
|
376
|
+
return info_dict
|
|
155
377
|
except Exception as e:
|
|
156
378
|
log_error("Failed to get index info: %s", e)
|
|
157
379
|
return None
|
|
@@ -160,8 +382,16 @@ class PineconeConnection(VectorDBConnection):
|
|
|
160
382
|
"""
|
|
161
383
|
Create a new index.
|
|
162
384
|
|
|
385
|
+
Note: In Pinecone, indexes are created but namespaces are implicit.
|
|
386
|
+
If name includes '::namespace', only the index will be created.
|
|
387
|
+
Namespaces are automatically created when data is added to them.
|
|
388
|
+
|
|
389
|
+
IMPORTANT: For Pinecone, it's recommended to always use named namespaces
|
|
390
|
+
(e.g., 'index::production' rather than just 'index') because the default
|
|
391
|
+
namespace has limitations with visibility in stats API and data browser.
|
|
392
|
+
|
|
163
393
|
Args:
|
|
164
|
-
name: Index name
|
|
394
|
+
name: Index name (format: 'index::namespace' recommended, or 'index' alone)
|
|
165
395
|
vector_size: Dimension of vectors
|
|
166
396
|
distance: Distance metric (Cosine, Euclidean, DotProduct)
|
|
167
397
|
|
|
@@ -172,6 +402,25 @@ class PineconeConnection(VectorDBConnection):
|
|
|
172
402
|
return False
|
|
173
403
|
|
|
174
404
|
try:
|
|
405
|
+
# Parse name - only use index part for creation
|
|
406
|
+
index_name, namespace = self._parse_collection_name(name)
|
|
407
|
+
|
|
408
|
+
# Warn if using default namespace
|
|
409
|
+
if not namespace:
|
|
410
|
+
log_error(
|
|
411
|
+
"RECOMMENDATION: Consider using a named namespace (e.g., '%s::main') "
|
|
412
|
+
"instead of the default namespace. Named namespaces are fully visible "
|
|
413
|
+
"in Pinecone's data browser and stats API.",
|
|
414
|
+
index_name,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
if namespace:
|
|
418
|
+
log_error(
|
|
419
|
+
"Note: Creating index '%s'. Namespace '%s' will be created when data is added.",
|
|
420
|
+
index_name,
|
|
421
|
+
namespace,
|
|
422
|
+
)
|
|
423
|
+
|
|
175
424
|
# Map distance names to Pinecone metrics
|
|
176
425
|
metric_map = {
|
|
177
426
|
"cosine": "cosine",
|
|
@@ -183,7 +432,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
183
432
|
|
|
184
433
|
# Create serverless index (default configuration)
|
|
185
434
|
self._client.create_index(
|
|
186
|
-
name=
|
|
435
|
+
name=index_name,
|
|
187
436
|
dimension=vector_size,
|
|
188
437
|
metric=metric,
|
|
189
438
|
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
|
|
@@ -193,7 +442,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
193
442
|
max_wait = 60 # seconds
|
|
194
443
|
start_time = time.time()
|
|
195
444
|
while time.time() - start_time < max_wait:
|
|
196
|
-
desc = self._client.describe_index(
|
|
445
|
+
desc = self._client.describe_index(index_name)
|
|
197
446
|
status = (
|
|
198
447
|
desc.status.get("state", "unknown")
|
|
199
448
|
if hasattr(desc.status, "get")
|
|
@@ -211,16 +460,16 @@ class PineconeConnection(VectorDBConnection):
|
|
|
211
460
|
def add_items(
|
|
212
461
|
self,
|
|
213
462
|
collection_name: str,
|
|
214
|
-
documents:
|
|
215
|
-
metadatas: Optional[
|
|
216
|
-
ids: Optional[
|
|
217
|
-
embeddings: Optional[
|
|
463
|
+
documents: list[str],
|
|
464
|
+
metadatas: Optional[list[dict[str, Any]]] = None,
|
|
465
|
+
ids: Optional[list[str]] = None,
|
|
466
|
+
embeddings: Optional[list[list[float]]] = None,
|
|
218
467
|
) -> bool:
|
|
219
468
|
"""
|
|
220
|
-
Add items to an index.
|
|
469
|
+
Add items to an index namespace.
|
|
221
470
|
|
|
222
471
|
Args:
|
|
223
|
-
collection_name:
|
|
472
|
+
collection_name: Collection name (format: 'index' or 'index::namespace')
|
|
224
473
|
documents: Document texts (stored in metadata)
|
|
225
474
|
metadatas: Metadata for each vector
|
|
226
475
|
ids: IDs for each vector
|
|
@@ -229,11 +478,14 @@ class PineconeConnection(VectorDBConnection):
|
|
|
229
478
|
Returns:
|
|
230
479
|
True if successful, False otherwise
|
|
231
480
|
"""
|
|
481
|
+
# Parse collection name
|
|
482
|
+
index_name, namespace = self._parse_collection_name(collection_name)
|
|
483
|
+
|
|
232
484
|
# If embeddings not provided, compute using base helper
|
|
233
485
|
if not embeddings and documents:
|
|
234
486
|
try:
|
|
235
487
|
embeddings = self.compute_embeddings_for_documents(
|
|
236
|
-
collection_name, documents, getattr(self, "
|
|
488
|
+
collection_name, documents, getattr(self, "profile_name", None)
|
|
237
489
|
)
|
|
238
490
|
except Exception as e:
|
|
239
491
|
log_error("Embeddings are required for Pinecone and computing them failed: %s", e)
|
|
@@ -243,7 +495,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
243
495
|
log_error("Embeddings are required for Pinecone but none were provided or computed")
|
|
244
496
|
return False
|
|
245
497
|
|
|
246
|
-
index = self._get_index(
|
|
498
|
+
index = self._get_index(index_name)
|
|
247
499
|
if not index:
|
|
248
500
|
return False
|
|
249
501
|
|
|
@@ -265,35 +517,46 @@ class PineconeConnection(VectorDBConnection):
|
|
|
265
517
|
|
|
266
518
|
vectors.append({"id": ids[i], "values": embedding, "metadata": metadata})
|
|
267
519
|
|
|
268
|
-
# Upsert in batches of 100 (Pinecone limit)
|
|
520
|
+
# Upsert in batches of 100 (Pinecone limit) with namespace
|
|
269
521
|
batch_size = 100
|
|
270
522
|
for i in range(0, len(vectors), batch_size):
|
|
271
523
|
batch = vectors[i : i + batch_size]
|
|
272
|
-
|
|
524
|
+
# For default namespace, omit the namespace parameter
|
|
525
|
+
if namespace:
|
|
526
|
+
index.upsert(vectors=batch, namespace=namespace)
|
|
527
|
+
else:
|
|
528
|
+
index.upsert(vectors=batch)
|
|
273
529
|
|
|
274
530
|
return True
|
|
275
531
|
except Exception as e:
|
|
276
532
|
log_error("Failed to add items: %s", e)
|
|
277
533
|
return False
|
|
278
534
|
|
|
279
|
-
def get_items(self, name: str, ids:
|
|
535
|
+
def get_items(self, name: str, ids: list[str]) -> dict[str, Any]:
|
|
280
536
|
"""
|
|
281
|
-
Retrieve items by IDs.
|
|
537
|
+
Retrieve items by IDs from a namespace.
|
|
282
538
|
|
|
283
539
|
Args:
|
|
284
|
-
name:
|
|
540
|
+
name: Collection name (format: 'index' or 'index::namespace')
|
|
285
541
|
ids: List of vector IDs
|
|
286
542
|
|
|
287
543
|
Returns:
|
|
288
544
|
Dictionary with documents and metadatas
|
|
289
545
|
"""
|
|
290
|
-
|
|
546
|
+
# Parse collection name
|
|
547
|
+
index_name, namespace = self._parse_collection_name(name)
|
|
548
|
+
|
|
549
|
+
index = self._get_index(index_name)
|
|
291
550
|
if not index:
|
|
292
551
|
return {"documents": [], "metadatas": []}
|
|
293
552
|
|
|
294
553
|
try:
|
|
295
|
-
# Fetch vectors
|
|
296
|
-
|
|
554
|
+
# Fetch vectors from namespace
|
|
555
|
+
# For default namespace, omit the namespace parameter
|
|
556
|
+
if namespace:
|
|
557
|
+
result = index.fetch(ids=ids, namespace=namespace)
|
|
558
|
+
else:
|
|
559
|
+
result = index.fetch(ids=ids)
|
|
297
560
|
|
|
298
561
|
documents = []
|
|
299
562
|
metadatas = []
|
|
@@ -318,10 +581,13 @@ class PineconeConnection(VectorDBConnection):
|
|
|
318
581
|
|
|
319
582
|
def delete_collection(self, name: str) -> bool:
|
|
320
583
|
"""
|
|
321
|
-
Delete an index.
|
|
584
|
+
Delete an index or clear a namespace.
|
|
585
|
+
|
|
586
|
+
If name is just an index name, deletes the entire index.
|
|
587
|
+
If name includes '::namespace', deletes all vectors in that namespace.
|
|
322
588
|
|
|
323
589
|
Args:
|
|
324
|
-
name:
|
|
590
|
+
name: Collection name (format: 'index' or 'index::namespace')
|
|
325
591
|
|
|
326
592
|
Returns:
|
|
327
593
|
True if successful, False otherwise
|
|
@@ -330,31 +596,52 @@ class PineconeConnection(VectorDBConnection):
|
|
|
330
596
|
return False
|
|
331
597
|
|
|
332
598
|
try:
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
599
|
+
# Parse collection name
|
|
600
|
+
index_name, namespace = self._parse_collection_name(name)
|
|
601
|
+
|
|
602
|
+
if namespace:
|
|
603
|
+
# Delete all vectors in the namespace (keeps index and other namespaces)
|
|
604
|
+
index = self._get_index(index_name)
|
|
605
|
+
if not index:
|
|
606
|
+
return False
|
|
607
|
+
index.delete(delete_all=True, namespace=namespace)
|
|
608
|
+
else:
|
|
609
|
+
# Delete the entire index
|
|
610
|
+
self._client.delete_index(index_name)
|
|
611
|
+
if self._current_index_name == index_name:
|
|
612
|
+
self._current_index = None
|
|
613
|
+
self._current_index_name = None
|
|
614
|
+
|
|
337
615
|
return True
|
|
338
616
|
except Exception as e:
|
|
339
|
-
log_error("Failed to delete
|
|
617
|
+
log_error("Failed to delete collection: %s", e)
|
|
340
618
|
return False
|
|
341
619
|
|
|
342
620
|
def count_collection(self, name: str) -> int:
|
|
343
621
|
"""
|
|
344
|
-
Return the number of vectors in the
|
|
622
|
+
Return the number of vectors in the namespace.
|
|
345
623
|
|
|
346
624
|
Args:
|
|
347
|
-
name:
|
|
625
|
+
name: Collection name (format: 'index' or 'index::namespace')
|
|
348
626
|
|
|
349
627
|
Returns:
|
|
350
628
|
Number of vectors
|
|
351
629
|
"""
|
|
352
|
-
|
|
630
|
+
# Parse collection name
|
|
631
|
+
index_name, namespace = self._parse_collection_name(name)
|
|
632
|
+
|
|
633
|
+
index = self._get_index(index_name)
|
|
353
634
|
if not index:
|
|
354
635
|
return 0
|
|
355
636
|
|
|
356
637
|
try:
|
|
357
638
|
stats = index.describe_index_stats()
|
|
639
|
+
|
|
640
|
+
# Get count for specific namespace
|
|
641
|
+
if namespace and "namespaces" in stats:
|
|
642
|
+
namespace_stats = stats["namespaces"].get(namespace, {})
|
|
643
|
+
return namespace_stats.get("vector_count", 0)
|
|
644
|
+
|
|
358
645
|
return stats.get("total_vector_count", 0)
|
|
359
646
|
except Exception:
|
|
360
647
|
return 0
|
|
@@ -362,8 +649,23 @@ class PineconeConnection(VectorDBConnection):
|
|
|
362
649
|
def _get_embedding_function_for_collection(self, collection_name: str):
|
|
363
650
|
"""
|
|
364
651
|
Returns embedding function and model type for a given collection, matching ChromaDB/Qdrant API.
|
|
652
|
+
|
|
653
|
+
Note: For collections using Pinecone-hosted models, this should not be called.
|
|
654
|
+
Text queries are handled directly by Pinecone.
|
|
365
655
|
"""
|
|
366
656
|
info = self.get_collection_info(collection_name)
|
|
657
|
+
|
|
658
|
+
# Check if this collection uses a Pinecone-hosted model
|
|
659
|
+
if info and info.get("embedding_model_type") == "pinecone-hosted":
|
|
660
|
+
hosted_model = info.get("embedding_model", "unknown")
|
|
661
|
+
log_error(
|
|
662
|
+
"Warning: Attempting to generate local embeddings for collection '%s' "
|
|
663
|
+
"that uses Pinecone-hosted model '%s'. This may indicate a configuration issue. "
|
|
664
|
+
"Consider using text queries instead.",
|
|
665
|
+
collection_name,
|
|
666
|
+
hosted_model,
|
|
667
|
+
)
|
|
668
|
+
|
|
367
669
|
dim = info.get("vector_dimension") if info else None
|
|
368
670
|
try:
|
|
369
671
|
dim_int = int(dim) if dim is not None else None
|
|
@@ -375,9 +677,10 @@ class PineconeConnection(VectorDBConnection):
|
|
|
375
677
|
|
|
376
678
|
model = None
|
|
377
679
|
model_type: str = "sentence-transformer"
|
|
378
|
-
|
|
680
|
+
profile_name = getattr(self, "profile_name", None)
|
|
681
|
+
if profile_name and collection_name:
|
|
379
682
|
settings = SettingsService()
|
|
380
|
-
cfg = settings.get_embedding_model(
|
|
683
|
+
cfg = settings.get_embedding_model(profile_name, collection_name)
|
|
381
684
|
if cfg and cfg.get("model") and cfg.get("type"):
|
|
382
685
|
from vector_inspector.core.embedding_utils import load_embedding_model
|
|
383
686
|
|
|
@@ -404,37 +707,55 @@ class PineconeConnection(VectorDBConnection):
|
|
|
404
707
|
def query_collection(
|
|
405
708
|
self,
|
|
406
709
|
collection_name: str,
|
|
407
|
-
query_texts: Optional[
|
|
408
|
-
query_embeddings: Optional[
|
|
710
|
+
query_texts: Optional[list[str]] = None,
|
|
711
|
+
query_embeddings: Optional[list[list[float]]] = None,
|
|
409
712
|
n_results: int = 10,
|
|
410
|
-
where: Optional[
|
|
411
|
-
|
|
412
|
-
) -> Optional[
|
|
713
|
+
where: Optional[dict[str, Any]] = None,
|
|
714
|
+
_where_document: Optional[dict[str, Any]] = None,
|
|
715
|
+
) -> Optional[dict[str, Any]]:
|
|
413
716
|
"""
|
|
414
|
-
Query
|
|
717
|
+
Query a namespace for similar vectors.
|
|
718
|
+
|
|
719
|
+
For indexes with hosted models, uses direct text-based search API.
|
|
415
720
|
|
|
416
721
|
Args:
|
|
417
|
-
collection_name:
|
|
418
|
-
query_texts: Text queries (
|
|
722
|
+
collection_name: Collection name (format: 'index' or 'index::namespace')
|
|
723
|
+
query_texts: Text queries (for hosted models, searches directly; otherwise embedded locally)
|
|
419
724
|
query_embeddings: Query embedding vectors
|
|
420
725
|
n_results: Number of results to return
|
|
421
726
|
where: Metadata filter
|
|
422
|
-
|
|
727
|
+
_where_document: Document content filter (not directly supported)
|
|
423
728
|
Returns:
|
|
424
729
|
Query results or None if failed
|
|
425
730
|
"""
|
|
731
|
+
# Parse collection name
|
|
732
|
+
index_name, namespace = self._parse_collection_name(collection_name)
|
|
426
733
|
|
|
427
|
-
#
|
|
734
|
+
# Check if index uses hosted model
|
|
735
|
+
hosted_model = self._check_hosted_model(index_name)
|
|
736
|
+
|
|
737
|
+
# If hosted model and text queries, use direct text search
|
|
738
|
+
if hosted_model and query_texts and query_embeddings is None:
|
|
739
|
+
log_error("Using Pinecone hosted model '%s' for text-based search", hosted_model)
|
|
740
|
+
return self._query_with_hosted_model(
|
|
741
|
+
index_name, namespace, query_texts, n_results, where
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
# Otherwise, use vector-based query
|
|
745
|
+
# If query_embeddings not provided, embed the query texts
|
|
428
746
|
if query_embeddings is None and query_texts:
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
747
|
+
try:
|
|
748
|
+
embedding_fn, _ = self._get_embedding_function_for_collection(collection_name)
|
|
749
|
+
query_embeddings = [embedding_fn(q) for q in query_texts]
|
|
750
|
+
except Exception as e:
|
|
751
|
+
log_error("Failed to generate embeddings for query. Error: %s", e)
|
|
752
|
+
return None
|
|
432
753
|
|
|
433
754
|
if not query_embeddings:
|
|
434
755
|
log_error("Query embeddings are required for Pinecone")
|
|
435
756
|
return None
|
|
436
757
|
|
|
437
|
-
index = self._get_index(
|
|
758
|
+
index = self._get_index(index_name)
|
|
438
759
|
if not index:
|
|
439
760
|
return None
|
|
440
761
|
|
|
@@ -452,13 +773,24 @@ class PineconeConnection(VectorDBConnection):
|
|
|
452
773
|
if where:
|
|
453
774
|
filter_dict = self._convert_filter(where)
|
|
454
775
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
776
|
+
# For default namespace, omit the namespace parameter
|
|
777
|
+
if namespace:
|
|
778
|
+
result = index.query(
|
|
779
|
+
vector=query_vector,
|
|
780
|
+
top_k=n_results,
|
|
781
|
+
include_metadata=True,
|
|
782
|
+
include_values=True,
|
|
783
|
+
filter=filter_dict,
|
|
784
|
+
namespace=namespace,
|
|
785
|
+
)
|
|
786
|
+
else:
|
|
787
|
+
result = index.query(
|
|
788
|
+
vector=query_vector,
|
|
789
|
+
top_k=n_results,
|
|
790
|
+
include_metadata=True,
|
|
791
|
+
include_values=True,
|
|
792
|
+
filter=filter_dict,
|
|
793
|
+
)
|
|
462
794
|
|
|
463
795
|
# Extract results
|
|
464
796
|
ids = []
|
|
@@ -506,7 +838,139 @@ class PineconeConnection(VectorDBConnection):
|
|
|
506
838
|
log_error("Query failed: %s\n%s", e, traceback.format_exc())
|
|
507
839
|
return None
|
|
508
840
|
|
|
509
|
-
def
|
|
841
|
+
def _query_with_hosted_model(
|
|
842
|
+
self,
|
|
843
|
+
index_name: str,
|
|
844
|
+
namespace: str,
|
|
845
|
+
query_texts: list[str],
|
|
846
|
+
n_results: int,
|
|
847
|
+
where: Optional[dict[str, Any]] = None,
|
|
848
|
+
) -> Optional[dict[str, Any]]:
|
|
849
|
+
"""
|
|
850
|
+
Query using Pinecone-hosted embedding model with direct text search.
|
|
851
|
+
|
|
852
|
+
Uses index.search() with text inputs - Pinecone embeds the text server-side.
|
|
853
|
+
|
|
854
|
+
Args:
|
|
855
|
+
index_name: Name of the Pinecone index
|
|
856
|
+
namespace: Namespace within the index
|
|
857
|
+
query_texts: Text queries (embedded server-side by Pinecone)
|
|
858
|
+
n_results: Number of results to return
|
|
859
|
+
where: Metadata filter
|
|
860
|
+
|
|
861
|
+
Returns:
|
|
862
|
+
Query results or None if failed
|
|
863
|
+
"""
|
|
864
|
+
index = self._get_index(index_name)
|
|
865
|
+
if not index:
|
|
866
|
+
return None
|
|
867
|
+
|
|
868
|
+
try:
|
|
869
|
+
# Pinecone queries one text at a time for hosted models
|
|
870
|
+
all_ids = []
|
|
871
|
+
all_distances = []
|
|
872
|
+
all_documents = []
|
|
873
|
+
all_metadatas = []
|
|
874
|
+
all_embeddings = []
|
|
875
|
+
|
|
876
|
+
for query_text in query_texts:
|
|
877
|
+
# Build filter if provided
|
|
878
|
+
filter_dict = None
|
|
879
|
+
if where:
|
|
880
|
+
filter_dict = self._convert_filter(where)
|
|
881
|
+
|
|
882
|
+
# Use index.search() with text input format for hosted models
|
|
883
|
+
query_dict = {
|
|
884
|
+
"inputs": {"text": query_text},
|
|
885
|
+
"top_k": n_results,
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
search_params: dict[str, Any] = {"query": query_dict}
|
|
889
|
+
|
|
890
|
+
# Add namespace if specified
|
|
891
|
+
if namespace:
|
|
892
|
+
search_params["namespace"] = namespace
|
|
893
|
+
|
|
894
|
+
# Add filter if provided
|
|
895
|
+
if filter_dict:
|
|
896
|
+
query_dict["filter"] = filter_dict
|
|
897
|
+
|
|
898
|
+
# Use search() method for text-based queries with hosted models
|
|
899
|
+
# Note: search() doesn't use include_metadata/include_values like query()
|
|
900
|
+
result = index.search(**search_params)
|
|
901
|
+
|
|
902
|
+
# Extract results
|
|
903
|
+
# Note: search() returns {'result': {'hits': [...]}} structure
|
|
904
|
+
# while query() returns {'matches': [...]} structure
|
|
905
|
+
ids = []
|
|
906
|
+
distances = []
|
|
907
|
+
documents = []
|
|
908
|
+
metadatas = []
|
|
909
|
+
embeddings = []
|
|
910
|
+
|
|
911
|
+
# Handle search() response structure
|
|
912
|
+
if hasattr(result, "result") and hasattr(result.result, "hits"):
|
|
913
|
+
hits = result.result.hits
|
|
914
|
+
elif isinstance(result, dict) and "result" in result and "hits" in result["result"]:
|
|
915
|
+
hits = result["result"]["hits"]
|
|
916
|
+
else:
|
|
917
|
+
log_error("Unexpected search response structure: %s", result)
|
|
918
|
+
hits = []
|
|
919
|
+
|
|
920
|
+
for hit in hits:
|
|
921
|
+
# Extract ID (search uses '_id' not 'id')
|
|
922
|
+
hit_id = hit.get("_id") if isinstance(hit, dict) else getattr(hit, "_id", None)
|
|
923
|
+
if hit_id:
|
|
924
|
+
ids.append(hit_id)
|
|
925
|
+
|
|
926
|
+
# Extract score (search uses '_score' not 'score')
|
|
927
|
+
score = (
|
|
928
|
+
hit.get("_score") if isinstance(hit, dict) else getattr(hit, "_score", None)
|
|
929
|
+
)
|
|
930
|
+
if score is not None:
|
|
931
|
+
# Convert similarity to distance for cosine metric
|
|
932
|
+
distances.append(1.0 - score)
|
|
933
|
+
else:
|
|
934
|
+
distances.append(None)
|
|
935
|
+
|
|
936
|
+
# Extract fields (search uses 'fields' not 'metadata')
|
|
937
|
+
fields = (
|
|
938
|
+
hit.get("fields", {})
|
|
939
|
+
if isinstance(hit, dict)
|
|
940
|
+
else getattr(hit, "fields", {})
|
|
941
|
+
)
|
|
942
|
+
if isinstance(fields, dict):
|
|
943
|
+
metadata = dict(fields)
|
|
944
|
+
doc = metadata.pop("document", "")
|
|
945
|
+
documents.append(doc)
|
|
946
|
+
metadatas.append(metadata)
|
|
947
|
+
else:
|
|
948
|
+
documents.append("")
|
|
949
|
+
metadatas.append({})
|
|
950
|
+
|
|
951
|
+
# search() doesn't return vector values
|
|
952
|
+
embeddings.append([])
|
|
953
|
+
|
|
954
|
+
all_ids.append(ids)
|
|
955
|
+
all_distances.append(distances)
|
|
956
|
+
all_documents.append(documents)
|
|
957
|
+
all_metadatas.append(metadatas)
|
|
958
|
+
all_embeddings.append(embeddings)
|
|
959
|
+
|
|
960
|
+
return {
|
|
961
|
+
"ids": all_ids,
|
|
962
|
+
"distances": all_distances,
|
|
963
|
+
"documents": all_documents,
|
|
964
|
+
"metadatas": all_metadatas,
|
|
965
|
+
"embeddings": all_embeddings,
|
|
966
|
+
}
|
|
967
|
+
except Exception as e:
|
|
968
|
+
import traceback
|
|
969
|
+
|
|
970
|
+
log_error("Text query with hosted model failed: %s\n%s", e, traceback.format_exc())
|
|
971
|
+
return None
|
|
972
|
+
|
|
973
|
+
def _convert_filter(self, where: dict[str, Any]) -> dict[str, Any]:
|
|
510
974
|
"""
|
|
511
975
|
Convert generic filter to Pinecone filter format.
|
|
512
976
|
|
|
@@ -531,16 +995,16 @@ class PineconeConnection(VectorDBConnection):
|
|
|
531
995
|
collection_name: str,
|
|
532
996
|
limit: Optional[int] = None,
|
|
533
997
|
offset: Optional[int] = None,
|
|
534
|
-
where: Optional[
|
|
535
|
-
) -> Optional[
|
|
998
|
+
where: Optional[dict[str, Any]] = None,
|
|
999
|
+
) -> Optional[dict[str, Any]]:
|
|
536
1000
|
"""
|
|
537
|
-
Get all items from
|
|
1001
|
+
Get all items from a namespace using pagination.
|
|
538
1002
|
|
|
539
1003
|
Note: Uses Pinecone's list() method which returns a generator of ID lists.
|
|
540
1004
|
Offset-based pagination is simulated by skipping items.
|
|
541
1005
|
|
|
542
1006
|
Args:
|
|
543
|
-
collection_name:
|
|
1007
|
+
collection_name: Collection name (format: 'index' or 'index::namespace')
|
|
544
1008
|
limit: Maximum number of items to return
|
|
545
1009
|
offset: Number of items to skip
|
|
546
1010
|
where: Metadata filter (not supported in list operation)
|
|
@@ -548,7 +1012,10 @@ class PineconeConnection(VectorDBConnection):
|
|
|
548
1012
|
Returns:
|
|
549
1013
|
Index items or None if failed
|
|
550
1014
|
"""
|
|
551
|
-
|
|
1015
|
+
# Parse collection name
|
|
1016
|
+
index_name, namespace = self._parse_collection_name(collection_name)
|
|
1017
|
+
|
|
1018
|
+
index = self._get_index(index_name)
|
|
552
1019
|
if not index:
|
|
553
1020
|
return None
|
|
554
1021
|
|
|
@@ -560,7 +1027,9 @@ class PineconeConnection(VectorDBConnection):
|
|
|
560
1027
|
target_limit = limit or 100
|
|
561
1028
|
|
|
562
1029
|
# list() returns a generator that yields lists of IDs
|
|
563
|
-
|
|
1030
|
+
# For default namespace, omit the namespace parameter
|
|
1031
|
+
id_generator = index.list(namespace=namespace) if namespace else index.list() # type: ignore
|
|
1032
|
+
for id_list in id_generator:
|
|
564
1033
|
if not id_list:
|
|
565
1034
|
continue
|
|
566
1035
|
|
|
@@ -593,7 +1062,11 @@ class PineconeConnection(VectorDBConnection):
|
|
|
593
1062
|
|
|
594
1063
|
for i in range(0, len(ids_to_fetch), batch_size):
|
|
595
1064
|
batch_ids = ids_to_fetch[i : i + batch_size]
|
|
596
|
-
|
|
1065
|
+
# For default namespace, omit the namespace parameter
|
|
1066
|
+
if namespace:
|
|
1067
|
+
fetch_result = index.fetch(ids=batch_ids, namespace=namespace)
|
|
1068
|
+
else:
|
|
1069
|
+
fetch_result = index.fetch(ids=batch_ids)
|
|
597
1070
|
|
|
598
1071
|
for vid in batch_ids:
|
|
599
1072
|
if vid in fetch_result.vectors:
|
|
@@ -622,18 +1095,18 @@ class PineconeConnection(VectorDBConnection):
|
|
|
622
1095
|
def update_items(
|
|
623
1096
|
self,
|
|
624
1097
|
collection_name: str,
|
|
625
|
-
ids:
|
|
626
|
-
documents: Optional[
|
|
627
|
-
metadatas: Optional[
|
|
628
|
-
embeddings: Optional[
|
|
1098
|
+
ids: list[str],
|
|
1099
|
+
documents: Optional[list[str]] = None,
|
|
1100
|
+
metadatas: Optional[list[dict[str, Any]]] = None,
|
|
1101
|
+
embeddings: Optional[list[list[float]]] = None,
|
|
629
1102
|
) -> bool:
|
|
630
1103
|
"""
|
|
631
|
-
Update items in
|
|
1104
|
+
Update items in a namespace.
|
|
632
1105
|
|
|
633
1106
|
Note: Pinecone updates via upsert (add_items can be used)
|
|
634
1107
|
|
|
635
1108
|
Args:
|
|
636
|
-
collection_name:
|
|
1109
|
+
collection_name: Collection name (format: 'index' or 'index::namespace')
|
|
637
1110
|
ids: IDs of items to update
|
|
638
1111
|
documents: New document texts
|
|
639
1112
|
metadatas: New metadata
|
|
@@ -642,13 +1115,20 @@ class PineconeConnection(VectorDBConnection):
|
|
|
642
1115
|
Returns:
|
|
643
1116
|
True if successful, False otherwise
|
|
644
1117
|
"""
|
|
645
|
-
|
|
1118
|
+
# Parse collection name
|
|
1119
|
+
index_name, namespace = self._parse_collection_name(collection_name)
|
|
1120
|
+
|
|
1121
|
+
index = self._get_index(index_name)
|
|
646
1122
|
if not index:
|
|
647
1123
|
return False
|
|
648
1124
|
|
|
649
1125
|
try:
|
|
650
1126
|
# Fetch existing vectors to preserve data not being updated
|
|
651
|
-
|
|
1127
|
+
# For default namespace, omit the namespace parameter
|
|
1128
|
+
if namespace:
|
|
1129
|
+
existing = index.fetch(ids=ids, namespace=namespace)
|
|
1130
|
+
else:
|
|
1131
|
+
existing = index.fetch(ids=ids)
|
|
652
1132
|
|
|
653
1133
|
vectors = []
|
|
654
1134
|
for i, vid in enumerate(ids):
|
|
@@ -688,11 +1168,15 @@ class PineconeConnection(VectorDBConnection):
|
|
|
688
1168
|
|
|
689
1169
|
vectors.append({"id": vid, "values": values, "metadata": metadata})
|
|
690
1170
|
|
|
691
|
-
# Upsert in batches
|
|
1171
|
+
# Upsert in batches with namespace
|
|
692
1172
|
batch_size = 100
|
|
693
1173
|
for i in range(0, len(vectors), batch_size):
|
|
694
1174
|
batch = vectors[i : i + batch_size]
|
|
695
|
-
|
|
1175
|
+
# For default namespace, omit the namespace parameter
|
|
1176
|
+
if namespace:
|
|
1177
|
+
index.upsert(vectors=batch, namespace=namespace)
|
|
1178
|
+
else:
|
|
1179
|
+
index.upsert(vectors=batch)
|
|
696
1180
|
|
|
697
1181
|
return True
|
|
698
1182
|
except Exception as e:
|
|
@@ -702,42 +1186,55 @@ class PineconeConnection(VectorDBConnection):
|
|
|
702
1186
|
def delete_items(
|
|
703
1187
|
self,
|
|
704
1188
|
collection_name: str,
|
|
705
|
-
ids: Optional[
|
|
706
|
-
where: Optional[
|
|
1189
|
+
ids: Optional[list[str]] = None,
|
|
1190
|
+
where: Optional[dict[str, Any]] = None,
|
|
707
1191
|
) -> bool:
|
|
708
1192
|
"""
|
|
709
|
-
Delete items from
|
|
1193
|
+
Delete items from a namespace.
|
|
710
1194
|
|
|
711
1195
|
Args:
|
|
712
|
-
collection_name:
|
|
1196
|
+
collection_name: Collection name (format: 'index' or 'index::namespace')
|
|
713
1197
|
ids: IDs of items to delete
|
|
714
1198
|
where: Metadata filter for items to delete
|
|
715
1199
|
|
|
716
1200
|
Returns:
|
|
717
1201
|
True if successful, False otherwise
|
|
718
1202
|
"""
|
|
719
|
-
|
|
1203
|
+
# Parse collection name
|
|
1204
|
+
index_name, namespace = self._parse_collection_name(collection_name)
|
|
1205
|
+
|
|
1206
|
+
index = self._get_index(index_name)
|
|
720
1207
|
if not index:
|
|
721
1208
|
return False
|
|
722
1209
|
|
|
723
1210
|
try:
|
|
724
1211
|
if ids:
|
|
725
|
-
# Delete by IDs
|
|
726
|
-
|
|
1212
|
+
# Delete by IDs in namespace
|
|
1213
|
+
# For default namespace, omit the namespace parameter
|
|
1214
|
+
if namespace:
|
|
1215
|
+
index.delete(ids=ids, namespace=namespace)
|
|
1216
|
+
else:
|
|
1217
|
+
index.delete(ids=ids)
|
|
727
1218
|
elif where:
|
|
728
|
-
# Delete by filter
|
|
1219
|
+
# Delete by filter in namespace
|
|
729
1220
|
filter_dict = self._convert_filter(where)
|
|
730
|
-
|
|
1221
|
+
if namespace:
|
|
1222
|
+
index.delete(filter=filter_dict, namespace=namespace)
|
|
1223
|
+
else:
|
|
1224
|
+
index.delete(filter=filter_dict)
|
|
731
1225
|
else:
|
|
732
|
-
# Delete all (use with caution)
|
|
733
|
-
|
|
1226
|
+
# Delete all in namespace (use with caution)
|
|
1227
|
+
if namespace:
|
|
1228
|
+
index.delete(delete_all=True, namespace=namespace)
|
|
1229
|
+
else:
|
|
1230
|
+
index.delete(delete_all=True)
|
|
734
1231
|
|
|
735
1232
|
return True
|
|
736
1233
|
except Exception as e:
|
|
737
1234
|
log_error("Failed to delete items: %s", e)
|
|
738
1235
|
return False
|
|
739
1236
|
|
|
740
|
-
def get_connection_info(self) ->
|
|
1237
|
+
def get_connection_info(self) -> dict[str, Any]:
|
|
741
1238
|
"""
|
|
742
1239
|
Get information about the current connection.
|
|
743
1240
|
|
|
@@ -756,7 +1253,7 @@ class PineconeConnection(VectorDBConnection):
|
|
|
756
1253
|
|
|
757
1254
|
return info
|
|
758
1255
|
|
|
759
|
-
def get_supported_filter_operators(self) ->
|
|
1256
|
+
def get_supported_filter_operators(self) -> list[dict[str, Any]]:
|
|
760
1257
|
"""
|
|
761
1258
|
Get filter operators supported by Pinecone.
|
|
762
1259
|
|