vector-inspector 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. vector_inspector/core/connection_manager.py +55 -49
  2. vector_inspector/core/connections/base_connection.py +41 -41
  3. vector_inspector/core/connections/chroma_connection.py +110 -86
  4. vector_inspector/core/connections/pinecone_connection.py +168 -182
  5. vector_inspector/core/connections/qdrant_connection.py +109 -126
  6. vector_inspector/core/connections/qdrant_helpers/__init__.py +4 -0
  7. vector_inspector/core/connections/qdrant_helpers/qdrant_embedding_resolver.py +35 -0
  8. vector_inspector/core/connections/qdrant_helpers/qdrant_filter_builder.py +51 -0
  9. vector_inspector/core/connections/template_connection.py +55 -65
  10. vector_inspector/core/embedding_utils.py +32 -32
  11. vector_inspector/core/logging.py +27 -0
  12. vector_inspector/core/model_registry.py +4 -3
  13. vector_inspector/main.py +6 -2
  14. vector_inspector/services/backup_helpers.py +63 -0
  15. vector_inspector/services/backup_restore_service.py +73 -152
  16. vector_inspector/services/credential_service.py +33 -40
  17. vector_inspector/services/import_export_service.py +70 -67
  18. vector_inspector/services/profile_service.py +92 -94
  19. vector_inspector/services/settings_service.py +68 -48
  20. vector_inspector/services/visualization_service.py +40 -39
  21. vector_inspector/ui/components/splash_window.py +57 -0
  22. vector_inspector/ui/dialogs/cross_db_migration.py +6 -5
  23. vector_inspector/ui/main_window.py +200 -146
  24. vector_inspector/ui/views/info_panel.py +208 -127
  25. vector_inspector/ui/views/metadata_view.py +8 -7
  26. vector_inspector/ui/views/search_view.py +97 -75
  27. vector_inspector/ui/views/visualization_view.py +140 -97
  28. vector_inspector/utils/version.py +5 -0
  29. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/METADATA +10 -2
  30. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/RECORD +32 -25
  31. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/WHEEL +0 -0
  32. {vector_inspector-0.3.2.dist-info → vector_inspector-0.3.4.dist-info}/entry_points.txt +0 -0
@@ -6,20 +6,18 @@ from pinecone import Pinecone, ServerlessSpec
6
6
  from pinecone.exceptions import PineconeException
7
7
 
8
8
  from .base_connection import VectorDBConnection
9
+ from vector_inspector.core.logging import log_error
9
10
 
10
11
 
11
12
  class PineconeConnection(VectorDBConnection):
12
13
  """Manages connection to Pinecone and provides query interface."""
13
-
14
+
14
15
  def __init__(
15
- self,
16
- api_key: str,
17
- environment: Optional[str] = None,
18
- index_host: Optional[str] = None
16
+ self, api_key: str, environment: Optional[str] = None, index_host: Optional[str] = None
19
17
  ):
20
18
  """
21
19
  Initialize Pinecone connection.
22
-
20
+
23
21
  Args:
24
22
  api_key: Pinecone API key
25
23
  environment: Pinecone environment (optional, auto-detected)
@@ -31,41 +29,41 @@ class PineconeConnection(VectorDBConnection):
31
29
  self._client: Optional[Pinecone] = None
32
30
  self._current_index = None
33
31
  self._current_index_name: Optional[str] = None
34
-
32
+
35
33
  def connect(self) -> bool:
36
34
  """
37
35
  Establish connection to Pinecone.
38
-
36
+
39
37
  Returns:
40
38
  True if connection successful, False otherwise
41
39
  """
42
40
  try:
43
41
  # Initialize Pinecone client
44
42
  self._client = Pinecone(api_key=self.api_key)
45
-
43
+
46
44
  # Test connection by listing indexes
47
45
  self._client.list_indexes()
48
46
  return True
49
47
  except Exception as e:
50
- print(f"Connection failed: {e}")
48
+ log_error("Connection failed: %s", e)
51
49
  self._client = None # Reset client on failure
52
50
  return False
53
-
51
+
54
52
  def disconnect(self):
55
53
  """Close connection to Pinecone."""
56
54
  self._client = None
57
55
  self._current_index = None
58
56
  self._current_index_name = None
59
-
57
+
60
58
  @property
61
59
  def is_connected(self) -> bool:
62
60
  """Check if connected to Pinecone."""
63
61
  return self._client is not None
64
-
62
+
65
63
  def list_collections(self) -> List[str]:
66
64
  """
67
65
  Get list of all indexes (collections in Pinecone terminology).
68
-
66
+
69
67
  Returns:
70
68
  List of index names
71
69
  """
@@ -75,14 +73,14 @@ class PineconeConnection(VectorDBConnection):
75
73
  indexes = self._client.list_indexes()
76
74
  return [str(idx.name) for idx in indexes] # type: ignore
77
75
  except Exception as e:
78
- print(f"Failed to list indexes: {e}")
76
+ log_error("Failed to list indexes: %s", e)
79
77
  return []
80
-
78
+
81
79
  def _get_index(self, name: str):
82
80
  """Get or create index reference."""
83
81
  if not self._client:
84
82
  return None
85
-
83
+
86
84
  try:
87
85
  # Cache the current index to avoid repeated lookups
88
86
  if self._current_index_name != name:
@@ -90,38 +88,38 @@ class PineconeConnection(VectorDBConnection):
90
88
  self._current_index_name = name
91
89
  return self._current_index
92
90
  except Exception as e:
93
- print(f"Failed to get index: {e}")
91
+ log_error("Failed to get index: %s", e)
94
92
  return None
95
-
93
+
96
94
  def get_collection_info(self, name: str) -> Optional[Dict[str, Any]]:
97
95
  """
98
96
  Get index metadata and statistics.
99
-
97
+
100
98
  Args:
101
99
  name: Index name
102
-
100
+
103
101
  Returns:
104
102
  Dictionary with index info
105
103
  """
106
104
  if not self._client:
107
105
  return None
108
-
106
+
109
107
  try:
110
108
  # Get index description
111
109
  index_description = self._client.describe_index(name)
112
-
110
+
113
111
  # Get index stats
114
112
  index = self._get_index(name)
115
113
  if not index:
116
114
  return None
117
-
115
+
118
116
  stats = index.describe_index_stats()
119
-
117
+
120
118
  # Extract information
121
- total_vector_count = stats.get('total_vector_count', 0)
119
+ total_vector_count = stats.get("total_vector_count", 0)
122
120
  dimension = index_description.dimension
123
121
  metric = index_description.metric
124
-
122
+
125
123
  # Get metadata fields from a sample query (if vectors exist)
126
124
  metadata_fields = []
127
125
  if total_vector_count > 0:
@@ -129,46 +127,50 @@ class PineconeConnection(VectorDBConnection):
129
127
  # Query for a small sample to see metadata structure
130
128
  dimension_val = int(dimension) if dimension else 0
131
129
  sample_query = index.query(
132
- vector=[0.0] * dimension_val,
133
- top_k=1,
134
- include_metadata=True
130
+ vector=[0.0] * dimension_val, top_k=1, include_metadata=True
135
131
  )
136
- if hasattr(sample_query, 'matches') and sample_query.matches: # type: ignore
132
+ if hasattr(sample_query, "matches") and sample_query.matches: # type: ignore
137
133
  metadata = sample_query.matches[0].metadata # type: ignore
138
134
  if metadata:
139
135
  metadata_fields = list(metadata.keys())
140
136
  except Exception:
141
137
  pass # Metadata fields will remain empty
142
-
138
+
143
139
  return {
144
140
  "name": name,
145
141
  "count": total_vector_count,
146
142
  "metadata_fields": metadata_fields,
147
143
  "vector_dimension": dimension,
148
144
  "distance_metric": str(metric).upper() if metric else "UNKNOWN",
149
- "host": str(index_description.host) if hasattr(index_description, 'host') else "N/A",
150
- "status": index_description.status.get('state', 'unknown') if hasattr(index_description.status, 'get') else str(index_description.status), # type: ignore
151
- "spec": str(index_description.spec) if hasattr(index_description, 'spec') else "N/A",
145
+ "host": str(index_description.host)
146
+ if hasattr(index_description, "host")
147
+ else "N/A",
148
+ "status": index_description.status.get("state", "unknown")
149
+ if hasattr(index_description.status, "get")
150
+ else str(index_description.status), # type: ignore
151
+ "spec": str(index_description.spec)
152
+ if hasattr(index_description, "spec")
153
+ else "N/A",
152
154
  }
153
155
  except Exception as e:
154
- print(f"Failed to get index info: {e}")
156
+ log_error("Failed to get index info: %s", e)
155
157
  return None
156
-
158
+
157
159
  def create_collection(self, name: str, vector_size: int, distance: str = "Cosine") -> bool:
158
160
  """
159
161
  Create a new index.
160
-
162
+
161
163
  Args:
162
164
  name: Index name
163
165
  vector_size: Dimension of vectors
164
166
  distance: Distance metric (Cosine, Euclidean, DotProduct)
165
-
167
+
166
168
  Returns:
167
169
  True if successful, False otherwise
168
170
  """
169
171
  if not self._client:
170
172
  return False
171
-
173
+
172
174
  try:
173
175
  # Map distance names to Pinecone metrics
174
176
  metric_map = {
@@ -178,33 +180,34 @@ class PineconeConnection(VectorDBConnection):
178
180
  "dot": "dotproduct",
179
181
  }
180
182
  metric = metric_map.get(distance.lower(), "cosine")
181
-
183
+
182
184
  # Create serverless index (default configuration)
183
185
  self._client.create_index(
184
186
  name=name,
185
187
  dimension=vector_size,
186
188
  metric=metric,
187
- spec=ServerlessSpec(
188
- cloud='aws',
189
- region='us-east-1'
190
- )
189
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
191
190
  )
192
-
191
+
193
192
  # Wait for index to be ready
194
193
  max_wait = 60 # seconds
195
194
  start_time = time.time()
196
195
  while time.time() - start_time < max_wait:
197
196
  desc = self._client.describe_index(name)
198
- status = desc.status.get('state', 'unknown') if hasattr(desc.status, 'get') else str(desc.status) # type: ignore
199
- if status.lower() == 'ready':
197
+ status = (
198
+ desc.status.get("state", "unknown")
199
+ if hasattr(desc.status, "get")
200
+ else str(desc.status)
201
+ ) # type: ignore
202
+ if status.lower() == "ready":
200
203
  return True
201
204
  time.sleep(2)
202
-
205
+
203
206
  return False
204
207
  except Exception as e:
205
- print(f"Failed to create index: {e}")
208
+ log_error("Failed to create index: %s", e)
206
209
  return False
207
-
210
+
208
211
  def add_items(
209
212
  self,
210
213
  collection_name: str,
@@ -215,111 +218,107 @@ class PineconeConnection(VectorDBConnection):
215
218
  ) -> bool:
216
219
  """
217
220
  Add items to an index.
218
-
221
+
219
222
  Args:
220
223
  collection_name: Name of index
221
224
  documents: Document texts (stored in metadata)
222
225
  metadatas: Metadata for each vector
223
226
  ids: IDs for each vector
224
227
  embeddings: Pre-computed embeddings (required for Pinecone)
225
-
228
+
226
229
  Returns:
227
230
  True if successful, False otherwise
228
231
  """
229
232
  if not embeddings:
230
- print("Embeddings are required for Pinecone")
233
+ log_error("Embeddings are required for Pinecone")
231
234
  return False
232
-
235
+
233
236
  index = self._get_index(collection_name)
234
237
  if not index:
235
238
  return False
236
-
239
+
237
240
  try:
238
241
  # Generate IDs if not provided
239
242
  if not ids:
240
243
  ids = [f"vec_{i}" for i in range(len(embeddings))]
241
-
244
+
242
245
  # Prepare vectors for upsert
243
246
  vectors = []
244
247
  for i, embedding in enumerate(embeddings):
245
248
  metadata = {}
246
249
  if metadatas and i < len(metadatas):
247
250
  metadata = metadatas[i].copy()
248
-
251
+
249
252
  # Add document text to metadata
250
253
  if documents and i < len(documents):
251
- metadata['document'] = documents[i]
252
-
253
- vectors.append({
254
- 'id': ids[i],
255
- 'values': embedding,
256
- 'metadata': metadata
257
- })
258
-
254
+ metadata["document"] = documents[i]
255
+
256
+ vectors.append({"id": ids[i], "values": embedding, "metadata": metadata})
257
+
259
258
  # Upsert in batches of 100 (Pinecone limit)
260
259
  batch_size = 100
261
260
  for i in range(0, len(vectors), batch_size):
262
- batch = vectors[i:i + batch_size]
261
+ batch = vectors[i : i + batch_size]
263
262
  index.upsert(vectors=batch)
264
-
263
+
265
264
  return True
266
265
  except Exception as e:
267
- print(f"Failed to add items: {e}")
266
+ log_error("Failed to add items: %s", e)
268
267
  return False
269
-
268
+
270
269
  def get_items(self, name: str, ids: List[str]) -> Dict[str, Any]:
271
270
  """
272
271
  Retrieve items by IDs.
273
-
272
+
274
273
  Args:
275
274
  name: Index name
276
275
  ids: List of vector IDs
277
-
276
+
278
277
  Returns:
279
278
  Dictionary with documents and metadatas
280
279
  """
281
280
  index = self._get_index(name)
282
281
  if not index:
283
282
  return {"documents": [], "metadatas": []}
284
-
283
+
285
284
  try:
286
285
  # Fetch vectors
287
286
  result = index.fetch(ids=ids)
288
-
287
+
289
288
  documents = []
290
289
  metadatas = []
291
-
290
+
292
291
  for vid in ids:
293
292
  if vid in result.vectors:
294
293
  vector_data = result.vectors[vid]
295
294
  metadata = vector_data.metadata or {}
296
-
295
+
297
296
  # Extract document from metadata
298
- doc = metadata.pop('document', '')
297
+ doc = metadata.pop("document", "")
299
298
  documents.append(doc)
300
299
  metadatas.append(metadata)
301
300
  else:
302
- documents.append('')
301
+ documents.append("")
303
302
  metadatas.append({})
304
-
303
+
305
304
  return {"documents": documents, "metadatas": metadatas}
306
305
  except Exception as e:
307
- print(f"Failed to get items: {e}")
306
+ log_error("Failed to get items: %s", e)
308
307
  return {"documents": [], "metadatas": []}
309
-
308
+
310
309
  def delete_collection(self, name: str) -> bool:
311
310
  """
312
311
  Delete an index.
313
-
312
+
314
313
  Args:
315
314
  name: Index name
316
-
315
+
317
316
  Returns:
318
317
  True if successful, False otherwise
319
318
  """
320
319
  if not self._client:
321
320
  return False
322
-
321
+
323
322
  try:
324
323
  self._client.delete_index(name)
325
324
  if self._current_index_name == name:
@@ -327,29 +326,29 @@ class PineconeConnection(VectorDBConnection):
327
326
  self._current_index_name = None
328
327
  return True
329
328
  except Exception as e:
330
- print(f"Failed to delete index: {e}")
329
+ log_error("Failed to delete index: %s", e)
331
330
  return False
332
-
331
+
333
332
  def count_collection(self, name: str) -> int:
334
333
  """
335
334
  Return the number of vectors in the index.
336
-
335
+
337
336
  Args:
338
337
  name: Index name
339
-
338
+
340
339
  Returns:
341
340
  Number of vectors
342
341
  """
343
342
  index = self._get_index(name)
344
343
  if not index:
345
344
  return 0
346
-
345
+
347
346
  try:
348
347
  stats = index.describe_index_stats()
349
- return stats.get('total_vector_count', 0)
348
+ return stats.get("total_vector_count", 0)
350
349
  except Exception:
351
350
  return 0
352
-
351
+
353
352
  def _get_embedding_function_for_collection(self, collection_name: str):
354
353
  """
355
354
  Returns embedding function and model type for a given collection, matching ChromaDB/Qdrant API.
@@ -363,6 +362,7 @@ class PineconeConnection(VectorDBConnection):
363
362
 
364
363
  # Prefer user-configured model for this collection
365
364
  from vector_inspector.services.settings_service import SettingsService
365
+
366
366
  model = None
367
367
  model_type: str = "sentence-transformer"
368
368
  if hasattr(self, "connection_id") and collection_name:
@@ -370,12 +370,14 @@ class PineconeConnection(VectorDBConnection):
370
370
  cfg = settings.get_embedding_model(getattr(self, "connection_id", ""), collection_name)
371
371
  if cfg and cfg.get("model") and cfg.get("type"):
372
372
  from vector_inspector.core.embedding_utils import load_embedding_model
373
+
373
374
  model = load_embedding_model(cfg["model"], cfg["type"])
374
375
  model_type = str(cfg["type"]) or "sentence-transformer"
375
376
 
376
377
  # Fallback to dimension-based model if none configured
377
378
  if model is None:
378
379
  from vector_inspector.core.embedding_utils import get_embedding_model_for_dimension
380
+
379
381
  if dim_int is None:
380
382
  dim_int = 384 # default for MiniLM
381
383
  loaded_model, _, inferred_type = get_embedding_model_for_dimension(dim_int)
@@ -383,6 +385,7 @@ class PineconeConnection(VectorDBConnection):
383
385
  model_type = str(inferred_type) or "sentence-transformer"
384
386
 
385
387
  from vector_inspector.core.embedding_utils import encode_text
388
+
386
389
  def embedding_fn(text: str):
387
390
  return encode_text(text, model, model_type)
388
391
 
@@ -399,7 +402,7 @@ class PineconeConnection(VectorDBConnection):
399
402
  ) -> Optional[Dict[str, Any]]:
400
403
  """
401
404
  Query an index for similar vectors.
402
-
405
+
403
406
  Args:
404
407
  collection_name: Name of index
405
408
  query_texts: Text queries (will be embedded if provided)
@@ -418,13 +421,13 @@ class PineconeConnection(VectorDBConnection):
418
421
  query_texts = None
419
422
 
420
423
  if not query_embeddings:
421
- print("Query embeddings are required for Pinecone")
424
+ log_error("Query embeddings are required for Pinecone")
422
425
  return None
423
-
426
+
424
427
  index = self._get_index(collection_name)
425
428
  if not index:
426
429
  return None
427
-
430
+
428
431
  try:
429
432
  # Pinecone queries one vector at a time
430
433
  all_ids = []
@@ -432,54 +435,54 @@ class PineconeConnection(VectorDBConnection):
432
435
  all_documents = []
433
436
  all_metadatas = []
434
437
  all_embeddings = []
435
-
438
+
436
439
  for query_vector in query_embeddings:
437
440
  # Build filter if provided
438
441
  filter_dict = None
439
442
  if where:
440
443
  filter_dict = self._convert_filter(where)
441
-
444
+
442
445
  result = index.query(
443
446
  vector=query_vector,
444
447
  top_k=n_results,
445
448
  include_metadata=True,
446
449
  include_values=True,
447
- filter=filter_dict
450
+ filter=filter_dict,
448
451
  )
449
-
452
+
450
453
  # Extract results
451
454
  ids = []
452
455
  distances = []
453
456
  documents = []
454
457
  metadatas = []
455
458
  embeddings = []
456
-
457
- if hasattr(result, 'matches'):
459
+
460
+ if hasattr(result, "matches"):
458
461
  for match in result.matches: # type: ignore
459
462
  ids.append(match.id) # type: ignore
460
463
  # Convert similarity to distance for cosine metric
461
- score = getattr(match, 'score', None)
464
+ score = getattr(match, "score", None)
462
465
  if score is not None:
463
466
  distances.append(1.0 - score)
464
467
  else:
465
468
  distances.append(None)
466
-
469
+
467
470
  metadata = match.metadata or {} # type: ignore
468
- doc = metadata.pop('document', '')
471
+ doc = metadata.pop("document", "")
469
472
  documents.append(doc)
470
473
  metadatas.append(metadata)
471
-
472
- if hasattr(match, 'values') and match.values: # type: ignore
474
+
475
+ if hasattr(match, "values") and match.values: # type: ignore
473
476
  embeddings.append(match.values) # type: ignore
474
477
  else:
475
478
  embeddings.append([])
476
-
479
+
477
480
  all_ids.append(ids)
478
481
  all_distances.append(distances)
479
482
  all_documents.append(documents)
480
483
  all_metadatas.append(metadatas)
481
484
  all_embeddings.append(embeddings)
482
-
485
+
483
486
  return {
484
487
  "ids": all_ids,
485
488
  "distances": all_distances,
@@ -488,21 +491,21 @@ class PineconeConnection(VectorDBConnection):
488
491
  "embeddings": all_embeddings,
489
492
  }
490
493
  except Exception as e:
491
- print(f"Query failed: {e}")
492
494
  import traceback
493
- traceback.print_exc()
495
+
496
+ log_error("Query failed: %s\n%s", e, traceback.format_exc())
494
497
  return None
495
-
498
+
496
499
  def _convert_filter(self, where: Dict[str, Any]) -> Dict[str, Any]:
497
500
  """
498
501
  Convert generic filter to Pinecone filter format.
499
-
502
+
500
503
  Pinecone supports: $eq, $ne, $gt, $gte, $lt, $lte, $in, $nin
501
504
  """
502
505
  # Simple conversion - map field equality
503
506
  # For more complex filters, this would need expansion
504
507
  pinecone_filter = {}
505
-
508
+
506
509
  for key, value in where.items():
507
510
  if isinstance(value, dict):
508
511
  # Handle operator-based filters
@@ -510,9 +513,9 @@ class PineconeConnection(VectorDBConnection):
510
513
  else:
511
514
  # Simple equality
512
515
  pinecone_filter[key] = {"$eq": value}
513
-
516
+
514
517
  return pinecone_filter
515
-
518
+
516
519
  def get_all_items(
517
520
  self,
518
521
  collection_name: str,
@@ -522,100 +525,90 @@ class PineconeConnection(VectorDBConnection):
522
525
  ) -> Optional[Dict[str, Any]]:
523
526
  """
524
527
  Get all items from an index using pagination.
525
-
528
+
526
529
  Note: Uses Pinecone's list() method which returns a generator of ID lists.
527
530
  Offset-based pagination is simulated by skipping items.
528
-
531
+
529
532
  Args:
530
533
  collection_name: Name of index
531
534
  limit: Maximum number of items to return
532
535
  offset: Number of items to skip
533
536
  where: Metadata filter (not supported in list operation)
534
-
537
+
535
538
  Returns:
536
539
  Index items or None if failed
537
540
  """
538
541
  index = self._get_index(collection_name)
539
542
  if not index:
540
543
  return None
541
-
544
+
542
545
  try:
543
546
  ids_to_fetch = []
544
547
  items_collected = 0
545
548
  items_skipped = 0
546
549
  target_offset = offset or 0
547
550
  target_limit = limit or 100
548
-
551
+
549
552
  # list() returns a generator that yields lists of IDs
550
553
  for id_list in index.list(): # type: ignore
551
554
  if not id_list:
552
555
  continue
553
-
556
+
554
557
  # Handle offset by skipping items
555
558
  for vid in id_list:
556
559
  if items_skipped < target_offset:
557
560
  items_skipped += 1
558
561
  continue
559
-
562
+
560
563
  if items_collected < target_limit:
561
564
  ids_to_fetch.append(vid)
562
565
  items_collected += 1
563
566
  else:
564
567
  break
565
-
568
+
566
569
  # Stop if we have enough
567
570
  if items_collected >= target_limit:
568
571
  break
569
-
572
+
570
573
  # If no IDs found, return empty result
571
574
  if not ids_to_fetch:
572
- return {
573
- "ids": [],
574
- "documents": [],
575
- "metadatas": [],
576
- "embeddings": []
577
- }
578
-
575
+ return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
576
+
579
577
  # Fetch the actual vector data in batches (Pinecone fetch limit is 1000)
580
578
  batch_size = 1000
581
579
  all_ids = []
582
580
  all_documents = []
583
581
  all_metadatas = []
584
582
  all_embeddings = []
585
-
583
+
586
584
  for i in range(0, len(ids_to_fetch), batch_size):
587
- batch_ids = ids_to_fetch[i:i + batch_size]
585
+ batch_ids = ids_to_fetch[i : i + batch_size]
588
586
  fetch_result = index.fetch(ids=batch_ids)
589
-
587
+
590
588
  for vid in batch_ids:
591
589
  if vid in fetch_result.vectors:
592
590
  vector_data = fetch_result.vectors[vid]
593
591
  all_ids.append(vid)
594
-
592
+
595
593
  metadata = vector_data.metadata.copy() if vector_data.metadata else {}
596
- doc = metadata.pop('document', '')
594
+ doc = metadata.pop("document", "")
597
595
  all_documents.append(doc)
598
596
  all_metadatas.append(metadata)
599
597
  all_embeddings.append(vector_data.values)
600
-
598
+
601
599
  return {
602
600
  "ids": all_ids,
603
601
  "documents": all_documents,
604
602
  "metadatas": all_metadatas,
605
- "embeddings": all_embeddings
603
+ "embeddings": all_embeddings,
606
604
  }
607
-
605
+
608
606
  except Exception as e:
609
- print(f"Failed to get all items: {e}")
610
607
  import traceback
611
- traceback.print_exc()
612
- return {
613
- "ids": [],
614
- "documents": [],
615
- "metadatas": [],
616
- "embeddings": []
617
- }
618
-
608
+
609
+ log_error("Failed to get all items: %s\n%s", e, traceback.format_exc())
610
+ return {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
611
+
619
612
  def update_items(
620
613
  self,
621
614
  collection_name: str,
@@ -626,27 +619,27 @@ class PineconeConnection(VectorDBConnection):
626
619
  ) -> bool:
627
620
  """
628
621
  Update items in an index.
629
-
622
+
630
623
  Note: Pinecone updates via upsert (add_items can be used)
631
-
624
+
632
625
  Args:
633
626
  collection_name: Name of index
634
627
  ids: IDs of items to update
635
628
  documents: New document texts
636
629
  metadatas: New metadata
637
630
  embeddings: New embeddings
638
-
631
+
639
632
  Returns:
640
633
  True if successful, False otherwise
641
634
  """
642
635
  index = self._get_index(collection_name)
643
636
  if not index:
644
637
  return False
645
-
638
+
646
639
  try:
647
640
  # Fetch existing vectors to preserve data not being updated
648
641
  existing = index.fetch(ids=ids)
649
-
642
+
650
643
  vectors = []
651
644
  for i, vid in enumerate(ids):
652
645
  # Start with existing data
@@ -660,32 +653,28 @@ class PineconeConnection(VectorDBConnection):
660
653
  continue
661
654
  values = embeddings[i]
662
655
  metadata = {}
663
-
656
+
664
657
  # Update metadata
665
658
  if metadatas and i < len(metadatas):
666
659
  metadata.update(metadatas[i])
667
-
660
+
668
661
  # Update document
669
662
  if documents and i < len(documents):
670
- metadata['document'] = documents[i]
671
-
672
- vectors.append({
673
- 'id': vid,
674
- 'values': values,
675
- 'metadata': metadata
676
- })
677
-
663
+ metadata["document"] = documents[i]
664
+
665
+ vectors.append({"id": vid, "values": values, "metadata": metadata})
666
+
678
667
  # Upsert in batches
679
668
  batch_size = 100
680
669
  for i in range(0, len(vectors), batch_size):
681
- batch = vectors[i:i + batch_size]
670
+ batch = vectors[i : i + batch_size]
682
671
  index.upsert(vectors=batch)
683
-
672
+
684
673
  return True
685
674
  except Exception as e:
686
- print(f"Failed to update items: {e}")
675
+ log_error("Failed to update items: %s", e)
687
676
  return False
688
-
677
+
689
678
  def delete_items(
690
679
  self,
691
680
  collection_name: str,
@@ -694,19 +683,19 @@ class PineconeConnection(VectorDBConnection):
694
683
  ) -> bool:
695
684
  """
696
685
  Delete items from an index.
697
-
686
+
698
687
  Args:
699
688
  collection_name: Name of index
700
689
  ids: IDs of items to delete
701
690
  where: Metadata filter for items to delete
702
-
691
+
703
692
  Returns:
704
693
  True if successful, False otherwise
705
694
  """
706
695
  index = self._get_index(collection_name)
707
696
  if not index:
708
697
  return False
709
-
698
+
710
699
  try:
711
700
  if ids:
712
701
  # Delete by IDs
@@ -718,24 +707,21 @@ class PineconeConnection(VectorDBConnection):
718
707
  else:
719
708
  # Delete all (use with caution)
720
709
  index.delete(delete_all=True)
721
-
710
+
722
711
  return True
723
712
  except Exception as e:
724
- print(f"Failed to delete items: {e}")
713
+ log_error("Failed to delete items: %s", e)
725
714
  return False
726
-
715
+
727
716
  def get_connection_info(self) -> Dict[str, Any]:
728
717
  """
729
718
  Get information about the current connection.
730
-
719
+
731
720
  Returns:
732
721
  Dictionary with connection details
733
722
  """
734
- info = {
735
- "provider": "Pinecone",
736
- "connected": self.is_connected
737
- }
738
-
723
+ info = {"provider": "Pinecone", "connected": self.is_connected}
724
+
739
725
  if self.is_connected and self._client:
740
726
  try:
741
727
  # Get account/environment info if available
@@ -743,13 +729,13 @@ class PineconeConnection(VectorDBConnection):
743
729
  info["index_count"] = len(indexes)
744
730
  except Exception:
745
731
  pass
746
-
732
+
747
733
  return info
748
-
734
+
749
735
  def get_supported_filter_operators(self) -> List[Dict[str, Any]]:
750
736
  """
751
737
  Get filter operators supported by Pinecone.
752
-
738
+
753
739
  Returns:
754
740
  List of operator dictionaries
755
741
  """