vector-inspector 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,26 @@
2
2
 
3
3
  from typing import Optional, Dict, Any, List
4
4
  from PySide6.QtWidgets import (
5
- QWidget, QVBoxLayout, QHBoxLayout, QTableWidget,
6
- QTableWidgetItem, QPushButton, QLabel, QSpinBox,
7
- QLineEdit, QComboBox, QGroupBox, QHeaderView, QMessageBox, QDialog,
8
- QFileDialog, QMenu
5
+ QWidget,
6
+ QVBoxLayout,
7
+ QHBoxLayout,
8
+ QTableWidget,
9
+ QTableWidgetItem,
10
+ QPushButton,
11
+ QCheckBox,
12
+ QLabel,
13
+ QSpinBox,
14
+ QLineEdit,
15
+ QComboBox,
16
+ QGroupBox,
17
+ QHeaderView,
18
+ QMessageBox,
19
+ QDialog,
20
+ QFileDialog,
21
+ QMenu,
9
22
  )
10
23
  from PySide6.QtCore import Qt, QTimer, QThread, Signal
24
+ import math
11
25
 
12
26
  from vector_inspector.core.connections.base_connection import VectorDBConnection
13
27
  from vector_inspector.ui.components.item_dialog import ItemDialog
@@ -23,10 +37,10 @@ from vector_inspector.core.logging import log_info
23
37
 
24
38
  class DataLoadThread(QThread):
25
39
  """Background thread for loading collection data."""
26
-
40
+
27
41
  finished = Signal(dict)
28
42
  error = Signal(str)
29
-
43
+
30
44
  def __init__(self, connection, collection, page_size, offset, server_filter):
31
45
  super().__init__()
32
46
  self.connection = connection
@@ -34,15 +48,12 @@ class DataLoadThread(QThread):
34
48
  self.page_size = page_size
35
49
  self.offset = offset
36
50
  self.server_filter = server_filter
37
-
51
+
38
52
  def run(self):
39
53
  """Load data from database."""
40
54
  try:
41
55
  data = self.connection.get_all_items(
42
- self.collection,
43
- limit=self.page_size,
44
- offset=self.offset,
45
- where=self.server_filter
56
+ self.collection, limit=self.page_size, offset=self.offset, where=self.server_filter
46
57
  )
47
58
  if data:
48
59
  self.finished.emit(data)
@@ -54,7 +65,7 @@ class DataLoadThread(QThread):
54
65
 
55
66
  class MetadataView(QWidget):
56
67
  """View for browsing collection data and metadata."""
57
-
68
+
58
69
  def __init__(self, connection: VectorDBConnection, parent=None):
59
70
  super().__init__(parent)
60
71
  self.connection = connection
@@ -67,39 +78,41 @@ class MetadataView(QWidget):
67
78
  self.settings_service = SettingsService()
68
79
  self.load_thread: Optional[DataLoadThread] = None
69
80
  self.cache_manager = get_cache_manager()
70
-
81
+ # used to select a specific ID after an async load
82
+ self._select_id_after_load: Optional[str] = None
83
+
71
84
  # Debounce timer for filter changes
72
85
  self.filter_reload_timer = QTimer()
73
86
  self.filter_reload_timer.setSingleShot(True)
74
87
  self.filter_reload_timer.timeout.connect(self._reload_with_filters)
75
-
88
+
76
89
  self._setup_ui()
77
-
90
+
78
91
  def _setup_ui(self):
79
92
  """Setup widget UI."""
80
93
  layout = QVBoxLayout(self)
81
-
94
+
82
95
  # Controls
83
96
  controls_layout = QHBoxLayout()
84
-
97
+
85
98
  # Pagination controls
86
99
  controls_layout.addWidget(QLabel("Page:"))
87
-
100
+
88
101
  self.prev_button = QPushButton("◀ Previous")
89
102
  self.prev_button.clicked.connect(self._previous_page)
90
103
  self.prev_button.setEnabled(False)
91
104
  controls_layout.addWidget(self.prev_button)
92
-
105
+
93
106
  self.page_label = QLabel("0 / 0")
94
107
  controls_layout.addWidget(self.page_label)
95
-
108
+
96
109
  self.next_button = QPushButton("Next ▶")
97
110
  self.next_button.clicked.connect(self._next_page)
98
111
  self.next_button.setEnabled(False)
99
112
  controls_layout.addWidget(self.next_button)
100
-
113
+
101
114
  controls_layout.addWidget(QLabel(" Items per page:"))
102
-
115
+
103
116
  self.page_size_spin = QSpinBox()
104
117
  self.page_size_spin.setMinimum(10)
105
118
  self.page_size_spin.setMaximum(500)
@@ -107,24 +120,37 @@ class MetadataView(QWidget):
107
120
  self.page_size_spin.setSingleStep(10)
108
121
  self.page_size_spin.valueChanged.connect(self._on_page_size_changed)
109
122
  controls_layout.addWidget(self.page_size_spin)
110
-
123
+
111
124
  controls_layout.addStretch()
112
-
125
+
113
126
  # Refresh button
114
127
  self.refresh_button = QPushButton("🔄 Refresh")
115
128
  self.refresh_button.clicked.connect(self._refresh_data)
116
129
  self.refresh_button.setToolTip("Refresh data and clear cache")
117
130
  controls_layout.addWidget(self.refresh_button)
118
-
131
+
119
132
  # Add/Delete buttons
120
133
  self.add_button = QPushButton("Add Item")
121
134
  self.add_button.clicked.connect(self._add_item)
122
135
  controls_layout.addWidget(self.add_button)
123
-
136
+
124
137
  self.delete_button = QPushButton("Delete Selected")
125
138
  self.delete_button.clicked.connect(self._delete_selected)
126
139
  controls_layout.addWidget(self.delete_button)
127
-
140
+
141
+ # Checkbox: generate embeddings on edit
142
+ self.generate_on_edit_checkbox = QCheckBox("Generate embeddings on edit")
143
+ # Load persisted preference (default False)
144
+ try:
145
+ pref = bool(self.settings_service.get("generate_embeddings_on_edit", False))
146
+ except Exception:
147
+ pref = False
148
+ self.generate_on_edit_checkbox.setChecked(pref)
149
+ self.generate_on_edit_checkbox.toggled.connect(
150
+ lambda v: self.settings_service.set("generate_embeddings_on_edit", bool(v))
151
+ )
152
+ controls_layout.addWidget(self.generate_on_edit_checkbox)
153
+
128
154
  # Export button with menu
129
155
  self.export_button = QPushButton("Export...")
130
156
  self.export_button.setStyleSheet("QPushButton::menu-indicator { width: 0px; }")
@@ -134,7 +160,7 @@ class MetadataView(QWidget):
134
160
  export_menu.addAction("Export to Parquet", lambda: self._export_data("parquet"))
135
161
  self.export_button.setMenu(export_menu)
136
162
  controls_layout.addWidget(self.export_button)
137
-
163
+
138
164
  # Import button with menu
139
165
  self.import_button = QPushButton("Import...")
140
166
  self.import_button.setStyleSheet("QPushButton::menu-indicator { width: 0px; }")
@@ -144,26 +170,26 @@ class MetadataView(QWidget):
144
170
  import_menu.addAction("Import from Parquet", lambda: self._import_data("parquet"))
145
171
  self.import_button.setMenu(import_menu)
146
172
  controls_layout.addWidget(self.import_button)
147
-
173
+
148
174
  layout.addLayout(controls_layout)
149
-
175
+
150
176
  # Filter section
151
177
  filter_group = QGroupBox("Metadata Filters")
152
178
  filter_group.setCheckable(True)
153
179
  filter_group.setChecked(False)
154
180
  filter_group_layout = QVBoxLayout()
155
-
181
+
156
182
  self.filter_builder = FilterBuilder()
157
183
  # Remove auto-reload on filter changes - only reload when user clicks Refresh
158
184
  # self.filter_builder.filter_changed.connect(self._on_filter_changed)
159
185
  # But DO reload when user presses Enter or clicks away from value input
160
186
  self.filter_builder.apply_filters.connect(self._apply_filters)
161
187
  filter_group_layout.addWidget(self.filter_builder)
162
-
188
+
163
189
  filter_group.setLayout(filter_group_layout)
164
190
  layout.addWidget(filter_group)
165
191
  self.filter_group = filter_group
166
-
192
+
167
193
  # Data table
168
194
  self.table = QTableWidget()
169
195
  self.table.setSelectionBehavior(QTableWidget.SelectRows)
@@ -171,23 +197,27 @@ class MetadataView(QWidget):
171
197
  self.table.horizontalHeader().setStretchLastSection(True)
172
198
  self.table.doubleClicked.connect(self._on_row_double_clicked)
173
199
  layout.addWidget(self.table)
174
-
200
+
175
201
  # Status bar
176
202
  self.status_label = QLabel("No collection selected")
177
203
  self.status_label.setStyleSheet("color: gray;")
178
204
  layout.addWidget(self.status_label)
179
-
205
+
180
206
  def set_collection(self, collection_name: str, database_name: str = ""):
181
207
  """Set the current collection to display."""
182
208
  self.current_collection = collection_name
183
209
  # Always update database_name if provided (even if empty string on first call)
184
210
  if database_name: # Only update if non-empty
185
211
  self.current_database = database_name
186
-
212
+
187
213
  # Debug: Check cache status
188
- log_info("[MetadataView] Setting collection: db='%s', coll='%s'", self.current_database, collection_name)
214
+ log_info(
215
+ "[MetadataView] Setting collection: db='%s', coll='%s'",
216
+ self.current_database,
217
+ collection_name,
218
+ )
189
219
  log_info("[MetadataView] Cache enabled: %s", self.cache_manager.is_enabled())
190
-
220
+
191
221
  # Check cache first
192
222
  cached = self.cache_manager.get(self.current_database, self.current_collection)
193
223
  if cached and cached.data:
@@ -198,256 +228,411 @@ class MetadataView(QWidget):
198
228
  self._populate_table(cached.data)
199
229
  self._update_pagination_controls()
200
230
  self._update_filter_fields(cached.data)
201
-
231
+
202
232
  # Restore UI state
203
233
  if cached.scroll_position:
204
234
  self.table.verticalScrollBar().setValue(cached.scroll_position)
205
235
  if cached.search_query:
206
236
  # Restore filter state if applicable
207
237
  pass
208
-
209
- self.status_label.setText(f"✓ Loaded from cache - {len(cached.data.get('ids', []))} items")
238
+
239
+ self.status_label.setText(
240
+ f"✓ Loaded from cache - {len(cached.data.get('ids', []))} items"
241
+ )
210
242
  return
211
-
243
+
212
244
  log_info("[MetadataView] ✗ Cache MISS. Loading from database...")
213
245
  # Not in cache, load from database
214
246
  self.current_page = 0
215
-
247
+
216
248
  # Update filter builder with supported operators
217
249
  operators = self.connection.get_supported_filter_operators()
218
250
  self.filter_builder.set_operators(operators)
219
-
251
+
220
252
  self._load_data_internal()
221
-
253
+
222
254
  def _load_data(self):
223
255
  """Load data from current collection (with loading dialog)."""
224
256
  if not self.current_collection:
225
257
  self.status_label.setText("No collection selected")
226
258
  self.table.setRowCount(0)
227
259
  return
228
-
260
+
229
261
  self.loading_dialog.show_loading("Loading data from collection...")
230
262
  QApplication.processEvents()
231
263
  try:
232
264
  self._load_data_internal()
233
265
  finally:
234
266
  self.loading_dialog.hide_loading()
235
-
267
+
236
268
  def _load_data_internal(self):
237
269
  """Internal method to load data without managing loading dialog."""
238
270
  if not self.current_collection:
239
271
  self.status_label.setText("No collection selected")
240
272
  self.table.setRowCount(0)
241
273
  return
242
-
274
+
243
275
  # Cancel any existing load thread
244
276
  if self.load_thread and self.load_thread.isRunning():
245
277
  self.load_thread.quit()
246
278
  self.load_thread.wait()
247
-
279
+
248
280
  offset = self.current_page * self.page_size
249
-
281
+
250
282
  # Get filters split into server-side and client-side
251
283
  server_filter = None
252
284
  self.client_filters = []
253
285
  if self.filter_group.isChecked() and self.filter_builder.has_filters():
254
286
  server_filter, self.client_filters = self.filter_builder.get_filters_split()
255
-
287
+
288
+ # If there are client-side filters, fetch the entire server-filtered set
289
+ # so we can apply client filters across all items, then paginate client-side.
290
+ req_limit = self.page_size
291
+ req_offset = offset
292
+ if self.client_filters:
293
+ req_limit = None
294
+ req_offset = None
295
+
256
296
  # Start background thread to load data
257
297
  self.load_thread = DataLoadThread(
258
298
  self.connection,
259
299
  self.current_collection,
260
- self.page_size,
261
- offset,
262
- server_filter
300
+ req_limit,
301
+ req_offset,
302
+ server_filter,
263
303
  )
264
304
  self.load_thread.finished.connect(self._on_data_loaded)
265
305
  self.load_thread.error.connect(self._on_load_error)
266
306
  self.load_thread.start()
267
-
307
+
268
308
  def _on_data_loaded(self, data: Dict[str, Any]):
269
309
  """Handle data loaded from background thread."""
270
- # Apply client-side filters if any
271
- if self.client_filters and data:
272
- data = apply_client_side_filters(data, self.client_filters)
273
-
310
+ # If no data returned
274
311
  if not data:
275
312
  self.status_label.setText("No data after filtering")
276
313
  self.table.setRowCount(0)
277
314
  return
278
-
315
+
316
+ # Apply client-side filters across the full dataset if present
317
+ full_data = data
318
+ if self.client_filters:
319
+ full_data = apply_client_side_filters(data, self.client_filters)
320
+
321
+ if not full_data or not full_data.get("ids"):
322
+ self.status_label.setText("No data after filtering")
323
+ self.table.setRowCount(0)
324
+ return
325
+
326
+ # If client-side filtering was used, perform pagination locally
327
+ if self.client_filters:
328
+ total_count = len(full_data.get("ids", []))
329
+ start = self.current_page * self.page_size
330
+ end = start + self.page_size
331
+
332
+ page_data = {}
333
+ for key in ("ids", "documents", "metadatas", "embeddings"):
334
+ lst = full_data.get(key, [])
335
+ page_data[key] = lst[start:end]
336
+
337
+ # Keep the full filtered data and expose the current page
338
+ self.current_data_full = full_data
339
+ self.current_data = page_data
340
+
341
+ self._populate_table(page_data)
342
+ self._update_pagination_controls(total_count=total_count)
343
+
344
+ # Update filter fields based on the full filtered dataset
345
+ self._update_filter_fields(full_data)
346
+
347
+ # Save full filtered dataset to cache
348
+ if self.current_database and self.current_collection:
349
+ log_info(
350
+ "[MetadataView] Saving filtered full dataset to cache: db='%s', coll='%s'",
351
+ self.current_database,
352
+ self.current_collection,
353
+ )
354
+ cache_entry = CacheEntry(
355
+ data=full_data,
356
+ scroll_position=self.table.verticalScrollBar().value(),
357
+ search_query=(
358
+ getattr(self.filter_builder, "to_dict")()
359
+ if callable(getattr(self.filter_builder, "to_dict", None))
360
+ else ""
361
+ ),
362
+ )
363
+ self.cache_manager.set(self.current_database, self.current_collection, cache_entry)
364
+ return
365
+
366
+ # After normal server-paginated load, if we were instructed to select
367
+ # a particular ID after load, attempt to find and select it.
368
+ if hasattr(self, "_select_id_after_load") and self._select_id_after_load:
369
+ try:
370
+ sel_id = self._select_id_after_load
371
+ ids = self.current_data.get("ids", []) if self.current_data else []
372
+ if ids and sel_id in ids:
373
+ row_idx = ids.index(sel_id)
374
+ # select and scroll to the row
375
+ self.table.selectRow(row_idx)
376
+ self.table.scrollToItem(self.table.item(row_idx, 0))
377
+ # clear the flag
378
+ self._select_id_after_load = None
379
+ except Exception:
380
+ self._select_id_after_load = None
381
+
382
+ # No client-side filters: display server-paginated data as before
279
383
  self.current_data = data
280
384
  self._populate_table(data)
281
385
  self._update_pagination_controls()
282
-
386
+
283
387
  # Update filter builder with available metadata fields
284
388
  self._update_filter_fields(data)
285
-
389
+
286
390
  # Save to cache
287
391
  if self.current_database and self.current_collection:
288
- log_info("[MetadataView] Saving to cache: db='%s', coll='%s'", self.current_database, self.current_collection)
392
+ log_info(
393
+ "[MetadataView] Saving to cache: db='%s', coll='%s'",
394
+ self.current_database,
395
+ self.current_collection,
396
+ )
289
397
  cache_entry = CacheEntry(
290
398
  data=data,
291
399
  scroll_position=self.table.verticalScrollBar().value(),
292
- search_query=self.filter_builder.to_dict() if hasattr(self.filter_builder, 'to_dict') else ""
400
+ search_query=(
401
+ getattr(self.filter_builder, "to_dict")()
402
+ if callable(getattr(self.filter_builder, "to_dict", None))
403
+ else ""
404
+ ),
293
405
  )
294
406
  self.cache_manager.set(self.current_database, self.current_collection, cache_entry)
295
- log_info("[MetadataView] ✓ Saved to cache. Total entries: %d", len(self.cache_manager._cache))
407
+ log_info(
408
+ "[MetadataView] ✓ Saved to cache. Total entries: %d", len(self.cache_manager._cache)
409
+ )
296
410
  else:
297
- log_info("[MetadataView] ✗ NOT saving to cache - db='%s', coll='%s'", self.current_database, self.current_collection)
298
-
411
+ log_info(
412
+ "[MetadataView] ✗ NOT saving to cache - db='%s', coll='%s'",
413
+ self.current_database,
414
+ self.current_collection,
415
+ )
416
+
299
417
  def _on_load_error(self, error_msg: str):
300
418
  """Handle error from background thread."""
301
419
  self.status_label.setText(f"Failed to load data: {error_msg}")
302
420
  self.table.setRowCount(0)
303
-
421
+
304
422
  def _update_filter_fields(self, data: Dict[str, Any]):
305
423
  """Update filter builder with available metadata field names."""
306
424
  field_names = []
307
-
425
+
308
426
  # Add 'document' field if documents exist
309
427
  documents = data.get("documents", [])
310
428
  if documents and any(doc for doc in documents if doc):
311
429
  field_names.append("document")
312
-
430
+
313
431
  # Add metadata fields
314
432
  metadatas = data.get("metadatas", [])
315
433
  if metadatas and len(metadatas) > 0 and metadatas[0]:
316
434
  # Get all unique metadata keys from the first item
317
435
  metadata_keys = sorted(metadatas[0].keys())
318
436
  field_names.extend(metadata_keys)
319
-
437
+
320
438
  if field_names:
321
439
  self.filter_builder.set_available_fields(field_names)
322
-
440
+
323
441
  def _populate_table(self, data: Dict[str, Any]):
324
442
  """Populate table with data."""
325
443
  ids = data.get("ids", [])
326
444
  documents = data.get("documents", [])
327
445
  metadatas = data.get("metadatas", [])
328
-
446
+
329
447
  if not ids:
330
448
  self.table.setRowCount(0)
331
449
  self.status_label.setText("No data in collection")
332
450
  return
333
-
451
+
334
452
  # Determine columns
335
453
  columns = ["ID", "Document"]
336
454
  if metadatas and metadatas[0]:
337
455
  metadata_keys = list(metadatas[0].keys())
338
456
  columns.extend(metadata_keys)
339
-
457
+
340
458
  self.table.setColumnCount(len(columns))
341
459
  self.table.setHorizontalHeaderLabels(columns)
342
460
  self.table.setRowCount(len(ids))
343
-
461
+
344
462
  # Populate rows
345
463
  for row, (id_val, doc, meta) in enumerate(zip(ids, documents, metadatas)):
346
464
  # ID column
347
465
  self.table.setItem(row, 0, QTableWidgetItem(str(id_val)))
348
-
466
+
349
467
  # Document column
350
468
  doc_text = str(doc) if doc else ""
351
469
  if len(doc_text) > 100:
352
470
  doc_text = doc_text[:100] + "..."
353
471
  self.table.setItem(row, 1, QTableWidgetItem(doc_text))
354
-
472
+
355
473
  # Metadata columns
356
474
  if meta:
357
475
  for col_idx, key in enumerate(metadata_keys, start=2):
358
476
  value = meta.get(key, "")
359
477
  self.table.setItem(row, col_idx, QTableWidgetItem(str(value)))
360
-
478
+
361
479
  self.table.resizeColumnsToContents()
362
480
  self.status_label.setText(f"Showing {len(ids)} items")
363
-
364
- def _update_pagination_controls(self):
365
- """Update pagination button states."""
481
+
482
+ def _update_pagination_controls(self, total_count: int = None):
483
+ """Update pagination button states.
484
+
485
+ If `total_count` is provided, use it to compute total pages. Otherwise
486
+ fall back to best-effort behavior based on current page size and items.
487
+ """
366
488
  if not self.current_data:
367
489
  return
368
-
369
- item_count = len(self.current_data.get("ids", []))
370
- has_more = item_count == self.page_size
371
-
490
+
491
+ if total_count is not None:
492
+ total_pages = max(1, math.ceil(total_count / self.page_size))
493
+ has_more = (self.current_page + 1) < total_pages
494
+ self.page_label.setText(f"{self.current_page + 1} / {total_pages}")
495
+ else:
496
+ item_count = len(self.current_data.get("ids", []))
497
+ has_more = item_count == self.page_size
498
+ self.page_label.setText(f"{self.current_page + 1}")
499
+
372
500
  self.prev_button.setEnabled(self.current_page > 0)
373
501
  self.next_button.setEnabled(has_more)
374
-
375
- # Update page label (approximate since ChromaDB doesn't give total count easily)
376
- self.page_label.setText(f"{self.current_page + 1}")
377
-
502
+
378
503
  def _previous_page(self):
379
504
  """Go to previous page."""
380
505
  if self.current_page > 0:
381
506
  self.current_page -= 1
382
507
  self._load_data()
383
-
508
+
384
509
  def _next_page(self):
385
510
  """Go to next page."""
386
511
  self.current_page += 1
387
512
  self._load_data()
388
-
513
+
389
514
  def _on_page_size_changed(self, value: int):
390
515
  """Handle page size change."""
391
516
  self.page_size = value
392
517
  self.current_page = 0
393
518
  self._load_data()
394
-
519
+
395
520
  def _add_item(self):
396
521
  """Add a new item to the collection."""
397
522
  if not self.current_collection:
398
523
  QMessageBox.warning(self, "No Collection", "Please select a collection first.")
399
524
  return
400
-
525
+
401
526
  dialog = ItemDialog(self)
402
-
527
+
403
528
  if dialog.exec() == QDialog.Accepted:
404
529
  item_data = dialog.get_item_data()
405
530
  if not item_data:
406
531
  return
407
-
532
+
408
533
  # Add item to collection
409
534
  success = self.connection.add_items(
410
535
  self.current_collection,
411
536
  documents=[item_data["document"]],
412
537
  metadatas=[item_data["metadata"]] if item_data["metadata"] else None,
413
- ids=[item_data["id"]] if item_data["id"] else None
538
+ ids=[item_data["id"]] if item_data["id"] else None,
414
539
  )
415
-
540
+
416
541
  if success:
417
542
  # Invalidate cache after adding item
418
543
  if self.current_database and self.current_collection:
419
544
  self.cache_manager.invalidate(self.current_database, self.current_collection)
420
545
  QMessageBox.information(self, "Success", "Item added successfully.")
421
- self._load_data()
546
+ # Preserve UI position: update the current table row in-place
547
+ try:
548
+ # Remember scroll position
549
+ vpos = self.table.verticalScrollBar().value()
550
+
551
+ # Invalidate cache so future full reloads will fetch fresh data
552
+ if self.current_database and self.current_collection:
553
+ self.cache_manager.invalidate(
554
+ self.current_database, self.current_collection
555
+ )
556
+
557
+ # Update in-memory current_data and visible table cells for this row
558
+ if self.current_data:
559
+ try:
560
+ # Update documents list
561
+ if "documents" in self.current_data and row < len(
562
+ self.current_data["documents"]
563
+ ):
564
+ self.current_data["documents"][row] = (
565
+ updated_data["document"] if updated_data["document"] else ""
566
+ )
567
+
568
+ # Update metadatas list
569
+ if "metadatas" in self.current_data and row < len(
570
+ self.current_data["metadatas"]
571
+ ):
572
+ self.current_data["metadatas"][row] = (
573
+ updated_data["metadata"] if updated_data["metadata"] else {}
574
+ )
575
+
576
+ # Update table document cell
577
+ doc_text = (
578
+ str(self.current_data["documents"][row])
579
+ if self.current_data["documents"][row]
580
+ else ""
581
+ )
582
+ if len(doc_text) > 100:
583
+ doc_text = doc_text[:100] + "..."
584
+ self.table.setItem(row, 1, QTableWidgetItem(doc_text))
585
+
586
+ # Update metadata columns based on current header names
587
+ metadata_keys = []
588
+ for col in range(2, self.table.columnCount()):
589
+ hdr = self.table.horizontalHeaderItem(col)
590
+ if hdr:
591
+ metadata_keys.append(hdr.text())
592
+
593
+ if "metadatas" in self.current_data:
594
+ meta = self.current_data["metadatas"][row]
595
+ for col_idx, key in enumerate(metadata_keys, start=2):
596
+ value = meta.get(key, "")
597
+ self.table.setItem(row, col_idx, QTableWidgetItem(str(value)))
598
+
599
+ # Restore scroll and selection
600
+ self.table.verticalScrollBar().setValue(vpos)
601
+ self.table.selectRow(row)
602
+ except Exception:
603
+ pass
604
+ except Exception:
605
+ # Fallback to full reload if anything goes wrong
606
+ self._load_data()
422
607
  else:
423
608
  QMessageBox.warning(self, "Error", "Failed to add item.")
424
-
609
+
425
610
  def _delete_selected(self):
426
611
  """Delete selected items."""
427
612
  if not self.current_collection:
428
613
  QMessageBox.warning(self, "No Collection", "Please select a collection first.")
429
614
  return
430
-
615
+
431
616
  selected_rows = self.table.selectionModel().selectedRows()
432
617
  if not selected_rows:
433
618
  QMessageBox.warning(self, "No Selection", "Please select items to delete.")
434
619
  return
435
-
620
+
436
621
  # Get IDs of selected items
437
622
  ids_to_delete = []
438
623
  for row in selected_rows:
439
624
  id_item = self.table.item(row.row(), 0)
440
625
  if id_item:
441
626
  ids_to_delete.append(id_item.text())
442
-
627
+
443
628
  # Confirm deletion
444
629
  reply = QMessageBox.question(
445
630
  self,
446
631
  "Confirm Deletion",
447
632
  f"Delete {len(ids_to_delete)} item(s)?",
448
- QMessageBox.Yes | QMessageBox.No
633
+ QMessageBox.Yes | QMessageBox.No,
449
634
  )
450
-
635
+
451
636
  if reply == QMessageBox.Yes:
452
637
  success = self.connection.delete_items(self.current_collection, ids=ids_to_delete)
453
638
  if success:
@@ -458,208 +643,298 @@ class MetadataView(QWidget):
458
643
  self._load_data()
459
644
  else:
460
645
  QMessageBox.warning(self, "Error", "Failed to delete items.")
461
-
646
+
462
647
  def _on_filter_changed(self):
463
648
  """Handle filter changes - debounce and reload data."""
464
649
  if self.filter_group.isChecked():
465
650
  # Restart the timer - will only fire 500ms after last change
466
651
  self.filter_reload_timer.stop()
467
652
  self.filter_reload_timer.start(500) # 500ms debounce
468
-
653
+
469
654
  def _reload_with_filters(self):
470
655
  """Reload data with current filters (called after debounce)."""
471
656
  self.current_page = 0
472
657
  self._load_data()
473
-
658
+
474
659
  def _apply_filters(self):
475
660
  """Apply filters when user presses Enter or clicks away."""
476
661
  if self.filter_group.isChecked() and self.current_collection:
477
662
  self.current_page = 0
478
663
  self._load_data()
479
-
664
+
480
665
  def _refresh_data(self):
481
666
  """Refresh data and invalidate cache."""
482
667
  if self.current_database and self.current_collection:
483
668
  self.cache_manager.invalidate(self.current_database, self.current_collection)
484
669
  self.current_page = 0
485
670
  self._load_data()
486
-
671
+
487
672
  def _on_row_double_clicked(self, index):
488
673
  """Handle double-click on a row to edit item."""
489
674
  if not self.current_collection or not self.current_data:
490
675
  return
491
-
676
+
492
677
  row = index.row()
493
678
  if row < 0 or row >= self.table.rowCount():
494
679
  return
495
-
680
+
496
681
  # Get item data for this row
497
682
  ids = self.current_data.get("ids", [])
498
683
  documents = self.current_data.get("documents", [])
499
684
  metadatas = self.current_data.get("metadatas", [])
500
-
685
+
501
686
  if row >= len(ids):
502
687
  return
503
-
688
+
504
689
  item_data = {
505
690
  "id": ids[row],
506
691
  "document": documents[row] if row < len(documents) else "",
507
- "metadata": metadatas[row] if row < len(metadatas) else {}
692
+ "metadata": metadatas[row] if row < len(metadatas) else {},
508
693
  }
509
-
694
+
510
695
  # Open edit dialog
511
696
  dialog = ItemDialog(self, item_data=item_data)
512
-
697
+
513
698
  if dialog.exec() == QDialog.Accepted:
514
699
  updated_data = dialog.get_item_data()
515
700
  if not updated_data:
516
701
  return
517
-
702
+
703
+ # Decide whether to generate embeddings on edit or preserve existing
704
+ embeddings_arg = None
705
+ try:
706
+ generate_on_edit = bool(self.generate_on_edit_checkbox.isChecked())
707
+ except Exception:
708
+ generate_on_edit = False
709
+
710
+ if not generate_on_edit:
711
+ # Try to preserve existing embedding for this row if present
712
+ existing_embs = self.current_data.get("embeddings", []) if self.current_data else []
713
+ if row < len(existing_embs):
714
+ existing = existing_embs[row]
715
+ if existing:
716
+ embeddings_arg = [existing]
717
+
518
718
  # Update item in collection
519
- success = self.connection.update_items(
520
- self.current_collection,
521
- ids=[updated_data["id"]],
522
- documents=[updated_data["document"]] if updated_data["document"] else None,
523
- metadatas=[updated_data["metadata"]] if updated_data["metadata"] else None
524
- )
525
-
719
+ if embeddings_arg is None:
720
+ # No embeddings passed -> will trigger regeneration when update_items supports it
721
+ success = self.connection.update_items(
722
+ self.current_collection,
723
+ ids=[updated_data["id"]],
724
+ documents=[updated_data["document"]] if updated_data["document"] else None,
725
+ metadatas=[updated_data["metadata"]] if updated_data["metadata"] else None,
726
+ )
727
+ else:
728
+ # Pass existing embeddings to preserve them
729
+ success = self.connection.update_items(
730
+ self.current_collection,
731
+ ids=[updated_data["id"]],
732
+ documents=[updated_data["document"]] if updated_data["document"] else None,
733
+ metadatas=[updated_data["metadata"]] if updated_data["metadata"] else None,
734
+ embeddings=embeddings_arg,
735
+ )
736
+
526
737
  if success:
527
738
  # Invalidate cache after updating item
528
739
  if self.current_database and self.current_collection:
529
740
  self.cache_manager.invalidate(self.current_database, self.current_collection)
530
- QMessageBox.information(self, "Success", "Item updated successfully.")
741
+
742
+ # Show info about embedding regeneration/preservation when applicable
743
+ try:
744
+ generate_on_edit = bool(self.generate_on_edit_checkbox.isChecked())
745
+ except Exception:
746
+ generate_on_edit = False
747
+
748
+ regen_count = 0
749
+ try:
750
+ regen_count = int(getattr(self.connection, "_last_regenerated_count", 0) or 0)
751
+ except Exception:
752
+ regen_count = 0
753
+
754
+ if generate_on_edit:
755
+ if regen_count > 0:
756
+ QMessageBox.information(
757
+ self,
758
+ "Success",
759
+ f"Item updated and embeddings regenerated ({regen_count}).",
760
+ )
761
+ else:
762
+ QMessageBox.information(
763
+ self, "Success", "Item updated. No embeddings were regenerated."
764
+ )
765
+ else:
766
+ # embedding preservation mode
767
+ if regen_count == 0:
768
+ QMessageBox.information(
769
+ self, "Success", "Item updated and existing embedding preserved."
770
+ )
771
+ else:
772
+ QMessageBox.information(
773
+ self,
774
+ "Success",
775
+ "Item updated.", # Fallback message
776
+ )
777
+
778
+ # If embeddings were regenerated, server ordering may have changed.
779
+ # Locate the updated item on the server (respecting server-side filters),
780
+ # compute its page and load that page while selecting the row. This
781
+ # ensures the edited item becomes visible even if the backend moved it.
782
+ try:
783
+ server_filter = None
784
+ if self.filter_group.isChecked() and self.filter_builder.has_filters():
785
+ server_filter, _ = self.filter_builder.get_filters_split()
786
+
787
+ full = self.connection.get_all_items(
788
+ self.current_collection, limit=None, offset=None, where=server_filter
789
+ )
790
+ if full and full.get("ids"):
791
+ all_ids = full.get("ids", [])
792
+ updated_id = updated_data.get("id")
793
+ if updated_id in all_ids:
794
+ idx = all_ids.index(updated_id)
795
+ target_page = idx // self.page_size
796
+ # set selection flag and load target page
797
+ self._select_id_after_load = updated_id
798
+ self.current_page = target_page
799
+ self._load_data()
800
+ return
801
+ except Exception:
802
+ pass
803
+
804
+ # Fallback: reload current page so UI reflects server state
531
805
  self._load_data()
532
806
  else:
533
807
  QMessageBox.warning(self, "Error", "Failed to update item.")
534
-
808
+
535
809
  def _export_data(self, format_type: str):
536
810
  """Export current table data to file (visible rows or selected rows)."""
537
811
  if not self.current_collection:
538
812
  QMessageBox.warning(self, "No Collection", "Please select a collection first.")
539
813
  return
540
-
814
+
541
815
  if not self.current_data or not self.current_data.get("ids"):
542
816
  QMessageBox.warning(self, "No Data", "No data to export.")
543
817
  return
544
-
818
+
545
819
  # Check if there are selected rows
546
820
  selected_rows = self.table.selectionModel().selectedRows()
547
-
821
+
548
822
  if selected_rows:
549
823
  # Export only selected rows
550
- export_data = {
551
- "ids": [],
552
- "documents": [],
553
- "metadatas": [],
554
- "embeddings": []
555
- }
556
-
824
+ export_data = {"ids": [], "documents": [], "metadatas": [], "embeddings": []}
825
+
557
826
  for index in selected_rows:
558
827
  row = index.row()
559
828
  if row < len(self.current_data["ids"]):
560
829
  export_data["ids"].append(self.current_data["ids"][row])
561
- if "documents" in self.current_data and row < len(self.current_data["documents"]):
830
+ if "documents" in self.current_data and row < len(
831
+ self.current_data["documents"]
832
+ ):
562
833
  export_data["documents"].append(self.current_data["documents"][row])
563
- if "metadatas" in self.current_data and row < len(self.current_data["metadatas"]):
834
+ if "metadatas" in self.current_data and row < len(
835
+ self.current_data["metadatas"]
836
+ ):
564
837
  export_data["metadatas"].append(self.current_data["metadatas"][row])
565
- if "embeddings" in self.current_data and row < len(self.current_data["embeddings"]):
838
+ if "embeddings" in self.current_data and row < len(
839
+ self.current_data["embeddings"]
840
+ ):
566
841
  export_data["embeddings"].append(self.current_data["embeddings"][row])
567
842
  else:
568
843
  # Export all visible data from current table
569
844
  export_data = self.current_data
570
-
845
+
571
846
  # Select file path
572
847
  file_filters = {
573
848
  "json": "JSON Files (*.json)",
574
849
  "csv": "CSV Files (*.csv)",
575
- "parquet": "Parquet Files (*.parquet)"
850
+ "parquet": "Parquet Files (*.parquet)",
576
851
  }
577
-
852
+
578
853
  # Get last used directory from settings
579
854
  last_dir = self.settings_service.get("last_import_export_dir", "")
580
- default_path = f"{last_dir}/{self.current_collection}.{format_type}" if last_dir else f"{self.current_collection}.{format_type}"
581
-
855
+ default_path = (
856
+ f"{last_dir}/{self.current_collection}.{format_type}"
857
+ if last_dir
858
+ else f"{self.current_collection}.{format_type}"
859
+ )
860
+
582
861
  file_path, _ = QFileDialog.getSaveFileName(
583
- self,
584
- f"Export to {format_type.upper()}",
585
- default_path,
586
- file_filters[format_type]
862
+ self, f"Export to {format_type.upper()}", default_path, file_filters[format_type]
587
863
  )
588
-
864
+
589
865
  if not file_path:
590
866
  return
591
-
867
+
592
868
  # Export
593
869
  service = ImportExportService()
594
870
  success = False
595
-
871
+
596
872
  if format_type == "json":
597
873
  success = service.export_to_json(export_data, file_path)
598
874
  elif format_type == "csv":
599
875
  success = service.export_to_csv(export_data, file_path)
600
876
  elif format_type == "parquet":
601
877
  success = service.export_to_parquet(export_data, file_path)
602
-
878
+
603
879
  if success:
604
880
  # Save the directory for next time
605
881
  from pathlib import Path
882
+
606
883
  self.settings_service.set("last_import_export_dir", str(Path(file_path).parent))
607
-
884
+
608
885
  QMessageBox.information(
609
886
  self,
610
887
  "Export Successful",
611
- f"Exported {len(export_data['ids'])} items to {file_path}"
888
+ f"Exported {len(export_data['ids'])} items to {file_path}",
612
889
  )
613
890
  else:
614
891
  QMessageBox.warning(self, "Export Failed", "Failed to export data.")
615
-
892
+
616
893
  def _import_data(self, format_type: str):
617
894
  """Import data from file into collection."""
618
895
  if not self.current_collection:
619
896
  QMessageBox.warning(self, "No Collection", "Please select a collection first.")
620
897
  return
621
-
898
+
622
899
  # Select file to import
623
900
  file_filters = {
624
901
  "json": "JSON Files (*.json)",
625
902
  "csv": "CSV Files (*.csv)",
626
- "parquet": "Parquet Files (*.parquet)"
903
+ "parquet": "Parquet Files (*.parquet)",
627
904
  }
628
-
905
+
629
906
  # Get last used directory from settings
630
907
  last_dir = self.settings_service.get("last_import_export_dir", "")
631
-
908
+
632
909
  file_path, _ = QFileDialog.getOpenFileName(
633
- self,
634
- f"Import from {format_type.upper()}",
635
- last_dir,
636
- file_filters[format_type]
910
+ self, f"Import from {format_type.upper()}", last_dir, file_filters[format_type]
637
911
  )
638
-
912
+
639
913
  if not file_path:
640
914
  return
641
-
915
+
642
916
  # Import
643
917
  self.loading_dialog.show_loading("Importing data...")
644
918
  QApplication.processEvents()
645
-
919
+
646
920
  try:
647
921
  service = ImportExportService()
648
922
  imported_data = None
649
-
923
+
650
924
  if format_type == "json":
651
925
  imported_data = service.import_from_json(file_path)
652
926
  elif format_type == "csv":
653
927
  imported_data = service.import_from_csv(file_path)
654
928
  elif format_type == "parquet":
655
929
  imported_data = service.import_from_parquet(file_path)
656
-
930
+
657
931
  if not imported_data:
658
932
  QMessageBox.warning(self, "Import Failed", "Failed to parse import file.")
659
933
  return
660
-
934
+
661
935
  # Handle Qdrant-specific requirements (similar to backup/restore)
662
936
  from vector_inspector.core.connections.qdrant_connection import QdrantConnection
937
+
663
938
  if isinstance(self.connection, QdrantConnection):
664
939
  # Check if embeddings are missing and need to be generated
665
940
  if not imported_data.get("embeddings"):
@@ -667,20 +942,26 @@ class MetadataView(QWidget):
667
942
  QApplication.processEvents()
668
943
  try:
669
944
  from sentence_transformers import SentenceTransformer
945
+
670
946
  model = SentenceTransformer("all-MiniLM-L6-v2")
671
947
  documents = imported_data.get("documents", [])
672
- imported_data["embeddings"] = model.encode(documents, show_progress_bar=False).tolist()
948
+ imported_data["embeddings"] = model.encode(
949
+ documents, show_progress_bar=False
950
+ ).tolist()
673
951
  except Exception as e:
674
- QMessageBox.warning(self, "Import Failed",
675
- f"Qdrant requires embeddings. Failed to generate: {e}")
952
+ QMessageBox.warning(
953
+ self,
954
+ "Import Failed",
955
+ f"Qdrant requires embeddings. Failed to generate: {e}",
956
+ )
676
957
  return
677
-
958
+
678
959
  # Convert IDs to Qdrant-compatible format (integers or UUIDs)
679
960
  # Store original IDs in metadata
680
961
  original_ids = imported_data.get("ids", [])
681
962
  qdrant_ids = []
682
963
  metadatas = imported_data.get("metadatas", [])
683
-
964
+
684
965
  for i, orig_id in enumerate(original_ids):
685
966
  # Try to convert to integer, otherwise use index
686
967
  try:
@@ -692,9 +973,9 @@ class MetadataView(QWidget):
692
973
  except (ValueError, AttributeError):
693
974
  # Use index as ID if can't convert
694
975
  qdrant_id = i
695
-
976
+
696
977
  qdrant_ids.append(qdrant_id)
697
-
978
+
698
979
  # Store original ID in metadata
699
980
  if i < len(metadatas):
700
981
  if metadatas[i] is None:
@@ -702,36 +983,34 @@ class MetadataView(QWidget):
702
983
  metadatas[i]["original_id"] = orig_id
703
984
  else:
704
985
  metadatas.append({"original_id": orig_id})
705
-
986
+
706
987
  imported_data["ids"] = qdrant_ids
707
988
  imported_data["metadatas"] = metadatas
708
-
989
+
709
990
  # Add items to collection
710
991
  success = self.connection.add_items(
711
992
  self.current_collection,
712
993
  documents=imported_data["documents"],
713
994
  metadatas=imported_data.get("metadatas"),
714
995
  ids=imported_data.get("ids"),
715
- embeddings=imported_data.get("embeddings")
996
+ embeddings=imported_data.get("embeddings"),
716
997
  )
717
998
  finally:
718
999
  self.loading_dialog.hide_loading()
719
-
1000
+
720
1001
  if success:
721
1002
  # Invalidate cache after import
722
1003
  if self.current_database and self.current_collection:
723
1004
  self.cache_manager.invalidate(self.current_database, self.current_collection)
724
-
1005
+
725
1006
  # Save the directory for next time
726
1007
  from pathlib import Path
1008
+
727
1009
  self.settings_service.set("last_import_export_dir", str(Path(file_path).parent))
728
-
1010
+
729
1011
  QMessageBox.information(
730
- self,
731
- "Import Successful",
732
- f"Imported {len(imported_data['ids'])} items."
1012
+ self, "Import Successful", f"Imported {len(imported_data['ids'])} items."
733
1013
  )
734
1014
  self._load_data()
735
1015
  else:
736
1016
  QMessageBox.warning(self, "Import Failed", "Failed to import data.")
737
-