sqlshell 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlshell might be problematic. Click here for more details.

sqlshell/main.py CHANGED
@@ -18,18 +18,20 @@ from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
18
18
  QCompleter, QFrame, QToolButton, QSizePolicy, QTabWidget,
19
19
  QStyleFactory, QToolBar, QStatusBar, QLineEdit, QMenu,
20
20
  QCheckBox, QWidgetAction, QMenuBar, QInputDialog, QProgressDialog,
21
- QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem)
21
+ QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem,
22
+ QComboBox)
22
23
  from PyQt6.QtCore import Qt, QAbstractTableModel, QRegularExpression, QRect, QSize, QStringListModel, QPropertyAnimation, QEasingCurve, QTimer, QPoint, QMimeData
23
24
  from PyQt6.QtGui import QFont, QColor, QSyntaxHighlighter, QTextCharFormat, QPainter, QTextFormat, QTextCursor, QIcon, QPalette, QLinearGradient, QBrush, QPixmap, QPolygon, QPainterPath, QDrag
24
25
  import numpy as np
25
26
  from datetime import datetime
27
+ import psutil
26
28
 
27
29
  from sqlshell import create_test_data
28
30
  from sqlshell.splash_screen import AnimatedSplashScreen
29
31
  from sqlshell.syntax_highlighter import SQLSyntaxHighlighter
30
32
  from sqlshell.editor import LineNumberArea, SQLEditor
31
33
  from sqlshell.ui import FilterHeader, BarChartDelegate
32
- from sqlshell.db import DatabaseManager
34
+ from sqlshell.db import DatabaseManager, ExportManager
33
35
  from sqlshell.query_tab import QueryTab
34
36
  from sqlshell.styles import (get_application_stylesheet, get_tab_corner_stylesheet,
35
37
  get_context_menu_stylesheet,
@@ -42,6 +44,7 @@ class SQLShell(QMainWindow):
42
44
  def __init__(self):
43
45
  super().__init__()
44
46
  self.db_manager = DatabaseManager()
47
+ self.export_manager = ExportManager(self.db_manager)
45
48
  self.current_df = None # Store the current DataFrame for filtering
46
49
  self.filter_widgets = [] # Store filter line edits
47
50
  self.current_project_file = None # Store the current project file path
@@ -188,6 +191,12 @@ class SQLShell(QMainWindow):
188
191
  tables_header.setStyleSheet(get_tables_header_stylesheet())
189
192
  left_layout.addWidget(tables_header)
190
193
 
194
+ # Tables info label
195
+ tables_info = QLabel("Right-click on tables to profile columns, analyze structure, and discover distributions. Select multiple tables to analyze foreign key relationships.")
196
+ tables_info.setWordWrap(True)
197
+ tables_info.setStyleSheet("color: #7FB3D5; font-size: 11px; margin-top: 2px; margin-bottom: 5px;")
198
+ left_layout.addWidget(tables_info)
199
+
191
200
  # Tables list with custom styling
192
201
  self.tables_list = DraggableTablesList(self)
193
202
  self.tables_list.itemClicked.connect(self.show_table_preview)
@@ -210,6 +219,39 @@ class SQLShell(QMainWindow):
210
219
  query_header.setObjectName("header_label")
211
220
  right_layout.addWidget(query_header)
212
221
 
222
+ # Create a drop area for tables above the tab widget
223
+ self.tab_drop_area = QFrame()
224
+ self.tab_drop_area.setFixedHeight(30)
225
+ self.tab_drop_area.setObjectName("tab_drop_area")
226
+
227
+ # Add a label with hint text
228
+ drop_area_layout = QHBoxLayout(self.tab_drop_area)
229
+ drop_area_layout.setContentsMargins(10, 0, 10, 0)
230
+ self.drop_hint_label = QLabel("Drag tables here to create new query tabs")
231
+ self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
232
+ self.drop_hint_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
233
+ drop_area_layout.addWidget(self.drop_hint_label)
234
+
235
+ self.tab_drop_area.setStyleSheet("""
236
+ #tab_drop_area {
237
+ background-color: #f8f9fa;
238
+ border: 1px dashed #BDC3C7;
239
+ border-radius: 4px;
240
+ margin: 0 0 5px 0;
241
+ }
242
+
243
+ #tab_drop_area:hover {
244
+ background-color: #E5F7FF;
245
+ border: 1px dashed #3498DB;
246
+ }
247
+ """)
248
+ self.tab_drop_area.setAcceptDrops(True)
249
+ self.tab_drop_area.dragEnterEvent = self.tab_area_drag_enter
250
+ self.tab_drop_area.dragMoveEvent = self.tab_area_drag_move
251
+ self.tab_drop_area.dragLeaveEvent = self.tab_area_drag_leave
252
+ self.tab_drop_area.dropEvent = self.tab_area_drop
253
+ right_layout.addWidget(self.tab_drop_area)
254
+
213
255
  # Create tab widget for multiple queries
214
256
  self.tab_widget = QTabWidget()
215
257
  self.tab_widget.setTabsClosable(True)
@@ -231,6 +273,100 @@ class SQLShell(QMainWindow):
231
273
  # Status bar
232
274
  self.statusBar().showMessage('Ready | Ctrl+Enter: Execute Query | Ctrl+K: Toggle Comment | Ctrl+T: New Tab | Ctrl+Shift+O: Quick Access Files')
233
275
 
276
+ # Methods for handling drag and drop on the tab drop area
277
+ def tab_area_drag_enter(self, event):
278
+ """Handle drag enter events on the tab drop area"""
279
+ # Accept only if from the tables list
280
+ if event.source() == self.tables_list:
281
+ # Extract table name(s) from the mime data
282
+ mime_data = event.mimeData()
283
+ if mime_data.hasText():
284
+ table_names = mime_data.text().split(", ")
285
+ if len(table_names) == 1:
286
+ self.drop_hint_label.setText(f"Release to create a new query tab for {table_names[0]}")
287
+ else:
288
+ self.drop_hint_label.setText(f"Release to create {len(table_names)} new query tabs")
289
+
290
+ self.drop_hint_label.setStyleSheet("color: #3498db; font-size: 11px; font-weight: bold;")
291
+
292
+ # Highlight the drop area
293
+ self.tab_drop_area.setStyleSheet("""
294
+ #tab_drop_area {
295
+ background-color: #E5F7FF;
296
+ border: 2px dashed #3498DB;
297
+ border-radius: 4px;
298
+ margin: 0 0 5px 0;
299
+ }
300
+ """)
301
+ self.tab_drop_area.setFixedHeight(40)
302
+ event.acceptProposedAction()
303
+ else:
304
+ event.ignore()
305
+
306
+ def tab_area_drag_move(self, event):
307
+ """Handle drag move events on the tab drop area"""
308
+ # Continue accepting drag moves
309
+ if event.source() == self.tables_list:
310
+ event.acceptProposedAction()
311
+ else:
312
+ event.ignore()
313
+
314
+ def tab_area_drag_leave(self, event):
315
+ """Handle drag leave events on the tab drop area"""
316
+ # Reset the drop area
317
+ self.tab_drop_area.setStyleSheet("""
318
+ #tab_drop_area {
319
+ background-color: #f8f9fa;
320
+ border: 1px dashed #BDC3C7;
321
+ border-radius: 4px;
322
+ margin: 0 0 5px 0;
323
+ }
324
+ """)
325
+ self.drop_hint_label.setText("Drag tables here to create new query tabs")
326
+ self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
327
+ self.tab_drop_area.setFixedHeight(30)
328
+ # No need to call a parent method
329
+
330
+ def tab_area_drop(self, event):
331
+ """Handle drop events on the tab drop area"""
332
+ # Process the drop to create a new tab with SELECT query
333
+ if event.source() == self.tables_list:
334
+ mime_data = event.mimeData()
335
+ if mime_data.hasText():
336
+ table_names = mime_data.text().split(", ")
337
+
338
+ for table_name in table_names:
339
+ # Check if this table needs to be reloaded first
340
+ if table_name in self.tables_list.tables_needing_reload:
341
+ # Reload the table immediately without asking
342
+ self.reload_selected_table(table_name)
343
+
344
+ # Generate a title for the tab
345
+ tab_title = f"Query {table_name}"
346
+ # Create a new tab
347
+ new_tab = self.add_tab(tab_title)
348
+ # Set the SQL query
349
+ new_tab.set_query_text(f"SELECT * FROM {table_name}")
350
+
351
+ self.statusBar().showMessage(f"Created new tab{'s' if len(table_names) > 1 else ''} for {', '.join(table_names)}")
352
+
353
+ # Reset the drop area appearance
354
+ self.tab_drop_area.setStyleSheet("""
355
+ #tab_drop_area {
356
+ background-color: #f8f9fa;
357
+ border: 1px dashed #BDC3C7;
358
+ border-radius: 4px;
359
+ margin: 0 0 5px 0;
360
+ }
361
+ """)
362
+ self.drop_hint_label.setText("Drag tables here to create new query tabs")
363
+ self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
364
+ self.tab_drop_area.setFixedHeight(30)
365
+
366
+ event.acceptProposedAction()
367
+ else:
368
+ event.ignore()
369
+
234
370
  def create_tab_corner_widget(self):
235
371
  """Create a corner widget with a + button to add new tabs"""
236
372
  corner_widget = QWidget()
@@ -284,25 +420,126 @@ class SQLShell(QMainWindow):
284
420
  headers = [str(col) for col in df.columns]
285
421
  current_tab.results_table.setHorizontalHeaderLabels(headers)
286
422
 
287
- # Calculate chunk size (adjust based on available memory)
288
- CHUNK_SIZE = 1000
289
-
290
- # Process data in chunks to avoid memory issues with large datasets
291
- for chunk_start in range(0, row_count, CHUNK_SIZE):
292
- chunk_end = min(chunk_start + CHUNK_SIZE, row_count)
293
- chunk = df.iloc[chunk_start:chunk_end]
423
+ # Calculate dynamic chunk size based on available memory
424
+ import psutil
425
+ available_memory = psutil.virtual_memory().available
426
+ # Use 10% of available memory for chunking, with a minimum of 1000 rows
427
+ memory_per_row = df.memory_usage(deep=True).sum() / len(df)
428
+ CHUNK_SIZE = max(1000, min(10000, int(available_memory * 0.1 / memory_per_row)))
429
+
430
+ # Add pagination controls if dataset is large
431
+ if row_count > CHUNK_SIZE:
432
+ # Remove any existing pagination widgets
433
+ for i in reversed(range(current_tab.results_layout.count())):
434
+ item = current_tab.results_layout.itemAt(i)
435
+ widget = item.widget() if item is not None else None
436
+ if widget and widget.objectName() == "pagination_widget":
437
+ current_tab.results_layout.removeWidget(widget)
438
+ widget.setParent(None)
439
+ widget.deleteLater()
440
+
441
+ # Create pagination widget
442
+ pagination_widget = QWidget()
443
+ pagination_widget.setObjectName("pagination_widget")
444
+ pagination_layout = QHBoxLayout(pagination_widget)
445
+
446
+ # Add page size selector
447
+ page_size_label = QLabel("Rows per page:")
448
+ page_size_combo = QComboBox()
449
+ page_sizes = [1000, 5000, 10000, 50000, 100000]
450
+ page_size_combo.addItems([str(size) for size in page_sizes])
451
+ page_size_combo.setCurrentText(str(CHUNK_SIZE))
452
+
453
+ # Add navigation buttons
454
+ prev_btn = QPushButton("Previous")
455
+ next_btn = QPushButton("Next")
456
+ page_label = QLabel("Page 1")
457
+
458
+ # Add widgets to layout
459
+ pagination_layout.addWidget(page_size_label)
460
+ pagination_layout.addWidget(page_size_combo)
461
+ pagination_layout.addStretch()
462
+ pagination_layout.addWidget(prev_btn)
463
+ pagination_layout.addWidget(page_label)
464
+ pagination_layout.addWidget(next_btn)
465
+
466
+ # Add pagination widget to results layout
467
+ current_tab.results_layout.addWidget(pagination_widget)
468
+
469
+ # Store pagination state
470
+ current_tab.pagination_state = {
471
+ 'current_page': 0,
472
+ 'page_size': CHUNK_SIZE,
473
+ 'total_pages': (row_count + CHUNK_SIZE - 1) // CHUNK_SIZE,
474
+ 'page_label': page_label,
475
+ 'prev_btn': prev_btn,
476
+ 'next_btn': next_btn,
477
+ 'page_size_combo': page_size_combo
478
+ }
294
479
 
295
- # Add rows for this chunk
296
- current_tab.results_table.setRowCount(chunk_end)
480
+ # Connect pagination signals
481
+ def update_page_size(size):
482
+ current_tab.pagination_state['page_size'] = int(size)
483
+ current_tab.pagination_state['total_pages'] = (row_count + int(size) - 1) // int(size)
484
+ current_tab.pagination_state['current_page'] = 0
485
+ load_current_page()
486
+
487
+ def load_current_page():
488
+ state = current_tab.pagination_state
489
+ start_idx = state['current_page'] * state['page_size']
490
+ end_idx = min(start_idx + state['page_size'], row_count)
491
+
492
+ # Clear existing rows
493
+ current_tab.results_table.setRowCount(0)
494
+
495
+ # Load current page
496
+ chunk = df.iloc[start_idx:end_idx]
497
+ current_tab.results_table.setRowCount(len(chunk))
498
+
499
+ for row_idx, (_, row_data) in enumerate(chunk.iterrows()):
500
+ for col_idx, value in enumerate(row_data):
501
+ formatted_value = self.format_value(value)
502
+ item = QTableWidgetItem(formatted_value)
503
+ current_tab.results_table.setItem(row_idx, col_idx, item)
504
+
505
+ # Update pagination controls
506
+ state['page_label'].setText(f"Page {state['current_page'] + 1} of {state['total_pages']}")
507
+ state['prev_btn'].setEnabled(state['current_page'] > 0)
508
+ state['next_btn'].setEnabled(state['current_page'] < state['total_pages'] - 1)
509
+
510
+ # Process events to keep UI responsive
511
+ QApplication.processEvents()
512
+
513
+ def next_page():
514
+ if current_tab.pagination_state['current_page'] < current_tab.pagination_state['total_pages'] - 1:
515
+ current_tab.pagination_state['current_page'] += 1
516
+ load_current_page()
517
+
518
+ def prev_page():
519
+ if current_tab.pagination_state['current_page'] > 0:
520
+ current_tab.pagination_state['current_page'] -= 1
521
+ load_current_page()
522
+
523
+ # Connect signals
524
+ page_size_combo.currentTextChanged.connect(update_page_size)
525
+ next_btn.clicked.connect(next_page)
526
+ prev_btn.clicked.connect(prev_page)
527
+
528
+ # Load first page
529
+ load_current_page()
530
+ else:
531
+ # For smaller datasets, load all at once
532
+ current_tab.results_table.setRowCount(row_count)
297
533
 
298
- for row_idx, (_, row_data) in enumerate(chunk.iterrows(), start=chunk_start):
534
+ for row_idx, (_, row_data) in enumerate(df.iterrows()):
299
535
  for col_idx, value in enumerate(row_data):
300
536
  formatted_value = self.format_value(value)
301
537
  item = QTableWidgetItem(formatted_value)
302
538
  current_tab.results_table.setItem(row_idx, col_idx, item)
303
-
304
- # Process events to keep UI responsive
305
- QApplication.processEvents()
539
+
540
+ # Process events periodically to keep UI responsive
541
+ if row_idx % 1000 == 0:
542
+ QApplication.processEvents()
306
543
 
307
544
  # Optimize column widths
308
545
  current_tab.results_table.resizeColumnsToContents()
@@ -630,6 +867,31 @@ class SQLShell(QMainWindow):
630
867
  QMessageBox.warning(self, "Empty Query", "Please enter a SQL query to execute.")
631
868
  return
632
869
 
870
+ # Check if the query references any tables that need to be loaded
871
+ referenced_tables = self.extract_table_names_from_query(query)
872
+ tables_to_load = [table for table in referenced_tables if table in self.tables_list.tables_needing_reload]
873
+
874
+ # Load any tables that need to be loaded
875
+ if tables_to_load:
876
+ progress = QProgressDialog(f"Loading tables...", "Cancel", 0, len(tables_to_load), self)
877
+ progress.setWindowTitle("Loading Tables")
878
+ progress.setWindowModality(Qt.WindowModality.WindowModal)
879
+ progress.show()
880
+
881
+ for i, table_name in enumerate(tables_to_load):
882
+ if progress.wasCanceled():
883
+ self.statusBar().showMessage("Query canceled: table loading was interrupted")
884
+ return
885
+
886
+ progress.setLabelText(f"Loading table: {table_name}")
887
+ progress.setValue(i)
888
+ QApplication.processEvents()
889
+
890
+ self.reload_selected_table(table_name)
891
+
892
+ progress.setValue(len(tables_to_load))
893
+ progress.close()
894
+
633
895
  start_time = datetime.now()
634
896
 
635
897
  try:
@@ -666,6 +928,57 @@ class SQLShell(QMainWindow):
666
928
  QMessageBox.critical(self, "Unexpected Error",
667
929
  f"An unexpected error occurred:\n\n{str(e)}")
668
930
  self.statusBar().showMessage("Query execution failed")
931
+
932
+ def extract_table_names_from_query(self, query):
933
+ """Extract table names from a SQL query using basic regex patterns"""
934
+ import re
935
+
936
+ # Convert to uppercase for easier pattern matching
937
+ query_upper = query.upper()
938
+
939
+ # Strip comments to avoid matching patterns inside comments
940
+ query_upper = re.sub(r'--.*?$', '', query_upper, flags=re.MULTILINE)
941
+ query_upper = re.sub(r'/\*.*?\*/', '', query_upper, flags=re.DOTALL)
942
+
943
+ # Common SQL patterns that reference tables
944
+ patterns = [
945
+ r'FROM\s+["\[]?(\w+)["\]]?', # FROM clause
946
+ r'JOIN\s+["\[]?(\w+)["\]]?', # JOIN clause
947
+ r'UPDATE\s+["\[]?(\w+)["\]]?', # UPDATE statement
948
+ r'INSERT\s+INTO\s+["\[]?(\w+)["\]]?', # INSERT statement
949
+ r'DELETE\s+FROM\s+["\[]?(\w+)["\]]?', # DELETE statement
950
+ r'CREATE\s+(?:TEMP|TEMPORARY)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # CREATE TABLE
951
+ r'DROP\s+TABLE\s+(?:IF\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # DROP TABLE
952
+ r'ALTER\s+TABLE\s+["\[]?(\w+)["\]]?', # ALTER TABLE
953
+ r'WITH\s+(\w+)\s+AS', # Common Table Expressions
954
+ r'MERGE\s+INTO\s+["\[]?(\w+)["\]]?' # MERGE statement
955
+ ]
956
+
957
+ tables = set()
958
+ for pattern in patterns:
959
+ matches = re.finditer(pattern, query_upper)
960
+ for match in matches:
961
+ # Get the table name from the matched group and strip any quotes
962
+ table_name = match.group(1).strip('"[]`\'')
963
+
964
+ # Skip SQL keywords
965
+ if table_name in ('SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'OFFSET',
966
+ 'UNION', 'INTERSECT', 'EXCEPT', 'WITH', 'AS', 'ON', 'USING'):
967
+ continue
968
+
969
+ # Add to our set of tables
970
+ tables.add(table_name.lower()) # Convert to lowercase for case-insensitive comparison
971
+
972
+ # Account for qualified table names (schema.table)
973
+ qualified_tables = set()
974
+ for table in tables:
975
+ if '.' in table:
976
+ qualified_tables.add(table.split('.')[-1]) # Add just the table part
977
+
978
+ tables.update(qualified_tables)
979
+
980
+ # Return all found table names in lowercase to match our table storage convention
981
+ return tables
669
982
 
670
983
  def _update_query_history(self, query):
671
984
  """Update query history and track term usage for improved autocompletion"""
@@ -808,25 +1121,32 @@ class SQLShell(QMainWindow):
808
1121
  # Generate test data
809
1122
  sales_df = create_test_data.create_sales_data()
810
1123
  customer_df = create_test_data.create_customer_data()
1124
+ large_customer_df = create_test_data.create_large_customer_data()
811
1125
  product_df = create_test_data.create_product_data()
812
1126
  large_numbers_df = create_test_data.create_large_numbers_data()
1127
+ california_housing_df = create_test_data.create_california_housing_data()
813
1128
 
814
1129
  # Save test data to temporary directory
815
1130
  sales_path = os.path.join(temp_dir, 'sample_sales_data.xlsx')
816
1131
  customer_path = os.path.join(temp_dir, 'customer_data.parquet')
817
1132
  product_path = os.path.join(temp_dir, 'product_catalog.xlsx')
818
1133
  large_numbers_path = os.path.join(temp_dir, 'large_numbers.xlsx')
819
-
1134
+ large_customer_path = os.path.join(temp_dir, 'large_customer_data.parquet')
1135
+ california_housing_path = os.path.join(temp_dir, 'california_housing_data.parquet')
820
1136
  sales_df.to_excel(sales_path, index=False)
821
1137
  customer_df.to_parquet(customer_path, index=False)
822
1138
  product_df.to_excel(product_path, index=False)
823
1139
  large_numbers_df.to_excel(large_numbers_path, index=False)
824
-
1140
+ large_customer_df.to_parquet(large_customer_path, index=False)
1141
+ california_housing_df.to_parquet(california_housing_path, index=False)
1142
+
825
1143
  # Register the tables in the database manager
826
1144
  self.db_manager.register_dataframe(sales_df, 'sample_sales_data', sales_path)
827
1145
  self.db_manager.register_dataframe(product_df, 'product_catalog', product_path)
828
1146
  self.db_manager.register_dataframe(customer_df, 'customer_data', customer_path)
829
1147
  self.db_manager.register_dataframe(large_numbers_df, 'large_numbers', large_numbers_path)
1148
+ self.db_manager.register_dataframe(large_customer_df, 'large_customer_data', large_customer_path)
1149
+ self.db_manager.register_dataframe(california_housing_df, 'california_housing_data', california_housing_path)
830
1150
 
831
1151
  # Update UI
832
1152
  self.tables_list.clear()
@@ -893,28 +1213,14 @@ LIMIT 10
893
1213
  self.statusBar().showMessage('Exporting data to Excel...')
894
1214
 
895
1215
  # Convert table data to DataFrame
896
- df = self.get_table_data_as_dataframe()
897
- df.to_excel(file_name, index=False)
1216
+ df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
1217
+ if df is None:
1218
+ raise Exception("Failed to convert table data to DataFrame")
898
1219
 
899
- # Generate table name from file name
900
- base_name = os.path.splitext(os.path.basename(file_name))[0]
901
- table_name = self.db_manager.sanitize_table_name(base_name)
1220
+ # Export using ExportManager
1221
+ table_name, metadata = self.export_manager.export_to_excel(df, file_name)
902
1222
 
903
- # Ensure unique table name
904
- original_name = table_name
905
- counter = 1
906
- while table_name in self.db_manager.loaded_tables:
907
- table_name = f"{original_name}_{counter}"
908
- counter += 1
909
-
910
- # Register the table in the database manager
911
- self.db_manager.register_dataframe(df, table_name, file_name)
912
-
913
- # Update tracking
914
- self.db_manager.loaded_tables[table_name] = file_name
915
- self.db_manager.table_columns[table_name] = df.columns.tolist()
916
-
917
- # Update UI using new method
1223
+ # Update UI
918
1224
  self.tables_list.add_table_item(table_name, os.path.basename(file_name))
919
1225
  self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
920
1226
 
@@ -951,28 +1257,14 @@ LIMIT 10
951
1257
  self.statusBar().showMessage('Exporting data to Parquet...')
952
1258
 
953
1259
  # Convert table data to DataFrame
954
- df = self.get_table_data_as_dataframe()
955
- df.to_parquet(file_name, index=False)
956
-
957
- # Generate table name from file name
958
- base_name = os.path.splitext(os.path.basename(file_name))[0]
959
- table_name = self.db_manager.sanitize_table_name(base_name)
960
-
961
- # Ensure unique table name
962
- original_name = table_name
963
- counter = 1
964
- while table_name in self.db_manager.loaded_tables:
965
- table_name = f"{original_name}_{counter}"
966
- counter += 1
1260
+ df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
1261
+ if df is None:
1262
+ raise Exception("Failed to convert table data to DataFrame")
967
1263
 
968
- # Register the table in the database manager
969
- self.db_manager.register_dataframe(df, table_name, file_name)
1264
+ # Export using ExportManager
1265
+ table_name, metadata = self.export_manager.export_to_parquet(df, file_name)
970
1266
 
971
- # Update tracking
972
- self.db_manager.loaded_tables[table_name] = file_name
973
- self.db_manager.table_columns[table_name] = df.columns.tolist()
974
-
975
- # Update UI using new method
1267
+ # Update UI
976
1268
  self.tables_list.add_table_item(table_name, os.path.basename(file_name))
977
1269
  self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
978
1270
 
@@ -992,94 +1284,10 @@ LIMIT 10
992
1284
 
993
1285
  def get_table_data_as_dataframe(self):
994
1286
  """Helper function to convert table widget data to a DataFrame with proper data types"""
995
- # Get the current tab
996
1287
  current_tab = self.get_current_tab()
997
1288
  if not current_tab:
998
1289
  return pd.DataFrame()
999
-
1000
- headers = [current_tab.results_table.horizontalHeaderItem(i).text() for i in range(current_tab.results_table.columnCount())]
1001
- data = []
1002
- for row in range(current_tab.results_table.rowCount()):
1003
- row_data = []
1004
- for column in range(current_tab.results_table.columnCount()):
1005
- item = current_tab.results_table.item(row, column)
1006
- row_data.append(item.text() if item else '')
1007
- data.append(row_data)
1008
-
1009
- # Create DataFrame from raw string data
1010
- df_raw = pd.DataFrame(data, columns=headers)
1011
-
1012
- # Try to use the original dataframe's dtypes if available
1013
- if hasattr(current_tab, 'current_df') and current_tab.current_df is not None:
1014
- original_df = current_tab.current_df
1015
- # Since we might have filtered rows, we can't just return the original DataFrame
1016
- # But we can use its column types to convert our string data appropriately
1017
-
1018
- # Create a new DataFrame with appropriate types
1019
- df_typed = pd.DataFrame()
1020
-
1021
- for col in df_raw.columns:
1022
- if col in original_df.columns:
1023
- # Get the original column type
1024
- orig_type = original_df[col].dtype
1025
-
1026
- # Special handling for different data types
1027
- if pd.api.types.is_numeric_dtype(orig_type):
1028
- # Handle numeric columns (int or float)
1029
- try:
1030
- # First try to convert to numeric type
1031
- # Remove commas used for thousands separators
1032
- numeric_col = pd.to_numeric(df_raw[col].str.replace(',', '').replace('NULL', np.nan))
1033
- df_typed[col] = numeric_col
1034
- except:
1035
- # If that fails, keep the original string
1036
- df_typed[col] = df_raw[col]
1037
- elif pd.api.types.is_datetime64_dtype(orig_type):
1038
- # Handle datetime columns
1039
- try:
1040
- df_typed[col] = pd.to_datetime(df_raw[col].replace('NULL', np.nan))
1041
- except:
1042
- df_typed[col] = df_raw[col]
1043
- elif pd.api.types.is_bool_dtype(orig_type):
1044
- # Handle boolean columns
1045
- try:
1046
- df_typed[col] = df_raw[col].map({'True': True, 'False': False}).replace('NULL', np.nan)
1047
- except:
1048
- df_typed[col] = df_raw[col]
1049
- else:
1050
- # For other types, keep as is
1051
- df_typed[col] = df_raw[col]
1052
- else:
1053
- # For columns not in the original dataframe, infer type
1054
- df_typed[col] = df_raw[col]
1055
-
1056
- return df_typed
1057
-
1058
- else:
1059
- # If we don't have the original dataframe, try to infer types
1060
- # First replace 'NULL' with actual NaN
1061
- df_raw.replace('NULL', np.nan, inplace=True)
1062
-
1063
- # Try to convert each column to numeric if possible
1064
- for col in df_raw.columns:
1065
- try:
1066
- # First try to convert to numeric by removing commas
1067
- df_raw[col] = pd.to_numeric(df_raw[col].str.replace(',', ''))
1068
- except:
1069
- # If that fails, try to convert to datetime
1070
- try:
1071
- df_raw[col] = pd.to_datetime(df_raw[col])
1072
- except:
1073
- # If both numeric and datetime conversions fail,
1074
- # try boolean conversion for True/False strings
1075
- try:
1076
- if df_raw[col].dropna().isin(['True', 'False']).all():
1077
- df_raw[col] = df_raw[col].map({'True': True, 'False': False})
1078
- except:
1079
- # Otherwise, keep as is
1080
- pass
1081
-
1082
- return df_raw
1290
+ return self.export_manager.convert_table_to_dataframe(current_tab.results_table)
1083
1291
 
1084
1292
  def keyPressEvent(self, event):
1085
1293
  """Handle global keyboard shortcuts"""
@@ -1203,6 +1411,30 @@ LIMIT 10
1203
1411
 
1204
1412
  def show_tables_context_menu(self, position):
1205
1413
  """Show context menu for tables list"""
1414
+ # Check if we have multiple selected items
1415
+ selected_items = self.tables_list.selectedItems()
1416
+ if len(selected_items) > 1:
1417
+ # Filter out any folder items from selection
1418
+ table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
1419
+
1420
+ if len(table_items) > 1:
1421
+ # Create context menu for multiple table selection
1422
+ context_menu = QMenu(self)
1423
+ context_menu.setStyleSheet(get_context_menu_stylesheet())
1424
+
1425
+ # Add foreign key analysis option
1426
+ analyze_fk_action = context_menu.addAction(f"Analyze Foreign Keys Between {len(table_items)} Tables")
1427
+ analyze_fk_action.setIcon(QIcon.fromTheme("system-search"))
1428
+
1429
+ # Show menu and get selected action
1430
+ action = context_menu.exec(self.tables_list.mapToGlobal(position))
1431
+
1432
+ if action == analyze_fk_action:
1433
+ self.analyze_foreign_keys_between_tables(table_items)
1434
+
1435
+ return
1436
+
1437
+ # Single item selection (original functionality)
1206
1438
  item = self.tables_list.itemAt(position)
1207
1439
 
1208
1440
  # If no item or it's a folder, let the tree widget handle it
@@ -1226,6 +1458,12 @@ LIMIT 10
1226
1458
  # Add menu actions
1227
1459
  select_from_action = context_menu.addAction("Select from")
1228
1460
  add_to_editor_action = context_menu.addAction("Just add to editor")
1461
+ select_from_new_tab_action = context_menu.addAction("Select From in New Tab")
1462
+
1463
+ # Add copy path actions
1464
+ context_menu.addSeparator()
1465
+ copy_path_action = context_menu.addAction("Copy Path")
1466
+ copy_relative_path_action = context_menu.addAction("Copy Relative Path")
1229
1467
 
1230
1468
  # Add entropy profiler action
1231
1469
  context_menu.addSeparator()
@@ -1236,6 +1474,10 @@ LIMIT 10
1236
1474
  profile_table_action = context_menu.addAction("Profile Table Structure")
1237
1475
  profile_table_action.setIcon(QIcon.fromTheme("edit-find"))
1238
1476
 
1477
+ # Add distributions profiler action
1478
+ profile_distributions_action = context_menu.addAction("Analyze Column Distributions")
1479
+ profile_distributions_action.setIcon(QIcon.fromTheme("accessories-calculator"))
1480
+
1239
1481
  # Check if table needs reloading and add appropriate action
1240
1482
  if table_name in self.tables_list.tables_needing_reload:
1241
1483
  reload_action = context_menu.addAction("Reload Table")
@@ -1286,6 +1528,11 @@ LIMIT 10
1286
1528
  cursor = current_tab.query_edit.textCursor()
1287
1529
  cursor.insertText(table_name)
1288
1530
  current_tab.query_edit.setFocus()
1531
+ elif action == select_from_new_tab_action:
1532
+ # Create a new tab with the selected table
1533
+ new_tab = self.add_tab(title=table_name)
1534
+ new_tab.set_query_text(f"SELECT * FROM {table_name}")
1535
+ new_tab.query_edit.setFocus()
1289
1536
  elif action == reload_action:
1290
1537
  self.reload_selected_table(table_name)
1291
1538
  elif action == analyze_entropy_action:
@@ -1294,6 +1541,9 @@ LIMIT 10
1294
1541
  elif action == profile_table_action:
1295
1542
  # Call the table profile method
1296
1543
  self.profile_table_structure(table_name)
1544
+ elif action == profile_distributions_action:
1545
+ # Call the distributions profile method
1546
+ self.profile_distributions(table_name)
1297
1547
  elif action == rename_action:
1298
1548
  # Show rename dialog
1299
1549
  new_name, ok = QInputDialog.getText(
@@ -1349,6 +1599,91 @@ LIMIT 10
1349
1599
  if target_folder:
1350
1600
  self.tables_list.move_item_to_folder(item, target_folder)
1351
1601
  self.statusBar().showMessage(f'Moved table "{table_name}" to folder "{target_folder.text(0)}"')
1602
+ elif action == copy_path_action:
1603
+ # Get the full path from the table source
1604
+ if table_name in self.db_manager.loaded_tables:
1605
+ path = self.db_manager.loaded_tables[table_name]
1606
+ if path != 'database': # Only copy if it's a file path
1607
+ QApplication.clipboard().setText(path)
1608
+ self.statusBar().showMessage(f"Copied full path to clipboard")
1609
+ elif action == copy_relative_path_action:
1610
+ # Get the relative path from the table source
1611
+ if table_name in self.db_manager.loaded_tables:
1612
+ path = self.db_manager.loaded_tables[table_name]
1613
+ if path != 'database': # Only copy if it's a file path
1614
+ try:
1615
+ rel_path = os.path.relpath(path)
1616
+ QApplication.clipboard().setText(rel_path)
1617
+ self.statusBar().showMessage(f"Copied relative path to clipboard")
1618
+ except ValueError:
1619
+ self.statusBar().showMessage("Could not determine relative path")
1620
+
1621
+ def analyze_foreign_keys_between_tables(self, table_items):
1622
+ """Analyze foreign key relationships between selected tables"""
1623
+ try:
1624
+ # Show a loading indicator
1625
+ table_count = len(table_items)
1626
+ self.statusBar().showMessage(f'Analyzing foreign key relationships between {table_count} tables...')
1627
+
1628
+ # Extract table names from selected items
1629
+ table_names = []
1630
+ for item in table_items:
1631
+ table_name = self.tables_list.get_table_name_from_item(item)
1632
+ if table_name:
1633
+ table_names.append(table_name)
1634
+
1635
+ if len(table_names) < 2:
1636
+ QMessageBox.warning(self, "Not Enough Tables",
1637
+ "At least two tables are required for foreign key analysis.")
1638
+ return
1639
+
1640
+ # Check if any tables need to be reloaded
1641
+ tables_to_reload = [tn for tn in table_names if tn in self.tables_list.tables_needing_reload]
1642
+ for table_name in tables_to_reload:
1643
+ # Reload the table immediately
1644
+ self.reload_selected_table(table_name)
1645
+
1646
+ # Fetch data for each table
1647
+ dfs = []
1648
+ for table_name in table_names:
1649
+ try:
1650
+ # Get the data as a dataframe
1651
+ query = f'SELECT * FROM "{table_name}"'
1652
+ df = self.db_manager.execute_query(query)
1653
+
1654
+ if df is not None and not df.empty:
1655
+ # Sample large tables to improve performance
1656
+ if len(df) > 10000:
1657
+ self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {len(df)} total)...')
1658
+ df = df.sample(n=10000, random_state=42)
1659
+ dfs.append(df)
1660
+ else:
1661
+ QMessageBox.warning(self, "Empty Table",
1662
+ f"Table '{table_name}' has no data and will be skipped.")
1663
+ except Exception as e:
1664
+ QMessageBox.warning(self, "Table Error",
1665
+ f"Error loading table '{table_name}': {str(e)}\nThis table will be skipped.")
1666
+
1667
+ if len(dfs) < 2:
1668
+ QMessageBox.warning(self, "Not Enough Tables",
1669
+ "At least two tables with data are required for foreign key analysis.")
1670
+ return
1671
+
1672
+ # Import the foreign key analyzer
1673
+ from sqlshell.utils.profile_foreign_keys import visualize_foreign_keys
1674
+
1675
+ # Create and show the visualization
1676
+ self.statusBar().showMessage(f'Analyzing foreign key relationships between {len(dfs)} tables...')
1677
+ vis = visualize_foreign_keys(dfs, table_names)
1678
+
1679
+ # Store a reference to prevent garbage collection
1680
+ self._fk_analysis_window = vis
1681
+
1682
+ self.statusBar().showMessage(f'Foreign key analysis complete for {len(dfs)} tables')
1683
+
1684
+ except Exception as e:
1685
+ QMessageBox.critical(self, "Analysis Error", f"Error analyzing foreign keys:\n\n{str(e)}")
1686
+ self.statusBar().showMessage(f'Error analyzing foreign keys: {str(e)}')
1352
1687
 
1353
1688
  def reload_selected_table(self, table_name=None):
1354
1689
  """Reload the data for a table from its source file"""
@@ -2616,6 +2951,20 @@ LIMIT 10
2616
2951
  self.showMaximized()
2617
2952
  self.was_maximized = True
2618
2953
 
2954
+ def get_selected_table(self):
2955
+ """Get the name of the currently selected table in the tables list"""
2956
+ if not hasattr(self, 'tables_list'):
2957
+ return None
2958
+
2959
+ selected_items = self.tables_list.selectedItems()
2960
+ # Filter out folders and use only single selections
2961
+ table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
2962
+
2963
+ if len(table_items) == 1: # Only use if exactly one table is selected
2964
+ return self.tables_list.get_table_name_from_item(table_items[0])
2965
+
2966
+ return None
2967
+
2619
2968
  def change_zoom(self, factor):
2620
2969
  """Change the zoom level of the application by adjusting font sizes"""
2621
2970
  try:
@@ -3195,6 +3544,12 @@ LIMIT 10
3195
3544
  df = self.db_manager.execute_query(query)
3196
3545
 
3197
3546
  if df is not None and not df.empty:
3547
+ # Sample the data if it's larger than 10,000 rows
3548
+ row_count = len(df)
3549
+ if row_count > 10000:
3550
+ self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {row_count} total)...')
3551
+ df = df.sample(n=10000, random_state=42)
3552
+
3198
3553
  # Import the key profiler
3199
3554
  from sqlshell.utils.profile_keys import visualize_profile
3200
3555
 
@@ -3205,7 +3560,10 @@ LIMIT 10
3205
3560
  # Store a reference to prevent garbage collection
3206
3561
  self._keys_profile_window = vis
3207
3562
 
3208
- self.statusBar().showMessage(f'Table structure profile generated for "{table_name}"')
3563
+ if row_count > 10000:
3564
+ self.statusBar().showMessage(f'Table structure profile generated for "{table_name}" (sampled 10,000 rows from {row_count})')
3565
+ else:
3566
+ self.statusBar().showMessage(f'Table structure profile generated for "{table_name}"')
3209
3567
  else:
3210
3568
  QMessageBox.warning(self, "Empty Table", f"Table '{table_name}' has no data to analyze.")
3211
3569
  self.statusBar().showMessage(f'Table "{table_name}" is empty - cannot analyze')
@@ -3216,6 +3574,289 @@ LIMIT 10
3216
3574
  except Exception as e:
3217
3575
  QMessageBox.critical(self, "Profile Error", f"Error profiling table structure:\n\n{str(e)}")
3218
3576
  self.statusBar().showMessage(f'Error profiling table: {str(e)}')
3577
+
3578
+ def profile_distributions(self, table_name):
3579
+ """Analyze a table's column distributions to understand data patterns"""
3580
+ try:
3581
+ # Show a loading indicator
3582
+ self.statusBar().showMessage(f'Analyzing column distributions for "{table_name}"...')
3583
+
3584
+ # Get the table data
3585
+ if table_name in self.db_manager.loaded_tables:
3586
+ # Check if table needs reloading first
3587
+ if table_name in self.tables_list.tables_needing_reload:
3588
+ # Reload the table immediately
3589
+ self.reload_selected_table(table_name)
3590
+
3591
+ # Get the data as a dataframe
3592
+ query = f'SELECT * FROM "{table_name}"'
3593
+ df = self.db_manager.execute_query(query)
3594
+
3595
+ if df is not None and not df.empty:
3596
+ # Sample the data if it's larger than 10,000 rows
3597
+ row_count = len(df)
3598
+ if row_count > 10000:
3599
+ self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {row_count} total)...')
3600
+ df = df.sample(n=10000, random_state=42)
3601
+
3602
+ # Import the distribution profiler
3603
+ from sqlshell.utils.profile_distributions import visualize_profile
3604
+
3605
+ # Create and show the visualization
3606
+ self.statusBar().showMessage(f'Generating distribution profile for "{table_name}"...')
3607
+ vis = visualize_profile(df)
3608
+
3609
+ # Store a reference to prevent garbage collection
3610
+ self._distributions_window = vis
3611
+
3612
+ if row_count > 10000:
3613
+ self.statusBar().showMessage(f'Distribution profile generated for "{table_name}" (sampled 10,000 rows from {row_count})')
3614
+ else:
3615
+ self.statusBar().showMessage(f'Distribution profile generated for "{table_name}"')
3616
+ else:
3617
+ QMessageBox.warning(self, "Empty Table", f"Table '{table_name}' has no data to analyze.")
3618
+ self.statusBar().showMessage(f'Table "{table_name}" is empty - cannot analyze')
3619
+ else:
3620
+ QMessageBox.warning(self, "Table Not Found", f"Table '{table_name}' not found.")
3621
+ self.statusBar().showMessage(f'Table "{table_name}" not found')
3622
+
3623
+ except Exception as e:
3624
+ QMessageBox.critical(self, "Profile Error", f"Error analyzing distributions:\n\n{str(e)}")
3625
+ self.statusBar().showMessage(f'Error analyzing distributions: {str(e)}')
3626
+
3627
+ def explain_column(self, column_name):
3628
+ """Analyze a column to explain its relationship with other columns"""
3629
+ try:
3630
+ # Get the current tab
3631
+ current_tab = self.get_current_tab()
3632
+ if not current_tab or current_tab.current_df is None:
3633
+ return
3634
+
3635
+ # Show a loading indicator
3636
+ self.statusBar().showMessage(f'Analyzing column "{column_name}"...')
3637
+
3638
+ # Get the dataframe from the current tab
3639
+ df = current_tab.current_df
3640
+
3641
+ if df is not None and not df.empty:
3642
+ # Sample the data if it's larger than 100 rows for ultra-fast performance
3643
+ row_count = len(df)
3644
+ if row_count > 100:
3645
+ self.statusBar().showMessage(f'Sampling data (using 100 rows from {row_count} total)...')
3646
+ df = df.sample(n=100, random_state=42)
3647
+
3648
+ # Import the column profiler
3649
+ from sqlshell.utils.profile_column import visualize_profile
3650
+
3651
+ # Create and show the visualization
3652
+ self.statusBar().showMessage(f'Generating column profile for "{column_name}"...')
3653
+ visualize_profile(df, column_name)
3654
+
3655
+ # We don't need to store a reference since the UI keeps itself alive
3656
+
3657
+ if row_count > 100:
3658
+ self.statusBar().showMessage(f'Column profile generated for "{column_name}" (sampled 100 rows from {row_count})')
3659
+ else:
3660
+ self.statusBar().showMessage(f'Column profile generated for "{column_name}"')
3661
+ else:
3662
+ QMessageBox.warning(self, "Empty Data", "No data available to analyze.")
3663
+ self.statusBar().showMessage(f'No data to analyze')
3664
+
3665
+ except Exception as e:
3666
+ QMessageBox.critical(self, "Analysis Error", f"Error analyzing column:\n\n{str(e)}")
3667
+ self.statusBar().showMessage(f'Error analyzing column: {str(e)}')
3668
+
3669
+ def encode_text(self, column_name):
3670
+ """Generate one-hot encoding for a text column and visualize the results"""
3671
+ try:
3672
+ # Get the current tab
3673
+ current_tab = self.get_current_tab()
3674
+ if not current_tab or current_tab.current_df is None:
3675
+ return
3676
+
3677
+ # Show a loading indicator
3678
+ self.statusBar().showMessage(f'Preparing one-hot encoding for "{column_name}"...')
3679
+
3680
+ # Get the dataframe from the current tab
3681
+ full_df = current_tab.current_df.copy()
3682
+ df = full_df
3683
+
3684
+ # Save original row count for reference
3685
+ current_tab.original_df_rowcount = len(full_df)
3686
+
3687
+ if df is not None and not df.empty:
3688
+ # Sample the data if it's larger than 1000 rows for better performance
3689
+ row_count = len(df)
3690
+ if row_count > 1000:
3691
+ self.statusBar().showMessage(f'Sampling data (using 1000 rows from {row_count} total)...')
3692
+
3693
+ # Store the full dataframe before sampling for later use
3694
+ current_tab._original_df_before_encoding = full_df
3695
+
3696
+ # Sample the data
3697
+ df = df.sample(n=1000, random_state=42)
3698
+
3699
+ # Import the one-hot encoding visualizer
3700
+ from sqlshell.utils.profile_ohe import visualize_ohe
3701
+
3702
+ # Create and show the visualization
3703
+ self.statusBar().showMessage(f'Generating one-hot encoding for "{column_name}"...')
3704
+ vis = visualize_ohe(df, column_name)
3705
+
3706
+ # Connect to the encodingApplied signal
3707
+ vis.encodingApplied.connect(self.apply_encoded_dataframe)
3708
+
3709
+ # Store a reference to prevent garbage collection
3710
+ self._ohe_window = vis
3711
+
3712
+ if row_count > 1000:
3713
+ self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}" (sampled 1000 rows from {row_count})')
3714
+ else:
3715
+ self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}"')
3716
+ else:
3717
+ QMessageBox.warning(self, "Empty Data", "No data available to encode.")
3718
+ self.statusBar().showMessage(f'No data to encode')
3719
+
3720
+ except Exception as e:
3721
+ QMessageBox.critical(self, "Encoding Error", f"Error generating one-hot encoding:\n\n{str(e)}")
3722
+ self.statusBar().showMessage(f'Error generating one-hot encoding: {str(e)}')
3723
+
3724
+ def apply_encoded_dataframe(self, encoded_df):
3725
+ """Apply the encoded dataframe to the current tab's results table"""
3726
+ try:
3727
+ # Get the current tab
3728
+ current_tab = self.get_current_tab()
3729
+ if not current_tab:
3730
+ return
3731
+
3732
+ # Check if we're using a sampled version
3733
+ is_sampled = False
3734
+ full_df = None
3735
+
3736
+ # Show a loading indicator
3737
+ self.statusBar().showMessage(f'Applying one-hot encoding...')
3738
+
3739
+ # Progress dialog for large datasets
3740
+ progress = QProgressDialog("Applying encoding...", "Cancel", 0, 100, self)
3741
+ progress.setWindowTitle("Processing")
3742
+ progress.setWindowModality(Qt.WindowModality.WindowModal)
3743
+ progress.setValue(10)
3744
+
3745
+ # Check if this sample is smaller than the actual dataset
3746
+ if hasattr(current_tab, '_original_df_before_encoding'):
3747
+ # We have the original, full dataset stored
3748
+ full_df = current_tab._original_df_before_encoding
3749
+ is_sampled = len(full_df) > len(encoded_df)
3750
+ elif hasattr(current_tab, 'original_df_rowcount'):
3751
+ # We know the original row count but don't have the data
3752
+ is_sampled = current_tab.original_df_rowcount > len(encoded_df)
3753
+
3754
+ progress.setValue(20)
3755
+ QApplication.processEvents()
3756
+
3757
+ # If we're working with a sample, apply the encoding to the full dataset
3758
+ if is_sampled and full_df is not None:
3759
+ self.statusBar().showMessage(f'Re-applying encoding to full dataset ({len(full_df)} rows)...')
3760
+
3761
+ try:
3762
+ # Get the encoding columns (added by the OHE process)
3763
+ original_cols = set(current_tab.current_df.columns)
3764
+ ohe_cols = set(encoded_df.columns) - original_cols
3765
+
3766
+ if ohe_cols:
3767
+ # Import the encoding function to apply to full dataset
3768
+ from sqlshell.utils.profile_ohe import get_ohe
3769
+
3770
+ # Get the column that was encoded
3771
+ encoded_column = None
3772
+ for col in original_cols:
3773
+ if any(c.startswith(f'is_{col}') for c in ohe_cols) or any(c.startswith(f'has_{col}') for c in ohe_cols):
3774
+ encoded_column = col
3775
+ break
3776
+
3777
+ progress.setValue(40)
3778
+ QApplication.processEvents()
3779
+
3780
+ if encoded_column:
3781
+ # Apply encoding to full dataset
3782
+ self.statusBar().showMessage(f'Encoding column "{encoded_column}" on full dataset...')
3783
+ full_encoded_df = get_ohe(full_df, encoded_column)
3784
+
3785
+ progress.setValue(80)
3786
+ QApplication.processEvents()
3787
+
3788
+ # Update the current dataframe with the fully encoded one
3789
+ current_tab.current_df = full_encoded_df
3790
+ self.current_df = full_encoded_df # Keep this for compatibility
3791
+
3792
+ # Use the full encoded dataframe instead
3793
+ encoded_df = full_encoded_df
3794
+ else:
3795
+ # If we can't determine the encoded column, use the sampled version
3796
+ current_tab.current_df = encoded_df
3797
+ self.current_df = encoded_df # Keep this for compatibility
3798
+ else:
3799
+ # No encoding columns found, use the sampled version
3800
+ current_tab.current_df = encoded_df
3801
+ self.current_df = encoded_df # Keep this for compatibility
3802
+ except Exception as e:
3803
+ # If there's an error, fall back to the provided encoded_df
3804
+ print(f"Error applying encoding to full dataset: {e}")
3805
+ current_tab.current_df = encoded_df
3806
+ self.current_df = encoded_df # Keep this for compatibility
3807
+ else:
3808
+ # No sampling occurred, just use the provided encoded dataframe
3809
+ current_tab.current_df = encoded_df
3810
+ self.current_df = encoded_df # Keep this for compatibility
3811
+
3812
+ progress.setValue(90)
3813
+ QApplication.processEvents()
3814
+
3815
+ # Populate the results table with the new dataframe
3816
+ self.populate_table(encoded_df)
3817
+
3818
+ # Update results title to show this is encoded data
3819
+ current_tab.results_title.setText(f"ENCODED DATA")
3820
+
3821
+ progress.setValue(100)
3822
+ progress.close()
3823
+
3824
+ # Update status
3825
+ self.statusBar().showMessage(f'Applied one-hot encoding with {len(encoded_df.columns)} columns')
3826
+
3827
+ # Check if we should register this as a temporary table
3828
+ if len(encoded_df) >= 100: # Only worth registering as table if it's substantial
3829
+ try:
3830
+ # Generate a unique table name
3831
+ import time
3832
+ timestamp = int(time.time())
3833
+ table_name = f"encoded_data_{timestamp}"
3834
+
3835
+ # Register as a temporary table in the database manager
3836
+ self.db_manager.register_dataframe(encoded_df, table_name, "query_result")
3837
+
3838
+ # Add to tables list
3839
+ self.tables_list.add_table_item(table_name, "encoded data")
3840
+
3841
+ # Update completer
3842
+ self.update_completer()
3843
+
3844
+ # Notify user
3845
+ self.statusBar().showMessage(f'Applied one-hot encoding and registered as table "{table_name}"')
3846
+ except Exception as e:
3847
+ # Just log the error but continue - this is an optional enhancement
3848
+ print(f"Error registering encoded dataframe as table: {e}")
3849
+
3850
+ except Exception as e:
3851
+ QMessageBox.critical(self, "Error", f"Failed to apply encoded dataframe:\n\n{str(e)}")
3852
+ self.statusBar().showMessage(f'Error applying encoding: {str(e)}')
3853
+
3854
+ def get_current_query_tab(self):
3855
+ """Get the currently active tab if it's a query tab (has query_edit attribute)"""
3856
+ current_tab = self.get_current_tab()
3857
+ if current_tab and hasattr(current_tab, 'query_edit'):
3858
+ return current_tab
3859
+ return None
3219
3860
 
3220
3861
  def main():
3221
3862
  # Parse command line arguments