sqlshell 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlshell might be problematic. Click here for more details.

sqlshell/main.py CHANGED
@@ -18,18 +18,20 @@ from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
18
18
  QCompleter, QFrame, QToolButton, QSizePolicy, QTabWidget,
19
19
  QStyleFactory, QToolBar, QStatusBar, QLineEdit, QMenu,
20
20
  QCheckBox, QWidgetAction, QMenuBar, QInputDialog, QProgressDialog,
21
- QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem)
21
+ QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem,
22
+ QComboBox)
22
23
  from PyQt6.QtCore import Qt, QAbstractTableModel, QRegularExpression, QRect, QSize, QStringListModel, QPropertyAnimation, QEasingCurve, QTimer, QPoint, QMimeData
23
24
  from PyQt6.QtGui import QFont, QColor, QSyntaxHighlighter, QTextCharFormat, QPainter, QTextFormat, QTextCursor, QIcon, QPalette, QLinearGradient, QBrush, QPixmap, QPolygon, QPainterPath, QDrag
24
25
  import numpy as np
25
26
  from datetime import datetime
27
+ import psutil
26
28
 
27
29
  from sqlshell import create_test_data
28
30
  from sqlshell.splash_screen import AnimatedSplashScreen
29
31
  from sqlshell.syntax_highlighter import SQLSyntaxHighlighter
30
32
  from sqlshell.editor import LineNumberArea, SQLEditor
31
33
  from sqlshell.ui import FilterHeader, BarChartDelegate
32
- from sqlshell.db import DatabaseManager
34
+ from sqlshell.db import DatabaseManager, ExportManager
33
35
  from sqlshell.query_tab import QueryTab
34
36
  from sqlshell.styles import (get_application_stylesheet, get_tab_corner_stylesheet,
35
37
  get_context_menu_stylesheet,
@@ -42,6 +44,7 @@ class SQLShell(QMainWindow):
42
44
  def __init__(self):
43
45
  super().__init__()
44
46
  self.db_manager = DatabaseManager()
47
+ self.export_manager = ExportManager(self.db_manager)
45
48
  self.current_df = None # Store the current DataFrame for filtering
46
49
  self.filter_widgets = [] # Store filter line edits
47
50
  self.current_project_file = None # Store the current project file path
@@ -216,6 +219,39 @@ class SQLShell(QMainWindow):
216
219
  query_header.setObjectName("header_label")
217
220
  right_layout.addWidget(query_header)
218
221
 
222
+ # Create a drop area for tables above the tab widget
223
+ self.tab_drop_area = QFrame()
224
+ self.tab_drop_area.setFixedHeight(30)
225
+ self.tab_drop_area.setObjectName("tab_drop_area")
226
+
227
+ # Add a label with hint text
228
+ drop_area_layout = QHBoxLayout(self.tab_drop_area)
229
+ drop_area_layout.setContentsMargins(10, 0, 10, 0)
230
+ self.drop_hint_label = QLabel("Drag tables here to create new query tabs")
231
+ self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
232
+ self.drop_hint_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
233
+ drop_area_layout.addWidget(self.drop_hint_label)
234
+
235
+ self.tab_drop_area.setStyleSheet("""
236
+ #tab_drop_area {
237
+ background-color: #f8f9fa;
238
+ border: 1px dashed #BDC3C7;
239
+ border-radius: 4px;
240
+ margin: 0 0 5px 0;
241
+ }
242
+
243
+ #tab_drop_area:hover {
244
+ background-color: #E5F7FF;
245
+ border: 1px dashed #3498DB;
246
+ }
247
+ """)
248
+ self.tab_drop_area.setAcceptDrops(True)
249
+ self.tab_drop_area.dragEnterEvent = self.tab_area_drag_enter
250
+ self.tab_drop_area.dragMoveEvent = self.tab_area_drag_move
251
+ self.tab_drop_area.dragLeaveEvent = self.tab_area_drag_leave
252
+ self.tab_drop_area.dropEvent = self.tab_area_drop
253
+ right_layout.addWidget(self.tab_drop_area)
254
+
219
255
  # Create tab widget for multiple queries
220
256
  self.tab_widget = QTabWidget()
221
257
  self.tab_widget.setTabsClosable(True)
@@ -237,6 +273,100 @@ class SQLShell(QMainWindow):
237
273
  # Status bar
238
274
  self.statusBar().showMessage('Ready | Ctrl+Enter: Execute Query | Ctrl+K: Toggle Comment | Ctrl+T: New Tab | Ctrl+Shift+O: Quick Access Files')
239
275
 
276
+ # Methods for handling drag and drop on the tab drop area
277
+ def tab_area_drag_enter(self, event):
278
+ """Handle drag enter events on the tab drop area"""
279
+ # Accept only if from the tables list
280
+ if event.source() == self.tables_list:
281
+ # Extract table name(s) from the mime data
282
+ mime_data = event.mimeData()
283
+ if mime_data.hasText():
284
+ table_names = mime_data.text().split(", ")
285
+ if len(table_names) == 1:
286
+ self.drop_hint_label.setText(f"Release to create a new query tab for {table_names[0]}")
287
+ else:
288
+ self.drop_hint_label.setText(f"Release to create {len(table_names)} new query tabs")
289
+
290
+ self.drop_hint_label.setStyleSheet("color: #3498db; font-size: 11px; font-weight: bold;")
291
+
292
+ # Highlight the drop area
293
+ self.tab_drop_area.setStyleSheet("""
294
+ #tab_drop_area {
295
+ background-color: #E5F7FF;
296
+ border: 2px dashed #3498DB;
297
+ border-radius: 4px;
298
+ margin: 0 0 5px 0;
299
+ }
300
+ """)
301
+ self.tab_drop_area.setFixedHeight(40)
302
+ event.acceptProposedAction()
303
+ else:
304
+ event.ignore()
305
+
306
+ def tab_area_drag_move(self, event):
307
+ """Handle drag move events on the tab drop area"""
308
+ # Continue accepting drag moves
309
+ if event.source() == self.tables_list:
310
+ event.acceptProposedAction()
311
+ else:
312
+ event.ignore()
313
+
314
+ def tab_area_drag_leave(self, event):
315
+ """Handle drag leave events on the tab drop area"""
316
+ # Reset the drop area
317
+ self.tab_drop_area.setStyleSheet("""
318
+ #tab_drop_area {
319
+ background-color: #f8f9fa;
320
+ border: 1px dashed #BDC3C7;
321
+ border-radius: 4px;
322
+ margin: 0 0 5px 0;
323
+ }
324
+ """)
325
+ self.drop_hint_label.setText("Drag tables here to create new query tabs")
326
+ self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
327
+ self.tab_drop_area.setFixedHeight(30)
328
+ # No need to call a parent method
329
+
330
+ def tab_area_drop(self, event):
331
+ """Handle drop events on the tab drop area"""
332
+ # Process the drop to create a new tab with SELECT query
333
+ if event.source() == self.tables_list:
334
+ mime_data = event.mimeData()
335
+ if mime_data.hasText():
336
+ table_names = mime_data.text().split(", ")
337
+
338
+ for table_name in table_names:
339
+ # Check if this table needs to be reloaded first
340
+ if table_name in self.tables_list.tables_needing_reload:
341
+ # Reload the table immediately without asking
342
+ self.reload_selected_table(table_name)
343
+
344
+ # Generate a title for the tab
345
+ tab_title = f"Query {table_name}"
346
+ # Create a new tab
347
+ new_tab = self.add_tab(tab_title)
348
+ # Set the SQL query
349
+ new_tab.set_query_text(f"SELECT * FROM {table_name}")
350
+
351
+ self.statusBar().showMessage(f"Created new tab{'s' if len(table_names) > 1 else ''} for {', '.join(table_names)}")
352
+
353
+ # Reset the drop area appearance
354
+ self.tab_drop_area.setStyleSheet("""
355
+ #tab_drop_area {
356
+ background-color: #f8f9fa;
357
+ border: 1px dashed #BDC3C7;
358
+ border-radius: 4px;
359
+ margin: 0 0 5px 0;
360
+ }
361
+ """)
362
+ self.drop_hint_label.setText("Drag tables here to create new query tabs")
363
+ self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
364
+ self.tab_drop_area.setFixedHeight(30)
365
+
366
+ event.acceptProposedAction()
367
+ else:
368
+ event.ignore()
369
+
240
370
  def create_tab_corner_widget(self):
241
371
  """Create a corner widget with a + button to add new tabs"""
242
372
  corner_widget = QWidget()
@@ -290,25 +420,126 @@ class SQLShell(QMainWindow):
290
420
  headers = [str(col) for col in df.columns]
291
421
  current_tab.results_table.setHorizontalHeaderLabels(headers)
292
422
 
293
- # Calculate chunk size (adjust based on available memory)
294
- CHUNK_SIZE = 1000
295
-
296
- # Process data in chunks to avoid memory issues with large datasets
297
- for chunk_start in range(0, row_count, CHUNK_SIZE):
298
- chunk_end = min(chunk_start + CHUNK_SIZE, row_count)
299
- chunk = df.iloc[chunk_start:chunk_end]
423
+ # Calculate dynamic chunk size based on available memory
424
+ import psutil
425
+ available_memory = psutil.virtual_memory().available
426
+ # Use 10% of available memory for chunking, with a minimum of 1000 rows
427
+ memory_per_row = df.memory_usage(deep=True).sum() / len(df)
428
+ CHUNK_SIZE = max(1000, min(10000, int(available_memory * 0.1 / memory_per_row)))
429
+
430
+ # Add pagination controls if dataset is large
431
+ if row_count > CHUNK_SIZE:
432
+ # Remove any existing pagination widgets
433
+ for i in reversed(range(current_tab.results_layout.count())):
434
+ item = current_tab.results_layout.itemAt(i)
435
+ widget = item.widget() if item is not None else None
436
+ if widget and widget.objectName() == "pagination_widget":
437
+ current_tab.results_layout.removeWidget(widget)
438
+ widget.setParent(None)
439
+ widget.deleteLater()
440
+
441
+ # Create pagination widget
442
+ pagination_widget = QWidget()
443
+ pagination_widget.setObjectName("pagination_widget")
444
+ pagination_layout = QHBoxLayout(pagination_widget)
445
+
446
+ # Add page size selector
447
+ page_size_label = QLabel("Rows per page:")
448
+ page_size_combo = QComboBox()
449
+ page_sizes = [1000, 5000, 10000, 50000, 100000]
450
+ page_size_combo.addItems([str(size) for size in page_sizes])
451
+ page_size_combo.setCurrentText(str(CHUNK_SIZE))
452
+
453
+ # Add navigation buttons
454
+ prev_btn = QPushButton("Previous")
455
+ next_btn = QPushButton("Next")
456
+ page_label = QLabel("Page 1")
457
+
458
+ # Add widgets to layout
459
+ pagination_layout.addWidget(page_size_label)
460
+ pagination_layout.addWidget(page_size_combo)
461
+ pagination_layout.addStretch()
462
+ pagination_layout.addWidget(prev_btn)
463
+ pagination_layout.addWidget(page_label)
464
+ pagination_layout.addWidget(next_btn)
465
+
466
+ # Add pagination widget to results layout
467
+ current_tab.results_layout.addWidget(pagination_widget)
468
+
469
+ # Store pagination state
470
+ current_tab.pagination_state = {
471
+ 'current_page': 0,
472
+ 'page_size': CHUNK_SIZE,
473
+ 'total_pages': (row_count + CHUNK_SIZE - 1) // CHUNK_SIZE,
474
+ 'page_label': page_label,
475
+ 'prev_btn': prev_btn,
476
+ 'next_btn': next_btn,
477
+ 'page_size_combo': page_size_combo
478
+ }
300
479
 
301
- # Add rows for this chunk
302
- current_tab.results_table.setRowCount(chunk_end)
480
+ # Connect pagination signals
481
+ def update_page_size(size):
482
+ current_tab.pagination_state['page_size'] = int(size)
483
+ current_tab.pagination_state['total_pages'] = (row_count + int(size) - 1) // int(size)
484
+ current_tab.pagination_state['current_page'] = 0
485
+ load_current_page()
486
+
487
+ def load_current_page():
488
+ state = current_tab.pagination_state
489
+ start_idx = state['current_page'] * state['page_size']
490
+ end_idx = min(start_idx + state['page_size'], row_count)
491
+
492
+ # Clear existing rows
493
+ current_tab.results_table.setRowCount(0)
494
+
495
+ # Load current page
496
+ chunk = df.iloc[start_idx:end_idx]
497
+ current_tab.results_table.setRowCount(len(chunk))
498
+
499
+ for row_idx, (_, row_data) in enumerate(chunk.iterrows()):
500
+ for col_idx, value in enumerate(row_data):
501
+ formatted_value = self.format_value(value)
502
+ item = QTableWidgetItem(formatted_value)
503
+ current_tab.results_table.setItem(row_idx, col_idx, item)
504
+
505
+ # Update pagination controls
506
+ state['page_label'].setText(f"Page {state['current_page'] + 1} of {state['total_pages']}")
507
+ state['prev_btn'].setEnabled(state['current_page'] > 0)
508
+ state['next_btn'].setEnabled(state['current_page'] < state['total_pages'] - 1)
509
+
510
+ # Process events to keep UI responsive
511
+ QApplication.processEvents()
512
+
513
+ def next_page():
514
+ if current_tab.pagination_state['current_page'] < current_tab.pagination_state['total_pages'] - 1:
515
+ current_tab.pagination_state['current_page'] += 1
516
+ load_current_page()
517
+
518
+ def prev_page():
519
+ if current_tab.pagination_state['current_page'] > 0:
520
+ current_tab.pagination_state['current_page'] -= 1
521
+ load_current_page()
522
+
523
+ # Connect signals
524
+ page_size_combo.currentTextChanged.connect(update_page_size)
525
+ next_btn.clicked.connect(next_page)
526
+ prev_btn.clicked.connect(prev_page)
527
+
528
+ # Load first page
529
+ load_current_page()
530
+ else:
531
+ # For smaller datasets, load all at once
532
+ current_tab.results_table.setRowCount(row_count)
303
533
 
304
- for row_idx, (_, row_data) in enumerate(chunk.iterrows(), start=chunk_start):
534
+ for row_idx, (_, row_data) in enumerate(df.iterrows()):
305
535
  for col_idx, value in enumerate(row_data):
306
536
  formatted_value = self.format_value(value)
307
537
  item = QTableWidgetItem(formatted_value)
308
538
  current_tab.results_table.setItem(row_idx, col_idx, item)
309
-
310
- # Process events to keep UI responsive
311
- QApplication.processEvents()
539
+
540
+ # Process events periodically to keep UI responsive
541
+ if row_idx % 1000 == 0:
542
+ QApplication.processEvents()
312
543
 
313
544
  # Optimize column widths
314
545
  current_tab.results_table.resizeColumnsToContents()
@@ -636,6 +867,31 @@ class SQLShell(QMainWindow):
636
867
  QMessageBox.warning(self, "Empty Query", "Please enter a SQL query to execute.")
637
868
  return
638
869
 
870
+ # Check if the query references any tables that need to be loaded
871
+ referenced_tables = self.extract_table_names_from_query(query)
872
+ tables_to_load = [table for table in referenced_tables if table in self.tables_list.tables_needing_reload]
873
+
874
+ # Load any tables that need to be loaded
875
+ if tables_to_load:
876
+ progress = QProgressDialog(f"Loading tables...", "Cancel", 0, len(tables_to_load), self)
877
+ progress.setWindowTitle("Loading Tables")
878
+ progress.setWindowModality(Qt.WindowModality.WindowModal)
879
+ progress.show()
880
+
881
+ for i, table_name in enumerate(tables_to_load):
882
+ if progress.wasCanceled():
883
+ self.statusBar().showMessage("Query canceled: table loading was interrupted")
884
+ return
885
+
886
+ progress.setLabelText(f"Loading table: {table_name}")
887
+ progress.setValue(i)
888
+ QApplication.processEvents()
889
+
890
+ self.reload_selected_table(table_name)
891
+
892
+ progress.setValue(len(tables_to_load))
893
+ progress.close()
894
+
639
895
  start_time = datetime.now()
640
896
 
641
897
  try:
@@ -672,6 +928,57 @@ class SQLShell(QMainWindow):
672
928
  QMessageBox.critical(self, "Unexpected Error",
673
929
  f"An unexpected error occurred:\n\n{str(e)}")
674
930
  self.statusBar().showMessage("Query execution failed")
931
+
932
+ def extract_table_names_from_query(self, query):
933
+ """Extract table names from a SQL query using basic regex patterns"""
934
+ import re
935
+
936
+ # Convert to uppercase for easier pattern matching
937
+ query_upper = query.upper()
938
+
939
+ # Strip comments to avoid matching patterns inside comments
940
+ query_upper = re.sub(r'--.*?$', '', query_upper, flags=re.MULTILINE)
941
+ query_upper = re.sub(r'/\*.*?\*/', '', query_upper, flags=re.DOTALL)
942
+
943
+ # Common SQL patterns that reference tables
944
+ patterns = [
945
+ r'FROM\s+["\[]?(\w+)["\]]?', # FROM clause
946
+ r'JOIN\s+["\[]?(\w+)["\]]?', # JOIN clause
947
+ r'UPDATE\s+["\[]?(\w+)["\]]?', # UPDATE statement
948
+ r'INSERT\s+INTO\s+["\[]?(\w+)["\]]?', # INSERT statement
949
+ r'DELETE\s+FROM\s+["\[]?(\w+)["\]]?', # DELETE statement
950
+ r'CREATE\s+(?:TEMP|TEMPORARY)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # CREATE TABLE
951
+ r'DROP\s+TABLE\s+(?:IF\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # DROP TABLE
952
+ r'ALTER\s+TABLE\s+["\[]?(\w+)["\]]?', # ALTER TABLE
953
+ r'WITH\s+(\w+)\s+AS', # Common Table Expressions
954
+ r'MERGE\s+INTO\s+["\[]?(\w+)["\]]?' # MERGE statement
955
+ ]
956
+
957
+ tables = set()
958
+ for pattern in patterns:
959
+ matches = re.finditer(pattern, query_upper)
960
+ for match in matches:
961
+ # Get the table name from the matched group and strip any quotes
962
+ table_name = match.group(1).strip('"[]`\'')
963
+
964
+ # Skip SQL keywords
965
+ if table_name in ('SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'OFFSET',
966
+ 'UNION', 'INTERSECT', 'EXCEPT', 'WITH', 'AS', 'ON', 'USING'):
967
+ continue
968
+
969
+ # Add to our set of tables
970
+ tables.add(table_name.lower()) # Convert to lowercase for case-insensitive comparison
971
+
972
+ # Account for qualified table names (schema.table)
973
+ qualified_tables = set()
974
+ for table in tables:
975
+ if '.' in table:
976
+ qualified_tables.add(table.split('.')[-1]) # Add just the table part
977
+
978
+ tables.update(qualified_tables)
979
+
980
+ # Return all found table names in lowercase to match our table storage convention
981
+ return tables
675
982
 
676
983
  def _update_query_history(self, query):
677
984
  """Update query history and track term usage for improved autocompletion"""
@@ -906,28 +1213,14 @@ LIMIT 10
906
1213
  self.statusBar().showMessage('Exporting data to Excel...')
907
1214
 
908
1215
  # Convert table data to DataFrame
909
- df = self.get_table_data_as_dataframe()
910
- df.to_excel(file_name, index=False)
1216
+ df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
1217
+ if df is None:
1218
+ raise Exception("Failed to convert table data to DataFrame")
911
1219
 
912
- # Generate table name from file name
913
- base_name = os.path.splitext(os.path.basename(file_name))[0]
914
- table_name = self.db_manager.sanitize_table_name(base_name)
1220
+ # Export using ExportManager
1221
+ table_name, metadata = self.export_manager.export_to_excel(df, file_name)
915
1222
 
916
- # Ensure unique table name
917
- original_name = table_name
918
- counter = 1
919
- while table_name in self.db_manager.loaded_tables:
920
- table_name = f"{original_name}_{counter}"
921
- counter += 1
922
-
923
- # Register the table in the database manager
924
- self.db_manager.register_dataframe(df, table_name, file_name)
925
-
926
- # Update tracking
927
- self.db_manager.loaded_tables[table_name] = file_name
928
- self.db_manager.table_columns[table_name] = df.columns.tolist()
929
-
930
- # Update UI using new method
1223
+ # Update UI
931
1224
  self.tables_list.add_table_item(table_name, os.path.basename(file_name))
932
1225
  self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
933
1226
 
@@ -964,28 +1257,14 @@ LIMIT 10
964
1257
  self.statusBar().showMessage('Exporting data to Parquet...')
965
1258
 
966
1259
  # Convert table data to DataFrame
967
- df = self.get_table_data_as_dataframe()
968
- df.to_parquet(file_name, index=False)
1260
+ df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
1261
+ if df is None:
1262
+ raise Exception("Failed to convert table data to DataFrame")
969
1263
 
970
- # Generate table name from file name
971
- base_name = os.path.splitext(os.path.basename(file_name))[0]
972
- table_name = self.db_manager.sanitize_table_name(base_name)
1264
+ # Export using ExportManager
1265
+ table_name, metadata = self.export_manager.export_to_parquet(df, file_name)
973
1266
 
974
- # Ensure unique table name
975
- original_name = table_name
976
- counter = 1
977
- while table_name in self.db_manager.loaded_tables:
978
- table_name = f"{original_name}_{counter}"
979
- counter += 1
980
-
981
- # Register the table in the database manager
982
- self.db_manager.register_dataframe(df, table_name, file_name)
983
-
984
- # Update tracking
985
- self.db_manager.loaded_tables[table_name] = file_name
986
- self.db_manager.table_columns[table_name] = df.columns.tolist()
987
-
988
- # Update UI using new method
1267
+ # Update UI
989
1268
  self.tables_list.add_table_item(table_name, os.path.basename(file_name))
990
1269
  self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
991
1270
 
@@ -1005,94 +1284,10 @@ LIMIT 10
1005
1284
 
1006
1285
  def get_table_data_as_dataframe(self):
1007
1286
  """Helper function to convert table widget data to a DataFrame with proper data types"""
1008
- # Get the current tab
1009
1287
  current_tab = self.get_current_tab()
1010
1288
  if not current_tab:
1011
1289
  return pd.DataFrame()
1012
-
1013
- headers = [current_tab.results_table.horizontalHeaderItem(i).text() for i in range(current_tab.results_table.columnCount())]
1014
- data = []
1015
- for row in range(current_tab.results_table.rowCount()):
1016
- row_data = []
1017
- for column in range(current_tab.results_table.columnCount()):
1018
- item = current_tab.results_table.item(row, column)
1019
- row_data.append(item.text() if item else '')
1020
- data.append(row_data)
1021
-
1022
- # Create DataFrame from raw string data
1023
- df_raw = pd.DataFrame(data, columns=headers)
1024
-
1025
- # Try to use the original dataframe's dtypes if available
1026
- if hasattr(current_tab, 'current_df') and current_tab.current_df is not None:
1027
- original_df = current_tab.current_df
1028
- # Since we might have filtered rows, we can't just return the original DataFrame
1029
- # But we can use its column types to convert our string data appropriately
1030
-
1031
- # Create a new DataFrame with appropriate types
1032
- df_typed = pd.DataFrame()
1033
-
1034
- for col in df_raw.columns:
1035
- if col in original_df.columns:
1036
- # Get the original column type
1037
- orig_type = original_df[col].dtype
1038
-
1039
- # Special handling for different data types
1040
- if pd.api.types.is_numeric_dtype(orig_type):
1041
- # Handle numeric columns (int or float)
1042
- try:
1043
- # First try to convert to numeric type
1044
- # Remove commas used for thousands separators
1045
- numeric_col = pd.to_numeric(df_raw[col].str.replace(',', '').replace('NULL', np.nan))
1046
- df_typed[col] = numeric_col
1047
- except:
1048
- # If that fails, keep the original string
1049
- df_typed[col] = df_raw[col]
1050
- elif pd.api.types.is_datetime64_dtype(orig_type):
1051
- # Handle datetime columns
1052
- try:
1053
- df_typed[col] = pd.to_datetime(df_raw[col].replace('NULL', np.nan))
1054
- except:
1055
- df_typed[col] = df_raw[col]
1056
- elif pd.api.types.is_bool_dtype(orig_type):
1057
- # Handle boolean columns
1058
- try:
1059
- df_typed[col] = df_raw[col].map({'True': True, 'False': False}).replace('NULL', np.nan)
1060
- except:
1061
- df_typed[col] = df_raw[col]
1062
- else:
1063
- # For other types, keep as is
1064
- df_typed[col] = df_raw[col]
1065
- else:
1066
- # For columns not in the original dataframe, infer type
1067
- df_typed[col] = df_raw[col]
1068
-
1069
- return df_typed
1070
-
1071
- else:
1072
- # If we don't have the original dataframe, try to infer types
1073
- # First replace 'NULL' with actual NaN
1074
- df_raw.replace('NULL', np.nan, inplace=True)
1075
-
1076
- # Try to convert each column to numeric if possible
1077
- for col in df_raw.columns:
1078
- try:
1079
- # First try to convert to numeric by removing commas
1080
- df_raw[col] = pd.to_numeric(df_raw[col].str.replace(',', ''))
1081
- except:
1082
- # If that fails, try to convert to datetime
1083
- try:
1084
- df_raw[col] = pd.to_datetime(df_raw[col])
1085
- except:
1086
- # If both numeric and datetime conversions fail,
1087
- # try boolean conversion for True/False strings
1088
- try:
1089
- if df_raw[col].dropna().isin(['True', 'False']).all():
1090
- df_raw[col] = df_raw[col].map({'True': True, 'False': False})
1091
- except:
1092
- # Otherwise, keep as is
1093
- pass
1094
-
1095
- return df_raw
1290
+ return self.export_manager.convert_table_to_dataframe(current_tab.results_table)
1096
1291
 
1097
1292
  def keyPressEvent(self, event):
1098
1293
  """Handle global keyboard shortcuts"""
@@ -1263,6 +1458,12 @@ LIMIT 10
1263
1458
  # Add menu actions
1264
1459
  select_from_action = context_menu.addAction("Select from")
1265
1460
  add_to_editor_action = context_menu.addAction("Just add to editor")
1461
+ select_from_new_tab_action = context_menu.addAction("Select From in New Tab")
1462
+
1463
+ # Add copy path actions
1464
+ context_menu.addSeparator()
1465
+ copy_path_action = context_menu.addAction("Copy Path")
1466
+ copy_relative_path_action = context_menu.addAction("Copy Relative Path")
1266
1467
 
1267
1468
  # Add entropy profiler action
1268
1469
  context_menu.addSeparator()
@@ -1327,6 +1528,11 @@ LIMIT 10
1327
1528
  cursor = current_tab.query_edit.textCursor()
1328
1529
  cursor.insertText(table_name)
1329
1530
  current_tab.query_edit.setFocus()
1531
+ elif action == select_from_new_tab_action:
1532
+ # Create a new tab with the selected table
1533
+ new_tab = self.add_tab(title=table_name)
1534
+ new_tab.set_query_text(f"SELECT * FROM {table_name}")
1535
+ new_tab.query_edit.setFocus()
1330
1536
  elif action == reload_action:
1331
1537
  self.reload_selected_table(table_name)
1332
1538
  elif action == analyze_entropy_action:
@@ -1393,6 +1599,24 @@ LIMIT 10
1393
1599
  if target_folder:
1394
1600
  self.tables_list.move_item_to_folder(item, target_folder)
1395
1601
  self.statusBar().showMessage(f'Moved table "{table_name}" to folder "{target_folder.text(0)}"')
1602
+ elif action == copy_path_action:
1603
+ # Get the full path from the table source
1604
+ if table_name in self.db_manager.loaded_tables:
1605
+ path = self.db_manager.loaded_tables[table_name]
1606
+ if path != 'database': # Only copy if it's a file path
1607
+ QApplication.clipboard().setText(path)
1608
+ self.statusBar().showMessage(f"Copied full path to clipboard")
1609
+ elif action == copy_relative_path_action:
1610
+ # Get the relative path from the table source
1611
+ if table_name in self.db_manager.loaded_tables:
1612
+ path = self.db_manager.loaded_tables[table_name]
1613
+ if path != 'database': # Only copy if it's a file path
1614
+ try:
1615
+ rel_path = os.path.relpath(path)
1616
+ QApplication.clipboard().setText(rel_path)
1617
+ self.statusBar().showMessage(f"Copied relative path to clipboard")
1618
+ except ValueError:
1619
+ self.statusBar().showMessage("Could not determine relative path")
1396
1620
 
1397
1621
  def analyze_foreign_keys_between_tables(self, table_items):
1398
1622
  """Analyze foreign key relationships between selected tables"""
@@ -2727,6 +2951,20 @@ LIMIT 10
2727
2951
  self.showMaximized()
2728
2952
  self.was_maximized = True
2729
2953
 
2954
+ def get_selected_table(self):
2955
+ """Get the name of the currently selected table in the tables list"""
2956
+ if not hasattr(self, 'tables_list'):
2957
+ return None
2958
+
2959
+ selected_items = self.tables_list.selectedItems()
2960
+ # Filter out folders and use only single selections
2961
+ table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
2962
+
2963
+ if len(table_items) == 1: # Only use if exactly one table is selected
2964
+ return self.tables_list.get_table_name_from_item(table_items[0])
2965
+
2966
+ return None
2967
+
2730
2968
  def change_zoom(self, factor):
2731
2969
  """Change the zoom level of the application by adjusting font sizes"""
2732
2970
  try:
@@ -3428,6 +3666,198 @@ LIMIT 10
3428
3666
  QMessageBox.critical(self, "Analysis Error", f"Error analyzing column:\n\n{str(e)}")
3429
3667
  self.statusBar().showMessage(f'Error analyzing column: {str(e)}')
3430
3668
 
3669
+ def encode_text(self, column_name):
3670
+ """Generate one-hot encoding for a text column and visualize the results"""
3671
+ try:
3672
+ # Get the current tab
3673
+ current_tab = self.get_current_tab()
3674
+ if not current_tab or current_tab.current_df is None:
3675
+ return
3676
+
3677
+ # Show a loading indicator
3678
+ self.statusBar().showMessage(f'Preparing one-hot encoding for "{column_name}"...')
3679
+
3680
+ # Get the dataframe from the current tab
3681
+ full_df = current_tab.current_df.copy()
3682
+ df = full_df
3683
+
3684
+ # Save original row count for reference
3685
+ current_tab.original_df_rowcount = len(full_df)
3686
+
3687
+ if df is not None and not df.empty:
3688
+ # Sample the data if it's larger than 1000 rows for better performance
3689
+ row_count = len(df)
3690
+ if row_count > 1000:
3691
+ self.statusBar().showMessage(f'Sampling data (using 1000 rows from {row_count} total)...')
3692
+
3693
+ # Store the full dataframe before sampling for later use
3694
+ current_tab._original_df_before_encoding = full_df
3695
+
3696
+ # Sample the data
3697
+ df = df.sample(n=1000, random_state=42)
3698
+
3699
+ # Import the one-hot encoding visualizer
3700
+ from sqlshell.utils.profile_ohe import visualize_ohe
3701
+
3702
+ # Create and show the visualization
3703
+ self.statusBar().showMessage(f'Generating one-hot encoding for "{column_name}"...')
3704
+ vis = visualize_ohe(df, column_name)
3705
+
3706
+ # Connect to the encodingApplied signal
3707
+ vis.encodingApplied.connect(self.apply_encoded_dataframe)
3708
+
3709
+ # Store a reference to prevent garbage collection
3710
+ self._ohe_window = vis
3711
+
3712
+ if row_count > 1000:
3713
+ self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}" (sampled 1000 rows from {row_count})')
3714
+ else:
3715
+ self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}"')
3716
+ else:
3717
+ QMessageBox.warning(self, "Empty Data", "No data available to encode.")
3718
+ self.statusBar().showMessage(f'No data to encode')
3719
+
3720
+ except Exception as e:
3721
+ QMessageBox.critical(self, "Encoding Error", f"Error generating one-hot encoding:\n\n{str(e)}")
3722
+ self.statusBar().showMessage(f'Error generating one-hot encoding: {str(e)}')
3723
+
3724
+ def apply_encoded_dataframe(self, encoded_df):
3725
+ """Apply the encoded dataframe to the current tab's results table"""
3726
+ try:
3727
+ # Get the current tab
3728
+ current_tab = self.get_current_tab()
3729
+ if not current_tab:
3730
+ return
3731
+
3732
+ # Check if we're using a sampled version
3733
+ is_sampled = False
3734
+ full_df = None
3735
+
3736
+ # Show a loading indicator
3737
+ self.statusBar().showMessage(f'Applying one-hot encoding...')
3738
+
3739
+ # Progress dialog for large datasets
3740
+ progress = QProgressDialog("Applying encoding...", "Cancel", 0, 100, self)
3741
+ progress.setWindowTitle("Processing")
3742
+ progress.setWindowModality(Qt.WindowModality.WindowModal)
3743
+ progress.setValue(10)
3744
+
3745
+ # Check if this sample is smaller than the actual dataset
3746
+ if hasattr(current_tab, '_original_df_before_encoding'):
3747
+ # We have the original, full dataset stored
3748
+ full_df = current_tab._original_df_before_encoding
3749
+ is_sampled = len(full_df) > len(encoded_df)
3750
+ elif hasattr(current_tab, 'original_df_rowcount'):
3751
+ # We know the original row count but don't have the data
3752
+ is_sampled = current_tab.original_df_rowcount > len(encoded_df)
3753
+
3754
+ progress.setValue(20)
3755
+ QApplication.processEvents()
3756
+
3757
+ # If we're working with a sample, apply the encoding to the full dataset
3758
+ if is_sampled and full_df is not None:
3759
+ self.statusBar().showMessage(f'Re-applying encoding to full dataset ({len(full_df)} rows)...')
3760
+
3761
+ try:
3762
+ # Get the encoding columns (added by the OHE process)
3763
+ original_cols = set(current_tab.current_df.columns)
3764
+ ohe_cols = set(encoded_df.columns) - original_cols
3765
+
3766
+ if ohe_cols:
3767
+ # Import the encoding function to apply to full dataset
3768
+ from sqlshell.utils.profile_ohe import get_ohe
3769
+
3770
+ # Get the column that was encoded
3771
+ encoded_column = None
3772
+ for col in original_cols:
3773
+ if any(c.startswith(f'is_{col}') for c in ohe_cols) or any(c.startswith(f'has_{col}') for c in ohe_cols):
3774
+ encoded_column = col
3775
+ break
3776
+
3777
+ progress.setValue(40)
3778
+ QApplication.processEvents()
3779
+
3780
+ if encoded_column:
3781
+ # Apply encoding to full dataset
3782
+ self.statusBar().showMessage(f'Encoding column "{encoded_column}" on full dataset...')
3783
+ full_encoded_df = get_ohe(full_df, encoded_column)
3784
+
3785
+ progress.setValue(80)
3786
+ QApplication.processEvents()
3787
+
3788
+ # Update the current dataframe with the fully encoded one
3789
+ current_tab.current_df = full_encoded_df
3790
+ self.current_df = full_encoded_df # Keep this for compatibility
3791
+
3792
+ # Use the full encoded dataframe instead
3793
+ encoded_df = full_encoded_df
3794
+ else:
3795
+ # If we can't determine the encoded column, use the sampled version
3796
+ current_tab.current_df = encoded_df
3797
+ self.current_df = encoded_df # Keep this for compatibility
3798
+ else:
3799
+ # No encoding columns found, use the sampled version
3800
+ current_tab.current_df = encoded_df
3801
+ self.current_df = encoded_df # Keep this for compatibility
3802
+ except Exception as e:
3803
+ # If there's an error, fall back to the provided encoded_df
3804
+ print(f"Error applying encoding to full dataset: {e}")
3805
+ current_tab.current_df = encoded_df
3806
+ self.current_df = encoded_df # Keep this for compatibility
3807
+ else:
3808
+ # No sampling occurred, just use the provided encoded dataframe
3809
+ current_tab.current_df = encoded_df
3810
+ self.current_df = encoded_df # Keep this for compatibility
3811
+
3812
+ progress.setValue(90)
3813
+ QApplication.processEvents()
3814
+
3815
+ # Populate the results table with the new dataframe
3816
+ self.populate_table(encoded_df)
3817
+
3818
+ # Update results title to show this is encoded data
3819
+ current_tab.results_title.setText(f"ENCODED DATA")
3820
+
3821
+ progress.setValue(100)
3822
+ progress.close()
3823
+
3824
+ # Update status
3825
+ self.statusBar().showMessage(f'Applied one-hot encoding with {len(encoded_df.columns)} columns')
3826
+
3827
+ # Check if we should register this as a temporary table
3828
+ if len(encoded_df) >= 100: # Only worth registering as table if it's substantial
3829
+ try:
3830
+ # Generate a unique table name
3831
+ import time
3832
+ timestamp = int(time.time())
3833
+ table_name = f"encoded_data_{timestamp}"
3834
+
3835
+ # Register as a temporary table in the database manager
3836
+ self.db_manager.register_dataframe(encoded_df, table_name, "query_result")
3837
+
3838
+ # Add to tables list
3839
+ self.tables_list.add_table_item(table_name, "encoded data")
3840
+
3841
+ # Update completer
3842
+ self.update_completer()
3843
+
3844
+ # Notify user
3845
+ self.statusBar().showMessage(f'Applied one-hot encoding and registered as table "{table_name}"')
3846
+ except Exception as e:
3847
+ # Just log the error but continue - this is an optional enhancement
3848
+ print(f"Error registering encoded dataframe as table: {e}")
3849
+
3850
+ except Exception as e:
3851
+ QMessageBox.critical(self, "Error", f"Failed to apply encoded dataframe:\n\n{str(e)}")
3852
+ self.statusBar().showMessage(f'Error applying encoding: {str(e)}')
3853
+
3854
+ def get_current_query_tab(self):
3855
+ """Get the currently active tab if it's a query tab (has query_edit attribute)"""
3856
+ current_tab = self.get_current_tab()
3857
+ if current_tab and hasattr(current_tab, 'query_edit'):
3858
+ return current_tab
3859
+ return None
3860
+
3431
3861
  def main():
3432
3862
  # Parse command line arguments
3433
3863
  parser = argparse.ArgumentParser(description='SQL Shell - SQL Query Tool')