sqlshell 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlshell might be problematic. Click here for more details.
- sqlshell/README.md +5 -1
- sqlshell/__init__.py +35 -5
- sqlshell/create_test_data.py +29 -0
- sqlshell/db/__init__.py +2 -1
- sqlshell/db/database_manager.py +336 -23
- sqlshell/db/export_manager.py +188 -0
- sqlshell/editor_integration.py +127 -0
- sqlshell/execution_handler.py +421 -0
- sqlshell/main.py +784 -143
- sqlshell/query_tab.py +592 -7
- sqlshell/table_list.py +90 -1
- sqlshell/ui/filter_header.py +36 -1
- sqlshell/utils/profile_column.py +2515 -0
- sqlshell/utils/profile_distributions.py +613 -0
- sqlshell/utils/profile_foreign_keys.py +547 -0
- sqlshell/utils/profile_ohe.py +631 -0
- sqlshell-0.3.0.dist-info/METADATA +400 -0
- {sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/RECORD +21 -14
- {sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/WHEEL +1 -1
- sqlshell-0.2.2.dist-info/METADATA +0 -198
- {sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/entry_points.txt +0 -0
- {sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/top_level.txt +0 -0
sqlshell/main.py
CHANGED
|
@@ -18,18 +18,20 @@ from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
|
|
|
18
18
|
QCompleter, QFrame, QToolButton, QSizePolicy, QTabWidget,
|
|
19
19
|
QStyleFactory, QToolBar, QStatusBar, QLineEdit, QMenu,
|
|
20
20
|
QCheckBox, QWidgetAction, QMenuBar, QInputDialog, QProgressDialog,
|
|
21
|
-
QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem
|
|
21
|
+
QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem,
|
|
22
|
+
QComboBox)
|
|
22
23
|
from PyQt6.QtCore import Qt, QAbstractTableModel, QRegularExpression, QRect, QSize, QStringListModel, QPropertyAnimation, QEasingCurve, QTimer, QPoint, QMimeData
|
|
23
24
|
from PyQt6.QtGui import QFont, QColor, QSyntaxHighlighter, QTextCharFormat, QPainter, QTextFormat, QTextCursor, QIcon, QPalette, QLinearGradient, QBrush, QPixmap, QPolygon, QPainterPath, QDrag
|
|
24
25
|
import numpy as np
|
|
25
26
|
from datetime import datetime
|
|
27
|
+
import psutil
|
|
26
28
|
|
|
27
29
|
from sqlshell import create_test_data
|
|
28
30
|
from sqlshell.splash_screen import AnimatedSplashScreen
|
|
29
31
|
from sqlshell.syntax_highlighter import SQLSyntaxHighlighter
|
|
30
32
|
from sqlshell.editor import LineNumberArea, SQLEditor
|
|
31
33
|
from sqlshell.ui import FilterHeader, BarChartDelegate
|
|
32
|
-
from sqlshell.db import DatabaseManager
|
|
34
|
+
from sqlshell.db import DatabaseManager, ExportManager
|
|
33
35
|
from sqlshell.query_tab import QueryTab
|
|
34
36
|
from sqlshell.styles import (get_application_stylesheet, get_tab_corner_stylesheet,
|
|
35
37
|
get_context_menu_stylesheet,
|
|
@@ -42,6 +44,7 @@ class SQLShell(QMainWindow):
|
|
|
42
44
|
def __init__(self):
|
|
43
45
|
super().__init__()
|
|
44
46
|
self.db_manager = DatabaseManager()
|
|
47
|
+
self.export_manager = ExportManager(self.db_manager)
|
|
45
48
|
self.current_df = None # Store the current DataFrame for filtering
|
|
46
49
|
self.filter_widgets = [] # Store filter line edits
|
|
47
50
|
self.current_project_file = None # Store the current project file path
|
|
@@ -188,6 +191,12 @@ class SQLShell(QMainWindow):
|
|
|
188
191
|
tables_header.setStyleSheet(get_tables_header_stylesheet())
|
|
189
192
|
left_layout.addWidget(tables_header)
|
|
190
193
|
|
|
194
|
+
# Tables info label
|
|
195
|
+
tables_info = QLabel("Right-click on tables to profile columns, analyze structure, and discover distributions. Select multiple tables to analyze foreign key relationships.")
|
|
196
|
+
tables_info.setWordWrap(True)
|
|
197
|
+
tables_info.setStyleSheet("color: #7FB3D5; font-size: 11px; margin-top: 2px; margin-bottom: 5px;")
|
|
198
|
+
left_layout.addWidget(tables_info)
|
|
199
|
+
|
|
191
200
|
# Tables list with custom styling
|
|
192
201
|
self.tables_list = DraggableTablesList(self)
|
|
193
202
|
self.tables_list.itemClicked.connect(self.show_table_preview)
|
|
@@ -210,6 +219,39 @@ class SQLShell(QMainWindow):
|
|
|
210
219
|
query_header.setObjectName("header_label")
|
|
211
220
|
right_layout.addWidget(query_header)
|
|
212
221
|
|
|
222
|
+
# Create a drop area for tables above the tab widget
|
|
223
|
+
self.tab_drop_area = QFrame()
|
|
224
|
+
self.tab_drop_area.setFixedHeight(30)
|
|
225
|
+
self.tab_drop_area.setObjectName("tab_drop_area")
|
|
226
|
+
|
|
227
|
+
# Add a label with hint text
|
|
228
|
+
drop_area_layout = QHBoxLayout(self.tab_drop_area)
|
|
229
|
+
drop_area_layout.setContentsMargins(10, 0, 10, 0)
|
|
230
|
+
self.drop_hint_label = QLabel("Drag tables here to create new query tabs")
|
|
231
|
+
self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
|
|
232
|
+
self.drop_hint_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
233
|
+
drop_area_layout.addWidget(self.drop_hint_label)
|
|
234
|
+
|
|
235
|
+
self.tab_drop_area.setStyleSheet("""
|
|
236
|
+
#tab_drop_area {
|
|
237
|
+
background-color: #f8f9fa;
|
|
238
|
+
border: 1px dashed #BDC3C7;
|
|
239
|
+
border-radius: 4px;
|
|
240
|
+
margin: 0 0 5px 0;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
#tab_drop_area:hover {
|
|
244
|
+
background-color: #E5F7FF;
|
|
245
|
+
border: 1px dashed #3498DB;
|
|
246
|
+
}
|
|
247
|
+
""")
|
|
248
|
+
self.tab_drop_area.setAcceptDrops(True)
|
|
249
|
+
self.tab_drop_area.dragEnterEvent = self.tab_area_drag_enter
|
|
250
|
+
self.tab_drop_area.dragMoveEvent = self.tab_area_drag_move
|
|
251
|
+
self.tab_drop_area.dragLeaveEvent = self.tab_area_drag_leave
|
|
252
|
+
self.tab_drop_area.dropEvent = self.tab_area_drop
|
|
253
|
+
right_layout.addWidget(self.tab_drop_area)
|
|
254
|
+
|
|
213
255
|
# Create tab widget for multiple queries
|
|
214
256
|
self.tab_widget = QTabWidget()
|
|
215
257
|
self.tab_widget.setTabsClosable(True)
|
|
@@ -231,6 +273,100 @@ class SQLShell(QMainWindow):
|
|
|
231
273
|
# Status bar
|
|
232
274
|
self.statusBar().showMessage('Ready | Ctrl+Enter: Execute Query | Ctrl+K: Toggle Comment | Ctrl+T: New Tab | Ctrl+Shift+O: Quick Access Files')
|
|
233
275
|
|
|
276
|
+
# Methods for handling drag and drop on the tab drop area
|
|
277
|
+
def tab_area_drag_enter(self, event):
|
|
278
|
+
"""Handle drag enter events on the tab drop area"""
|
|
279
|
+
# Accept only if from the tables list
|
|
280
|
+
if event.source() == self.tables_list:
|
|
281
|
+
# Extract table name(s) from the mime data
|
|
282
|
+
mime_data = event.mimeData()
|
|
283
|
+
if mime_data.hasText():
|
|
284
|
+
table_names = mime_data.text().split(", ")
|
|
285
|
+
if len(table_names) == 1:
|
|
286
|
+
self.drop_hint_label.setText(f"Release to create a new query tab for {table_names[0]}")
|
|
287
|
+
else:
|
|
288
|
+
self.drop_hint_label.setText(f"Release to create {len(table_names)} new query tabs")
|
|
289
|
+
|
|
290
|
+
self.drop_hint_label.setStyleSheet("color: #3498db; font-size: 11px; font-weight: bold;")
|
|
291
|
+
|
|
292
|
+
# Highlight the drop area
|
|
293
|
+
self.tab_drop_area.setStyleSheet("""
|
|
294
|
+
#tab_drop_area {
|
|
295
|
+
background-color: #E5F7FF;
|
|
296
|
+
border: 2px dashed #3498DB;
|
|
297
|
+
border-radius: 4px;
|
|
298
|
+
margin: 0 0 5px 0;
|
|
299
|
+
}
|
|
300
|
+
""")
|
|
301
|
+
self.tab_drop_area.setFixedHeight(40)
|
|
302
|
+
event.acceptProposedAction()
|
|
303
|
+
else:
|
|
304
|
+
event.ignore()
|
|
305
|
+
|
|
306
|
+
def tab_area_drag_move(self, event):
|
|
307
|
+
"""Handle drag move events on the tab drop area"""
|
|
308
|
+
# Continue accepting drag moves
|
|
309
|
+
if event.source() == self.tables_list:
|
|
310
|
+
event.acceptProposedAction()
|
|
311
|
+
else:
|
|
312
|
+
event.ignore()
|
|
313
|
+
|
|
314
|
+
def tab_area_drag_leave(self, event):
|
|
315
|
+
"""Handle drag leave events on the tab drop area"""
|
|
316
|
+
# Reset the drop area
|
|
317
|
+
self.tab_drop_area.setStyleSheet("""
|
|
318
|
+
#tab_drop_area {
|
|
319
|
+
background-color: #f8f9fa;
|
|
320
|
+
border: 1px dashed #BDC3C7;
|
|
321
|
+
border-radius: 4px;
|
|
322
|
+
margin: 0 0 5px 0;
|
|
323
|
+
}
|
|
324
|
+
""")
|
|
325
|
+
self.drop_hint_label.setText("Drag tables here to create new query tabs")
|
|
326
|
+
self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
|
|
327
|
+
self.tab_drop_area.setFixedHeight(30)
|
|
328
|
+
# No need to call a parent method
|
|
329
|
+
|
|
330
|
+
def tab_area_drop(self, event):
|
|
331
|
+
"""Handle drop events on the tab drop area"""
|
|
332
|
+
# Process the drop to create a new tab with SELECT query
|
|
333
|
+
if event.source() == self.tables_list:
|
|
334
|
+
mime_data = event.mimeData()
|
|
335
|
+
if mime_data.hasText():
|
|
336
|
+
table_names = mime_data.text().split(", ")
|
|
337
|
+
|
|
338
|
+
for table_name in table_names:
|
|
339
|
+
# Check if this table needs to be reloaded first
|
|
340
|
+
if table_name in self.tables_list.tables_needing_reload:
|
|
341
|
+
# Reload the table immediately without asking
|
|
342
|
+
self.reload_selected_table(table_name)
|
|
343
|
+
|
|
344
|
+
# Generate a title for the tab
|
|
345
|
+
tab_title = f"Query {table_name}"
|
|
346
|
+
# Create a new tab
|
|
347
|
+
new_tab = self.add_tab(tab_title)
|
|
348
|
+
# Set the SQL query
|
|
349
|
+
new_tab.set_query_text(f"SELECT * FROM {table_name}")
|
|
350
|
+
|
|
351
|
+
self.statusBar().showMessage(f"Created new tab{'s' if len(table_names) > 1 else ''} for {', '.join(table_names)}")
|
|
352
|
+
|
|
353
|
+
# Reset the drop area appearance
|
|
354
|
+
self.tab_drop_area.setStyleSheet("""
|
|
355
|
+
#tab_drop_area {
|
|
356
|
+
background-color: #f8f9fa;
|
|
357
|
+
border: 1px dashed #BDC3C7;
|
|
358
|
+
border-radius: 4px;
|
|
359
|
+
margin: 0 0 5px 0;
|
|
360
|
+
}
|
|
361
|
+
""")
|
|
362
|
+
self.drop_hint_label.setText("Drag tables here to create new query tabs")
|
|
363
|
+
self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
|
|
364
|
+
self.tab_drop_area.setFixedHeight(30)
|
|
365
|
+
|
|
366
|
+
event.acceptProposedAction()
|
|
367
|
+
else:
|
|
368
|
+
event.ignore()
|
|
369
|
+
|
|
234
370
|
def create_tab_corner_widget(self):
|
|
235
371
|
"""Create a corner widget with a + button to add new tabs"""
|
|
236
372
|
corner_widget = QWidget()
|
|
@@ -284,25 +420,126 @@ class SQLShell(QMainWindow):
|
|
|
284
420
|
headers = [str(col) for col in df.columns]
|
|
285
421
|
current_tab.results_table.setHorizontalHeaderLabels(headers)
|
|
286
422
|
|
|
287
|
-
# Calculate chunk size
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
#
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
423
|
+
# Calculate dynamic chunk size based on available memory
|
|
424
|
+
import psutil
|
|
425
|
+
available_memory = psutil.virtual_memory().available
|
|
426
|
+
# Use 10% of available memory for chunking, with a minimum of 1000 rows
|
|
427
|
+
memory_per_row = df.memory_usage(deep=True).sum() / len(df)
|
|
428
|
+
CHUNK_SIZE = max(1000, min(10000, int(available_memory * 0.1 / memory_per_row)))
|
|
429
|
+
|
|
430
|
+
# Add pagination controls if dataset is large
|
|
431
|
+
if row_count > CHUNK_SIZE:
|
|
432
|
+
# Remove any existing pagination widgets
|
|
433
|
+
for i in reversed(range(current_tab.results_layout.count())):
|
|
434
|
+
item = current_tab.results_layout.itemAt(i)
|
|
435
|
+
widget = item.widget() if item is not None else None
|
|
436
|
+
if widget and widget.objectName() == "pagination_widget":
|
|
437
|
+
current_tab.results_layout.removeWidget(widget)
|
|
438
|
+
widget.setParent(None)
|
|
439
|
+
widget.deleteLater()
|
|
440
|
+
|
|
441
|
+
# Create pagination widget
|
|
442
|
+
pagination_widget = QWidget()
|
|
443
|
+
pagination_widget.setObjectName("pagination_widget")
|
|
444
|
+
pagination_layout = QHBoxLayout(pagination_widget)
|
|
445
|
+
|
|
446
|
+
# Add page size selector
|
|
447
|
+
page_size_label = QLabel("Rows per page:")
|
|
448
|
+
page_size_combo = QComboBox()
|
|
449
|
+
page_sizes = [1000, 5000, 10000, 50000, 100000]
|
|
450
|
+
page_size_combo.addItems([str(size) for size in page_sizes])
|
|
451
|
+
page_size_combo.setCurrentText(str(CHUNK_SIZE))
|
|
452
|
+
|
|
453
|
+
# Add navigation buttons
|
|
454
|
+
prev_btn = QPushButton("Previous")
|
|
455
|
+
next_btn = QPushButton("Next")
|
|
456
|
+
page_label = QLabel("Page 1")
|
|
457
|
+
|
|
458
|
+
# Add widgets to layout
|
|
459
|
+
pagination_layout.addWidget(page_size_label)
|
|
460
|
+
pagination_layout.addWidget(page_size_combo)
|
|
461
|
+
pagination_layout.addStretch()
|
|
462
|
+
pagination_layout.addWidget(prev_btn)
|
|
463
|
+
pagination_layout.addWidget(page_label)
|
|
464
|
+
pagination_layout.addWidget(next_btn)
|
|
465
|
+
|
|
466
|
+
# Add pagination widget to results layout
|
|
467
|
+
current_tab.results_layout.addWidget(pagination_widget)
|
|
468
|
+
|
|
469
|
+
# Store pagination state
|
|
470
|
+
current_tab.pagination_state = {
|
|
471
|
+
'current_page': 0,
|
|
472
|
+
'page_size': CHUNK_SIZE,
|
|
473
|
+
'total_pages': (row_count + CHUNK_SIZE - 1) // CHUNK_SIZE,
|
|
474
|
+
'page_label': page_label,
|
|
475
|
+
'prev_btn': prev_btn,
|
|
476
|
+
'next_btn': next_btn,
|
|
477
|
+
'page_size_combo': page_size_combo
|
|
478
|
+
}
|
|
294
479
|
|
|
295
|
-
#
|
|
296
|
-
|
|
480
|
+
# Connect pagination signals
|
|
481
|
+
def update_page_size(size):
|
|
482
|
+
current_tab.pagination_state['page_size'] = int(size)
|
|
483
|
+
current_tab.pagination_state['total_pages'] = (row_count + int(size) - 1) // int(size)
|
|
484
|
+
current_tab.pagination_state['current_page'] = 0
|
|
485
|
+
load_current_page()
|
|
486
|
+
|
|
487
|
+
def load_current_page():
|
|
488
|
+
state = current_tab.pagination_state
|
|
489
|
+
start_idx = state['current_page'] * state['page_size']
|
|
490
|
+
end_idx = min(start_idx + state['page_size'], row_count)
|
|
491
|
+
|
|
492
|
+
# Clear existing rows
|
|
493
|
+
current_tab.results_table.setRowCount(0)
|
|
494
|
+
|
|
495
|
+
# Load current page
|
|
496
|
+
chunk = df.iloc[start_idx:end_idx]
|
|
497
|
+
current_tab.results_table.setRowCount(len(chunk))
|
|
498
|
+
|
|
499
|
+
for row_idx, (_, row_data) in enumerate(chunk.iterrows()):
|
|
500
|
+
for col_idx, value in enumerate(row_data):
|
|
501
|
+
formatted_value = self.format_value(value)
|
|
502
|
+
item = QTableWidgetItem(formatted_value)
|
|
503
|
+
current_tab.results_table.setItem(row_idx, col_idx, item)
|
|
504
|
+
|
|
505
|
+
# Update pagination controls
|
|
506
|
+
state['page_label'].setText(f"Page {state['current_page'] + 1} of {state['total_pages']}")
|
|
507
|
+
state['prev_btn'].setEnabled(state['current_page'] > 0)
|
|
508
|
+
state['next_btn'].setEnabled(state['current_page'] < state['total_pages'] - 1)
|
|
509
|
+
|
|
510
|
+
# Process events to keep UI responsive
|
|
511
|
+
QApplication.processEvents()
|
|
512
|
+
|
|
513
|
+
def next_page():
|
|
514
|
+
if current_tab.pagination_state['current_page'] < current_tab.pagination_state['total_pages'] - 1:
|
|
515
|
+
current_tab.pagination_state['current_page'] += 1
|
|
516
|
+
load_current_page()
|
|
517
|
+
|
|
518
|
+
def prev_page():
|
|
519
|
+
if current_tab.pagination_state['current_page'] > 0:
|
|
520
|
+
current_tab.pagination_state['current_page'] -= 1
|
|
521
|
+
load_current_page()
|
|
522
|
+
|
|
523
|
+
# Connect signals
|
|
524
|
+
page_size_combo.currentTextChanged.connect(update_page_size)
|
|
525
|
+
next_btn.clicked.connect(next_page)
|
|
526
|
+
prev_btn.clicked.connect(prev_page)
|
|
527
|
+
|
|
528
|
+
# Load first page
|
|
529
|
+
load_current_page()
|
|
530
|
+
else:
|
|
531
|
+
# For smaller datasets, load all at once
|
|
532
|
+
current_tab.results_table.setRowCount(row_count)
|
|
297
533
|
|
|
298
|
-
for row_idx, (_, row_data) in enumerate(
|
|
534
|
+
for row_idx, (_, row_data) in enumerate(df.iterrows()):
|
|
299
535
|
for col_idx, value in enumerate(row_data):
|
|
300
536
|
formatted_value = self.format_value(value)
|
|
301
537
|
item = QTableWidgetItem(formatted_value)
|
|
302
538
|
current_tab.results_table.setItem(row_idx, col_idx, item)
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
539
|
+
|
|
540
|
+
# Process events periodically to keep UI responsive
|
|
541
|
+
if row_idx % 1000 == 0:
|
|
542
|
+
QApplication.processEvents()
|
|
306
543
|
|
|
307
544
|
# Optimize column widths
|
|
308
545
|
current_tab.results_table.resizeColumnsToContents()
|
|
@@ -630,6 +867,31 @@ class SQLShell(QMainWindow):
|
|
|
630
867
|
QMessageBox.warning(self, "Empty Query", "Please enter a SQL query to execute.")
|
|
631
868
|
return
|
|
632
869
|
|
|
870
|
+
# Check if the query references any tables that need to be loaded
|
|
871
|
+
referenced_tables = self.extract_table_names_from_query(query)
|
|
872
|
+
tables_to_load = [table for table in referenced_tables if table in self.tables_list.tables_needing_reload]
|
|
873
|
+
|
|
874
|
+
# Load any tables that need to be loaded
|
|
875
|
+
if tables_to_load:
|
|
876
|
+
progress = QProgressDialog(f"Loading tables...", "Cancel", 0, len(tables_to_load), self)
|
|
877
|
+
progress.setWindowTitle("Loading Tables")
|
|
878
|
+
progress.setWindowModality(Qt.WindowModality.WindowModal)
|
|
879
|
+
progress.show()
|
|
880
|
+
|
|
881
|
+
for i, table_name in enumerate(tables_to_load):
|
|
882
|
+
if progress.wasCanceled():
|
|
883
|
+
self.statusBar().showMessage("Query canceled: table loading was interrupted")
|
|
884
|
+
return
|
|
885
|
+
|
|
886
|
+
progress.setLabelText(f"Loading table: {table_name}")
|
|
887
|
+
progress.setValue(i)
|
|
888
|
+
QApplication.processEvents()
|
|
889
|
+
|
|
890
|
+
self.reload_selected_table(table_name)
|
|
891
|
+
|
|
892
|
+
progress.setValue(len(tables_to_load))
|
|
893
|
+
progress.close()
|
|
894
|
+
|
|
633
895
|
start_time = datetime.now()
|
|
634
896
|
|
|
635
897
|
try:
|
|
@@ -666,6 +928,57 @@ class SQLShell(QMainWindow):
|
|
|
666
928
|
QMessageBox.critical(self, "Unexpected Error",
|
|
667
929
|
f"An unexpected error occurred:\n\n{str(e)}")
|
|
668
930
|
self.statusBar().showMessage("Query execution failed")
|
|
931
|
+
|
|
932
|
+
def extract_table_names_from_query(self, query):
|
|
933
|
+
"""Extract table names from a SQL query using basic regex patterns"""
|
|
934
|
+
import re
|
|
935
|
+
|
|
936
|
+
# Convert to uppercase for easier pattern matching
|
|
937
|
+
query_upper = query.upper()
|
|
938
|
+
|
|
939
|
+
# Strip comments to avoid matching patterns inside comments
|
|
940
|
+
query_upper = re.sub(r'--.*?$', '', query_upper, flags=re.MULTILINE)
|
|
941
|
+
query_upper = re.sub(r'/\*.*?\*/', '', query_upper, flags=re.DOTALL)
|
|
942
|
+
|
|
943
|
+
# Common SQL patterns that reference tables
|
|
944
|
+
patterns = [
|
|
945
|
+
r'FROM\s+["\[]?(\w+)["\]]?', # FROM clause
|
|
946
|
+
r'JOIN\s+["\[]?(\w+)["\]]?', # JOIN clause
|
|
947
|
+
r'UPDATE\s+["\[]?(\w+)["\]]?', # UPDATE statement
|
|
948
|
+
r'INSERT\s+INTO\s+["\[]?(\w+)["\]]?', # INSERT statement
|
|
949
|
+
r'DELETE\s+FROM\s+["\[]?(\w+)["\]]?', # DELETE statement
|
|
950
|
+
r'CREATE\s+(?:TEMP|TEMPORARY)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # CREATE TABLE
|
|
951
|
+
r'DROP\s+TABLE\s+(?:IF\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # DROP TABLE
|
|
952
|
+
r'ALTER\s+TABLE\s+["\[]?(\w+)["\]]?', # ALTER TABLE
|
|
953
|
+
r'WITH\s+(\w+)\s+AS', # Common Table Expressions
|
|
954
|
+
r'MERGE\s+INTO\s+["\[]?(\w+)["\]]?' # MERGE statement
|
|
955
|
+
]
|
|
956
|
+
|
|
957
|
+
tables = set()
|
|
958
|
+
for pattern in patterns:
|
|
959
|
+
matches = re.finditer(pattern, query_upper)
|
|
960
|
+
for match in matches:
|
|
961
|
+
# Get the table name from the matched group and strip any quotes
|
|
962
|
+
table_name = match.group(1).strip('"[]`\'')
|
|
963
|
+
|
|
964
|
+
# Skip SQL keywords
|
|
965
|
+
if table_name in ('SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'OFFSET',
|
|
966
|
+
'UNION', 'INTERSECT', 'EXCEPT', 'WITH', 'AS', 'ON', 'USING'):
|
|
967
|
+
continue
|
|
968
|
+
|
|
969
|
+
# Add to our set of tables
|
|
970
|
+
tables.add(table_name.lower()) # Convert to lowercase for case-insensitive comparison
|
|
971
|
+
|
|
972
|
+
# Account for qualified table names (schema.table)
|
|
973
|
+
qualified_tables = set()
|
|
974
|
+
for table in tables:
|
|
975
|
+
if '.' in table:
|
|
976
|
+
qualified_tables.add(table.split('.')[-1]) # Add just the table part
|
|
977
|
+
|
|
978
|
+
tables.update(qualified_tables)
|
|
979
|
+
|
|
980
|
+
# Return all found table names in lowercase to match our table storage convention
|
|
981
|
+
return tables
|
|
669
982
|
|
|
670
983
|
def _update_query_history(self, query):
|
|
671
984
|
"""Update query history and track term usage for improved autocompletion"""
|
|
@@ -808,25 +1121,32 @@ class SQLShell(QMainWindow):
|
|
|
808
1121
|
# Generate test data
|
|
809
1122
|
sales_df = create_test_data.create_sales_data()
|
|
810
1123
|
customer_df = create_test_data.create_customer_data()
|
|
1124
|
+
large_customer_df = create_test_data.create_large_customer_data()
|
|
811
1125
|
product_df = create_test_data.create_product_data()
|
|
812
1126
|
large_numbers_df = create_test_data.create_large_numbers_data()
|
|
1127
|
+
california_housing_df = create_test_data.create_california_housing_data()
|
|
813
1128
|
|
|
814
1129
|
# Save test data to temporary directory
|
|
815
1130
|
sales_path = os.path.join(temp_dir, 'sample_sales_data.xlsx')
|
|
816
1131
|
customer_path = os.path.join(temp_dir, 'customer_data.parquet')
|
|
817
1132
|
product_path = os.path.join(temp_dir, 'product_catalog.xlsx')
|
|
818
1133
|
large_numbers_path = os.path.join(temp_dir, 'large_numbers.xlsx')
|
|
819
|
-
|
|
1134
|
+
large_customer_path = os.path.join(temp_dir, 'large_customer_data.parquet')
|
|
1135
|
+
california_housing_path = os.path.join(temp_dir, 'california_housing_data.parquet')
|
|
820
1136
|
sales_df.to_excel(sales_path, index=False)
|
|
821
1137
|
customer_df.to_parquet(customer_path, index=False)
|
|
822
1138
|
product_df.to_excel(product_path, index=False)
|
|
823
1139
|
large_numbers_df.to_excel(large_numbers_path, index=False)
|
|
824
|
-
|
|
1140
|
+
large_customer_df.to_parquet(large_customer_path, index=False)
|
|
1141
|
+
california_housing_df.to_parquet(california_housing_path, index=False)
|
|
1142
|
+
|
|
825
1143
|
# Register the tables in the database manager
|
|
826
1144
|
self.db_manager.register_dataframe(sales_df, 'sample_sales_data', sales_path)
|
|
827
1145
|
self.db_manager.register_dataframe(product_df, 'product_catalog', product_path)
|
|
828
1146
|
self.db_manager.register_dataframe(customer_df, 'customer_data', customer_path)
|
|
829
1147
|
self.db_manager.register_dataframe(large_numbers_df, 'large_numbers', large_numbers_path)
|
|
1148
|
+
self.db_manager.register_dataframe(large_customer_df, 'large_customer_data', large_customer_path)
|
|
1149
|
+
self.db_manager.register_dataframe(california_housing_df, 'california_housing_data', california_housing_path)
|
|
830
1150
|
|
|
831
1151
|
# Update UI
|
|
832
1152
|
self.tables_list.clear()
|
|
@@ -893,28 +1213,14 @@ LIMIT 10
|
|
|
893
1213
|
self.statusBar().showMessage('Exporting data to Excel...')
|
|
894
1214
|
|
|
895
1215
|
# Convert table data to DataFrame
|
|
896
|
-
df = self.
|
|
897
|
-
df
|
|
1216
|
+
df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
|
|
1217
|
+
if df is None:
|
|
1218
|
+
raise Exception("Failed to convert table data to DataFrame")
|
|
898
1219
|
|
|
899
|
-
#
|
|
900
|
-
|
|
901
|
-
table_name = self.db_manager.sanitize_table_name(base_name)
|
|
1220
|
+
# Export using ExportManager
|
|
1221
|
+
table_name, metadata = self.export_manager.export_to_excel(df, file_name)
|
|
902
1222
|
|
|
903
|
-
#
|
|
904
|
-
original_name = table_name
|
|
905
|
-
counter = 1
|
|
906
|
-
while table_name in self.db_manager.loaded_tables:
|
|
907
|
-
table_name = f"{original_name}_{counter}"
|
|
908
|
-
counter += 1
|
|
909
|
-
|
|
910
|
-
# Register the table in the database manager
|
|
911
|
-
self.db_manager.register_dataframe(df, table_name, file_name)
|
|
912
|
-
|
|
913
|
-
# Update tracking
|
|
914
|
-
self.db_manager.loaded_tables[table_name] = file_name
|
|
915
|
-
self.db_manager.table_columns[table_name] = df.columns.tolist()
|
|
916
|
-
|
|
917
|
-
# Update UI using new method
|
|
1223
|
+
# Update UI
|
|
918
1224
|
self.tables_list.add_table_item(table_name, os.path.basename(file_name))
|
|
919
1225
|
self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
|
|
920
1226
|
|
|
@@ -951,28 +1257,14 @@ LIMIT 10
|
|
|
951
1257
|
self.statusBar().showMessage('Exporting data to Parquet...')
|
|
952
1258
|
|
|
953
1259
|
# Convert table data to DataFrame
|
|
954
|
-
df = self.
|
|
955
|
-
df
|
|
956
|
-
|
|
957
|
-
# Generate table name from file name
|
|
958
|
-
base_name = os.path.splitext(os.path.basename(file_name))[0]
|
|
959
|
-
table_name = self.db_manager.sanitize_table_name(base_name)
|
|
960
|
-
|
|
961
|
-
# Ensure unique table name
|
|
962
|
-
original_name = table_name
|
|
963
|
-
counter = 1
|
|
964
|
-
while table_name in self.db_manager.loaded_tables:
|
|
965
|
-
table_name = f"{original_name}_{counter}"
|
|
966
|
-
counter += 1
|
|
1260
|
+
df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
|
|
1261
|
+
if df is None:
|
|
1262
|
+
raise Exception("Failed to convert table data to DataFrame")
|
|
967
1263
|
|
|
968
|
-
#
|
|
969
|
-
self.
|
|
1264
|
+
# Export using ExportManager
|
|
1265
|
+
table_name, metadata = self.export_manager.export_to_parquet(df, file_name)
|
|
970
1266
|
|
|
971
|
-
# Update
|
|
972
|
-
self.db_manager.loaded_tables[table_name] = file_name
|
|
973
|
-
self.db_manager.table_columns[table_name] = df.columns.tolist()
|
|
974
|
-
|
|
975
|
-
# Update UI using new method
|
|
1267
|
+
# Update UI
|
|
976
1268
|
self.tables_list.add_table_item(table_name, os.path.basename(file_name))
|
|
977
1269
|
self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
|
|
978
1270
|
|
|
@@ -992,94 +1284,10 @@ LIMIT 10
|
|
|
992
1284
|
|
|
993
1285
|
def get_table_data_as_dataframe(self):
|
|
994
1286
|
"""Helper function to convert table widget data to a DataFrame with proper data types"""
|
|
995
|
-
# Get the current tab
|
|
996
1287
|
current_tab = self.get_current_tab()
|
|
997
1288
|
if not current_tab:
|
|
998
1289
|
return pd.DataFrame()
|
|
999
|
-
|
|
1000
|
-
headers = [current_tab.results_table.horizontalHeaderItem(i).text() for i in range(current_tab.results_table.columnCount())]
|
|
1001
|
-
data = []
|
|
1002
|
-
for row in range(current_tab.results_table.rowCount()):
|
|
1003
|
-
row_data = []
|
|
1004
|
-
for column in range(current_tab.results_table.columnCount()):
|
|
1005
|
-
item = current_tab.results_table.item(row, column)
|
|
1006
|
-
row_data.append(item.text() if item else '')
|
|
1007
|
-
data.append(row_data)
|
|
1008
|
-
|
|
1009
|
-
# Create DataFrame from raw string data
|
|
1010
|
-
df_raw = pd.DataFrame(data, columns=headers)
|
|
1011
|
-
|
|
1012
|
-
# Try to use the original dataframe's dtypes if available
|
|
1013
|
-
if hasattr(current_tab, 'current_df') and current_tab.current_df is not None:
|
|
1014
|
-
original_df = current_tab.current_df
|
|
1015
|
-
# Since we might have filtered rows, we can't just return the original DataFrame
|
|
1016
|
-
# But we can use its column types to convert our string data appropriately
|
|
1017
|
-
|
|
1018
|
-
# Create a new DataFrame with appropriate types
|
|
1019
|
-
df_typed = pd.DataFrame()
|
|
1020
|
-
|
|
1021
|
-
for col in df_raw.columns:
|
|
1022
|
-
if col in original_df.columns:
|
|
1023
|
-
# Get the original column type
|
|
1024
|
-
orig_type = original_df[col].dtype
|
|
1025
|
-
|
|
1026
|
-
# Special handling for different data types
|
|
1027
|
-
if pd.api.types.is_numeric_dtype(orig_type):
|
|
1028
|
-
# Handle numeric columns (int or float)
|
|
1029
|
-
try:
|
|
1030
|
-
# First try to convert to numeric type
|
|
1031
|
-
# Remove commas used for thousands separators
|
|
1032
|
-
numeric_col = pd.to_numeric(df_raw[col].str.replace(',', '').replace('NULL', np.nan))
|
|
1033
|
-
df_typed[col] = numeric_col
|
|
1034
|
-
except:
|
|
1035
|
-
# If that fails, keep the original string
|
|
1036
|
-
df_typed[col] = df_raw[col]
|
|
1037
|
-
elif pd.api.types.is_datetime64_dtype(orig_type):
|
|
1038
|
-
# Handle datetime columns
|
|
1039
|
-
try:
|
|
1040
|
-
df_typed[col] = pd.to_datetime(df_raw[col].replace('NULL', np.nan))
|
|
1041
|
-
except:
|
|
1042
|
-
df_typed[col] = df_raw[col]
|
|
1043
|
-
elif pd.api.types.is_bool_dtype(orig_type):
|
|
1044
|
-
# Handle boolean columns
|
|
1045
|
-
try:
|
|
1046
|
-
df_typed[col] = df_raw[col].map({'True': True, 'False': False}).replace('NULL', np.nan)
|
|
1047
|
-
except:
|
|
1048
|
-
df_typed[col] = df_raw[col]
|
|
1049
|
-
else:
|
|
1050
|
-
# For other types, keep as is
|
|
1051
|
-
df_typed[col] = df_raw[col]
|
|
1052
|
-
else:
|
|
1053
|
-
# For columns not in the original dataframe, infer type
|
|
1054
|
-
df_typed[col] = df_raw[col]
|
|
1055
|
-
|
|
1056
|
-
return df_typed
|
|
1057
|
-
|
|
1058
|
-
else:
|
|
1059
|
-
# If we don't have the original dataframe, try to infer types
|
|
1060
|
-
# First replace 'NULL' with actual NaN
|
|
1061
|
-
df_raw.replace('NULL', np.nan, inplace=True)
|
|
1062
|
-
|
|
1063
|
-
# Try to convert each column to numeric if possible
|
|
1064
|
-
for col in df_raw.columns:
|
|
1065
|
-
try:
|
|
1066
|
-
# First try to convert to numeric by removing commas
|
|
1067
|
-
df_raw[col] = pd.to_numeric(df_raw[col].str.replace(',', ''))
|
|
1068
|
-
except:
|
|
1069
|
-
# If that fails, try to convert to datetime
|
|
1070
|
-
try:
|
|
1071
|
-
df_raw[col] = pd.to_datetime(df_raw[col])
|
|
1072
|
-
except:
|
|
1073
|
-
# If both numeric and datetime conversions fail,
|
|
1074
|
-
# try boolean conversion for True/False strings
|
|
1075
|
-
try:
|
|
1076
|
-
if df_raw[col].dropna().isin(['True', 'False']).all():
|
|
1077
|
-
df_raw[col] = df_raw[col].map({'True': True, 'False': False})
|
|
1078
|
-
except:
|
|
1079
|
-
# Otherwise, keep as is
|
|
1080
|
-
pass
|
|
1081
|
-
|
|
1082
|
-
return df_raw
|
|
1290
|
+
return self.export_manager.convert_table_to_dataframe(current_tab.results_table)
|
|
1083
1291
|
|
|
1084
1292
|
def keyPressEvent(self, event):
|
|
1085
1293
|
"""Handle global keyboard shortcuts"""
|
|
@@ -1203,6 +1411,30 @@ LIMIT 10
|
|
|
1203
1411
|
|
|
1204
1412
|
def show_tables_context_menu(self, position):
|
|
1205
1413
|
"""Show context menu for tables list"""
|
|
1414
|
+
# Check if we have multiple selected items
|
|
1415
|
+
selected_items = self.tables_list.selectedItems()
|
|
1416
|
+
if len(selected_items) > 1:
|
|
1417
|
+
# Filter out any folder items from selection
|
|
1418
|
+
table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
|
|
1419
|
+
|
|
1420
|
+
if len(table_items) > 1:
|
|
1421
|
+
# Create context menu for multiple table selection
|
|
1422
|
+
context_menu = QMenu(self)
|
|
1423
|
+
context_menu.setStyleSheet(get_context_menu_stylesheet())
|
|
1424
|
+
|
|
1425
|
+
# Add foreign key analysis option
|
|
1426
|
+
analyze_fk_action = context_menu.addAction(f"Analyze Foreign Keys Between {len(table_items)} Tables")
|
|
1427
|
+
analyze_fk_action.setIcon(QIcon.fromTheme("system-search"))
|
|
1428
|
+
|
|
1429
|
+
# Show menu and get selected action
|
|
1430
|
+
action = context_menu.exec(self.tables_list.mapToGlobal(position))
|
|
1431
|
+
|
|
1432
|
+
if action == analyze_fk_action:
|
|
1433
|
+
self.analyze_foreign_keys_between_tables(table_items)
|
|
1434
|
+
|
|
1435
|
+
return
|
|
1436
|
+
|
|
1437
|
+
# Single item selection (original functionality)
|
|
1206
1438
|
item = self.tables_list.itemAt(position)
|
|
1207
1439
|
|
|
1208
1440
|
# If no item or it's a folder, let the tree widget handle it
|
|
@@ -1226,6 +1458,12 @@ LIMIT 10
|
|
|
1226
1458
|
# Add menu actions
|
|
1227
1459
|
select_from_action = context_menu.addAction("Select from")
|
|
1228
1460
|
add_to_editor_action = context_menu.addAction("Just add to editor")
|
|
1461
|
+
select_from_new_tab_action = context_menu.addAction("Select From in New Tab")
|
|
1462
|
+
|
|
1463
|
+
# Add copy path actions
|
|
1464
|
+
context_menu.addSeparator()
|
|
1465
|
+
copy_path_action = context_menu.addAction("Copy Path")
|
|
1466
|
+
copy_relative_path_action = context_menu.addAction("Copy Relative Path")
|
|
1229
1467
|
|
|
1230
1468
|
# Add entropy profiler action
|
|
1231
1469
|
context_menu.addSeparator()
|
|
@@ -1236,6 +1474,10 @@ LIMIT 10
|
|
|
1236
1474
|
profile_table_action = context_menu.addAction("Profile Table Structure")
|
|
1237
1475
|
profile_table_action.setIcon(QIcon.fromTheme("edit-find"))
|
|
1238
1476
|
|
|
1477
|
+
# Add distributions profiler action
|
|
1478
|
+
profile_distributions_action = context_menu.addAction("Analyze Column Distributions")
|
|
1479
|
+
profile_distributions_action.setIcon(QIcon.fromTheme("accessories-calculator"))
|
|
1480
|
+
|
|
1239
1481
|
# Check if table needs reloading and add appropriate action
|
|
1240
1482
|
if table_name in self.tables_list.tables_needing_reload:
|
|
1241
1483
|
reload_action = context_menu.addAction("Reload Table")
|
|
@@ -1286,6 +1528,11 @@ LIMIT 10
|
|
|
1286
1528
|
cursor = current_tab.query_edit.textCursor()
|
|
1287
1529
|
cursor.insertText(table_name)
|
|
1288
1530
|
current_tab.query_edit.setFocus()
|
|
1531
|
+
elif action == select_from_new_tab_action:
|
|
1532
|
+
# Create a new tab with the selected table
|
|
1533
|
+
new_tab = self.add_tab(title=table_name)
|
|
1534
|
+
new_tab.set_query_text(f"SELECT * FROM {table_name}")
|
|
1535
|
+
new_tab.query_edit.setFocus()
|
|
1289
1536
|
elif action == reload_action:
|
|
1290
1537
|
self.reload_selected_table(table_name)
|
|
1291
1538
|
elif action == analyze_entropy_action:
|
|
@@ -1294,6 +1541,9 @@ LIMIT 10
|
|
|
1294
1541
|
elif action == profile_table_action:
|
|
1295
1542
|
# Call the table profile method
|
|
1296
1543
|
self.profile_table_structure(table_name)
|
|
1544
|
+
elif action == profile_distributions_action:
|
|
1545
|
+
# Call the distributions profile method
|
|
1546
|
+
self.profile_distributions(table_name)
|
|
1297
1547
|
elif action == rename_action:
|
|
1298
1548
|
# Show rename dialog
|
|
1299
1549
|
new_name, ok = QInputDialog.getText(
|
|
@@ -1349,6 +1599,91 @@ LIMIT 10
|
|
|
1349
1599
|
if target_folder:
|
|
1350
1600
|
self.tables_list.move_item_to_folder(item, target_folder)
|
|
1351
1601
|
self.statusBar().showMessage(f'Moved table "{table_name}" to folder "{target_folder.text(0)}"')
|
|
1602
|
+
elif action == copy_path_action:
|
|
1603
|
+
# Get the full path from the table source
|
|
1604
|
+
if table_name in self.db_manager.loaded_tables:
|
|
1605
|
+
path = self.db_manager.loaded_tables[table_name]
|
|
1606
|
+
if path != 'database': # Only copy if it's a file path
|
|
1607
|
+
QApplication.clipboard().setText(path)
|
|
1608
|
+
self.statusBar().showMessage(f"Copied full path to clipboard")
|
|
1609
|
+
elif action == copy_relative_path_action:
|
|
1610
|
+
# Get the relative path from the table source
|
|
1611
|
+
if table_name in self.db_manager.loaded_tables:
|
|
1612
|
+
path = self.db_manager.loaded_tables[table_name]
|
|
1613
|
+
if path != 'database': # Only copy if it's a file path
|
|
1614
|
+
try:
|
|
1615
|
+
rel_path = os.path.relpath(path)
|
|
1616
|
+
QApplication.clipboard().setText(rel_path)
|
|
1617
|
+
self.statusBar().showMessage(f"Copied relative path to clipboard")
|
|
1618
|
+
except ValueError:
|
|
1619
|
+
self.statusBar().showMessage("Could not determine relative path")
|
|
1620
|
+
|
|
1621
|
+
def analyze_foreign_keys_between_tables(self, table_items):
|
|
1622
|
+
"""Analyze foreign key relationships between selected tables"""
|
|
1623
|
+
try:
|
|
1624
|
+
# Show a loading indicator
|
|
1625
|
+
table_count = len(table_items)
|
|
1626
|
+
self.statusBar().showMessage(f'Analyzing foreign key relationships between {table_count} tables...')
|
|
1627
|
+
|
|
1628
|
+
# Extract table names from selected items
|
|
1629
|
+
table_names = []
|
|
1630
|
+
for item in table_items:
|
|
1631
|
+
table_name = self.tables_list.get_table_name_from_item(item)
|
|
1632
|
+
if table_name:
|
|
1633
|
+
table_names.append(table_name)
|
|
1634
|
+
|
|
1635
|
+
if len(table_names) < 2:
|
|
1636
|
+
QMessageBox.warning(self, "Not Enough Tables",
|
|
1637
|
+
"At least two tables are required for foreign key analysis.")
|
|
1638
|
+
return
|
|
1639
|
+
|
|
1640
|
+
# Check if any tables need to be reloaded
|
|
1641
|
+
tables_to_reload = [tn for tn in table_names if tn in self.tables_list.tables_needing_reload]
|
|
1642
|
+
for table_name in tables_to_reload:
|
|
1643
|
+
# Reload the table immediately
|
|
1644
|
+
self.reload_selected_table(table_name)
|
|
1645
|
+
|
|
1646
|
+
# Fetch data for each table
|
|
1647
|
+
dfs = []
|
|
1648
|
+
for table_name in table_names:
|
|
1649
|
+
try:
|
|
1650
|
+
# Get the data as a dataframe
|
|
1651
|
+
query = f'SELECT * FROM "{table_name}"'
|
|
1652
|
+
df = self.db_manager.execute_query(query)
|
|
1653
|
+
|
|
1654
|
+
if df is not None and not df.empty:
|
|
1655
|
+
# Sample large tables to improve performance
|
|
1656
|
+
if len(df) > 10000:
|
|
1657
|
+
self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {len(df)} total)...')
|
|
1658
|
+
df = df.sample(n=10000, random_state=42)
|
|
1659
|
+
dfs.append(df)
|
|
1660
|
+
else:
|
|
1661
|
+
QMessageBox.warning(self, "Empty Table",
|
|
1662
|
+
f"Table '{table_name}' has no data and will be skipped.")
|
|
1663
|
+
except Exception as e:
|
|
1664
|
+
QMessageBox.warning(self, "Table Error",
|
|
1665
|
+
f"Error loading table '{table_name}': {str(e)}\nThis table will be skipped.")
|
|
1666
|
+
|
|
1667
|
+
if len(dfs) < 2:
|
|
1668
|
+
QMessageBox.warning(self, "Not Enough Tables",
|
|
1669
|
+
"At least two tables with data are required for foreign key analysis.")
|
|
1670
|
+
return
|
|
1671
|
+
|
|
1672
|
+
# Import the foreign key analyzer
|
|
1673
|
+
from sqlshell.utils.profile_foreign_keys import visualize_foreign_keys
|
|
1674
|
+
|
|
1675
|
+
# Create and show the visualization
|
|
1676
|
+
self.statusBar().showMessage(f'Analyzing foreign key relationships between {len(dfs)} tables...')
|
|
1677
|
+
vis = visualize_foreign_keys(dfs, table_names)
|
|
1678
|
+
|
|
1679
|
+
# Store a reference to prevent garbage collection
|
|
1680
|
+
self._fk_analysis_window = vis
|
|
1681
|
+
|
|
1682
|
+
self.statusBar().showMessage(f'Foreign key analysis complete for {len(dfs)} tables')
|
|
1683
|
+
|
|
1684
|
+
except Exception as e:
|
|
1685
|
+
QMessageBox.critical(self, "Analysis Error", f"Error analyzing foreign keys:\n\n{str(e)}")
|
|
1686
|
+
self.statusBar().showMessage(f'Error analyzing foreign keys: {str(e)}')
|
|
1352
1687
|
|
|
1353
1688
|
def reload_selected_table(self, table_name=None):
|
|
1354
1689
|
"""Reload the data for a table from its source file"""
|
|
@@ -2616,6 +2951,20 @@ LIMIT 10
|
|
|
2616
2951
|
self.showMaximized()
|
|
2617
2952
|
self.was_maximized = True
|
|
2618
2953
|
|
|
2954
|
+
def get_selected_table(self):
|
|
2955
|
+
"""Get the name of the currently selected table in the tables list"""
|
|
2956
|
+
if not hasattr(self, 'tables_list'):
|
|
2957
|
+
return None
|
|
2958
|
+
|
|
2959
|
+
selected_items = self.tables_list.selectedItems()
|
|
2960
|
+
# Filter out folders and use only single selections
|
|
2961
|
+
table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
|
|
2962
|
+
|
|
2963
|
+
if len(table_items) == 1: # Only use if exactly one table is selected
|
|
2964
|
+
return self.tables_list.get_table_name_from_item(table_items[0])
|
|
2965
|
+
|
|
2966
|
+
return None
|
|
2967
|
+
|
|
2619
2968
|
def change_zoom(self, factor):
|
|
2620
2969
|
"""Change the zoom level of the application by adjusting font sizes"""
|
|
2621
2970
|
try:
|
|
@@ -3195,6 +3544,12 @@ LIMIT 10
|
|
|
3195
3544
|
df = self.db_manager.execute_query(query)
|
|
3196
3545
|
|
|
3197
3546
|
if df is not None and not df.empty:
|
|
3547
|
+
# Sample the data if it's larger than 10,000 rows
|
|
3548
|
+
row_count = len(df)
|
|
3549
|
+
if row_count > 10000:
|
|
3550
|
+
self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {row_count} total)...')
|
|
3551
|
+
df = df.sample(n=10000, random_state=42)
|
|
3552
|
+
|
|
3198
3553
|
# Import the key profiler
|
|
3199
3554
|
from sqlshell.utils.profile_keys import visualize_profile
|
|
3200
3555
|
|
|
@@ -3205,7 +3560,10 @@ LIMIT 10
|
|
|
3205
3560
|
# Store a reference to prevent garbage collection
|
|
3206
3561
|
self._keys_profile_window = vis
|
|
3207
3562
|
|
|
3208
|
-
|
|
3563
|
+
if row_count > 10000:
|
|
3564
|
+
self.statusBar().showMessage(f'Table structure profile generated for "{table_name}" (sampled 10,000 rows from {row_count})')
|
|
3565
|
+
else:
|
|
3566
|
+
self.statusBar().showMessage(f'Table structure profile generated for "{table_name}"')
|
|
3209
3567
|
else:
|
|
3210
3568
|
QMessageBox.warning(self, "Empty Table", f"Table '{table_name}' has no data to analyze.")
|
|
3211
3569
|
self.statusBar().showMessage(f'Table "{table_name}" is empty - cannot analyze')
|
|
@@ -3216,6 +3574,289 @@ LIMIT 10
|
|
|
3216
3574
|
except Exception as e:
|
|
3217
3575
|
QMessageBox.critical(self, "Profile Error", f"Error profiling table structure:\n\n{str(e)}")
|
|
3218
3576
|
self.statusBar().showMessage(f'Error profiling table: {str(e)}')
|
|
3577
|
+
|
|
3578
|
+
def profile_distributions(self, table_name):
|
|
3579
|
+
"""Analyze a table's column distributions to understand data patterns"""
|
|
3580
|
+
try:
|
|
3581
|
+
# Show a loading indicator
|
|
3582
|
+
self.statusBar().showMessage(f'Analyzing column distributions for "{table_name}"...')
|
|
3583
|
+
|
|
3584
|
+
# Get the table data
|
|
3585
|
+
if table_name in self.db_manager.loaded_tables:
|
|
3586
|
+
# Check if table needs reloading first
|
|
3587
|
+
if table_name in self.tables_list.tables_needing_reload:
|
|
3588
|
+
# Reload the table immediately
|
|
3589
|
+
self.reload_selected_table(table_name)
|
|
3590
|
+
|
|
3591
|
+
# Get the data as a dataframe
|
|
3592
|
+
query = f'SELECT * FROM "{table_name}"'
|
|
3593
|
+
df = self.db_manager.execute_query(query)
|
|
3594
|
+
|
|
3595
|
+
if df is not None and not df.empty:
|
|
3596
|
+
# Sample the data if it's larger than 10,000 rows
|
|
3597
|
+
row_count = len(df)
|
|
3598
|
+
if row_count > 10000:
|
|
3599
|
+
self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {row_count} total)...')
|
|
3600
|
+
df = df.sample(n=10000, random_state=42)
|
|
3601
|
+
|
|
3602
|
+
# Import the distribution profiler
|
|
3603
|
+
from sqlshell.utils.profile_distributions import visualize_profile
|
|
3604
|
+
|
|
3605
|
+
# Create and show the visualization
|
|
3606
|
+
self.statusBar().showMessage(f'Generating distribution profile for "{table_name}"...')
|
|
3607
|
+
vis = visualize_profile(df)
|
|
3608
|
+
|
|
3609
|
+
# Store a reference to prevent garbage collection
|
|
3610
|
+
self._distributions_window = vis
|
|
3611
|
+
|
|
3612
|
+
if row_count > 10000:
|
|
3613
|
+
self.statusBar().showMessage(f'Distribution profile generated for "{table_name}" (sampled 10,000 rows from {row_count})')
|
|
3614
|
+
else:
|
|
3615
|
+
self.statusBar().showMessage(f'Distribution profile generated for "{table_name}"')
|
|
3616
|
+
else:
|
|
3617
|
+
QMessageBox.warning(self, "Empty Table", f"Table '{table_name}' has no data to analyze.")
|
|
3618
|
+
self.statusBar().showMessage(f'Table "{table_name}" is empty - cannot analyze')
|
|
3619
|
+
else:
|
|
3620
|
+
QMessageBox.warning(self, "Table Not Found", f"Table '{table_name}' not found.")
|
|
3621
|
+
self.statusBar().showMessage(f'Table "{table_name}" not found')
|
|
3622
|
+
|
|
3623
|
+
except Exception as e:
|
|
3624
|
+
QMessageBox.critical(self, "Profile Error", f"Error analyzing distributions:\n\n{str(e)}")
|
|
3625
|
+
self.statusBar().showMessage(f'Error analyzing distributions: {str(e)}')
|
|
3626
|
+
|
|
3627
|
+
def explain_column(self, column_name):
|
|
3628
|
+
"""Analyze a column to explain its relationship with other columns"""
|
|
3629
|
+
try:
|
|
3630
|
+
# Get the current tab
|
|
3631
|
+
current_tab = self.get_current_tab()
|
|
3632
|
+
if not current_tab or current_tab.current_df is None:
|
|
3633
|
+
return
|
|
3634
|
+
|
|
3635
|
+
# Show a loading indicator
|
|
3636
|
+
self.statusBar().showMessage(f'Analyzing column "{column_name}"...')
|
|
3637
|
+
|
|
3638
|
+
# Get the dataframe from the current tab
|
|
3639
|
+
df = current_tab.current_df
|
|
3640
|
+
|
|
3641
|
+
if df is not None and not df.empty:
|
|
3642
|
+
# Sample the data if it's larger than 100 rows for ultra-fast performance
|
|
3643
|
+
row_count = len(df)
|
|
3644
|
+
if row_count > 100:
|
|
3645
|
+
self.statusBar().showMessage(f'Sampling data (using 100 rows from {row_count} total)...')
|
|
3646
|
+
df = df.sample(n=100, random_state=42)
|
|
3647
|
+
|
|
3648
|
+
# Import the column profiler
|
|
3649
|
+
from sqlshell.utils.profile_column import visualize_profile
|
|
3650
|
+
|
|
3651
|
+
# Create and show the visualization
|
|
3652
|
+
self.statusBar().showMessage(f'Generating column profile for "{column_name}"...')
|
|
3653
|
+
visualize_profile(df, column_name)
|
|
3654
|
+
|
|
3655
|
+
# We don't need to store a reference since the UI keeps itself alive
|
|
3656
|
+
|
|
3657
|
+
if row_count > 100:
|
|
3658
|
+
self.statusBar().showMessage(f'Column profile generated for "{column_name}" (sampled 100 rows from {row_count})')
|
|
3659
|
+
else:
|
|
3660
|
+
self.statusBar().showMessage(f'Column profile generated for "{column_name}"')
|
|
3661
|
+
else:
|
|
3662
|
+
QMessageBox.warning(self, "Empty Data", "No data available to analyze.")
|
|
3663
|
+
self.statusBar().showMessage(f'No data to analyze')
|
|
3664
|
+
|
|
3665
|
+
except Exception as e:
|
|
3666
|
+
QMessageBox.critical(self, "Analysis Error", f"Error analyzing column:\n\n{str(e)}")
|
|
3667
|
+
self.statusBar().showMessage(f'Error analyzing column: {str(e)}')
|
|
3668
|
+
|
|
3669
|
+
def encode_text(self, column_name):
|
|
3670
|
+
"""Generate one-hot encoding for a text column and visualize the results"""
|
|
3671
|
+
try:
|
|
3672
|
+
# Get the current tab
|
|
3673
|
+
current_tab = self.get_current_tab()
|
|
3674
|
+
if not current_tab or current_tab.current_df is None:
|
|
3675
|
+
return
|
|
3676
|
+
|
|
3677
|
+
# Show a loading indicator
|
|
3678
|
+
self.statusBar().showMessage(f'Preparing one-hot encoding for "{column_name}"...')
|
|
3679
|
+
|
|
3680
|
+
# Get the dataframe from the current tab
|
|
3681
|
+
full_df = current_tab.current_df.copy()
|
|
3682
|
+
df = full_df
|
|
3683
|
+
|
|
3684
|
+
# Save original row count for reference
|
|
3685
|
+
current_tab.original_df_rowcount = len(full_df)
|
|
3686
|
+
|
|
3687
|
+
if df is not None and not df.empty:
|
|
3688
|
+
# Sample the data if it's larger than 1000 rows for better performance
|
|
3689
|
+
row_count = len(df)
|
|
3690
|
+
if row_count > 1000:
|
|
3691
|
+
self.statusBar().showMessage(f'Sampling data (using 1000 rows from {row_count} total)...')
|
|
3692
|
+
|
|
3693
|
+
# Store the full dataframe before sampling for later use
|
|
3694
|
+
current_tab._original_df_before_encoding = full_df
|
|
3695
|
+
|
|
3696
|
+
# Sample the data
|
|
3697
|
+
df = df.sample(n=1000, random_state=42)
|
|
3698
|
+
|
|
3699
|
+
# Import the one-hot encoding visualizer
|
|
3700
|
+
from sqlshell.utils.profile_ohe import visualize_ohe
|
|
3701
|
+
|
|
3702
|
+
# Create and show the visualization
|
|
3703
|
+
self.statusBar().showMessage(f'Generating one-hot encoding for "{column_name}"...')
|
|
3704
|
+
vis = visualize_ohe(df, column_name)
|
|
3705
|
+
|
|
3706
|
+
# Connect to the encodingApplied signal
|
|
3707
|
+
vis.encodingApplied.connect(self.apply_encoded_dataframe)
|
|
3708
|
+
|
|
3709
|
+
# Store a reference to prevent garbage collection
|
|
3710
|
+
self._ohe_window = vis
|
|
3711
|
+
|
|
3712
|
+
if row_count > 1000:
|
|
3713
|
+
self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}" (sampled 1000 rows from {row_count})')
|
|
3714
|
+
else:
|
|
3715
|
+
self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}"')
|
|
3716
|
+
else:
|
|
3717
|
+
QMessageBox.warning(self, "Empty Data", "No data available to encode.")
|
|
3718
|
+
self.statusBar().showMessage(f'No data to encode')
|
|
3719
|
+
|
|
3720
|
+
except Exception as e:
|
|
3721
|
+
QMessageBox.critical(self, "Encoding Error", f"Error generating one-hot encoding:\n\n{str(e)}")
|
|
3722
|
+
self.statusBar().showMessage(f'Error generating one-hot encoding: {str(e)}')
|
|
3723
|
+
|
|
3724
|
+
def apply_encoded_dataframe(self, encoded_df):
|
|
3725
|
+
"""Apply the encoded dataframe to the current tab's results table"""
|
|
3726
|
+
try:
|
|
3727
|
+
# Get the current tab
|
|
3728
|
+
current_tab = self.get_current_tab()
|
|
3729
|
+
if not current_tab:
|
|
3730
|
+
return
|
|
3731
|
+
|
|
3732
|
+
# Check if we're using a sampled version
|
|
3733
|
+
is_sampled = False
|
|
3734
|
+
full_df = None
|
|
3735
|
+
|
|
3736
|
+
# Show a loading indicator
|
|
3737
|
+
self.statusBar().showMessage(f'Applying one-hot encoding...')
|
|
3738
|
+
|
|
3739
|
+
# Progress dialog for large datasets
|
|
3740
|
+
progress = QProgressDialog("Applying encoding...", "Cancel", 0, 100, self)
|
|
3741
|
+
progress.setWindowTitle("Processing")
|
|
3742
|
+
progress.setWindowModality(Qt.WindowModality.WindowModal)
|
|
3743
|
+
progress.setValue(10)
|
|
3744
|
+
|
|
3745
|
+
# Check if this sample is smaller than the actual dataset
|
|
3746
|
+
if hasattr(current_tab, '_original_df_before_encoding'):
|
|
3747
|
+
# We have the original, full dataset stored
|
|
3748
|
+
full_df = current_tab._original_df_before_encoding
|
|
3749
|
+
is_sampled = len(full_df) > len(encoded_df)
|
|
3750
|
+
elif hasattr(current_tab, 'original_df_rowcount'):
|
|
3751
|
+
# We know the original row count but don't have the data
|
|
3752
|
+
is_sampled = current_tab.original_df_rowcount > len(encoded_df)
|
|
3753
|
+
|
|
3754
|
+
progress.setValue(20)
|
|
3755
|
+
QApplication.processEvents()
|
|
3756
|
+
|
|
3757
|
+
# If we're working with a sample, apply the encoding to the full dataset
|
|
3758
|
+
if is_sampled and full_df is not None:
|
|
3759
|
+
self.statusBar().showMessage(f'Re-applying encoding to full dataset ({len(full_df)} rows)...')
|
|
3760
|
+
|
|
3761
|
+
try:
|
|
3762
|
+
# Get the encoding columns (added by the OHE process)
|
|
3763
|
+
original_cols = set(current_tab.current_df.columns)
|
|
3764
|
+
ohe_cols = set(encoded_df.columns) - original_cols
|
|
3765
|
+
|
|
3766
|
+
if ohe_cols:
|
|
3767
|
+
# Import the encoding function to apply to full dataset
|
|
3768
|
+
from sqlshell.utils.profile_ohe import get_ohe
|
|
3769
|
+
|
|
3770
|
+
# Get the column that was encoded
|
|
3771
|
+
encoded_column = None
|
|
3772
|
+
for col in original_cols:
|
|
3773
|
+
if any(c.startswith(f'is_{col}') for c in ohe_cols) or any(c.startswith(f'has_{col}') for c in ohe_cols):
|
|
3774
|
+
encoded_column = col
|
|
3775
|
+
break
|
|
3776
|
+
|
|
3777
|
+
progress.setValue(40)
|
|
3778
|
+
QApplication.processEvents()
|
|
3779
|
+
|
|
3780
|
+
if encoded_column:
|
|
3781
|
+
# Apply encoding to full dataset
|
|
3782
|
+
self.statusBar().showMessage(f'Encoding column "{encoded_column}" on full dataset...')
|
|
3783
|
+
full_encoded_df = get_ohe(full_df, encoded_column)
|
|
3784
|
+
|
|
3785
|
+
progress.setValue(80)
|
|
3786
|
+
QApplication.processEvents()
|
|
3787
|
+
|
|
3788
|
+
# Update the current dataframe with the fully encoded one
|
|
3789
|
+
current_tab.current_df = full_encoded_df
|
|
3790
|
+
self.current_df = full_encoded_df # Keep this for compatibility
|
|
3791
|
+
|
|
3792
|
+
# Use the full encoded dataframe instead
|
|
3793
|
+
encoded_df = full_encoded_df
|
|
3794
|
+
else:
|
|
3795
|
+
# If we can't determine the encoded column, use the sampled version
|
|
3796
|
+
current_tab.current_df = encoded_df
|
|
3797
|
+
self.current_df = encoded_df # Keep this for compatibility
|
|
3798
|
+
else:
|
|
3799
|
+
# No encoding columns found, use the sampled version
|
|
3800
|
+
current_tab.current_df = encoded_df
|
|
3801
|
+
self.current_df = encoded_df # Keep this for compatibility
|
|
3802
|
+
except Exception as e:
|
|
3803
|
+
# If there's an error, fall back to the provided encoded_df
|
|
3804
|
+
print(f"Error applying encoding to full dataset: {e}")
|
|
3805
|
+
current_tab.current_df = encoded_df
|
|
3806
|
+
self.current_df = encoded_df # Keep this for compatibility
|
|
3807
|
+
else:
|
|
3808
|
+
# No sampling occurred, just use the provided encoded dataframe
|
|
3809
|
+
current_tab.current_df = encoded_df
|
|
3810
|
+
self.current_df = encoded_df # Keep this for compatibility
|
|
3811
|
+
|
|
3812
|
+
progress.setValue(90)
|
|
3813
|
+
QApplication.processEvents()
|
|
3814
|
+
|
|
3815
|
+
# Populate the results table with the new dataframe
|
|
3816
|
+
self.populate_table(encoded_df)
|
|
3817
|
+
|
|
3818
|
+
# Update results title to show this is encoded data
|
|
3819
|
+
current_tab.results_title.setText(f"ENCODED DATA")
|
|
3820
|
+
|
|
3821
|
+
progress.setValue(100)
|
|
3822
|
+
progress.close()
|
|
3823
|
+
|
|
3824
|
+
# Update status
|
|
3825
|
+
self.statusBar().showMessage(f'Applied one-hot encoding with {len(encoded_df.columns)} columns')
|
|
3826
|
+
|
|
3827
|
+
# Check if we should register this as a temporary table
|
|
3828
|
+
if len(encoded_df) >= 100: # Only worth registering as table if it's substantial
|
|
3829
|
+
try:
|
|
3830
|
+
# Generate a unique table name
|
|
3831
|
+
import time
|
|
3832
|
+
timestamp = int(time.time())
|
|
3833
|
+
table_name = f"encoded_data_{timestamp}"
|
|
3834
|
+
|
|
3835
|
+
# Register as a temporary table in the database manager
|
|
3836
|
+
self.db_manager.register_dataframe(encoded_df, table_name, "query_result")
|
|
3837
|
+
|
|
3838
|
+
# Add to tables list
|
|
3839
|
+
self.tables_list.add_table_item(table_name, "encoded data")
|
|
3840
|
+
|
|
3841
|
+
# Update completer
|
|
3842
|
+
self.update_completer()
|
|
3843
|
+
|
|
3844
|
+
# Notify user
|
|
3845
|
+
self.statusBar().showMessage(f'Applied one-hot encoding and registered as table "{table_name}"')
|
|
3846
|
+
except Exception as e:
|
|
3847
|
+
# Just log the error but continue - this is an optional enhancement
|
|
3848
|
+
print(f"Error registering encoded dataframe as table: {e}")
|
|
3849
|
+
|
|
3850
|
+
except Exception as e:
|
|
3851
|
+
QMessageBox.critical(self, "Error", f"Failed to apply encoded dataframe:\n\n{str(e)}")
|
|
3852
|
+
self.statusBar().showMessage(f'Error applying encoding: {str(e)}')
|
|
3853
|
+
|
|
3854
|
+
def get_current_query_tab(self):
|
|
3855
|
+
"""Get the currently active tab if it's a query tab (has query_edit attribute)"""
|
|
3856
|
+
current_tab = self.get_current_tab()
|
|
3857
|
+
if current_tab and hasattr(current_tab, 'query_edit'):
|
|
3858
|
+
return current_tab
|
|
3859
|
+
return None
|
|
3219
3860
|
|
|
3220
3861
|
def main():
|
|
3221
3862
|
# Parse command line arguments
|