PyPI - sqlshell - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

sqlshell 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlshell might be problematic. Click here for more details.

Files changed (22) hide show

sqlshell/README.md +5 -1
sqlshell/__init__.py +35 -5
sqlshell/create_test_data.py +29 -0
sqlshell/db/__init__.py +2 -1
sqlshell/db/database_manager.py +336 -23
sqlshell/db/export_manager.py +188 -0
sqlshell/editor_integration.py +127 -0
sqlshell/execution_handler.py +421 -0
sqlshell/main.py +784 -143
sqlshell/query_tab.py +592 -7
sqlshell/table_list.py +90 -1
sqlshell/ui/filter_header.py +36 -1
sqlshell/utils/profile_column.py +2515 -0
sqlshell/utils/profile_distributions.py +613 -0
sqlshell/utils/profile_foreign_keys.py +547 -0
sqlshell/utils/profile_ohe.py +631 -0
sqlshell-0.3.0.dist-info/METADATA +400 -0
{sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/RECORD +21 -14
{sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/WHEEL +1 -1
sqlshell-0.2.2.dist-info/METADATA +0 -198
{sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/entry_points.txt +0 -0
{sqlshell-0.2.2.dist-info → sqlshell-0.3.0.dist-info}/top_level.txt +0 -0

sqlshell/main.py CHANGED Viewed

@@ -18,18 +18,20 @@ from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
                            QCompleter, QFrame, QToolButton, QSizePolicy, QTabWidget,
                            QStyleFactory, QToolBar, QStatusBar, QLineEdit, QMenu,
                            QCheckBox, QWidgetAction, QMenuBar, QInputDialog, QProgressDialog,
-                           QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem)
+                           QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem,
+                           QComboBox)
 from PyQt6.QtCore import Qt, QAbstractTableModel, QRegularExpression, QRect, QSize, QStringListModel, QPropertyAnimation, QEasingCurve, QTimer, QPoint, QMimeData
 from PyQt6.QtGui import QFont, QColor, QSyntaxHighlighter, QTextCharFormat, QPainter, QTextFormat, QTextCursor, QIcon, QPalette, QLinearGradient, QBrush, QPixmap, QPolygon, QPainterPath, QDrag
 import numpy as np
 from datetime import datetime
+import psutil
 from sqlshell import create_test_data
 from sqlshell.splash_screen import AnimatedSplashScreen
 from sqlshell.syntax_highlighter import SQLSyntaxHighlighter
 from sqlshell.editor import LineNumberArea, SQLEditor
 from sqlshell.ui import FilterHeader, BarChartDelegate
-from sqlshell.db import DatabaseManager
+from sqlshell.db import DatabaseManager, ExportManager
 from sqlshell.query_tab import QueryTab
 from sqlshell.styles import (get_application_stylesheet, get_tab_corner_stylesheet,
                            get_context_menu_stylesheet,
@@ -42,6 +44,7 @@ class SQLShell(QMainWindow):
     def __init__(self):
         super().__init__()
         self.db_manager = DatabaseManager()
+        self.export_manager = ExportManager(self.db_manager)
         self.current_df = None  # Store the current DataFrame for filtering
         self.filter_widgets = []  # Store filter line edits
         self.current_project_file = None  # Store the current project file path
@@ -188,6 +191,12 @@ class SQLShell(QMainWindow):
         tables_header.setStyleSheet(get_tables_header_stylesheet())
         left_layout.addWidget(tables_header)
+        # Tables info label
+        tables_info = QLabel("Right-click on tables to profile columns, analyze structure, and discover distributions. Select multiple tables to analyze foreign key relationships.")
+        tables_info.setWordWrap(True)
+        tables_info.setStyleSheet("color: #7FB3D5; font-size: 11px; margin-top: 2px; margin-bottom: 5px;")
+        left_layout.addWidget(tables_info)
         # Tables list with custom styling
         self.tables_list = DraggableTablesList(self)
         self.tables_list.itemClicked.connect(self.show_table_preview)
@@ -210,6 +219,39 @@ class SQLShell(QMainWindow):
         query_header.setObjectName("header_label")
         right_layout.addWidget(query_header)
+        # Create a drop area for tables above the tab widget
+        self.tab_drop_area = QFrame()
+        self.tab_drop_area.setFixedHeight(30)
+        self.tab_drop_area.setObjectName("tab_drop_area")
+        # Add a label with hint text
+        drop_area_layout = QHBoxLayout(self.tab_drop_area)
+        drop_area_layout.setContentsMargins(10, 0, 10, 0)
+        self.drop_hint_label = QLabel("Drag tables here to create new query tabs")
+        self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
+        self.drop_hint_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
+        drop_area_layout.addWidget(self.drop_hint_label)
+        self.tab_drop_area.setStyleSheet("""
+            #tab_drop_area {
+                background-color: #f8f9fa;
+                border: 1px dashed #BDC3C7;
+                border-radius: 4px;
+                margin: 0 0 5px 0;
+            }
+            #tab_drop_area:hover {
+                background-color: #E5F7FF;
+                border: 1px dashed #3498DB;
+            }
+        """)
+        self.tab_drop_area.setAcceptDrops(True)
+        self.tab_drop_area.dragEnterEvent = self.tab_area_drag_enter
+        self.tab_drop_area.dragMoveEvent = self.tab_area_drag_move
+        self.tab_drop_area.dragLeaveEvent = self.tab_area_drag_leave
+        self.tab_drop_area.dropEvent = self.tab_area_drop
+        right_layout.addWidget(self.tab_drop_area)
         # Create tab widget for multiple queries
         self.tab_widget = QTabWidget()
         self.tab_widget.setTabsClosable(True)
@@ -231,6 +273,100 @@ class SQLShell(QMainWindow):
         # Status bar
         self.statusBar().showMessage('Ready | Ctrl+Enter: Execute Query | Ctrl+K: Toggle Comment | Ctrl+T: New Tab | Ctrl+Shift+O: Quick Access Files')
+    # Methods for handling drag and drop on the tab drop area
+    def tab_area_drag_enter(self, event):
+        """Handle drag enter events on the tab drop area"""
+        # Accept only if from the tables list
+        if event.source() == self.tables_list:
+            # Extract table name(s) from the mime data
+            mime_data = event.mimeData()
+            if mime_data.hasText():
+                table_names = mime_data.text().split(", ")
+                if len(table_names) == 1:
+                    self.drop_hint_label.setText(f"Release to create a new query tab for {table_names[0]}")
+                else:
+                    self.drop_hint_label.setText(f"Release to create {len(table_names)} new query tabs")
+                self.drop_hint_label.setStyleSheet("color: #3498db; font-size: 11px; font-weight: bold;")
+            # Highlight the drop area
+            self.tab_drop_area.setStyleSheet("""
+                #tab_drop_area {
+                    background-color: #E5F7FF;
+                    border: 2px dashed #3498DB;
+                    border-radius: 4px;
+                    margin: 0 0 5px 0;
+                }
+            """)
+            self.tab_drop_area.setFixedHeight(40)
+            event.acceptProposedAction()
+        else:
+            event.ignore()
+    def tab_area_drag_move(self, event):
+        """Handle drag move events on the tab drop area"""
+        # Continue accepting drag moves
+        if event.source() == self.tables_list:
+            event.acceptProposedAction()
+        else:
+            event.ignore()
+    def tab_area_drag_leave(self, event):
+        """Handle drag leave events on the tab drop area"""
+        # Reset the drop area
+        self.tab_drop_area.setStyleSheet("""
+            #tab_drop_area {
+                background-color: #f8f9fa;
+                border: 1px dashed #BDC3C7;
+                border-radius: 4px;
+                margin: 0 0 5px 0;
+            }
+        """)
+        self.drop_hint_label.setText("Drag tables here to create new query tabs")
+        self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
+        self.tab_drop_area.setFixedHeight(30)
+        # No need to call a parent method
+    def tab_area_drop(self, event):
+        """Handle drop events on the tab drop area"""
+        # Process the drop to create a new tab with SELECT query
+        if event.source() == self.tables_list:
+            mime_data = event.mimeData()
+            if mime_data.hasText():
+                table_names = mime_data.text().split(", ")
+                for table_name in table_names:
+                    # Check if this table needs to be reloaded first
+                    if table_name in self.tables_list.tables_needing_reload:
+                        # Reload the table immediately without asking
+                        self.reload_selected_table(table_name)
+                    # Generate a title for the tab
+                    tab_title = f"Query {table_name}"
+                    # Create a new tab
+                    new_tab = self.add_tab(tab_title)
+                    # Set the SQL query
+                    new_tab.set_query_text(f"SELECT * FROM {table_name}")
+                self.statusBar().showMessage(f"Created new tab{'s' if len(table_names) > 1 else ''} for {', '.join(table_names)}")
+                # Reset the drop area appearance
+                self.tab_drop_area.setStyleSheet("""
+                    #tab_drop_area {
+                        background-color: #f8f9fa;
+                        border: 1px dashed #BDC3C7;
+                        border-radius: 4px;
+                        margin: 0 0 5px 0;
+                    }
+                """)
+                self.drop_hint_label.setText("Drag tables here to create new query tabs")
+                self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
+                self.tab_drop_area.setFixedHeight(30)
+            event.acceptProposedAction()
+        else:
+            event.ignore()
     def create_tab_corner_widget(self):
         """Create a corner widget with a + button to add new tabs"""
         corner_widget = QWidget()
@@ -284,25 +420,126 @@ class SQLShell(QMainWindow):
             headers = [str(col) for col in df.columns]
             current_tab.results_table.setHorizontalHeaderLabels(headers)
-            # Calculate chunk size (adjust based on available memory)
-            CHUNK_SIZE = 1000
-            # Process data in chunks to avoid memory issues with large datasets
-            for chunk_start in range(0, row_count, CHUNK_SIZE):
-                chunk_end = min(chunk_start + CHUNK_SIZE, row_count)
-                chunk = df.iloc[chunk_start:chunk_end]
+            # Calculate dynamic chunk size based on available memory
+            import psutil
+            available_memory = psutil.virtual_memory().available
+            # Use 10% of available memory for chunking, with a minimum of 1000 rows
+            memory_per_row = df.memory_usage(deep=True).sum() / len(df)
+            CHUNK_SIZE = max(1000, min(10000, int(available_memory * 0.1 / memory_per_row)))
+            # Add pagination controls if dataset is large
+            if row_count > CHUNK_SIZE:
+                # Remove any existing pagination widgets
+                for i in reversed(range(current_tab.results_layout.count())):
+                    item = current_tab.results_layout.itemAt(i)
+                    widget = item.widget() if item is not None else None
+                    if widget and widget.objectName() == "pagination_widget":
+                        current_tab.results_layout.removeWidget(widget)
+                        widget.setParent(None)
+                        widget.deleteLater()
+                # Create pagination widget
+                pagination_widget = QWidget()
+                pagination_widget.setObjectName("pagination_widget")
+                pagination_layout = QHBoxLayout(pagination_widget)
+                # Add page size selector
+                page_size_label = QLabel("Rows per page:")
+                page_size_combo = QComboBox()
+                page_sizes = [1000, 5000, 10000, 50000, 100000]
+                page_size_combo.addItems([str(size) for size in page_sizes])
+                page_size_combo.setCurrentText(str(CHUNK_SIZE))
+                # Add navigation buttons
+                prev_btn = QPushButton("Previous")
+                next_btn = QPushButton("Next")
+                page_label = QLabel("Page 1")
+                # Add widgets to layout
+                pagination_layout.addWidget(page_size_label)
+                pagination_layout.addWidget(page_size_combo)
+                pagination_layout.addStretch()
+                pagination_layout.addWidget(prev_btn)
+                pagination_layout.addWidget(page_label)
+                pagination_layout.addWidget(next_btn)
+                # Add pagination widget to results layout
+                current_tab.results_layout.addWidget(pagination_widget)
+                # Store pagination state
+                current_tab.pagination_state = {
+                    'current_page': 0,
+                    'page_size': CHUNK_SIZE,
+                    'total_pages': (row_count + CHUNK_SIZE - 1) // CHUNK_SIZE,
+                    'page_label': page_label,
+                    'prev_btn': prev_btn,
+                    'next_btn': next_btn,
+                    'page_size_combo': page_size_combo
+                }
-                # Add rows for this chunk
-                current_tab.results_table.setRowCount(chunk_end)
+                # Connect pagination signals
+                def update_page_size(size):
+                    current_tab.pagination_state['page_size'] = int(size)
+                    current_tab.pagination_state['total_pages'] = (row_count + int(size) - 1) // int(size)
+                    current_tab.pagination_state['current_page'] = 0
+                    load_current_page()
+                def load_current_page():
+                    state = current_tab.pagination_state
+                    start_idx = state['current_page'] * state['page_size']
+                    end_idx = min(start_idx + state['page_size'], row_count)
+                    # Clear existing rows
+                    current_tab.results_table.setRowCount(0)
+                    # Load current page
+                    chunk = df.iloc[start_idx:end_idx]
+                    current_tab.results_table.setRowCount(len(chunk))
+                    for row_idx, (_, row_data) in enumerate(chunk.iterrows()):
+                        for col_idx, value in enumerate(row_data):
+                            formatted_value = self.format_value(value)
+                            item = QTableWidgetItem(formatted_value)
+                            current_tab.results_table.setItem(row_idx, col_idx, item)
+                    # Update pagination controls
+                    state['page_label'].setText(f"Page {state['current_page'] + 1} of {state['total_pages']}")
+                    state['prev_btn'].setEnabled(state['current_page'] > 0)
+                    state['next_btn'].setEnabled(state['current_page'] < state['total_pages'] - 1)
+                    # Process events to keep UI responsive
+                    QApplication.processEvents()
+                def next_page():
+                    if current_tab.pagination_state['current_page'] < current_tab.pagination_state['total_pages'] - 1:
+                        current_tab.pagination_state['current_page'] += 1
+                        load_current_page()
+                def prev_page():
+                    if current_tab.pagination_state['current_page'] > 0:
+                        current_tab.pagination_state['current_page'] -= 1
+                        load_current_page()
+                # Connect signals
+                page_size_combo.currentTextChanged.connect(update_page_size)
+                next_btn.clicked.connect(next_page)
+                prev_btn.clicked.connect(prev_page)
+                # Load first page
+                load_current_page()
+            else:
+                # For smaller datasets, load all at once
+                current_tab.results_table.setRowCount(row_count)
-                for row_idx, (_, row_data) in enumerate(chunk.iterrows(), start=chunk_start):
+                for row_idx, (_, row_data) in enumerate(df.iterrows()):
                     for col_idx, value in enumerate(row_data):
                         formatted_value = self.format_value(value)
                         item = QTableWidgetItem(formatted_value)
                         current_tab.results_table.setItem(row_idx, col_idx, item)
-                # Process events to keep UI responsive
-                QApplication.processEvents()
+                    # Process events periodically to keep UI responsive
+                    if row_idx % 1000 == 0:
+                        QApplication.processEvents()
             # Optimize column widths
             current_tab.results_table.resizeColumnsToContents()
@@ -630,6 +867,31 @@ class SQLShell(QMainWindow):
                 QMessageBox.warning(self, "Empty Query", "Please enter a SQL query to execute.")
                 return
+            # Check if the query references any tables that need to be loaded
+            referenced_tables = self.extract_table_names_from_query(query)
+            tables_to_load = [table for table in referenced_tables if table in self.tables_list.tables_needing_reload]
+            # Load any tables that need to be loaded
+            if tables_to_load:
+                progress = QProgressDialog(f"Loading tables...", "Cancel", 0, len(tables_to_load), self)
+                progress.setWindowTitle("Loading Tables")
+                progress.setWindowModality(Qt.WindowModality.WindowModal)
+                progress.show()
+                for i, table_name in enumerate(tables_to_load):
+                    if progress.wasCanceled():
+                        self.statusBar().showMessage("Query canceled: table loading was interrupted")
+                        return
+                    progress.setLabelText(f"Loading table: {table_name}")
+                    progress.setValue(i)
+                    QApplication.processEvents()
+                    self.reload_selected_table(table_name)
+                progress.setValue(len(tables_to_load))
+                progress.close()
             start_time = datetime.now()
             try:
@@ -666,6 +928,57 @@ class SQLShell(QMainWindow):
             QMessageBox.critical(self, "Unexpected Error",
                 f"An unexpected error occurred:\n\n{str(e)}")
             self.statusBar().showMessage("Query execution failed")
+    def extract_table_names_from_query(self, query):
+        """Extract table names from a SQL query using basic regex patterns"""
+        import re
+        # Convert to uppercase for easier pattern matching
+        query_upper = query.upper()
+        # Strip comments to avoid matching patterns inside comments
+        query_upper = re.sub(r'--.*?$', '', query_upper, flags=re.MULTILINE)
+        query_upper = re.sub(r'/\*.*?\*/', '', query_upper, flags=re.DOTALL)
+        # Common SQL patterns that reference tables
+        patterns = [
+            r'FROM\s+["\[]?(\w+)["\]]?',                         # FROM clause
+            r'JOIN\s+["\[]?(\w+)["\]]?',                         # JOIN clause
+            r'UPDATE\s+["\[]?(\w+)["\]]?',                       # UPDATE statement
+            r'INSERT\s+INTO\s+["\[]?(\w+)["\]]?',                # INSERT statement
+            r'DELETE\s+FROM\s+["\[]?(\w+)["\]]?',                # DELETE statement
+            r'CREATE\s+(?:TEMP|TEMPORARY)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # CREATE TABLE
+            r'DROP\s+TABLE\s+(?:IF\s+EXISTS\s+)?["\[]?(\w+)["\]]?',  # DROP TABLE
+            r'ALTER\s+TABLE\s+["\[]?(\w+)["\]]?',                # ALTER TABLE
+            r'WITH\s+(\w+)\s+AS',                                # Common Table Expressions
+            r'MERGE\s+INTO\s+["\[]?(\w+)["\]]?'                  # MERGE statement
+        ]
+        tables = set()
+        for pattern in patterns:
+            matches = re.finditer(pattern, query_upper)
+            for match in matches:
+                # Get the table name from the matched group and strip any quotes
+                table_name = match.group(1).strip('"[]`\'')
+                # Skip SQL keywords
+                if table_name in ('SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'OFFSET',
+                                 'UNION', 'INTERSECT', 'EXCEPT', 'WITH', 'AS', 'ON', 'USING'):
+                    continue
+                # Add to our set of tables
+                tables.add(table_name.lower())  # Convert to lowercase for case-insensitive comparison
+        # Account for qualified table names (schema.table)
+        qualified_tables = set()
+        for table in tables:
+            if '.' in table:
+                qualified_tables.add(table.split('.')[-1])  # Add just the table part
+        tables.update(qualified_tables)
+        # Return all found table names in lowercase to match our table storage convention
+        return tables
     def _update_query_history(self, query):
         """Update query history and track term usage for improved autocompletion"""
@@ -808,25 +1121,32 @@ class SQLShell(QMainWindow):
             # Generate test data
             sales_df = create_test_data.create_sales_data()
             customer_df = create_test_data.create_customer_data()
+            large_customer_df = create_test_data.create_large_customer_data()
             product_df = create_test_data.create_product_data()
             large_numbers_df = create_test_data.create_large_numbers_data()
+            california_housing_df = create_test_data.create_california_housing_data()
             # Save test data to temporary directory
             sales_path = os.path.join(temp_dir, 'sample_sales_data.xlsx')
             customer_path = os.path.join(temp_dir, 'customer_data.parquet')
             product_path = os.path.join(temp_dir, 'product_catalog.xlsx')
             large_numbers_path = os.path.join(temp_dir, 'large_numbers.xlsx')
+            large_customer_path = os.path.join(temp_dir, 'large_customer_data.parquet')
+            california_housing_path = os.path.join(temp_dir, 'california_housing_data.parquet')
             sales_df.to_excel(sales_path, index=False)
             customer_df.to_parquet(customer_path, index=False)
             product_df.to_excel(product_path, index=False)
             large_numbers_df.to_excel(large_numbers_path, index=False)
+            large_customer_df.to_parquet(large_customer_path, index=False)
+            california_housing_df.to_parquet(california_housing_path, index=False)
             # Register the tables in the database manager
             self.db_manager.register_dataframe(sales_df, 'sample_sales_data', sales_path)
             self.db_manager.register_dataframe(product_df, 'product_catalog', product_path)
             self.db_manager.register_dataframe(customer_df, 'customer_data', customer_path)
             self.db_manager.register_dataframe(large_numbers_df, 'large_numbers', large_numbers_path)
+            self.db_manager.register_dataframe(large_customer_df, 'large_customer_data', large_customer_path)
+            self.db_manager.register_dataframe(california_housing_df, 'california_housing_data', california_housing_path)
             # Update UI
             self.tables_list.clear()
@@ -893,28 +1213,14 @@ LIMIT 10
             self.statusBar().showMessage('Exporting data to Excel...')
             # Convert table data to DataFrame
-            df = self.get_table_data_as_dataframe()
-            df.to_excel(file_name, index=False)
+            df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
+            if df is None:
+                raise Exception("Failed to convert table data to DataFrame")
-            # Generate table name from file name
-            base_name = os.path.splitext(os.path.basename(file_name))[0]
-            table_name = self.db_manager.sanitize_table_name(base_name)
+            # Export using ExportManager
+            table_name, metadata = self.export_manager.export_to_excel(df, file_name)
-            # Ensure unique table name
-            original_name = table_name
-            counter = 1
-            while table_name in self.db_manager.loaded_tables:
-                table_name = f"{original_name}_{counter}"
-                counter += 1
-            # Register the table in the database manager
-            self.db_manager.register_dataframe(df, table_name, file_name)
-            # Update tracking
-            self.db_manager.loaded_tables[table_name] = file_name
-            self.db_manager.table_columns[table_name] = df.columns.tolist()
-            # Update UI using new method
+            # Update UI
             self.tables_list.add_table_item(table_name, os.path.basename(file_name))
             self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
@@ -951,28 +1257,14 @@ LIMIT 10
             self.statusBar().showMessage('Exporting data to Parquet...')
             # Convert table data to DataFrame
-            df = self.get_table_data_as_dataframe()
-            df.to_parquet(file_name, index=False)
-            # Generate table name from file name
-            base_name = os.path.splitext(os.path.basename(file_name))[0]
-            table_name = self.db_manager.sanitize_table_name(base_name)
-            # Ensure unique table name
-            original_name = table_name
-            counter = 1
-            while table_name in self.db_manager.loaded_tables:
-                table_name = f"{original_name}_{counter}"
-                counter += 1
+            df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
+            if df is None:
+                raise Exception("Failed to convert table data to DataFrame")
-            # Register the table in the database manager
-            self.db_manager.register_dataframe(df, table_name, file_name)
+            # Export using ExportManager
+            table_name, metadata = self.export_manager.export_to_parquet(df, file_name)
-            # Update tracking
-            self.db_manager.loaded_tables[table_name] = file_name
-            self.db_manager.table_columns[table_name] = df.columns.tolist()
-            # Update UI using new method
+            # Update UI
             self.tables_list.add_table_item(table_name, os.path.basename(file_name))
             self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
@@ -992,94 +1284,10 @@ LIMIT 10
     def get_table_data_as_dataframe(self):
         """Helper function to convert table widget data to a DataFrame with proper data types"""
-        # Get the current tab
         current_tab = self.get_current_tab()
         if not current_tab:
             return pd.DataFrame()
-        headers = [current_tab.results_table.horizontalHeaderItem(i).text() for i in range(current_tab.results_table.columnCount())]
-        data = []
-        for row in range(current_tab.results_table.rowCount()):
-            row_data = []
-            for column in range(current_tab.results_table.columnCount()):
-                item = current_tab.results_table.item(row, column)
-                row_data.append(item.text() if item else '')
-            data.append(row_data)
-        # Create DataFrame from raw string data
-        df_raw = pd.DataFrame(data, columns=headers)
-        # Try to use the original dataframe's dtypes if available
-        if hasattr(current_tab, 'current_df') and current_tab.current_df is not None:
-            original_df = current_tab.current_df
-            # Since we might have filtered rows, we can't just return the original DataFrame
-            # But we can use its column types to convert our string data appropriately
-            # Create a new DataFrame with appropriate types
-            df_typed = pd.DataFrame()
-            for col in df_raw.columns:
-                if col in original_df.columns:
-                    # Get the original column type
-                    orig_type = original_df[col].dtype
-                    # Special handling for different data types
-                    if pd.api.types.is_numeric_dtype(orig_type):
-                        # Handle numeric columns (int or float)
-                        try:
-                            # First try to convert to numeric type
-                            # Remove commas used for thousands separators
-                            numeric_col = pd.to_numeric(df_raw[col].str.replace(',', '').replace('NULL', np.nan))
-                            df_typed[col] = numeric_col
-                        except:
-                            # If that fails, keep the original string
-                            df_typed[col] = df_raw[col]
-                    elif pd.api.types.is_datetime64_dtype(orig_type):
-                        # Handle datetime columns
-                        try:
-                            df_typed[col] = pd.to_datetime(df_raw[col].replace('NULL', np.nan))
-                        except:
-                            df_typed[col] = df_raw[col]
-                    elif pd.api.types.is_bool_dtype(orig_type):
-                        # Handle boolean columns
-                        try:
-                            df_typed[col] = df_raw[col].map({'True': True, 'False': False}).replace('NULL', np.nan)
-                        except:
-                            df_typed[col] = df_raw[col]
-                    else:
-                        # For other types, keep as is
-                        df_typed[col] = df_raw[col]
-                else:
-                    # For columns not in the original dataframe, infer type
-                    df_typed[col] = df_raw[col]
-            return df_typed
-        else:
-            # If we don't have the original dataframe, try to infer types
-            # First replace 'NULL' with actual NaN
-            df_raw.replace('NULL', np.nan, inplace=True)
-            # Try to convert each column to numeric if possible
-            for col in df_raw.columns:
-                try:
-                    # First try to convert to numeric by removing commas
-                    df_raw[col] = pd.to_numeric(df_raw[col].str.replace(',', ''))
-                except:
-                    # If that fails, try to convert to datetime
-                    try:
-                        df_raw[col] = pd.to_datetime(df_raw[col])
-                    except:
-                        # If both numeric and datetime conversions fail,
-                        # try boolean conversion for True/False strings
-                        try:
-                            if df_raw[col].dropna().isin(['True', 'False']).all():
-                                df_raw[col] = df_raw[col].map({'True': True, 'False': False})
-                        except:
-                            # Otherwise, keep as is
-                            pass
-            return df_raw
+        return self.export_manager.convert_table_to_dataframe(current_tab.results_table)
     def keyPressEvent(self, event):
         """Handle global keyboard shortcuts"""
@@ -1203,6 +1411,30 @@ LIMIT 10
     def show_tables_context_menu(self, position):
         """Show context menu for tables list"""
+        # Check if we have multiple selected items
+        selected_items = self.tables_list.selectedItems()
+        if len(selected_items) > 1:
+            # Filter out any folder items from selection
+            table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
+            if len(table_items) > 1:
+                # Create context menu for multiple table selection
+                context_menu = QMenu(self)
+                context_menu.setStyleSheet(get_context_menu_stylesheet())
+                # Add foreign key analysis option
+                analyze_fk_action = context_menu.addAction(f"Analyze Foreign Keys Between {len(table_items)} Tables")
+                analyze_fk_action.setIcon(QIcon.fromTheme("system-search"))
+                # Show menu and get selected action
+                action = context_menu.exec(self.tables_list.mapToGlobal(position))
+                if action == analyze_fk_action:
+                    self.analyze_foreign_keys_between_tables(table_items)
+                return
+        # Single item selection (original functionality)
         item = self.tables_list.itemAt(position)
         # If no item or it's a folder, let the tree widget handle it
@@ -1226,6 +1458,12 @@ LIMIT 10
         # Add menu actions
         select_from_action = context_menu.addAction("Select from")
         add_to_editor_action = context_menu.addAction("Just add to editor")
+        select_from_new_tab_action = context_menu.addAction("Select From in New Tab")
+        # Add copy path actions
+        context_menu.addSeparator()
+        copy_path_action = context_menu.addAction("Copy Path")
+        copy_relative_path_action = context_menu.addAction("Copy Relative Path")
         # Add entropy profiler action
         context_menu.addSeparator()
@@ -1236,6 +1474,10 @@ LIMIT 10
         profile_table_action = context_menu.addAction("Profile Table Structure")
         profile_table_action.setIcon(QIcon.fromTheme("edit-find"))
+        # Add distributions profiler action
+        profile_distributions_action = context_menu.addAction("Analyze Column Distributions")
+        profile_distributions_action.setIcon(QIcon.fromTheme("accessories-calculator"))
         # Check if table needs reloading and add appropriate action
         if table_name in self.tables_list.tables_needing_reload:
             reload_action = context_menu.addAction("Reload Table")
@@ -1286,6 +1528,11 @@ LIMIT 10
             cursor = current_tab.query_edit.textCursor()
             cursor.insertText(table_name)
             current_tab.query_edit.setFocus()
+        elif action == select_from_new_tab_action:
+            # Create a new tab with the selected table
+            new_tab = self.add_tab(title=table_name)
+            new_tab.set_query_text(f"SELECT * FROM {table_name}")
+            new_tab.query_edit.setFocus()
         elif action == reload_action:
             self.reload_selected_table(table_name)
         elif action == analyze_entropy_action:
@@ -1294,6 +1541,9 @@ LIMIT 10
         elif action == profile_table_action:
             # Call the table profile method
             self.profile_table_structure(table_name)
+        elif action == profile_distributions_action:
+            # Call the distributions profile method
+            self.profile_distributions(table_name)
         elif action == rename_action:
             # Show rename dialog
             new_name, ok = QInputDialog.getText(
@@ -1349,6 +1599,91 @@ LIMIT 10
             if target_folder:
                 self.tables_list.move_item_to_folder(item, target_folder)
                 self.statusBar().showMessage(f'Moved table "{table_name}" to folder "{target_folder.text(0)}"')
+        elif action == copy_path_action:
+            # Get the full path from the table source
+            if table_name in self.db_manager.loaded_tables:
+                path = self.db_manager.loaded_tables[table_name]
+                if path != 'database':  # Only copy if it's a file path
+                    QApplication.clipboard().setText(path)
+                    self.statusBar().showMessage(f"Copied full path to clipboard")
+        elif action == copy_relative_path_action:
+            # Get the relative path from the table source
+            if table_name in self.db_manager.loaded_tables:
+                path = self.db_manager.loaded_tables[table_name]
+                if path != 'database':  # Only copy if it's a file path
+                    try:
+                        rel_path = os.path.relpath(path)
+                        QApplication.clipboard().setText(rel_path)
+                        self.statusBar().showMessage(f"Copied relative path to clipboard")
+                    except ValueError:
+                        self.statusBar().showMessage("Could not determine relative path")
+    def analyze_foreign_keys_between_tables(self, table_items):
+        """Analyze foreign key relationships between selected tables"""
+        try:
+            # Show a loading indicator
+            table_count = len(table_items)
+            self.statusBar().showMessage(f'Analyzing foreign key relationships between {table_count} tables...')
+            # Extract table names from selected items
+            table_names = []
+            for item in table_items:
+                table_name = self.tables_list.get_table_name_from_item(item)
+                if table_name:
+                    table_names.append(table_name)
+            if len(table_names) < 2:
+                QMessageBox.warning(self, "Not Enough Tables",
+                                    "At least two tables are required for foreign key analysis.")
+                return
+            # Check if any tables need to be reloaded
+            tables_to_reload = [tn for tn in table_names if tn in self.tables_list.tables_needing_reload]
+            for table_name in tables_to_reload:
+                # Reload the table immediately
+                self.reload_selected_table(table_name)
+            # Fetch data for each table
+            dfs = []
+            for table_name in table_names:
+                try:
+                    # Get the data as a dataframe
+                    query = f'SELECT * FROM "{table_name}"'
+                    df = self.db_manager.execute_query(query)
+                    if df is not None and not df.empty:
+                        # Sample large tables to improve performance
+                        if len(df) > 10000:
+                            self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {len(df)} total)...')
+                            df = df.sample(n=10000, random_state=42)
+                        dfs.append(df)
+                    else:
+                        QMessageBox.warning(self, "Empty Table",
+                                            f"Table '{table_name}' has no data and will be skipped.")
+                except Exception as e:
+                    QMessageBox.warning(self, "Table Error",
+                                       f"Error loading table '{table_name}': {str(e)}\nThis table will be skipped.")
+            if len(dfs) < 2:
+                QMessageBox.warning(self, "Not Enough Tables",
+                                   "At least two tables with data are required for foreign key analysis.")
+                return
+            # Import the foreign key analyzer
+            from sqlshell.utils.profile_foreign_keys import visualize_foreign_keys
+            # Create and show the visualization
+            self.statusBar().showMessage(f'Analyzing foreign key relationships between {len(dfs)} tables...')
+            vis = visualize_foreign_keys(dfs, table_names)
+            # Store a reference to prevent garbage collection
+            self._fk_analysis_window = vis
+            self.statusBar().showMessage(f'Foreign key analysis complete for {len(dfs)} tables')
+        except Exception as e:
+            QMessageBox.critical(self, "Analysis Error", f"Error analyzing foreign keys:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error analyzing foreign keys: {str(e)}')
     def reload_selected_table(self, table_name=None):
         """Reload the data for a table from its source file"""
@@ -2616,6 +2951,20 @@ LIMIT 10
             self.showMaximized()
             self.was_maximized = True
+    def get_selected_table(self):
+        """Get the name of the currently selected table in the tables list"""
+        if not hasattr(self, 'tables_list'):
+            return None
+        selected_items = self.tables_list.selectedItems()
+        # Filter out folders and use only single selections
+        table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
+        if len(table_items) == 1:  # Only use if exactly one table is selected
+            return self.tables_list.get_table_name_from_item(table_items[0])
+        return None
     def change_zoom(self, factor):
         """Change the zoom level of the application by adjusting font sizes"""
         try:
@@ -3195,6 +3544,12 @@ LIMIT 10
                 df = self.db_manager.execute_query(query)
                 if df is not None and not df.empty:
+                    # Sample the data if it's larger than 10,000 rows
+                    row_count = len(df)
+                    if row_count > 10000:
+                        self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {row_count} total)...')
+                        df = df.sample(n=10000, random_state=42)
                     # Import the key profiler
                     from sqlshell.utils.profile_keys import visualize_profile
@@ -3205,7 +3560,10 @@ LIMIT 10
                     # Store a reference to prevent garbage collection
                     self._keys_profile_window = vis
-                    self.statusBar().showMessage(f'Table structure profile generated for "{table_name}"')
+                    if row_count > 10000:
+                        self.statusBar().showMessage(f'Table structure profile generated for "{table_name}" (sampled 10,000 rows from {row_count})')
+                    else:
+                        self.statusBar().showMessage(f'Table structure profile generated for "{table_name}"')
                 else:
                     QMessageBox.warning(self, "Empty Table", f"Table '{table_name}' has no data to analyze.")
                     self.statusBar().showMessage(f'Table "{table_name}" is empty - cannot analyze')
@@ -3216,6 +3574,289 @@ LIMIT 10
         except Exception as e:
             QMessageBox.critical(self, "Profile Error", f"Error profiling table structure:\n\n{str(e)}")
             self.statusBar().showMessage(f'Error profiling table: {str(e)}')
+    def profile_distributions(self, table_name):
+        """Analyze a table's column distributions to understand data patterns"""
+        try:
+            # Show a loading indicator
+            self.statusBar().showMessage(f'Analyzing column distributions for "{table_name}"...')
+            # Get the table data
+            if table_name in self.db_manager.loaded_tables:
+                # Check if table needs reloading first
+                if table_name in self.tables_list.tables_needing_reload:
+                    # Reload the table immediately
+                    self.reload_selected_table(table_name)
+                # Get the data as a dataframe
+                query = f'SELECT * FROM "{table_name}"'
+                df = self.db_manager.execute_query(query)
+                if df is not None and not df.empty:
+                    # Sample the data if it's larger than 10,000 rows
+                    row_count = len(df)
+                    if row_count > 10000:
+                        self.statusBar().showMessage(f'Sampling {table_name} (using 10,000 rows from {row_count} total)...')
+                        df = df.sample(n=10000, random_state=42)
+                    # Import the distribution profiler
+                    from sqlshell.utils.profile_distributions import visualize_profile
+                    # Create and show the visualization
+                    self.statusBar().showMessage(f'Generating distribution profile for "{table_name}"...')
+                    vis = visualize_profile(df)
+                    # Store a reference to prevent garbage collection
+                    self._distributions_window = vis
+                    if row_count > 10000:
+                        self.statusBar().showMessage(f'Distribution profile generated for "{table_name}" (sampled 10,000 rows from {row_count})')
+                    else:
+                        self.statusBar().showMessage(f'Distribution profile generated for "{table_name}"')
+                else:
+                    QMessageBox.warning(self, "Empty Table", f"Table '{table_name}' has no data to analyze.")
+                    self.statusBar().showMessage(f'Table "{table_name}" is empty - cannot analyze')
+            else:
+                QMessageBox.warning(self, "Table Not Found", f"Table '{table_name}' not found.")
+                self.statusBar().showMessage(f'Table "{table_name}" not found')
+        except Exception as e:
+            QMessageBox.critical(self, "Profile Error", f"Error analyzing distributions:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error analyzing distributions: {str(e)}')
+    def explain_column(self, column_name):
+        """Analyze a column to explain its relationship with other columns"""
+        try:
+            # Get the current tab
+            current_tab = self.get_current_tab()
+            if not current_tab or current_tab.current_df is None:
+                return
+            # Show a loading indicator
+            self.statusBar().showMessage(f'Analyzing column "{column_name}"...')
+            # Get the dataframe from the current tab
+            df = current_tab.current_df
+            if df is not None and not df.empty:
+                # Sample the data if it's larger than 100 rows for ultra-fast performance
+                row_count = len(df)
+                if row_count > 100:
+                    self.statusBar().showMessage(f'Sampling data (using 100 rows from {row_count} total)...')
+                    df = df.sample(n=100, random_state=42)
+                # Import the column profiler
+                from sqlshell.utils.profile_column import visualize_profile
+                # Create and show the visualization
+                self.statusBar().showMessage(f'Generating column profile for "{column_name}"...')
+                visualize_profile(df, column_name)
+                # We don't need to store a reference since the UI keeps itself alive
+                if row_count > 100:
+                    self.statusBar().showMessage(f'Column profile generated for "{column_name}" (sampled 100 rows from {row_count})')
+                else:
+                    self.statusBar().showMessage(f'Column profile generated for "{column_name}"')
+            else:
+                QMessageBox.warning(self, "Empty Data", "No data available to analyze.")
+                self.statusBar().showMessage(f'No data to analyze')
+        except Exception as e:
+            QMessageBox.critical(self, "Analysis Error", f"Error analyzing column:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error analyzing column: {str(e)}')
+    def encode_text(self, column_name):
+        """Generate one-hot encoding for a text column and visualize the results"""
+        try:
+            # Get the current tab
+            current_tab = self.get_current_tab()
+            if not current_tab or current_tab.current_df is None:
+                return
+            # Show a loading indicator
+            self.statusBar().showMessage(f'Preparing one-hot encoding for "{column_name}"...')
+            # Get the dataframe from the current tab
+            full_df = current_tab.current_df.copy()
+            df = full_df
+            # Save original row count for reference
+            current_tab.original_df_rowcount = len(full_df)
+            if df is not None and not df.empty:
+                # Sample the data if it's larger than 1000 rows for better performance
+                row_count = len(df)
+                if row_count > 1000:
+                    self.statusBar().showMessage(f'Sampling data (using 1000 rows from {row_count} total)...')
+                    # Store the full dataframe before sampling for later use
+                    current_tab._original_df_before_encoding = full_df
+                    # Sample the data
+                    df = df.sample(n=1000, random_state=42)
+                # Import the one-hot encoding visualizer
+                from sqlshell.utils.profile_ohe import visualize_ohe
+                # Create and show the visualization
+                self.statusBar().showMessage(f'Generating one-hot encoding for "{column_name}"...')
+                vis = visualize_ohe(df, column_name)
+                # Connect to the encodingApplied signal
+                vis.encodingApplied.connect(self.apply_encoded_dataframe)
+                # Store a reference to prevent garbage collection
+                self._ohe_window = vis
+                if row_count > 1000:
+                    self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}" (sampled 1000 rows from {row_count})')
+                else:
+                    self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}"')
+            else:
+                QMessageBox.warning(self, "Empty Data", "No data available to encode.")
+                self.statusBar().showMessage(f'No data to encode')
+        except Exception as e:
+            QMessageBox.critical(self, "Encoding Error", f"Error generating one-hot encoding:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error generating one-hot encoding: {str(e)}')
+    def apply_encoded_dataframe(self, encoded_df):
+        """Apply the encoded dataframe to the current tab's results table"""
+        try:
+            # Get the current tab
+            current_tab = self.get_current_tab()
+            if not current_tab:
+                return
+            # Check if we're using a sampled version
+            is_sampled = False
+            full_df = None
+            # Show a loading indicator
+            self.statusBar().showMessage(f'Applying one-hot encoding...')
+            # Progress dialog for large datasets
+            progress = QProgressDialog("Applying encoding...", "Cancel", 0, 100, self)
+            progress.setWindowTitle("Processing")
+            progress.setWindowModality(Qt.WindowModality.WindowModal)
+            progress.setValue(10)
+            # Check if this sample is smaller than the actual dataset
+            if hasattr(current_tab, '_original_df_before_encoding'):
+                # We have the original, full dataset stored
+                full_df = current_tab._original_df_before_encoding
+                is_sampled = len(full_df) > len(encoded_df)
+            elif hasattr(current_tab, 'original_df_rowcount'):
+                # We know the original row count but don't have the data
+                is_sampled = current_tab.original_df_rowcount > len(encoded_df)
+            progress.setValue(20)
+            QApplication.processEvents()
+            # If we're working with a sample, apply the encoding to the full dataset
+            if is_sampled and full_df is not None:
+                self.statusBar().showMessage(f'Re-applying encoding to full dataset ({len(full_df)} rows)...')
+                try:
+                    # Get the encoding columns (added by the OHE process)
+                    original_cols = set(current_tab.current_df.columns)
+                    ohe_cols = set(encoded_df.columns) - original_cols
+                    if ohe_cols:
+                        # Import the encoding function to apply to full dataset
+                        from sqlshell.utils.profile_ohe import get_ohe
+                        # Get the column that was encoded
+                        encoded_column = None
+                        for col in original_cols:
+                            if any(c.startswith(f'is_{col}') for c in ohe_cols) or any(c.startswith(f'has_{col}') for c in ohe_cols):
+                                encoded_column = col
+                                break
+                        progress.setValue(40)
+                        QApplication.processEvents()
+                        if encoded_column:
+                            # Apply encoding to full dataset
+                            self.statusBar().showMessage(f'Encoding column "{encoded_column}" on full dataset...')
+                            full_encoded_df = get_ohe(full_df, encoded_column)
+                            progress.setValue(80)
+                            QApplication.processEvents()
+                            # Update the current dataframe with the fully encoded one
+                            current_tab.current_df = full_encoded_df
+                            self.current_df = full_encoded_df  # Keep this for compatibility
+                            # Use the full encoded dataframe instead
+                            encoded_df = full_encoded_df
+                        else:
+                            # If we can't determine the encoded column, use the sampled version
+                            current_tab.current_df = encoded_df
+                            self.current_df = encoded_df  # Keep this for compatibility
+                    else:
+                        # No encoding columns found, use the sampled version
+                        current_tab.current_df = encoded_df
+                        self.current_df = encoded_df  # Keep this for compatibility
+                except Exception as e:
+                    # If there's an error, fall back to the provided encoded_df
+                    print(f"Error applying encoding to full dataset: {e}")
+                    current_tab.current_df = encoded_df
+                    self.current_df = encoded_df  # Keep this for compatibility
+            else:
+                # No sampling occurred, just use the provided encoded dataframe
+                current_tab.current_df = encoded_df
+                self.current_df = encoded_df  # Keep this for compatibility
+            progress.setValue(90)
+            QApplication.processEvents()
+            # Populate the results table with the new dataframe
+            self.populate_table(encoded_df)
+            # Update results title to show this is encoded data
+            current_tab.results_title.setText(f"ENCODED DATA")
+            progress.setValue(100)
+            progress.close()
+            # Update status
+            self.statusBar().showMessage(f'Applied one-hot encoding with {len(encoded_df.columns)} columns')
+            # Check if we should register this as a temporary table
+            if len(encoded_df) >= 100:  # Only worth registering as table if it's substantial
+                try:
+                    # Generate a unique table name
+                    import time
+                    timestamp = int(time.time())
+                    table_name = f"encoded_data_{timestamp}"
+                    # Register as a temporary table in the database manager
+                    self.db_manager.register_dataframe(encoded_df, table_name, "query_result")
+                    # Add to tables list
+                    self.tables_list.add_table_item(table_name, "encoded data")
+                    # Update completer
+                    self.update_completer()
+                    # Notify user
+                    self.statusBar().showMessage(f'Applied one-hot encoding and registered as table "{table_name}"')
+                except Exception as e:
+                    # Just log the error but continue - this is an optional enhancement
+                    print(f"Error registering encoded dataframe as table: {e}")
+        except Exception as e:
+            QMessageBox.critical(self, "Error", f"Failed to apply encoded dataframe:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error applying encoding: {str(e)}')
+    def get_current_query_tab(self):
+        """Get the currently active tab if it's a query tab (has query_edit attribute)"""
+        current_tab = self.get_current_tab()
+        if current_tab and hasattr(current_tab, 'query_edit'):
+            return current_tab
+        return None
 def main():
     # Parse command line arguments

sqlshell 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

sqlshell 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl