PyPI - sqlshell - Versions diffs - 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

sqlshell 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlshell might be problematic. Click here for more details.

Files changed (17) hide show

sqlshell/__init__.py +34 -4
sqlshell/db/__init__.py +2 -1
sqlshell/db/database_manager.py +336 -23
sqlshell/db/export_manager.py +188 -0
sqlshell/editor_integration.py +127 -0
sqlshell/execution_handler.py +421 -0
sqlshell/main.py +570 -140
sqlshell/query_tab.py +592 -7
sqlshell/ui/filter_header.py +22 -1
sqlshell/utils/profile_column.py +1586 -170
sqlshell/utils/profile_foreign_keys.py +103 -11
sqlshell/utils/profile_ohe.py +631 -0
{sqlshell-0.2.3.dist-info → sqlshell-0.3.0.dist-info}/METADATA +126 -7
{sqlshell-0.2.3.dist-info → sqlshell-0.3.0.dist-info}/RECORD +17 -13
{sqlshell-0.2.3.dist-info → sqlshell-0.3.0.dist-info}/WHEEL +1 -1
{sqlshell-0.2.3.dist-info → sqlshell-0.3.0.dist-info}/entry_points.txt +0 -0
{sqlshell-0.2.3.dist-info → sqlshell-0.3.0.dist-info}/top_level.txt +0 -0

sqlshell/main.py CHANGED Viewed

@@ -18,18 +18,20 @@ from PyQt6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
                            QCompleter, QFrame, QToolButton, QSizePolicy, QTabWidget,
                            QStyleFactory, QToolBar, QStatusBar, QLineEdit, QMenu,
                            QCheckBox, QWidgetAction, QMenuBar, QInputDialog, QProgressDialog,
-                           QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem)
+                           QListWidgetItem, QDialog, QGraphicsDropShadowEffect, QTreeWidgetItem,
+                           QComboBox)
 from PyQt6.QtCore import Qt, QAbstractTableModel, QRegularExpression, QRect, QSize, QStringListModel, QPropertyAnimation, QEasingCurve, QTimer, QPoint, QMimeData
 from PyQt6.QtGui import QFont, QColor, QSyntaxHighlighter, QTextCharFormat, QPainter, QTextFormat, QTextCursor, QIcon, QPalette, QLinearGradient, QBrush, QPixmap, QPolygon, QPainterPath, QDrag
 import numpy as np
 from datetime import datetime
+import psutil
 from sqlshell import create_test_data
 from sqlshell.splash_screen import AnimatedSplashScreen
 from sqlshell.syntax_highlighter import SQLSyntaxHighlighter
 from sqlshell.editor import LineNumberArea, SQLEditor
 from sqlshell.ui import FilterHeader, BarChartDelegate
-from sqlshell.db import DatabaseManager
+from sqlshell.db import DatabaseManager, ExportManager
 from sqlshell.query_tab import QueryTab
 from sqlshell.styles import (get_application_stylesheet, get_tab_corner_stylesheet,
                            get_context_menu_stylesheet,
@@ -42,6 +44,7 @@ class SQLShell(QMainWindow):
     def __init__(self):
         super().__init__()
         self.db_manager = DatabaseManager()
+        self.export_manager = ExportManager(self.db_manager)
         self.current_df = None  # Store the current DataFrame for filtering
         self.filter_widgets = []  # Store filter line edits
         self.current_project_file = None  # Store the current project file path
@@ -216,6 +219,39 @@ class SQLShell(QMainWindow):
         query_header.setObjectName("header_label")
         right_layout.addWidget(query_header)
+        # Create a drop area for tables above the tab widget
+        self.tab_drop_area = QFrame()
+        self.tab_drop_area.setFixedHeight(30)
+        self.tab_drop_area.setObjectName("tab_drop_area")
+        # Add a label with hint text
+        drop_area_layout = QHBoxLayout(self.tab_drop_area)
+        drop_area_layout.setContentsMargins(10, 0, 10, 0)
+        self.drop_hint_label = QLabel("Drag tables here to create new query tabs")
+        self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
+        self.drop_hint_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
+        drop_area_layout.addWidget(self.drop_hint_label)
+        self.tab_drop_area.setStyleSheet("""
+            #tab_drop_area {
+                background-color: #f8f9fa;
+                border: 1px dashed #BDC3C7;
+                border-radius: 4px;
+                margin: 0 0 5px 0;
+            }
+            #tab_drop_area:hover {
+                background-color: #E5F7FF;
+                border: 1px dashed #3498DB;
+            }
+        """)
+        self.tab_drop_area.setAcceptDrops(True)
+        self.tab_drop_area.dragEnterEvent = self.tab_area_drag_enter
+        self.tab_drop_area.dragMoveEvent = self.tab_area_drag_move
+        self.tab_drop_area.dragLeaveEvent = self.tab_area_drag_leave
+        self.tab_drop_area.dropEvent = self.tab_area_drop
+        right_layout.addWidget(self.tab_drop_area)
         # Create tab widget for multiple queries
         self.tab_widget = QTabWidget()
         self.tab_widget.setTabsClosable(True)
@@ -237,6 +273,100 @@ class SQLShell(QMainWindow):
         # Status bar
         self.statusBar().showMessage('Ready | Ctrl+Enter: Execute Query | Ctrl+K: Toggle Comment | Ctrl+T: New Tab | Ctrl+Shift+O: Quick Access Files')
+    # Methods for handling drag and drop on the tab drop area
+    def tab_area_drag_enter(self, event):
+        """Handle drag enter events on the tab drop area"""
+        # Accept only if from the tables list
+        if event.source() == self.tables_list:
+            # Extract table name(s) from the mime data
+            mime_data = event.mimeData()
+            if mime_data.hasText():
+                table_names = mime_data.text().split(", ")
+                if len(table_names) == 1:
+                    self.drop_hint_label.setText(f"Release to create a new query tab for {table_names[0]}")
+                else:
+                    self.drop_hint_label.setText(f"Release to create {len(table_names)} new query tabs")
+                self.drop_hint_label.setStyleSheet("color: #3498db; font-size: 11px; font-weight: bold;")
+            # Highlight the drop area
+            self.tab_drop_area.setStyleSheet("""
+                #tab_drop_area {
+                    background-color: #E5F7FF;
+                    border: 2px dashed #3498DB;
+                    border-radius: 4px;
+                    margin: 0 0 5px 0;
+                }
+            """)
+            self.tab_drop_area.setFixedHeight(40)
+            event.acceptProposedAction()
+        else:
+            event.ignore()
+    def tab_area_drag_move(self, event):
+        """Handle drag move events on the tab drop area"""
+        # Continue accepting drag moves
+        if event.source() == self.tables_list:
+            event.acceptProposedAction()
+        else:
+            event.ignore()
+    def tab_area_drag_leave(self, event):
+        """Handle drag leave events on the tab drop area"""
+        # Reset the drop area
+        self.tab_drop_area.setStyleSheet("""
+            #tab_drop_area {
+                background-color: #f8f9fa;
+                border: 1px dashed #BDC3C7;
+                border-radius: 4px;
+                margin: 0 0 5px 0;
+            }
+        """)
+        self.drop_hint_label.setText("Drag tables here to create new query tabs")
+        self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
+        self.tab_drop_area.setFixedHeight(30)
+        # No need to call a parent method
+    def tab_area_drop(self, event):
+        """Handle drop events on the tab drop area"""
+        # Process the drop to create a new tab with SELECT query
+        if event.source() == self.tables_list:
+            mime_data = event.mimeData()
+            if mime_data.hasText():
+                table_names = mime_data.text().split(", ")
+                for table_name in table_names:
+                    # Check if this table needs to be reloaded first
+                    if table_name in self.tables_list.tables_needing_reload:
+                        # Reload the table immediately without asking
+                        self.reload_selected_table(table_name)
+                    # Generate a title for the tab
+                    tab_title = f"Query {table_name}"
+                    # Create a new tab
+                    new_tab = self.add_tab(tab_title)
+                    # Set the SQL query
+                    new_tab.set_query_text(f"SELECT * FROM {table_name}")
+                self.statusBar().showMessage(f"Created new tab{'s' if len(table_names) > 1 else ''} for {', '.join(table_names)}")
+                # Reset the drop area appearance
+                self.tab_drop_area.setStyleSheet("""
+                    #tab_drop_area {
+                        background-color: #f8f9fa;
+                        border: 1px dashed #BDC3C7;
+                        border-radius: 4px;
+                        margin: 0 0 5px 0;
+                    }
+                """)
+                self.drop_hint_label.setText("Drag tables here to create new query tabs")
+                self.drop_hint_label.setStyleSheet("color: #95a5a6; font-size: 11px;")
+                self.tab_drop_area.setFixedHeight(30)
+            event.acceptProposedAction()
+        else:
+            event.ignore()
     def create_tab_corner_widget(self):
         """Create a corner widget with a + button to add new tabs"""
         corner_widget = QWidget()
@@ -290,25 +420,126 @@ class SQLShell(QMainWindow):
             headers = [str(col) for col in df.columns]
             current_tab.results_table.setHorizontalHeaderLabels(headers)
-            # Calculate chunk size (adjust based on available memory)
-            CHUNK_SIZE = 1000
-            # Process data in chunks to avoid memory issues with large datasets
-            for chunk_start in range(0, row_count, CHUNK_SIZE):
-                chunk_end = min(chunk_start + CHUNK_SIZE, row_count)
-                chunk = df.iloc[chunk_start:chunk_end]
+            # Calculate dynamic chunk size based on available memory
+            import psutil
+            available_memory = psutil.virtual_memory().available
+            # Use 10% of available memory for chunking, with a minimum of 1000 rows
+            memory_per_row = df.memory_usage(deep=True).sum() / len(df)
+            CHUNK_SIZE = max(1000, min(10000, int(available_memory * 0.1 / memory_per_row)))
+            # Add pagination controls if dataset is large
+            if row_count > CHUNK_SIZE:
+                # Remove any existing pagination widgets
+                for i in reversed(range(current_tab.results_layout.count())):
+                    item = current_tab.results_layout.itemAt(i)
+                    widget = item.widget() if item is not None else None
+                    if widget and widget.objectName() == "pagination_widget":
+                        current_tab.results_layout.removeWidget(widget)
+                        widget.setParent(None)
+                        widget.deleteLater()
+                # Create pagination widget
+                pagination_widget = QWidget()
+                pagination_widget.setObjectName("pagination_widget")
+                pagination_layout = QHBoxLayout(pagination_widget)
+                # Add page size selector
+                page_size_label = QLabel("Rows per page:")
+                page_size_combo = QComboBox()
+                page_sizes = [1000, 5000, 10000, 50000, 100000]
+                page_size_combo.addItems([str(size) for size in page_sizes])
+                page_size_combo.setCurrentText(str(CHUNK_SIZE))
+                # Add navigation buttons
+                prev_btn = QPushButton("Previous")
+                next_btn = QPushButton("Next")
+                page_label = QLabel("Page 1")
+                # Add widgets to layout
+                pagination_layout.addWidget(page_size_label)
+                pagination_layout.addWidget(page_size_combo)
+                pagination_layout.addStretch()
+                pagination_layout.addWidget(prev_btn)
+                pagination_layout.addWidget(page_label)
+                pagination_layout.addWidget(next_btn)
+                # Add pagination widget to results layout
+                current_tab.results_layout.addWidget(pagination_widget)
+                # Store pagination state
+                current_tab.pagination_state = {
+                    'current_page': 0,
+                    'page_size': CHUNK_SIZE,
+                    'total_pages': (row_count + CHUNK_SIZE - 1) // CHUNK_SIZE,
+                    'page_label': page_label,
+                    'prev_btn': prev_btn,
+                    'next_btn': next_btn,
+                    'page_size_combo': page_size_combo
+                }
-                # Add rows for this chunk
-                current_tab.results_table.setRowCount(chunk_end)
+                # Connect pagination signals
+                def update_page_size(size):
+                    current_tab.pagination_state['page_size'] = int(size)
+                    current_tab.pagination_state['total_pages'] = (row_count + int(size) - 1) // int(size)
+                    current_tab.pagination_state['current_page'] = 0
+                    load_current_page()
+                def load_current_page():
+                    state = current_tab.pagination_state
+                    start_idx = state['current_page'] * state['page_size']
+                    end_idx = min(start_idx + state['page_size'], row_count)
+                    # Clear existing rows
+                    current_tab.results_table.setRowCount(0)
+                    # Load current page
+                    chunk = df.iloc[start_idx:end_idx]
+                    current_tab.results_table.setRowCount(len(chunk))
+                    for row_idx, (_, row_data) in enumerate(chunk.iterrows()):
+                        for col_idx, value in enumerate(row_data):
+                            formatted_value = self.format_value(value)
+                            item = QTableWidgetItem(formatted_value)
+                            current_tab.results_table.setItem(row_idx, col_idx, item)
+                    # Update pagination controls
+                    state['page_label'].setText(f"Page {state['current_page'] + 1} of {state['total_pages']}")
+                    state['prev_btn'].setEnabled(state['current_page'] > 0)
+                    state['next_btn'].setEnabled(state['current_page'] < state['total_pages'] - 1)
+                    # Process events to keep UI responsive
+                    QApplication.processEvents()
+                def next_page():
+                    if current_tab.pagination_state['current_page'] < current_tab.pagination_state['total_pages'] - 1:
+                        current_tab.pagination_state['current_page'] += 1
+                        load_current_page()
+                def prev_page():
+                    if current_tab.pagination_state['current_page'] > 0:
+                        current_tab.pagination_state['current_page'] -= 1
+                        load_current_page()
+                # Connect signals
+                page_size_combo.currentTextChanged.connect(update_page_size)
+                next_btn.clicked.connect(next_page)
+                prev_btn.clicked.connect(prev_page)
+                # Load first page
+                load_current_page()
+            else:
+                # For smaller datasets, load all at once
+                current_tab.results_table.setRowCount(row_count)
-                for row_idx, (_, row_data) in enumerate(chunk.iterrows(), start=chunk_start):
+                for row_idx, (_, row_data) in enumerate(df.iterrows()):
                     for col_idx, value in enumerate(row_data):
                         formatted_value = self.format_value(value)
                         item = QTableWidgetItem(formatted_value)
                         current_tab.results_table.setItem(row_idx, col_idx, item)
-                # Process events to keep UI responsive
-                QApplication.processEvents()
+                    # Process events periodically to keep UI responsive
+                    if row_idx % 1000 == 0:
+                        QApplication.processEvents()
             # Optimize column widths
             current_tab.results_table.resizeColumnsToContents()
@@ -636,6 +867,31 @@ class SQLShell(QMainWindow):
                 QMessageBox.warning(self, "Empty Query", "Please enter a SQL query to execute.")
                 return
+            # Check if the query references any tables that need to be loaded
+            referenced_tables = self.extract_table_names_from_query(query)
+            tables_to_load = [table for table in referenced_tables if table in self.tables_list.tables_needing_reload]
+            # Load any tables that need to be loaded
+            if tables_to_load:
+                progress = QProgressDialog(f"Loading tables...", "Cancel", 0, len(tables_to_load), self)
+                progress.setWindowTitle("Loading Tables")
+                progress.setWindowModality(Qt.WindowModality.WindowModal)
+                progress.show()
+                for i, table_name in enumerate(tables_to_load):
+                    if progress.wasCanceled():
+                        self.statusBar().showMessage("Query canceled: table loading was interrupted")
+                        return
+                    progress.setLabelText(f"Loading table: {table_name}")
+                    progress.setValue(i)
+                    QApplication.processEvents()
+                    self.reload_selected_table(table_name)
+                progress.setValue(len(tables_to_load))
+                progress.close()
             start_time = datetime.now()
             try:
@@ -672,6 +928,57 @@ class SQLShell(QMainWindow):
             QMessageBox.critical(self, "Unexpected Error",
                 f"An unexpected error occurred:\n\n{str(e)}")
             self.statusBar().showMessage("Query execution failed")
+    def extract_table_names_from_query(self, query):
+        """Extract table names from a SQL query using basic regex patterns"""
+        import re
+        # Convert to uppercase for easier pattern matching
+        query_upper = query.upper()
+        # Strip comments to avoid matching patterns inside comments
+        query_upper = re.sub(r'--.*?$', '', query_upper, flags=re.MULTILINE)
+        query_upper = re.sub(r'/\*.*?\*/', '', query_upper, flags=re.DOTALL)
+        # Common SQL patterns that reference tables
+        patterns = [
+            r'FROM\s+["\[]?(\w+)["\]]?',                         # FROM clause
+            r'JOIN\s+["\[]?(\w+)["\]]?',                         # JOIN clause
+            r'UPDATE\s+["\[]?(\w+)["\]]?',                       # UPDATE statement
+            r'INSERT\s+INTO\s+["\[]?(\w+)["\]]?',                # INSERT statement
+            r'DELETE\s+FROM\s+["\[]?(\w+)["\]]?',                # DELETE statement
+            r'CREATE\s+(?:TEMP|TEMPORARY)?\s*TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?["\[]?(\w+)["\]]?', # CREATE TABLE
+            r'DROP\s+TABLE\s+(?:IF\s+EXISTS\s+)?["\[]?(\w+)["\]]?',  # DROP TABLE
+            r'ALTER\s+TABLE\s+["\[]?(\w+)["\]]?',                # ALTER TABLE
+            r'WITH\s+(\w+)\s+AS',                                # Common Table Expressions
+            r'MERGE\s+INTO\s+["\[]?(\w+)["\]]?'                  # MERGE statement
+        ]
+        tables = set()
+        for pattern in patterns:
+            matches = re.finditer(pattern, query_upper)
+            for match in matches:
+                # Get the table name from the matched group and strip any quotes
+                table_name = match.group(1).strip('"[]`\'')
+                # Skip SQL keywords
+                if table_name in ('SELECT', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'OFFSET',
+                                 'UNION', 'INTERSECT', 'EXCEPT', 'WITH', 'AS', 'ON', 'USING'):
+                    continue
+                # Add to our set of tables
+                tables.add(table_name.lower())  # Convert to lowercase for case-insensitive comparison
+        # Account for qualified table names (schema.table)
+        qualified_tables = set()
+        for table in tables:
+            if '.' in table:
+                qualified_tables.add(table.split('.')[-1])  # Add just the table part
+        tables.update(qualified_tables)
+        # Return all found table names in lowercase to match our table storage convention
+        return tables
     def _update_query_history(self, query):
         """Update query history and track term usage for improved autocompletion"""
@@ -906,28 +1213,14 @@ LIMIT 10
             self.statusBar().showMessage('Exporting data to Excel...')
             # Convert table data to DataFrame
-            df = self.get_table_data_as_dataframe()
-            df.to_excel(file_name, index=False)
+            df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
+            if df is None:
+                raise Exception("Failed to convert table data to DataFrame")
-            # Generate table name from file name
-            base_name = os.path.splitext(os.path.basename(file_name))[0]
-            table_name = self.db_manager.sanitize_table_name(base_name)
+            # Export using ExportManager
+            table_name, metadata = self.export_manager.export_to_excel(df, file_name)
-            # Ensure unique table name
-            original_name = table_name
-            counter = 1
-            while table_name in self.db_manager.loaded_tables:
-                table_name = f"{original_name}_{counter}"
-                counter += 1
-            # Register the table in the database manager
-            self.db_manager.register_dataframe(df, table_name, file_name)
-            # Update tracking
-            self.db_manager.loaded_tables[table_name] = file_name
-            self.db_manager.table_columns[table_name] = df.columns.tolist()
-            # Update UI using new method
+            # Update UI
             self.tables_list.add_table_item(table_name, os.path.basename(file_name))
             self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
@@ -964,28 +1257,14 @@ LIMIT 10
             self.statusBar().showMessage('Exporting data to Parquet...')
             # Convert table data to DataFrame
-            df = self.get_table_data_as_dataframe()
-            df.to_parquet(file_name, index=False)
+            df = self.export_manager.convert_table_to_dataframe(current_tab.results_table)
+            if df is None:
+                raise Exception("Failed to convert table data to DataFrame")
-            # Generate table name from file name
-            base_name = os.path.splitext(os.path.basename(file_name))[0]
-            table_name = self.db_manager.sanitize_table_name(base_name)
+            # Export using ExportManager
+            table_name, metadata = self.export_manager.export_to_parquet(df, file_name)
-            # Ensure unique table name
-            original_name = table_name
-            counter = 1
-            while table_name in self.db_manager.loaded_tables:
-                table_name = f"{original_name}_{counter}"
-                counter += 1
-            # Register the table in the database manager
-            self.db_manager.register_dataframe(df, table_name, file_name)
-            # Update tracking
-            self.db_manager.loaded_tables[table_name] = file_name
-            self.db_manager.table_columns[table_name] = df.columns.tolist()
-            # Update UI using new method
+            # Update UI
             self.tables_list.add_table_item(table_name, os.path.basename(file_name))
             self.statusBar().showMessage(f'Data exported to {file_name} and loaded as table "{table_name}"')
@@ -1005,94 +1284,10 @@ LIMIT 10
     def get_table_data_as_dataframe(self):
         """Helper function to convert table widget data to a DataFrame with proper data types"""
-        # Get the current tab
         current_tab = self.get_current_tab()
         if not current_tab:
             return pd.DataFrame()
-        headers = [current_tab.results_table.horizontalHeaderItem(i).text() for i in range(current_tab.results_table.columnCount())]
-        data = []
-        for row in range(current_tab.results_table.rowCount()):
-            row_data = []
-            for column in range(current_tab.results_table.columnCount()):
-                item = current_tab.results_table.item(row, column)
-                row_data.append(item.text() if item else '')
-            data.append(row_data)
-        # Create DataFrame from raw string data
-        df_raw = pd.DataFrame(data, columns=headers)
-        # Try to use the original dataframe's dtypes if available
-        if hasattr(current_tab, 'current_df') and current_tab.current_df is not None:
-            original_df = current_tab.current_df
-            # Since we might have filtered rows, we can't just return the original DataFrame
-            # But we can use its column types to convert our string data appropriately
-            # Create a new DataFrame with appropriate types
-            df_typed = pd.DataFrame()
-            for col in df_raw.columns:
-                if col in original_df.columns:
-                    # Get the original column type
-                    orig_type = original_df[col].dtype
-                    # Special handling for different data types
-                    if pd.api.types.is_numeric_dtype(orig_type):
-                        # Handle numeric columns (int or float)
-                        try:
-                            # First try to convert to numeric type
-                            # Remove commas used for thousands separators
-                            numeric_col = pd.to_numeric(df_raw[col].str.replace(',', '').replace('NULL', np.nan))
-                            df_typed[col] = numeric_col
-                        except:
-                            # If that fails, keep the original string
-                            df_typed[col] = df_raw[col]
-                    elif pd.api.types.is_datetime64_dtype(orig_type):
-                        # Handle datetime columns
-                        try:
-                            df_typed[col] = pd.to_datetime(df_raw[col].replace('NULL', np.nan))
-                        except:
-                            df_typed[col] = df_raw[col]
-                    elif pd.api.types.is_bool_dtype(orig_type):
-                        # Handle boolean columns
-                        try:
-                            df_typed[col] = df_raw[col].map({'True': True, 'False': False}).replace('NULL', np.nan)
-                        except:
-                            df_typed[col] = df_raw[col]
-                    else:
-                        # For other types, keep as is
-                        df_typed[col] = df_raw[col]
-                else:
-                    # For columns not in the original dataframe, infer type
-                    df_typed[col] = df_raw[col]
-            return df_typed
-        else:
-            # If we don't have the original dataframe, try to infer types
-            # First replace 'NULL' with actual NaN
-            df_raw.replace('NULL', np.nan, inplace=True)
-            # Try to convert each column to numeric if possible
-            for col in df_raw.columns:
-                try:
-                    # First try to convert to numeric by removing commas
-                    df_raw[col] = pd.to_numeric(df_raw[col].str.replace(',', ''))
-                except:
-                    # If that fails, try to convert to datetime
-                    try:
-                        df_raw[col] = pd.to_datetime(df_raw[col])
-                    except:
-                        # If both numeric and datetime conversions fail,
-                        # try boolean conversion for True/False strings
-                        try:
-                            if df_raw[col].dropna().isin(['True', 'False']).all():
-                                df_raw[col] = df_raw[col].map({'True': True, 'False': False})
-                        except:
-                            # Otherwise, keep as is
-                            pass
-            return df_raw
+        return self.export_manager.convert_table_to_dataframe(current_tab.results_table)
     def keyPressEvent(self, event):
         """Handle global keyboard shortcuts"""
@@ -1263,6 +1458,12 @@ LIMIT 10
         # Add menu actions
         select_from_action = context_menu.addAction("Select from")
         add_to_editor_action = context_menu.addAction("Just add to editor")
+        select_from_new_tab_action = context_menu.addAction("Select From in New Tab")
+        # Add copy path actions
+        context_menu.addSeparator()
+        copy_path_action = context_menu.addAction("Copy Path")
+        copy_relative_path_action = context_menu.addAction("Copy Relative Path")
         # Add entropy profiler action
         context_menu.addSeparator()
@@ -1327,6 +1528,11 @@ LIMIT 10
             cursor = current_tab.query_edit.textCursor()
             cursor.insertText(table_name)
             current_tab.query_edit.setFocus()
+        elif action == select_from_new_tab_action:
+            # Create a new tab with the selected table
+            new_tab = self.add_tab(title=table_name)
+            new_tab.set_query_text(f"SELECT * FROM {table_name}")
+            new_tab.query_edit.setFocus()
         elif action == reload_action:
             self.reload_selected_table(table_name)
         elif action == analyze_entropy_action:
@@ -1393,6 +1599,24 @@ LIMIT 10
             if target_folder:
                 self.tables_list.move_item_to_folder(item, target_folder)
                 self.statusBar().showMessage(f'Moved table "{table_name}" to folder "{target_folder.text(0)}"')
+        elif action == copy_path_action:
+            # Get the full path from the table source
+            if table_name in self.db_manager.loaded_tables:
+                path = self.db_manager.loaded_tables[table_name]
+                if path != 'database':  # Only copy if it's a file path
+                    QApplication.clipboard().setText(path)
+                    self.statusBar().showMessage(f"Copied full path to clipboard")
+        elif action == copy_relative_path_action:
+            # Get the relative path from the table source
+            if table_name in self.db_manager.loaded_tables:
+                path = self.db_manager.loaded_tables[table_name]
+                if path != 'database':  # Only copy if it's a file path
+                    try:
+                        rel_path = os.path.relpath(path)
+                        QApplication.clipboard().setText(rel_path)
+                        self.statusBar().showMessage(f"Copied relative path to clipboard")
+                    except ValueError:
+                        self.statusBar().showMessage("Could not determine relative path")
     def analyze_foreign_keys_between_tables(self, table_items):
         """Analyze foreign key relationships between selected tables"""
@@ -2727,6 +2951,20 @@ LIMIT 10
             self.showMaximized()
             self.was_maximized = True
+    def get_selected_table(self):
+        """Get the name of the currently selected table in the tables list"""
+        if not hasattr(self, 'tables_list'):
+            return None
+        selected_items = self.tables_list.selectedItems()
+        # Filter out folders and use only single selections
+        table_items = [item for item in selected_items if not self.tables_list.is_folder_item(item)]
+        if len(table_items) == 1:  # Only use if exactly one table is selected
+            return self.tables_list.get_table_name_from_item(table_items[0])
+        return None
     def change_zoom(self, factor):
         """Change the zoom level of the application by adjusting font sizes"""
         try:
@@ -3428,6 +3666,198 @@ LIMIT 10
             QMessageBox.critical(self, "Analysis Error", f"Error analyzing column:\n\n{str(e)}")
             self.statusBar().showMessage(f'Error analyzing column: {str(e)}')
+    def encode_text(self, column_name):
+        """Generate one-hot encoding for a text column and visualize the results"""
+        try:
+            # Get the current tab
+            current_tab = self.get_current_tab()
+            if not current_tab or current_tab.current_df is None:
+                return
+            # Show a loading indicator
+            self.statusBar().showMessage(f'Preparing one-hot encoding for "{column_name}"...')
+            # Get the dataframe from the current tab
+            full_df = current_tab.current_df.copy()
+            df = full_df
+            # Save original row count for reference
+            current_tab.original_df_rowcount = len(full_df)
+            if df is not None and not df.empty:
+                # Sample the data if it's larger than 1000 rows for better performance
+                row_count = len(df)
+                if row_count > 1000:
+                    self.statusBar().showMessage(f'Sampling data (using 1000 rows from {row_count} total)...')
+                    # Store the full dataframe before sampling for later use
+                    current_tab._original_df_before_encoding = full_df
+                    # Sample the data
+                    df = df.sample(n=1000, random_state=42)
+                # Import the one-hot encoding visualizer
+                from sqlshell.utils.profile_ohe import visualize_ohe
+                # Create and show the visualization
+                self.statusBar().showMessage(f'Generating one-hot encoding for "{column_name}"...')
+                vis = visualize_ohe(df, column_name)
+                # Connect to the encodingApplied signal
+                vis.encodingApplied.connect(self.apply_encoded_dataframe)
+                # Store a reference to prevent garbage collection
+                self._ohe_window = vis
+                if row_count > 1000:
+                    self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}" (sampled 1000 rows from {row_count})')
+                else:
+                    self.statusBar().showMessage(f'One-hot encoding generated for "{column_name}"')
+            else:
+                QMessageBox.warning(self, "Empty Data", "No data available to encode.")
+                self.statusBar().showMessage(f'No data to encode')
+        except Exception as e:
+            QMessageBox.critical(self, "Encoding Error", f"Error generating one-hot encoding:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error generating one-hot encoding: {str(e)}')
+    def apply_encoded_dataframe(self, encoded_df):
+        """Apply the encoded dataframe to the current tab's results table"""
+        try:
+            # Get the current tab
+            current_tab = self.get_current_tab()
+            if not current_tab:
+                return
+            # Check if we're using a sampled version
+            is_sampled = False
+            full_df = None
+            # Show a loading indicator
+            self.statusBar().showMessage(f'Applying one-hot encoding...')
+            # Progress dialog for large datasets
+            progress = QProgressDialog("Applying encoding...", "Cancel", 0, 100, self)
+            progress.setWindowTitle("Processing")
+            progress.setWindowModality(Qt.WindowModality.WindowModal)
+            progress.setValue(10)
+            # Check if this sample is smaller than the actual dataset
+            if hasattr(current_tab, '_original_df_before_encoding'):
+                # We have the original, full dataset stored
+                full_df = current_tab._original_df_before_encoding
+                is_sampled = len(full_df) > len(encoded_df)
+            elif hasattr(current_tab, 'original_df_rowcount'):
+                # We know the original row count but don't have the data
+                is_sampled = current_tab.original_df_rowcount > len(encoded_df)
+            progress.setValue(20)
+            QApplication.processEvents()
+            # If we're working with a sample, apply the encoding to the full dataset
+            if is_sampled and full_df is not None:
+                self.statusBar().showMessage(f'Re-applying encoding to full dataset ({len(full_df)} rows)...')
+                try:
+                    # Get the encoding columns (added by the OHE process)
+                    original_cols = set(current_tab.current_df.columns)
+                    ohe_cols = set(encoded_df.columns) - original_cols
+                    if ohe_cols:
+                        # Import the encoding function to apply to full dataset
+                        from sqlshell.utils.profile_ohe import get_ohe
+                        # Get the column that was encoded
+                        encoded_column = None
+                        for col in original_cols:
+                            if any(c.startswith(f'is_{col}') for c in ohe_cols) or any(c.startswith(f'has_{col}') for c in ohe_cols):
+                                encoded_column = col
+                                break
+                        progress.setValue(40)
+                        QApplication.processEvents()
+                        if encoded_column:
+                            # Apply encoding to full dataset
+                            self.statusBar().showMessage(f'Encoding column "{encoded_column}" on full dataset...')
+                            full_encoded_df = get_ohe(full_df, encoded_column)
+                            progress.setValue(80)
+                            QApplication.processEvents()
+                            # Update the current dataframe with the fully encoded one
+                            current_tab.current_df = full_encoded_df
+                            self.current_df = full_encoded_df  # Keep this for compatibility
+                            # Use the full encoded dataframe instead
+                            encoded_df = full_encoded_df
+                        else:
+                            # If we can't determine the encoded column, use the sampled version
+                            current_tab.current_df = encoded_df
+                            self.current_df = encoded_df  # Keep this for compatibility
+                    else:
+                        # No encoding columns found, use the sampled version
+                        current_tab.current_df = encoded_df
+                        self.current_df = encoded_df  # Keep this for compatibility
+                except Exception as e:
+                    # If there's an error, fall back to the provided encoded_df
+                    print(f"Error applying encoding to full dataset: {e}")
+                    current_tab.current_df = encoded_df
+                    self.current_df = encoded_df  # Keep this for compatibility
+            else:
+                # No sampling occurred, just use the provided encoded dataframe
+                current_tab.current_df = encoded_df
+                self.current_df = encoded_df  # Keep this for compatibility
+            progress.setValue(90)
+            QApplication.processEvents()
+            # Populate the results table with the new dataframe
+            self.populate_table(encoded_df)
+            # Update results title to show this is encoded data
+            current_tab.results_title.setText(f"ENCODED DATA")
+            progress.setValue(100)
+            progress.close()
+            # Update status
+            self.statusBar().showMessage(f'Applied one-hot encoding with {len(encoded_df.columns)} columns')
+            # Check if we should register this as a temporary table
+            if len(encoded_df) >= 100:  # Only worth registering as table if it's substantial
+                try:
+                    # Generate a unique table name
+                    import time
+                    timestamp = int(time.time())
+                    table_name = f"encoded_data_{timestamp}"
+                    # Register as a temporary table in the database manager
+                    self.db_manager.register_dataframe(encoded_df, table_name, "query_result")
+                    # Add to tables list
+                    self.tables_list.add_table_item(table_name, "encoded data")
+                    # Update completer
+                    self.update_completer()
+                    # Notify user
+                    self.statusBar().showMessage(f'Applied one-hot encoding and registered as table "{table_name}"')
+                except Exception as e:
+                    # Just log the error but continue - this is an optional enhancement
+                    print(f"Error registering encoded dataframe as table: {e}")
+        except Exception as e:
+            QMessageBox.critical(self, "Error", f"Failed to apply encoded dataframe:\n\n{str(e)}")
+            self.statusBar().showMessage(f'Error applying encoding: {str(e)}')
+    def get_current_query_tab(self):
+        """Get the currently active tab if it's a query tab (has query_edit attribute)"""
+        current_tab = self.get_current_tab()
+        if current_tab and hasattr(current_tab, 'query_edit'):
+            return current_tab
+        return None
 def main():
     # Parse command line arguments
     parser = argparse.ArgumentParser(description='SQL Shell - SQL Query Tool')

sqlshell 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

sqlshell 0.2.3py3-none-any.whl → 0.3.0py3-none-any.whl