PyPI - sqlshell - Versions diffs - 0.1.9__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

sqlshell 0.1.9py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlshell might be problematic. Click here for more details.

Files changed (24) hide show

sqlshell/LICENSE +21 -0
sqlshell/MANIFEST.in +6 -0
sqlshell/README.md +59 -0
sqlshell/__init__.py +3 -1
sqlshell/context_suggester.py +765 -0
sqlshell/create_test_data.py +106 -30
sqlshell/db/database_manager.py +152 -6
sqlshell/editor.py +68 -11
sqlshell/main.py +1566 -656
sqlshell/menus.py +171 -0
sqlshell/query_tab.py +32 -3
sqlshell/styles.py +257 -0
sqlshell/suggester_integration.py +275 -0
sqlshell/table_list.py +907 -0
sqlshell/utils/__init__.py +8 -0
sqlshell/utils/profile_entropy.py +347 -0
sqlshell/utils/profile_keys.py +356 -0
sqlshell-0.2.1.dist-info/METADATA +198 -0
{sqlshell-0.1.9.dist-info → sqlshell-0.2.1.dist-info}/RECORD +22 -12
{sqlshell-0.1.9.dist-info → sqlshell-0.2.1.dist-info}/WHEEL +1 -1
sqlshell/setup.py +0 -42
sqlshell-0.1.9.dist-info/METADATA +0 -122
{sqlshell-0.1.9.dist-info → sqlshell-0.2.1.dist-info}/entry_points.txt +0 -0
{sqlshell-0.1.9.dist-info → sqlshell-0.2.1.dist-info}/top_level.txt +0 -0

sqlshell/create_test_data.py CHANGED Viewed

@@ -1,50 +1,126 @@
 import pandas as pd
 import numpy as np
 from datetime import datetime, timedelta
+import os
+# Set random seed for reproducibility
+np.random.seed(42)
+# Define output directory
+OUTPUT_DIR = 'test_data'
+os.makedirs(OUTPUT_DIR, exist_ok=True)
 def create_sales_data(num_records=1000):
-    """Create sample sales data"""
-    # Generate random dates within the last year
+    # Generate dates for the last 365 days
     end_date = datetime.now()
     start_date = end_date - timedelta(days=365)
-    dates = pd.date_range(start=start_date, end=end_date, periods=num_records)
+    dates = [start_date + timedelta(days=x) for x in range(366)]
+    random_dates = np.random.choice(dates, num_records)
+    # Create product data
+    products = ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer']
+    product_prices = {
+        'Laptop': (800, 2000),
+        'Smartphone': (400, 1200),
+        'Tablet': (200, 800),
+        'Monitor': (150, 500),
+        'Keyboard': (20, 150),
+        'Mouse': (10, 80),
+        'Headphones': (30, 300),
+        'Printer': (100, 400)
+    }
     # Generate random data
     data = {
-        'orderid': range(1, num_records + 1),
-        'orderdate': dates,
-        'customerid': np.random.randint(1, 101, num_records),
-        'productid': np.random.randint(1, 51, num_records),
-        'quantity': np.random.randint(1, 11, num_records),
-        'unitprice': np.random.uniform(10.0, 1000.0, num_records).round(2)
+        'OrderID': range(1, num_records + 1),
+        'Date': random_dates,
+        'ProductID': np.random.randint(1, len(products) + 1, num_records),  # Changed to ProductID for joining
+        'Quantity': np.random.randint(1, 11, num_records),
+        'CustomerID': np.random.randint(1, 201, num_records),
+        'Region': np.random.choice(['North', 'South', 'East', 'West'], num_records)
     }
-    return pd.DataFrame(data)
-def create_customer_data(num_customers=100):
-    """Create sample customer data"""
-    # Generate random customer data
+    # Calculate prices based on product
+    product_list = [products[pid-1] for pid in data['ProductID']]
+    data['Price'] = [np.random.uniform(product_prices[p][0], product_prices[p][1])
+                     for p in product_list]
+    data['TotalAmount'] = [price * qty for price, qty in zip(data['Price'], data['Quantity'])]
+    # Create DataFrame
+    df = pd.DataFrame(data)
+    # Round numerical columns
+    df['Price'] = df['Price'].round(2)
+    df['TotalAmount'] = df['TotalAmount'].round(2)
+    # Sort by Date
+    return df.sort_values('Date')
+def create_customer_data(num_customers=200):
+    # Generate customer data
     data = {
-        'customerid': range(1, num_customers + 1),
-        'customername': [f"Customer {i}" for i in range(1, num_customers + 1)],
-        'email': [f"customer{i}@example.com" for i in range(1, num_customers + 1)],
-        'country': np.random.choice(['USA', 'UK', 'Canada', 'Australia', 'Germany'], num_customers),
-        'joindate': pd.date_range(start='2020-01-01', periods=num_customers).tolist()
+        'CustomerID': range(1, num_customers + 1),
+        'FirstName': [f'Customer{i}' for i in range(1, num_customers + 1)],
+        'LastName': [f'Lastname{i}' for i in range(1, num_customers + 1)],
+        'Email': [f'customer{i}@example.com' for i in range(1, num_customers + 1)],
+        'JoinDate': [datetime.now() - timedelta(days=np.random.randint(1, 1000))
+                     for _ in range(num_customers)],
+        'CustomerType': np.random.choice(['Regular', 'Premium', 'VIP'], num_customers),
+        'CreditScore': np.random.randint(300, 851, num_customers)
     }
     return pd.DataFrame(data)
-def create_product_data(num_products=50):
-    """Create sample product data"""
-    categories = ['Electronics', 'Books', 'Clothing', 'Home & Garden', 'Sports']
+def create_product_data():
+    # Create detailed product information
+    products = {
+        'ProductID': range(1, 9),
+        'ProductName': ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer'],
+        'Category': ['Computers', 'Mobile', 'Mobile', 'Accessories', 'Accessories', 'Accessories', 'Audio', 'Peripherals'],
+        'Brand': ['TechPro', 'MobileX', 'TabletCo', 'ViewMax', 'TypeMaster', 'ClickPro', 'SoundMax', 'PrintPro'],
+        'StockQuantity': np.random.randint(50, 500, 8),
+        'MinPrice': [800, 400, 200, 150, 20, 10, 30, 100],
+        'MaxPrice': [2000, 1200, 800, 500, 150, 80, 300, 400],
+        'Weight_kg': [2.5, 0.2, 0.5, 3.0, 0.8, 0.1, 0.3, 5.0],
+        'WarrantyMonths': [24, 12, 12, 36, 12, 12, 24, 12]
+    }
+    return pd.DataFrame(products)
+def create_large_numbers_data(num_records=100):
+    """Create a dataset with very large numbers for testing and visualization."""
-    # Generate random product data
+    # Generate random IDs
+    ids = range(1, num_records + 1)
+    # Create different columns with large numbers
     data = {
-        'productid': range(1, num_products + 1),
-        'productname': [f"Product {i}" for i in range(1, num_products + 1)],
-        'category': np.random.choice(categories, num_products),
-        'baseprice': np.random.uniform(5.0, 500.0, num_products).round(2),
-        'instock': np.random.choice([True, False], num_products, p=[0.8, 0.2])
+        'ID': ids,
+        'Date': pd.date_range(start='2023-01-01', periods=num_records),
+        'SmallValue': np.random.randint(1, 1000, num_records),
+        'MediumValue': np.random.randint(10000, 9999999, num_records),
+        'LargeValue': [int(str(np.random.randint(1, 999)) + str(np.random.randint(0, 9999999)).zfill(7) +
+                          str(np.random.randint(0, 9999)).zfill(4)) for _ in range(num_records)],
+        'VeryLargeValue': [int(str(np.random.randint(100, 999)) + str(np.random.randint(1000000, 9999999)) +
+                             str(np.random.randint(1000000, 9999999))) for _ in range(num_records)],
+        'MassiveValue': [int('1' + ''.join([str(np.random.randint(0, 10)) for _ in range(15)])) for _ in range(num_records)],
+        'Category': np.random.choice(['A', 'B', 'C', 'D', 'E'], num_records),
+        'IsActive': np.random.choice([True, False], num_records, p=[0.8, 0.2])
     }
-    return pd.DataFrame(data)
+    # Create exponential values for scientific notation
+    data['ExponentialValue'] = [float(f"{np.random.randint(1, 10)}.{np.random.randint(1, 100):02d}e{np.random.randint(10, 20)}")
+                              for _ in range(num_records)]
+    # Create monetary values (with decimals)
+    data['Revenue'] = [np.random.randint(1000000, 9999999999) + np.random.random() for _ in range(num_records)]
+    data['Budget'] = [np.random.randint(10000000, 999999999) + np.random.random() for _ in range(num_records)]
+    # Create DataFrame
+    df = pd.DataFrame(data)
+    # Round monetary values to 2 decimal places
+    df['Revenue'] = df['Revenue'].round(2)
+    df['Budget'] = df['Budget'].round(2)
+    return df

sqlshell/db/database_manager.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import sqlite3
 import pandas as pd
 import duckdb
+from pathlib import Path
 class DatabaseManager:
     """
@@ -47,13 +48,14 @@ class DatabaseManager:
                 self.connection_type = None
                 self.database_path = None  # Clear the database path
-    def open_database(self, filename):
+    def open_database(self, filename, load_all_tables=True):
         """
         Open a database connection to the specified file.
         Detects whether it's a SQLite or DuckDB database.
         Args:
             filename: Path to the database file
+            load_all_tables: Whether to automatically load all tables from the database
         Returns:
             True if successful, False otherwise
@@ -64,6 +66,10 @@ class DatabaseManager:
         # Close any existing connection
         self.close_connection()
+        # Clear any existing loaded tables
+        self.loaded_tables = {}
+        self.table_columns = {}
         try:
             if self.is_sqlite_db(filename):
                 self.conn = sqlite3.connect(filename)
@@ -75,8 +81,9 @@ class DatabaseManager:
             # Store the database path
             self.database_path = os.path.abspath(filename)
-            # Load tables from the database
-            self.load_database_tables()
+            # Load tables from the database if requested
+            if load_all_tables:
+                self.load_database_tables()
             return True
         except (sqlite3.Error, duckdb.Error) as e:
             self.conn = None
@@ -212,7 +219,7 @@ class DatabaseManager:
         Load data from a file into the database.
         Args:
-            file_path: Path to the data file (Excel, CSV, Parquet)
+            file_path: Path to the data file (Excel, CSV, Parquet, Delta)
         Returns:
             Tuple of (table_name, DataFrame) for the loaded data
@@ -221,8 +228,23 @@ class DatabaseManager:
             ValueError: If the file format is unsupported or there's an error
         """
         try:
+            # Check if this is a Delta table (folder with _delta_log)
+            delta_path = Path(file_path)
+            is_delta_table = (delta_path.is_dir() and
+                             (delta_path / '_delta_log').exists()) or file_path.endswith('.delta')
             # Read the file into a DataFrame, using optimized loading strategies
-            if file_path.endswith(('.xlsx', '.xls')):
+            if is_delta_table:
+                # Read as Delta table using deltalake library
+                try:
+                    # Load the Delta table
+                    import deltalake
+                    delta_table = deltalake.DeltaTable(file_path)
+                    # Convert to pandas DataFrame
+                    df = delta_table.to_pandas()
+                except Exception as e:
+                    raise ValueError(f"Error loading Delta table: {str(e)}")
+            elif file_path.endswith(('.xlsx', '.xls')):
                 # Try to use a streaming approach for Excel files
                 try:
                     # For Excel files, we first check if it's a large file
@@ -279,6 +301,11 @@ class DatabaseManager:
             # Generate table name from file name
             base_name = os.path.splitext(os.path.basename(file_path))[0]
+            # For directories like Delta tables, use the directory name
+            if os.path.isdir(file_path):
+                base_name = os.path.basename(file_path)
             table_name = self.sanitize_table_name(base_name)
             # Ensure unique table name
@@ -374,6 +401,68 @@ class DatabaseManager:
         except Exception as e:
             raise Exception(f"Error previewing table: {str(e)}")
+    def reload_table(self, table_name):
+        """
+        Reload a table's data from its source file.
+        Args:
+            table_name: Name of the table to reload
+        Returns:
+            Tuple of (bool, message) indicating success/failure and a message
+        Raises:
+            ValueError: If the table cannot be reloaded
+        """
+        if not table_name in self.loaded_tables:
+            return False, f"Table '{table_name}' not found"
+        file_path = self.loaded_tables[table_name]
+        # Check if this is a file-based table
+        if file_path in ['database', 'query_result']:
+            return False, f"Cannot reload '{table_name}' because it's not a file-based table"
+        try:
+            # Check if the file still exists
+            if not os.path.exists(file_path):
+                return False, f"Source file '{file_path}' no longer exists"
+            # Store the original table name
+            original_name = table_name
+            # Remove the existing table
+            self.remove_table(table_name)
+            # Check if this is a Delta table
+            delta_path = Path(file_path)
+            is_delta_table = (delta_path.is_dir() and
+                             (delta_path / '_delta_log').exists()) or file_path.endswith('.delta')
+            # Load the file with the original table name
+            df = None
+            if is_delta_table:
+                # Read as Delta table
+                import deltalake
+                delta_table = deltalake.DeltaTable(file_path)
+                df = delta_table.to_pandas()
+            elif file_path.endswith(('.xlsx', '.xls')):
+                df = pd.read_excel(file_path)
+            elif file_path.endswith('.csv'):
+                df = pd.read_csv(file_path)
+            elif file_path.endswith('.parquet'):
+                df = pd.read_parquet(file_path)
+            else:
+                return False, "Unsupported file format"
+            # Register the dataframe with the original name
+            self.register_dataframe(df, original_name, file_path)
+            return True, f"Table '{table_name}' reloaded successfully"
+        except Exception as e:
+            return False, f"Error reloading table: {str(e)}"
     def rename_table(self, old_name, new_name):
         """
         Rename a table in the database.
@@ -688,4 +777,61 @@ class DatabaseManager:
                     column_data_types[col_name] = data_type
         except Exception:
             # Ignore errors in type detection - this is just for enhancement
-            pass
+            pass
+    def load_specific_table(self, table_name):
+        """
+        Load metadata for a specific table from the database.
+        This is used when we know which tables we want to load rather than loading all tables.
+        Args:
+            table_name: Name of the table to load
+        Returns:
+            Boolean indicating if the table was found and loaded
+        """
+        if not self.is_connected():
+            return False
+        try:
+            if self.connection_type == 'sqlite':
+                # Check if the table exists in SQLite
+                cursor = self.conn.cursor()
+                cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table_name,))
+                result = cursor.fetchone()
+                if result:
+                    # Get column names for the table
+                    try:
+                        column_query = f"PRAGMA table_info({table_name})"
+                        columns = cursor.execute(column_query).fetchall()
+                        self.table_columns[table_name] = [col[1] for col in columns]  # Column name is at index 1
+                    except Exception:
+                        self.table_columns[table_name] = []
+                    # Register the table
+                    self.loaded_tables[table_name] = 'database'
+                    return True
+            else:  # duckdb
+                # Check if the table exists in DuckDB
+                query = f"SELECT table_name FROM information_schema.tables WHERE table_name='{table_name}' AND table_schema='main'"
+                result = self.conn.execute(query).fetchdf()
+                if not result.empty:
+                    # Get column names for the table
+                    try:
+                        column_query = f"SELECT column_name FROM information_schema.columns WHERE table_name='{table_name}' AND table_schema='main'"
+                        columns = self.conn.execute(column_query).fetchdf()
+                        self.table_columns[table_name] = columns['column_name'].tolist()
+                    except Exception:
+                        self.table_columns[table_name] = []
+                    # Register the table
+                    self.loaded_tables[table_name] = 'database'
+                    return True
+            return False
+        except Exception:
+            return False

sqlshell/editor.py CHANGED Viewed

@@ -496,15 +496,68 @@ class SQLEditor(QPlainTextEdit):
         popup = self.completer.popup()
         popup.setCurrentIndex(self.completer.completionModel().index(0, 0))
-        # Calculate position for the popup
-        cr = self.cursorRect()
-        cr.setWidth(self.completer.popup().sizeHintForColumn(0) +
-                   self.completer.popup().verticalScrollBar().sizeHint().width())
-        # Show the popup
-        self.completer.complete(cr)
+        try:
+            # Calculate position for the popup
+            cr = self.cursorRect()
+            # Ensure cursorRect is valid
+            if not cr.isValid() or cr.x() < 0 or cr.y() < 0:
+                # Try to recompute using the text cursor
+                cursor = self.textCursor()
+                cr = self.cursorRect(cursor)
+                # If still invalid, use a default position
+                if not cr.isValid() or cr.x() < 0 or cr.y() < 0:
+                    pos = self.mapToGlobal(self.pos())
+                    cr = QRect(pos.x() + 10, pos.y() + 10, 10, self.fontMetrics().height())
+            # Calculate width for the popup that fits the content
+            suggested_width = popup.sizeHintForColumn(0) + popup.verticalScrollBar().sizeHint().width()
+            # Ensure minimum width
+            popup_width = max(suggested_width, 200)
+            cr.setWidth(popup_width)
+            # Show the popup at the correct position
+            self.completer.complete(cr)
+        except Exception as e:
+            # In case of any error, try a more direct approach
+            print(f"Error positioning completion popup: {e}")
+            try:
+                cursor_pos = self.mapToGlobal(self.cursorRect().bottomLeft())
+                popup.move(cursor_pos)
+                popup.show()
+            except:
+                # Last resort - if all else fails, hide the popup to avoid showing it in the wrong place
+                popup.hide()
     def keyPressEvent(self, event):
+        # Check for Ctrl+Enter first, which should take precedence over other behaviors
+        if event.key() == Qt.Key.Key_Return and (event.modifiers() & Qt.KeyboardModifier.ControlModifier):
+            # If autocomplete popup is showing, hide it
+            if self.completer and self.completer.popup().isVisible():
+                self.completer.popup().hide()
+            # Cancel any pending autocomplete timers
+            if hasattr(self, '_completion_timer') and self._completion_timer.isActive():
+                self._completion_timer.stop()
+            # Let the main window handle query execution
+            # Important: We need to emit event to parent to trigger execution
+            # and prevent it from being treated as an autocomplete selection
+            event.accept()  # Mark the event as handled
+            # Find the parent SQLShell instance and call its execute_query method
+            parent = self
+            while parent is not None:
+                if hasattr(parent, 'execute_query'):
+                    parent.execute_query()
+                    return
+                parent = parent.parent()
+            # If we couldn't find the execute_query method, pass the event up
+            super().keyPressEvent(event)
+            return
         # Handle completer popup navigation
         if self.completer and self.completer.popup().isVisible():
             # Handle Tab key to complete the current selection
@@ -816,11 +869,15 @@ class SQLEditor(QPlainTextEdit):
             # Get table name from dropped text
             text = event.mimeData().text()
-            # Extract actual table name (if it includes parentheses)
-            if " (" in text:
-                table_name = text.split(" (")[0]
+            # Try to extract table name from custom mime data if available
+            if event.mimeData().hasFormat('application/x-sqlshell-tablename'):
+                table_name = bytes(event.mimeData().data('application/x-sqlshell-tablename')).decode()
             else:
-                table_name = text
+                # Extract actual table name (if it includes parentheses)
+                if " (" in text:
+                    table_name = text.split(" (")[0]
+                else:
+                    table_name = text
             # Get current cursor position and surrounding text
             cursor = self.textCursor()

sqlshell 0.1.9__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

sqlshell 0.1.9py3-none-any.whl → 0.2.1py3-none-any.whl