sqlshell 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. sqlshell/__init__.py +84 -0
  2. sqlshell/__main__.py +4926 -0
  3. sqlshell/ai_autocomplete.py +392 -0
  4. sqlshell/ai_settings_dialog.py +337 -0
  5. sqlshell/context_suggester.py +768 -0
  6. sqlshell/create_test_data.py +152 -0
  7. sqlshell/data/create_test_data.py +137 -0
  8. sqlshell/db/__init__.py +6 -0
  9. sqlshell/db/database_manager.py +1318 -0
  10. sqlshell/db/export_manager.py +188 -0
  11. sqlshell/editor.py +1166 -0
  12. sqlshell/editor_integration.py +127 -0
  13. sqlshell/execution_handler.py +421 -0
  14. sqlshell/menus.py +262 -0
  15. sqlshell/notification_manager.py +370 -0
  16. sqlshell/query_tab.py +904 -0
  17. sqlshell/resources/__init__.py +1 -0
  18. sqlshell/resources/icon.png +0 -0
  19. sqlshell/resources/logo_large.png +0 -0
  20. sqlshell/resources/logo_medium.png +0 -0
  21. sqlshell/resources/logo_small.png +0 -0
  22. sqlshell/resources/splash_screen.gif +0 -0
  23. sqlshell/space_invaders.py +501 -0
  24. sqlshell/splash_screen.py +405 -0
  25. sqlshell/sqlshell/__init__.py +5 -0
  26. sqlshell/sqlshell/create_test_data.py +118 -0
  27. sqlshell/sqlshell/create_test_databases.py +96 -0
  28. sqlshell/sqlshell_demo.png +0 -0
  29. sqlshell/styles.py +257 -0
  30. sqlshell/suggester_integration.py +330 -0
  31. sqlshell/syntax_highlighter.py +124 -0
  32. sqlshell/table_list.py +996 -0
  33. sqlshell/ui/__init__.py +6 -0
  34. sqlshell/ui/bar_chart_delegate.py +49 -0
  35. sqlshell/ui/filter_header.py +469 -0
  36. sqlshell/utils/__init__.py +16 -0
  37. sqlshell/utils/profile_cn2.py +1661 -0
  38. sqlshell/utils/profile_column.py +2635 -0
  39. sqlshell/utils/profile_distributions.py +616 -0
  40. sqlshell/utils/profile_entropy.py +347 -0
  41. sqlshell/utils/profile_foreign_keys.py +779 -0
  42. sqlshell/utils/profile_keys.py +2834 -0
  43. sqlshell/utils/profile_ohe.py +934 -0
  44. sqlshell/utils/profile_ohe_advanced.py +754 -0
  45. sqlshell/utils/profile_ohe_comparison.py +237 -0
  46. sqlshell/utils/profile_prediction.py +926 -0
  47. sqlshell/utils/profile_similarity.py +876 -0
  48. sqlshell/utils/search_in_df.py +90 -0
  49. sqlshell/widgets.py +400 -0
  50. sqlshell-0.4.4.dist-info/METADATA +441 -0
  51. sqlshell-0.4.4.dist-info/RECORD +54 -0
  52. sqlshell-0.4.4.dist-info/WHEEL +5 -0
  53. sqlshell-0.4.4.dist-info/entry_points.txt +2 -0
  54. sqlshell-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,152 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime, timedelta
4
+ import os
5
+
6
+ # Set random seed for reproducibility
7
+ np.random.seed(42)
8
+
9
+ def create_california_housing_data(output_file='california_housing_data.parquet'):
10
+ """Use the real world california housing dataset"""
11
+ # Load the dataset
12
+ df = pd.read_csv('https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv')
13
+
14
+ # Save to Parquet
15
+ df.to_parquet(output_file)
16
+ return df
17
+
18
+ def create_large_customer_data(num_customers=1_000_000, chunk_size=100_000, output_file='large_customer_data.parquet'):
19
+ """Create a large customer dataset """
20
+ # Generate customer data
21
+ data = {
22
+ 'CustomerID': range(1, num_customers + 1),
23
+ 'FirstName': [f'Customer{i}' for i in range(1, num_customers + 1)],
24
+ 'LastName': [f'Lastname{i}' for i in range(1, num_customers + 1)],
25
+ 'Email': [f'customer{i}@example.com' for i in range(1, num_customers + 1)],
26
+ 'JoinDate': [datetime.now() - timedelta(days=np.random.randint(1, 1000))
27
+ for _ in range(num_customers)],
28
+ 'CustomerType': np.random.choice(['Regular', 'Premium', 'VIP'], num_customers),
29
+ 'CreditScore': np.random.randint(300, 851, num_customers)
30
+ }
31
+
32
+ # Create DataFrame
33
+ df = pd.DataFrame(data)
34
+
35
+ return df
36
+
37
+
38
+ def create_sales_data(num_records=1000):
39
+ # Generate dates for the last 365 days
40
+ end_date = datetime.now()
41
+ start_date = end_date - timedelta(days=365)
42
+ dates = [start_date + timedelta(days=x) for x in range(366)]
43
+ random_dates = np.random.choice(dates, num_records)
44
+
45
+ # Create product data
46
+ products = ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer']
47
+ product_prices = {
48
+ 'Laptop': (800, 2000),
49
+ 'Smartphone': (400, 1200),
50
+ 'Tablet': (200, 800),
51
+ 'Monitor': (150, 500),
52
+ 'Keyboard': (20, 150),
53
+ 'Mouse': (10, 80),
54
+ 'Headphones': (30, 300),
55
+ 'Printer': (100, 400)
56
+ }
57
+
58
+ # Generate random data
59
+ data = {
60
+ 'OrderID': range(1, num_records + 1),
61
+ 'Date': random_dates,
62
+ 'ProductID': np.random.randint(1, len(products) + 1, num_records), # Changed to ProductID for joining
63
+ 'Quantity': np.random.randint(1, 11, num_records),
64
+ 'CustomerID': np.random.randint(1, 201, num_records),
65
+ 'Region': np.random.choice(['North', 'South', 'East', 'West'], num_records)
66
+ }
67
+
68
+ # Calculate prices based on product
69
+ product_list = [products[pid-1] for pid in data['ProductID']]
70
+ data['Price'] = [np.random.uniform(product_prices[p][0], product_prices[p][1])
71
+ for p in product_list]
72
+ data['TotalAmount'] = [price * qty for price, qty in zip(data['Price'], data['Quantity'])]
73
+
74
+ # Create DataFrame
75
+ df = pd.DataFrame(data)
76
+
77
+ # Round numerical columns
78
+ df['Price'] = df['Price'].round(2)
79
+ df['TotalAmount'] = df['TotalAmount'].round(2)
80
+
81
+ # Sort by Date
82
+ return df.sort_values('Date')
83
+
84
+ def create_customer_data(num_customers=200):
85
+ # Generate customer data
86
+ data = {
87
+ 'CustomerID': range(1, num_customers + 1),
88
+ 'FirstName': [f'Customer{i}' for i in range(1, num_customers + 1)],
89
+ 'LastName': [f'Lastname{i}' for i in range(1, num_customers + 1)],
90
+ 'Email': [f'customer{i}@example.com' for i in range(1, num_customers + 1)],
91
+ 'JoinDate': [datetime.now() - timedelta(days=np.random.randint(1, 1000))
92
+ for _ in range(num_customers)],
93
+ 'CustomerType': np.random.choice(['Regular', 'Premium', 'VIP'], num_customers),
94
+ 'CreditScore': np.random.randint(300, 851, num_customers)
95
+ }
96
+
97
+ return pd.DataFrame(data)
98
+
99
+ def create_product_data():
100
+ # Create detailed product information
101
+ products = {
102
+ 'ProductID': range(1, 9),
103
+ 'ProductName': ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer'],
104
+ 'Category': ['Computers', 'Mobile', 'Mobile', 'Accessories', 'Accessories', 'Accessories', 'Audio', 'Peripherals'],
105
+ 'Brand': ['TechPro', 'MobileX', 'TabletCo', 'ViewMax', 'TypeMaster', 'ClickPro', 'SoundMax', 'PrintPro'],
106
+ 'StockQuantity': np.random.randint(50, 500, 8),
107
+ 'MinPrice': [800, 400, 200, 150, 20, 10, 30, 100],
108
+ 'MaxPrice': [2000, 1200, 800, 500, 150, 80, 300, 400],
109
+ 'Weight_kg': [2.5, 0.2, 0.5, 3.0, 0.8, 0.1, 0.3, 5.0],
110
+ 'WarrantyMonths': [24, 12, 12, 36, 12, 12, 24, 12]
111
+ }
112
+
113
+ return pd.DataFrame(products)
114
+
115
+ def create_large_numbers_data(num_records=100):
116
+ """Create a dataset with very large numbers for testing and visualization."""
117
+
118
+ # Generate random IDs
119
+ ids = range(1, num_records + 1)
120
+
121
+ # Create different columns with large numbers
122
+ data = {
123
+ 'ID': ids,
124
+ 'Date': pd.date_range(start='2023-01-01', periods=num_records),
125
+ 'SmallValue': np.random.randint(1, 1000, num_records),
126
+ 'MediumValue': np.random.randint(10000, 9999999, num_records),
127
+ 'LargeValue': [int(str(np.random.randint(1, 999)) + str(np.random.randint(0, 9999999)).zfill(7) +
128
+ str(np.random.randint(0, 9999)).zfill(4)) for _ in range(num_records)],
129
+ 'VeryLargeValue': [int(str(np.random.randint(100, 999)) + str(np.random.randint(1000000, 9999999)) +
130
+ str(np.random.randint(1000000, 9999999))) for _ in range(num_records)],
131
+ 'MassiveValue': [int('1' + ''.join([str(np.random.randint(0, 10)) for _ in range(15)])) for _ in range(num_records)],
132
+ 'Category': np.random.choice(['A', 'B', 'C', 'D', 'E'], num_records),
133
+ 'IsActive': np.random.choice([True, False], num_records, p=[0.8, 0.2])
134
+ }
135
+
136
+ # Create exponential values for scientific notation
137
+ data['ExponentialValue'] = [float(f"{np.random.randint(1, 10)}.{np.random.randint(1, 100):02d}e{np.random.randint(10, 20)}")
138
+ for _ in range(num_records)]
139
+
140
+ # Create monetary values (with decimals)
141
+ # Use dtype=np.int64 to avoid int32 overflow on Windows
142
+ data['Revenue'] = [np.random.randint(1000000, 9999999999, dtype=np.int64) + np.random.random() for _ in range(num_records)]
143
+ data['Budget'] = [np.random.randint(10000000, 999999999, dtype=np.int64) + np.random.random() for _ in range(num_records)]
144
+
145
+ # Create DataFrame
146
+ df = pd.DataFrame(data)
147
+
148
+ # Round monetary values to 2 decimal places
149
+ df['Revenue'] = df['Revenue'].round(2)
150
+ df['Budget'] = df['Budget'].round(2)
151
+
152
+ return df
@@ -0,0 +1,137 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime, timedelta
4
+ import os
5
+
6
+ # Set random seed for reproducibility
7
+ np.random.seed(42)
8
+
9
+ # Define output directory
10
+ OUTPUT_DIR = 'test_data'
11
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
12
+
13
+ def create_sales_data(num_records=1000):
14
+ # Generate dates for the last 365 days
15
+ end_date = datetime.now()
16
+ start_date = end_date - timedelta(days=365)
17
+ dates = [start_date + timedelta(days=x) for x in range(366)]
18
+ random_dates = np.random.choice(dates, num_records)
19
+
20
+ # Create product data
21
+ products = ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer']
22
+ product_prices = {
23
+ 'Laptop': (800, 2000),
24
+ 'Smartphone': (400, 1200),
25
+ 'Tablet': (200, 800),
26
+ 'Monitor': (150, 500),
27
+ 'Keyboard': (20, 150),
28
+ 'Mouse': (10, 80),
29
+ 'Headphones': (30, 300),
30
+ 'Printer': (100, 400)
31
+ }
32
+
33
+ # Generate random data
34
+ data = {
35
+ 'OrderID': range(1, num_records + 1),
36
+ 'Date': random_dates,
37
+ 'ProductID': np.random.randint(1, len(products) + 1, num_records), # Changed to ProductID for joining
38
+ 'Quantity': np.random.randint(1, 11, num_records),
39
+ 'CustomerID': np.random.randint(1, 201, num_records),
40
+ 'Region': np.random.choice(['North', 'South', 'East', 'West'], num_records)
41
+ }
42
+
43
+ # Calculate prices based on product
44
+ product_list = [products[pid-1] for pid in data['ProductID']]
45
+ data['Price'] = [np.random.uniform(product_prices[p][0], product_prices[p][1])
46
+ for p in product_list]
47
+ data['TotalAmount'] = [price * qty for price, qty in zip(data['Price'], data['Quantity'])]
48
+
49
+ # Create DataFrame
50
+ df = pd.DataFrame(data)
51
+
52
+ # Round numerical columns
53
+ df['Price'] = df['Price'].round(2)
54
+ df['TotalAmount'] = df['TotalAmount'].round(2)
55
+
56
+ # Sort by Date
57
+ return df.sort_values('Date')
58
+
59
+ def create_customer_data(num_customers=200):
60
+ # Generate customer data
61
+ data = {
62
+ 'CustomerID': range(1, num_customers + 1),
63
+ 'FirstName': [f'Customer{i}' for i in range(1, num_customers + 1)],
64
+ 'LastName': [f'Lastname{i}' for i in range(1, num_customers + 1)],
65
+ 'Email': [f'customer{i}@example.com' for i in range(1, num_customers + 1)],
66
+ 'JoinDate': [datetime.now() - timedelta(days=np.random.randint(1, 1000))
67
+ for _ in range(num_customers)],
68
+ 'CustomerType': np.random.choice(['Regular', 'Premium', 'VIP'], num_customers),
69
+ 'CreditScore': np.random.randint(300, 851, num_customers)
70
+ }
71
+
72
+ return pd.DataFrame(data)
73
+
74
+ def create_product_data():
75
+ # Create detailed product information
76
+ products = {
77
+ 'ProductID': range(1, 9),
78
+ 'ProductName': ['Laptop', 'Smartphone', 'Tablet', 'Monitor', 'Keyboard', 'Mouse', 'Headphones', 'Printer'],
79
+ 'Category': ['Computers', 'Mobile', 'Mobile', 'Accessories', 'Accessories', 'Accessories', 'Audio', 'Peripherals'],
80
+ 'Brand': ['TechPro', 'MobileX', 'TabletCo', 'ViewMax', 'TypeMaster', 'ClickPro', 'SoundMax', 'PrintPro'],
81
+ 'StockQuantity': np.random.randint(50, 500, 8),
82
+ 'MinPrice': [800, 400, 200, 150, 20, 10, 30, 100],
83
+ 'MaxPrice': [2000, 1200, 800, 500, 150, 80, 300, 400],
84
+ 'Weight_kg': [2.5, 0.2, 0.5, 3.0, 0.8, 0.1, 0.3, 5.0],
85
+ 'WarrantyMonths': [24, 12, 12, 36, 12, 12, 24, 12]
86
+ }
87
+
88
+ return pd.DataFrame(products)
89
+
90
+ if __name__ == '__main__':
91
+ # Create and save sales data
92
+ sales_df = create_sales_data()
93
+ sales_output = os.path.join(OUTPUT_DIR, 'sample_sales_data.xlsx')
94
+ sales_df.to_excel(sales_output, index=False)
95
+ print(f"Created sales data in '{sales_output}'")
96
+ print(f"Number of sales records: {len(sales_df)}")
97
+
98
+ # Create and save customer data as parquet
99
+ customer_df = create_customer_data()
100
+ customer_output = os.path.join(OUTPUT_DIR, 'customer_data.parquet')
101
+ customer_df.to_parquet(customer_output, index=False)
102
+ print(f"\nCreated customer data in '{customer_output}'")
103
+ print(f"Number of customers: {len(customer_df)}")
104
+
105
+ # Create and save product data
106
+ product_df = create_product_data()
107
+ product_output = os.path.join(OUTPUT_DIR, 'product_catalog.xlsx')
108
+ product_df.to_excel(product_output, index=False)
109
+ print(f"\nCreated product catalog in '{product_output}'")
110
+ print(f"Number of products: {len(product_df)}")
111
+
112
+ # Print sample queries
113
+ print("\nSample SQL queries for joining the data:")
114
+ print("""
115
+ -- Join sales with customer data
116
+ SELECT s.*, c.FirstName, c.LastName, c.CustomerType
117
+ FROM test_data.sample_sales_data s
118
+ JOIN test_data.customer_data c ON s.CustomerID = c.CustomerID;
119
+
120
+ -- Join sales with product data
121
+ SELECT s.*, p.ProductName, p.Category, p.Brand
122
+ FROM test_data.sample_sales_data s
123
+ JOIN test_data.product_catalog p ON s.ProductID = p.ProductID;
124
+
125
+ -- Three-way join with aggregation
126
+ SELECT
127
+ p.Category,
128
+ c.CustomerType,
129
+ COUNT(*) as NumOrders,
130
+ SUM(s.TotalAmount) as TotalRevenue,
131
+ AVG(s.Quantity) as AvgQuantity
132
+ FROM test_data.sample_sales_data s
133
+ JOIN test_data.customer_data c ON s.CustomerID = c.CustomerID
134
+ JOIN test_data.product_catalog p ON s.ProductID = p.ProductID
135
+ GROUP BY p.Category, c.CustomerType
136
+ ORDER BY p.Category, c.CustomerType;
137
+ """)
@@ -0,0 +1,6 @@
1
+ """Database management components for SQLShell application."""
2
+
3
+ from sqlshell.db.database_manager import DatabaseManager
4
+ from sqlshell.db.export_manager import ExportManager
5
+
6
+ __all__ = ['DatabaseManager', 'ExportManager']