sqlshell 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. sqlshell/__init__.py +84 -0
  2. sqlshell/__main__.py +4926 -0
  3. sqlshell/ai_autocomplete.py +392 -0
  4. sqlshell/ai_settings_dialog.py +337 -0
  5. sqlshell/context_suggester.py +768 -0
  6. sqlshell/create_test_data.py +152 -0
  7. sqlshell/data/create_test_data.py +137 -0
  8. sqlshell/db/__init__.py +6 -0
  9. sqlshell/db/database_manager.py +1318 -0
  10. sqlshell/db/export_manager.py +188 -0
  11. sqlshell/editor.py +1166 -0
  12. sqlshell/editor_integration.py +127 -0
  13. sqlshell/execution_handler.py +421 -0
  14. sqlshell/menus.py +262 -0
  15. sqlshell/notification_manager.py +370 -0
  16. sqlshell/query_tab.py +904 -0
  17. sqlshell/resources/__init__.py +1 -0
  18. sqlshell/resources/icon.png +0 -0
  19. sqlshell/resources/logo_large.png +0 -0
  20. sqlshell/resources/logo_medium.png +0 -0
  21. sqlshell/resources/logo_small.png +0 -0
  22. sqlshell/resources/splash_screen.gif +0 -0
  23. sqlshell/space_invaders.py +501 -0
  24. sqlshell/splash_screen.py +405 -0
  25. sqlshell/sqlshell/__init__.py +5 -0
  26. sqlshell/sqlshell/create_test_data.py +118 -0
  27. sqlshell/sqlshell/create_test_databases.py +96 -0
  28. sqlshell/sqlshell_demo.png +0 -0
  29. sqlshell/styles.py +257 -0
  30. sqlshell/suggester_integration.py +330 -0
  31. sqlshell/syntax_highlighter.py +124 -0
  32. sqlshell/table_list.py +996 -0
  33. sqlshell/ui/__init__.py +6 -0
  34. sqlshell/ui/bar_chart_delegate.py +49 -0
  35. sqlshell/ui/filter_header.py +469 -0
  36. sqlshell/utils/__init__.py +16 -0
  37. sqlshell/utils/profile_cn2.py +1661 -0
  38. sqlshell/utils/profile_column.py +2635 -0
  39. sqlshell/utils/profile_distributions.py +616 -0
  40. sqlshell/utils/profile_entropy.py +347 -0
  41. sqlshell/utils/profile_foreign_keys.py +779 -0
  42. sqlshell/utils/profile_keys.py +2834 -0
  43. sqlshell/utils/profile_ohe.py +934 -0
  44. sqlshell/utils/profile_ohe_advanced.py +754 -0
  45. sqlshell/utils/profile_ohe_comparison.py +237 -0
  46. sqlshell/utils/profile_prediction.py +926 -0
  47. sqlshell/utils/profile_similarity.py +876 -0
  48. sqlshell/utils/search_in_df.py +90 -0
  49. sqlshell/widgets.py +400 -0
  50. sqlshell-0.4.4.dist-info/METADATA +441 -0
  51. sqlshell-0.4.4.dist-info/RECORD +54 -0
  52. sqlshell-0.4.4.dist-info/WHEEL +5 -0
  53. sqlshell-0.4.4.dist-info/entry_points.txt +2 -0
  54. sqlshell-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,90 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from typing import Union, Optional
4
+ import re
5
+
6
+
7
+ def search(dataframe: pd.DataFrame, text: str, case_sensitive: bool = False, regex: bool = False) -> pd.DataFrame:
8
+ """
9
+ Search for text across all columns in a DataFrame efficiently.
10
+
11
+ Args:
12
+ dataframe: The pandas DataFrame to search in
13
+ text: The text to search for
14
+ case_sensitive: Whether the search should be case-sensitive (default: False)
15
+ regex: Whether to treat the search text as a regular expression (default: False)
16
+
17
+ Returns:
18
+ DataFrame containing only the rows that have a match in any column
19
+ """
20
+ if dataframe.empty:
21
+ return dataframe
22
+
23
+ if not text:
24
+ return dataframe
25
+
26
+ # Convert search text based on case sensitivity
27
+ search_text = text if case_sensitive else text.lower()
28
+
29
+ # Create a boolean mask for matching rows
30
+ mask = pd.Series([False] * len(dataframe), index=dataframe.index)
31
+
32
+ # Search through each column
33
+ for column in dataframe.columns:
34
+ # Convert column to string, handling NaN values
35
+ col_str = dataframe[column].astype(str)
36
+
37
+ if not case_sensitive:
38
+ col_str = col_str.str.lower()
39
+
40
+ if regex:
41
+ try:
42
+ # Use regex search
43
+ flags = 0 if case_sensitive else re.IGNORECASE
44
+ column_mask = col_str.str.contains(search_text, regex=True, na=False, flags=flags)
45
+ except re.error:
46
+ # If regex is invalid, fall back to literal search
47
+ column_mask = col_str.str.contains(search_text, regex=False, na=False)
48
+ else:
49
+ # Use literal string search (faster for non-regex)
50
+ column_mask = col_str.str.contains(search_text, regex=False, na=False)
51
+
52
+ # Combine with overall mask using OR operation
53
+ mask = mask | column_mask
54
+
55
+ return dataframe[mask]
56
+
57
+
58
+ def search_optimized(dataframe: pd.DataFrame, text: str, case_sensitive: bool = False) -> pd.DataFrame:
59
+ """
60
+ Optimized version of search for very large datasets.
61
+ Uses vectorized operations for better performance.
62
+
63
+ Args:
64
+ dataframe: The pandas DataFrame to search in
65
+ text: The text to search for
66
+ case_sensitive: Whether the search should be case-sensitive (default: False)
67
+
68
+ Returns:
69
+ DataFrame containing only the rows that have a match in any column
70
+ """
71
+ if dataframe.empty or not text:
72
+ return dataframe if dataframe.empty else dataframe
73
+
74
+ # Convert search text based on case sensitivity
75
+ search_text = text if case_sensitive else text.lower()
76
+
77
+ # Convert all columns to string and concatenate with separator
78
+ # This allows for vectorized search across all columns at once
79
+ separator = '|' # Use a separator that's unlikely to appear in data
80
+
81
+ # Handle case sensitivity by converting to lowercase if needed
82
+ if case_sensitive:
83
+ combined = dataframe.astype(str).apply(lambda x: separator.join(x), axis=1)
84
+ else:
85
+ combined = dataframe.astype(str).apply(lambda x: separator.join(x).lower(), axis=1)
86
+
87
+ # Search in the combined string
88
+ mask = combined.str.contains(search_text, regex=False, na=False)
89
+
90
+ return dataframe[mask]
sqlshell/widgets.py ADDED
@@ -0,0 +1,400 @@
1
+ from PyQt6.QtWidgets import QTableWidget, QApplication, QMenu, QMessageBox
2
+ from PyQt6.QtCore import Qt, QEvent
3
+ from PyQt6.QtGui import QKeyEvent, QAction, QIcon
4
+ import pandas as pd
5
+ import numpy as np
6
+
7
+
8
+ class CopyableTableWidget(QTableWidget):
9
+ """Custom QTableWidget that supports copying data to clipboard with Ctrl+C"""
10
+
11
+ def __init__(self, parent=None):
12
+ super().__init__(parent)
13
+ self.setFocusPolicy(Qt.FocusPolicy.StrongFocus)
14
+ self.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
15
+ self.customContextMenuRequested.connect(self.show_context_menu)
16
+
17
+ def keyPressEvent(self, event: QKeyEvent):
18
+ """Handle key press events, specifically Ctrl+C for copying"""
19
+ if event.key() == Qt.Key.Key_C and event.modifiers() & Qt.KeyboardModifier.ControlModifier:
20
+ self.copy_selection_to_clipboard()
21
+ return
22
+
23
+ # For other keys, use the default behavior
24
+ super().keyPressEvent(event)
25
+
26
+ def _get_current_table_name(self):
27
+ """Get the current table name from the results context"""
28
+ try:
29
+ parent_tab = getattr(self, '_parent_tab', None)
30
+ if parent_tab is None:
31
+ return None
32
+
33
+ # First check if we're in preview mode with a known table name
34
+ if hasattr(parent_tab, 'preview_table_name') and parent_tab.preview_table_name:
35
+ return parent_tab.preview_table_name
36
+
37
+ # Otherwise try to get from the DataFrame's _query_source attribute
38
+ if hasattr(parent_tab, 'current_df') and parent_tab.current_df is not None:
39
+ if hasattr(parent_tab.current_df, '_query_source'):
40
+ return getattr(parent_tab.current_df, '_query_source')
41
+
42
+ # As a fallback, try to extract table name from the query text
43
+ if hasattr(parent_tab, 'query_edit'):
44
+ query_text = parent_tab.query_edit.toPlainText().strip()
45
+ if query_text:
46
+ # Try to extract table name from simple SELECT queries
47
+ import re
48
+ # Look for FROM or JOIN clauses
49
+ pattern = r'(?:FROM|JOIN)\s+([a-zA-Z0-9_]+)'
50
+ matches = re.findall(pattern, query_text, re.IGNORECASE)
51
+ if matches:
52
+ # Return the first table found
53
+ table_name = matches[0]
54
+ # Verify this table exists in the main window's loaded tables
55
+ main_window = self._get_main_window()
56
+ if main_window and hasattr(main_window, 'db_manager'):
57
+ if table_name in main_window.db_manager.loaded_tables:
58
+ return table_name
59
+
60
+ return None
61
+ except Exception:
62
+ return None
63
+
64
+ def _get_main_window(self):
65
+ """Get a reference to the main window"""
66
+ try:
67
+ parent_tab = getattr(self, '_parent_tab', None)
68
+ if parent_tab and hasattr(parent_tab, 'parent'):
69
+ return parent_tab.parent
70
+ return None
71
+ except Exception:
72
+ return None
73
+
74
+ def show_context_menu(self, position):
75
+ """Show context menu with copy options and table analysis actions"""
76
+ menu = QMenu(self)
77
+
78
+ # Check if there's a selection
79
+ has_selection = bool(self.selectionModel().selection())
80
+
81
+ if has_selection:
82
+ copy_selection_action = QAction("Copy Selection (Ctrl+C)", self)
83
+ copy_selection_action.triggered.connect(self.copy_selection_to_clipboard)
84
+ menu.addAction(copy_selection_action)
85
+
86
+ menu.addSeparator()
87
+
88
+ copy_all_action = QAction("Copy All Data", self)
89
+ copy_all_action.triggered.connect(self.copy_all_to_clipboard)
90
+ menu.addAction(copy_all_action)
91
+
92
+ # Add count rows action if we have data
93
+ parent_tab = getattr(self, '_parent_tab', None)
94
+ if parent_tab and hasattr(parent_tab, 'current_df') and parent_tab.current_df is not None:
95
+ menu.addSeparator()
96
+ count_rows_action = QAction("Count Rows", self)
97
+ count_rows_action.triggered.connect(self._show_row_count)
98
+ menu.addAction(count_rows_action)
99
+
100
+ # Add table analysis options if we have data
101
+ table_name = self._get_current_table_name()
102
+ main_window = self._get_main_window()
103
+
104
+ # Show analysis menu if we have either a table name OR current data
105
+ has_data = (parent_tab and hasattr(parent_tab, 'current_df') and
106
+ parent_tab.current_df is not None and not parent_tab.current_df.empty)
107
+
108
+ if main_window and (table_name or has_data):
109
+ menu.addSeparator()
110
+
111
+ # Add a submenu for table analysis
112
+ analysis_menu = menu.addMenu("Table Analysis")
113
+ analysis_menu.setIcon(QIcon.fromTheme("system-search"))
114
+
115
+ # If we have a table name, use table-based analysis
116
+ # Otherwise, use DataFrame-based analysis
117
+ if table_name:
118
+ # Analyze Column Importance (entropy)
119
+ analyze_entropy_action = analysis_menu.addAction("Analyze Column Importance")
120
+ analyze_entropy_action.setIcon(QIcon.fromTheme("system-search"))
121
+ analyze_entropy_action.triggered.connect(
122
+ lambda: self._call_main_window_method('analyze_table_entropy', table_name)
123
+ )
124
+
125
+ # Profile Table Structure
126
+ profile_table_action = analysis_menu.addAction("Profile Table Structure")
127
+ profile_table_action.setIcon(QIcon.fromTheme("edit-find"))
128
+ profile_table_action.triggered.connect(
129
+ lambda: self._call_main_window_method('profile_table_structure', table_name)
130
+ )
131
+
132
+ # Analyze Column Distributions
133
+ profile_distributions_action = analysis_menu.addAction("Analyze Column Distributions")
134
+ profile_distributions_action.setIcon(QIcon.fromTheme("accessories-calculator"))
135
+ profile_distributions_action.triggered.connect(
136
+ lambda: self._call_main_window_method('profile_distributions', table_name)
137
+ )
138
+
139
+ # Analyze Row Similarity
140
+ profile_similarity_action = analysis_menu.addAction("Analyze Row Similarity")
141
+ profile_similarity_action.setIcon(QIcon.fromTheme("applications-utilities"))
142
+ profile_similarity_action.triggered.connect(
143
+ lambda: self._call_main_window_method('profile_similarity', table_name)
144
+ )
145
+ else:
146
+ # Use DataFrame-based analysis for query results without a clear table source
147
+ # Analyze Column Importance (entropy)
148
+ analyze_entropy_action = analysis_menu.addAction("Analyze Column Importance")
149
+ analyze_entropy_action.setIcon(QIcon.fromTheme("system-search"))
150
+ analyze_entropy_action.triggered.connect(
151
+ lambda: self._call_main_window_method('analyze_current_data_entropy')
152
+ )
153
+
154
+ # Profile Data Structure
155
+ profile_table_action = analysis_menu.addAction("Profile Data Structure")
156
+ profile_table_action.setIcon(QIcon.fromTheme("edit-find"))
157
+ profile_table_action.triggered.connect(
158
+ lambda: self._call_main_window_method('profile_current_data_structure')
159
+ )
160
+
161
+ # Analyze Column Distributions
162
+ profile_distributions_action = analysis_menu.addAction("Analyze Column Distributions")
163
+ profile_distributions_action.setIcon(QIcon.fromTheme("accessories-calculator"))
164
+ profile_distributions_action.triggered.connect(
165
+ lambda: self._call_main_window_method('profile_current_data_distributions')
166
+ )
167
+
168
+ # Analyze Row Similarity
169
+ profile_similarity_action = analysis_menu.addAction("Analyze Row Similarity")
170
+ profile_similarity_action.setIcon(QIcon.fromTheme("applications-utilities"))
171
+ profile_similarity_action.triggered.connect(
172
+ lambda: self._call_main_window_method('profile_current_data_similarity')
173
+ )
174
+
175
+ # Only show menu if we have actions
176
+ if menu.actions():
177
+ menu.exec(self.mapToGlobal(position))
178
+
179
+ def _call_main_window_method(self, method_name, table_name=None):
180
+ """Call a method on the main window with optional table name"""
181
+ main_window = self._get_main_window()
182
+ if main_window and hasattr(main_window, method_name):
183
+ method = getattr(main_window, method_name)
184
+ if table_name is not None:
185
+ method(table_name)
186
+ else:
187
+ method()
188
+
189
+ def _show_row_count(self):
190
+ """Show the row count in a message box"""
191
+ parent_tab = getattr(self, '_parent_tab', None)
192
+ if not parent_tab:
193
+ return
194
+
195
+ # Check if we're in preview mode - if so, get the full table count
196
+ if (hasattr(parent_tab, 'is_preview_mode') and parent_tab.is_preview_mode and
197
+ hasattr(parent_tab, 'preview_table_name') and parent_tab.preview_table_name):
198
+ # Get the main window to access the database manager
199
+ main_window = self._get_main_window()
200
+ if main_window and hasattr(main_window, 'db_manager'):
201
+ try:
202
+ # Get the full table to count all rows
203
+ full_df = main_window.db_manager.get_full_table(parent_tab.preview_table_name)
204
+ row_count = len(full_df)
205
+ QMessageBox.information(self, "Row Count", f"Total rows: {row_count:,}")
206
+ except Exception as e:
207
+ # Fall back to preview count if we can't get full table
208
+ if hasattr(parent_tab, 'current_df') and parent_tab.current_df is not None:
209
+ row_count = len(parent_tab.current_df)
210
+ QMessageBox.information(self, "Row Count", f"Preview rows: {row_count:,}\n(Error getting full count: {str(e)})")
211
+ elif hasattr(parent_tab, 'current_df') and parent_tab.current_df is not None:
212
+ # Not in preview mode, just show the current dataframe count
213
+ row_count = len(parent_tab.current_df)
214
+ QMessageBox.information(self, "Row Count", f"Total rows: {row_count:,}")
215
+
216
+ def _get_unformatted_value(self, row, col):
217
+ """Get the unformatted value from the original DataFrame if available"""
218
+ try:
219
+ # Try to get the original DataFrame from the parent tab
220
+ parent_tab = None
221
+
222
+ # First try the direct reference we set
223
+ if hasattr(self, '_parent_tab') and self._parent_tab is not None:
224
+ parent_tab = self._parent_tab
225
+ else:
226
+ # Fallback to parent() method
227
+ parent_tab = self.parent()
228
+
229
+ if parent_tab and hasattr(parent_tab, 'current_df') and parent_tab.current_df is not None:
230
+ original_df = parent_tab.current_df
231
+
232
+ # Calculate the actual DataFrame row index, accounting for pagination
233
+ actual_row_idx = row
234
+
235
+ # If pagination is active, adjust the row index
236
+ if hasattr(parent_tab, 'pagination_state') and parent_tab.pagination_state:
237
+ state = parent_tab.pagination_state
238
+ page_offset = state['current_page'] * state['page_size']
239
+ actual_row_idx = page_offset + row
240
+
241
+ # Check if we have valid indices
242
+ if actual_row_idx < len(original_df) and col < len(original_df.columns):
243
+ # Get the raw value from the original DataFrame
244
+ raw_value = original_df.iloc[actual_row_idx, col]
245
+
246
+ # Handle NaN/NULL values
247
+ if pd.isna(raw_value):
248
+ return "NULL"
249
+
250
+ # For numeric types, return the raw value as string without formatting
251
+ if isinstance(raw_value, (int, float, np.integer, np.floating)):
252
+ return str(raw_value)
253
+
254
+ # For other types, return as string
255
+ return str(raw_value)
256
+
257
+ # Try alternative ways to access the dataframe
258
+ # Check if the parent has a parent (main window) that might have current_df
259
+ if parent_tab and hasattr(parent_tab, 'parent') and hasattr(parent_tab.parent(), 'current_df') and parent_tab.parent().current_df is not None:
260
+ original_df = parent_tab.parent().current_df
261
+
262
+ # Calculate the actual DataFrame row index, accounting for pagination
263
+ actual_row_idx = row
264
+
265
+ # Check if we have valid indices
266
+ if actual_row_idx < len(original_df) and col < len(original_df.columns):
267
+ # Get the raw value from the original DataFrame
268
+ raw_value = original_df.iloc[actual_row_idx, col]
269
+
270
+ # Handle NaN/NULL values
271
+ if pd.isna(raw_value):
272
+ return "NULL"
273
+
274
+ # For numeric types, return the raw value as string without formatting
275
+ if isinstance(raw_value, (int, float, np.integer, np.floating)):
276
+ return str(raw_value)
277
+
278
+ # For other types, return as string
279
+ return str(raw_value)
280
+
281
+ except Exception as e:
282
+ # If anything fails, fall back to formatted text
283
+ pass
284
+
285
+ # Fallback: use the formatted text from the table item
286
+ item = self.item(row, col)
287
+ return item.text() if item else ""
288
+
289
+ def copy_selection_to_clipboard(self):
290
+ """Copy selected cells to clipboard in tab-separated format"""
291
+ selection = self.selectionModel().selection()
292
+
293
+ if not selection:
294
+ # If no selection, copy all visible data
295
+ self.copy_all_to_clipboard()
296
+ return
297
+
298
+ # Get selected ranges
299
+ selected_ranges = selection
300
+ if not selected_ranges:
301
+ return
302
+
303
+ # Find the bounds of the selection
304
+ min_row = float('inf')
305
+ max_row = -1
306
+ min_col = float('inf')
307
+ max_col = -1
308
+
309
+ for range_ in selected_ranges:
310
+ min_row = min(min_row, range_.top())
311
+ max_row = max(max_row, range_.bottom())
312
+ min_col = min(min_col, range_.left())
313
+ max_col = max(max_col, range_.right())
314
+
315
+ # Build the data to copy
316
+ copied_data = []
317
+
318
+ # Add headers if copying from the first row or if entire columns are selected
319
+ if min_row == 0 or self.are_entire_columns_selected():
320
+ header_row = []
321
+ for col in range(min_col, max_col + 1):
322
+ header_item = self.horizontalHeaderItem(col)
323
+ header_text = header_item.text() if header_item else f"Column_{col}"
324
+ header_row.append(header_text)
325
+ copied_data.append('\t'.join(header_row))
326
+
327
+ # Add data rows
328
+ for row in range(min_row, max_row + 1):
329
+ if row >= self.rowCount():
330
+ break
331
+
332
+ row_data = []
333
+ for col in range(min_col, max_col + 1):
334
+ if col >= self.columnCount():
335
+ break
336
+
337
+ # Use unformatted value when possible
338
+ cell_text = self._get_unformatted_value(row, col)
339
+ row_data.append(cell_text)
340
+
341
+ copied_data.append('\t'.join(row_data))
342
+
343
+ # Join all rows with newlines and copy to clipboard
344
+ clipboard_text = '\n'.join(copied_data)
345
+ QApplication.clipboard().setText(clipboard_text)
346
+
347
+ # Show status message if parent has statusBar
348
+ if hasattr(self.parent(), 'statusBar'):
349
+ row_count = max_row - min_row + 1
350
+ col_count = max_col - min_col + 1
351
+ self.parent().statusBar().showMessage(f"Copied {row_count} rows × {col_count} columns to clipboard")
352
+ elif hasattr(self.parent(), 'parent') and hasattr(self.parent().parent(), 'statusBar'):
353
+ row_count = max_row - min_row + 1
354
+ col_count = max_col - min_col + 1
355
+ self.parent().parent().statusBar().showMessage(f"Copied {row_count} rows × {col_count} columns to clipboard")
356
+
357
+ def copy_all_to_clipboard(self):
358
+ """Copy all table data to clipboard"""
359
+ if self.rowCount() == 0 or self.columnCount() == 0:
360
+ return
361
+
362
+ copied_data = []
363
+
364
+ # Add headers
365
+ header_row = []
366
+ for col in range(self.columnCount()):
367
+ header_item = self.horizontalHeaderItem(col)
368
+ header_text = header_item.text() if header_item else f"Column_{col}"
369
+ header_row.append(header_text)
370
+ copied_data.append('\t'.join(header_row))
371
+
372
+ # Add all data rows
373
+ for row in range(self.rowCount()):
374
+ row_data = []
375
+ for col in range(self.columnCount()):
376
+ # Use unformatted value when possible
377
+ cell_text = self._get_unformatted_value(row, col)
378
+ row_data.append(cell_text)
379
+ copied_data.append('\t'.join(row_data))
380
+
381
+ # Join all rows with newlines and copy to clipboard
382
+ clipboard_text = '\n'.join(copied_data)
383
+ QApplication.clipboard().setText(clipboard_text)
384
+
385
+ # Show status message if parent has statusBar
386
+ if hasattr(self.parent(), 'statusBar'):
387
+ self.parent().statusBar().showMessage(f"Copied all {self.rowCount()} rows × {self.columnCount()} columns to clipboard")
388
+ elif hasattr(self.parent(), 'parent') and hasattr(self.parent().parent(), 'statusBar'):
389
+ self.parent().parent().statusBar().showMessage(f"Copied all {self.rowCount()} rows × {self.columnCount()} columns to clipboard")
390
+
391
+ def are_entire_columns_selected(self):
392
+ """Check if entire columns are selected"""
393
+ selection = self.selectionModel().selection()
394
+ if not selection:
395
+ return False
396
+
397
+ for range_ in selection:
398
+ if range_.top() == 0 and range_.bottom() == self.rowCount() - 1:
399
+ return True
400
+ return False