sqlshell 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlshell might be problematic. Click here for more details.
- sqlshell/LICENSE +21 -0
- sqlshell/MANIFEST.in +6 -0
- sqlshell/README.md +59 -0
- sqlshell/context_suggester.py +765 -0
- sqlshell/create_test_data.py +106 -30
- sqlshell/db/database_manager.py +152 -6
- sqlshell/editor.py +68 -11
- sqlshell/main.py +1566 -656
- sqlshell/menus.py +171 -0
- sqlshell/query_tab.py +32 -3
- sqlshell/styles.py +257 -0
- sqlshell/suggester_integration.py +275 -0
- sqlshell/table_list.py +907 -0
- sqlshell/utils/__init__.py +8 -0
- sqlshell/utils/profile_entropy.py +347 -0
- sqlshell/utils/profile_keys.py +356 -0
- sqlshell-0.2.0.dist-info/METADATA +198 -0
- {sqlshell-0.1.9.dist-info → sqlshell-0.2.0.dist-info}/RECORD +21 -11
- {sqlshell-0.1.9.dist-info → sqlshell-0.2.0.dist-info}/WHEEL +1 -1
- sqlshell/setup.py +0 -42
- sqlshell-0.1.9.dist-info/METADATA +0 -122
- {sqlshell-0.1.9.dist-info → sqlshell-0.2.0.dist-info}/entry_points.txt +0 -0
- {sqlshell-0.1.9.dist-info → sqlshell-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import itertools
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import random
|
|
5
|
+
from PyQt6.QtWidgets import (
|
|
6
|
+
QApplication, QWidget, QVBoxLayout, QLabel, QTableWidget, QTableWidgetItem, QHeaderView, QTabWidget, QMainWindow
|
|
7
|
+
)
|
|
8
|
+
from PyQt6.QtCore import Qt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def find_functional_dependencies(df: pd.DataFrame, max_lhs_size: int = 2):
|
|
12
|
+
"""
|
|
13
|
+
Discover all functional dependencies X -> A in the DataFrame for |X| <= max_lhs_size.
|
|
14
|
+
Returns a list of tuples (lhs, rhs).
|
|
15
|
+
"""
|
|
16
|
+
fds = []
|
|
17
|
+
cols = list(df.columns)
|
|
18
|
+
n_rows = len(df)
|
|
19
|
+
|
|
20
|
+
for size in range(1, max_lhs_size + 1):
|
|
21
|
+
for lhs in itertools.combinations(cols, size):
|
|
22
|
+
# for each potential dependent attribute not in lhs
|
|
23
|
+
lhs_df = df[list(lhs)]
|
|
24
|
+
# group by lhs and count distinct values of each other column
|
|
25
|
+
grouped = df.groupby(list(lhs))
|
|
26
|
+
for rhs in cols:
|
|
27
|
+
if rhs in lhs:
|
|
28
|
+
continue
|
|
29
|
+
# Check if for each group, rhs has only one distinct value
|
|
30
|
+
distinct_counts = grouped[rhs].nunique(dropna=False)
|
|
31
|
+
if (distinct_counts <= 1).all():
|
|
32
|
+
fds.append((lhs, rhs))
|
|
33
|
+
return fds
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def propose_normalized_tables(cols, candidate_keys, fds):
|
|
37
|
+
"""
|
|
38
|
+
Propose a set of normalized tables based on functional dependencies.
|
|
39
|
+
Uses a simplified approach to create 3NF tables.
|
|
40
|
+
|
|
41
|
+
Parameters:
|
|
42
|
+
- cols: list of all columns
|
|
43
|
+
- candidate_keys: list of candidate keys
|
|
44
|
+
- fds: list of functional dependencies as (lhs, rhs) tuples
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
- List of proposed tables as (table_name, primary_key, attributes) tuples
|
|
48
|
+
"""
|
|
49
|
+
# Start with a set of all attributes
|
|
50
|
+
all_attrs = set(cols)
|
|
51
|
+
proposed_tables = []
|
|
52
|
+
|
|
53
|
+
# Group FDs by their determinants (LHS)
|
|
54
|
+
determinant_groups = {}
|
|
55
|
+
for lhs, rhs in fds:
|
|
56
|
+
lhs_key = tuple(sorted(lhs))
|
|
57
|
+
if lhs_key not in determinant_groups:
|
|
58
|
+
determinant_groups[lhs_key] = []
|
|
59
|
+
determinant_groups[lhs_key].append(rhs)
|
|
60
|
+
|
|
61
|
+
# Create tables for each determinant group
|
|
62
|
+
table_counter = 1
|
|
63
|
+
for lhs, rhs_list in determinant_groups.items():
|
|
64
|
+
table_attrs = set(lhs) | set(rhs_list)
|
|
65
|
+
if table_attrs: # Skip empty tables
|
|
66
|
+
table_name = f"Table_{table_counter}"
|
|
67
|
+
primary_key = ", ".join(lhs)
|
|
68
|
+
attributes = list(table_attrs)
|
|
69
|
+
proposed_tables.append((table_name, primary_key, attributes))
|
|
70
|
+
table_counter += 1
|
|
71
|
+
|
|
72
|
+
# Create a table for any remaining attributes not in any FD
|
|
73
|
+
# or create a table with a candidate key if none exists yet
|
|
74
|
+
used_attrs = set()
|
|
75
|
+
for _, _, attrs in proposed_tables:
|
|
76
|
+
used_attrs.update(attrs)
|
|
77
|
+
|
|
78
|
+
remaining_attrs = all_attrs - used_attrs
|
|
79
|
+
if remaining_attrs:
|
|
80
|
+
# If we have a candidate key, use it for remaining attributes
|
|
81
|
+
for key in candidate_keys:
|
|
82
|
+
key_set = set(key)
|
|
83
|
+
if key_set & remaining_attrs: # If key has overlap with remaining attrs
|
|
84
|
+
table_name = f"Table_{table_counter}"
|
|
85
|
+
primary_key = ", ".join(key)
|
|
86
|
+
attributes = list(remaining_attrs | key_set)
|
|
87
|
+
proposed_tables.append((table_name, primary_key, attributes))
|
|
88
|
+
break
|
|
89
|
+
else: # No suitable candidate key
|
|
90
|
+
table_name = f"Table_{table_counter}"
|
|
91
|
+
primary_key = "id (suggested)"
|
|
92
|
+
attributes = list(remaining_attrs)
|
|
93
|
+
proposed_tables.append((table_name, primary_key, attributes))
|
|
94
|
+
|
|
95
|
+
return proposed_tables
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def profile(df: pd.DataFrame, max_combination_size: int = 2, max_lhs_size: int = 2):
|
|
99
|
+
"""
|
|
100
|
+
Analyze a pandas DataFrame to suggest candidate keys and discover functional dependencies.
|
|
101
|
+
|
|
102
|
+
Parameters:
|
|
103
|
+
- df: pandas.DataFrame to analyze.
|
|
104
|
+
- max_combination_size: max size of column combos to test for keys.
|
|
105
|
+
- max_lhs_size: max size of LHS in discovered FDs.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
- Tuple of (fd_results, key_results, n_rows, cols, max_combination_size, max_lhs_size, normalized_tables)
|
|
109
|
+
"""
|
|
110
|
+
n_rows = len(df)
|
|
111
|
+
cols = list(df.columns)
|
|
112
|
+
|
|
113
|
+
# Discover functional dependencies
|
|
114
|
+
fds = find_functional_dependencies(df, max_lhs_size)
|
|
115
|
+
|
|
116
|
+
# Prepare FD results
|
|
117
|
+
fd_results = [(", ".join(lhs), rhs) for lhs, rhs in fds]
|
|
118
|
+
|
|
119
|
+
# Profile keys (by uniqueness)
|
|
120
|
+
all_keys = []
|
|
121
|
+
for size in range(1, max_combination_size + 1):
|
|
122
|
+
for combo in itertools.combinations(cols, size):
|
|
123
|
+
unique_count = df.drop_duplicates(subset=combo).shape[0]
|
|
124
|
+
unique_ratio = unique_count / n_rows
|
|
125
|
+
is_key = unique_count == n_rows
|
|
126
|
+
if is_key:
|
|
127
|
+
all_keys.append(combo)
|
|
128
|
+
|
|
129
|
+
# Distinguish between candidate keys and superkeys
|
|
130
|
+
candidate_keys = []
|
|
131
|
+
superkeys = []
|
|
132
|
+
|
|
133
|
+
for key in all_keys:
|
|
134
|
+
is_candidate = True
|
|
135
|
+
# Check if any proper subset of this key is also a key
|
|
136
|
+
for i in range(1, len(key)):
|
|
137
|
+
for subset in itertools.combinations(key, i):
|
|
138
|
+
if subset in all_keys:
|
|
139
|
+
is_candidate = False
|
|
140
|
+
break
|
|
141
|
+
if not is_candidate:
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
if is_candidate:
|
|
145
|
+
candidate_keys.append(key)
|
|
146
|
+
else:
|
|
147
|
+
superkeys.append(key)
|
|
148
|
+
|
|
149
|
+
# Prepare results for all keys (both candidate keys and superkeys)
|
|
150
|
+
results = []
|
|
151
|
+
for size in range(1, max_combination_size + 1):
|
|
152
|
+
for combo in itertools.combinations(cols, size):
|
|
153
|
+
unique_count = df.drop_duplicates(subset=combo).shape[0]
|
|
154
|
+
unique_ratio = unique_count / n_rows
|
|
155
|
+
is_key = combo in all_keys
|
|
156
|
+
is_candidate = combo in candidate_keys
|
|
157
|
+
is_superkey = combo in superkeys
|
|
158
|
+
|
|
159
|
+
# Use icons for different key types
|
|
160
|
+
key_type = ""
|
|
161
|
+
if is_candidate:
|
|
162
|
+
key_type = "★ Candidate Key" # Star for candidate keys
|
|
163
|
+
elif is_superkey:
|
|
164
|
+
key_type = "⊃ Superkey" # Superset symbol for superkeys
|
|
165
|
+
|
|
166
|
+
results.append((combo, unique_count, unique_ratio, is_key, key_type))
|
|
167
|
+
|
|
168
|
+
results.sort(key=lambda x: (not x[3], -x[2], len(x[0])))
|
|
169
|
+
key_results = [(", ".join(c), u, f"{u/n_rows:.2%}", k)
|
|
170
|
+
for c, u, _, _, k in results]
|
|
171
|
+
|
|
172
|
+
# Propose normalized tables
|
|
173
|
+
normalized_tables = propose_normalized_tables(cols, candidate_keys, fds)
|
|
174
|
+
|
|
175
|
+
return fd_results, key_results, n_rows, cols, max_combination_size, max_lhs_size, normalized_tables
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def visualize_profile(df: pd.DataFrame, max_combination_size: int = 2, max_lhs_size: int = 2):
|
|
179
|
+
"""
|
|
180
|
+
Create a visual representation of the key profile for a dataframe.
|
|
181
|
+
|
|
182
|
+
Parameters:
|
|
183
|
+
- df: pandas.DataFrame to analyze.
|
|
184
|
+
- max_combination_size: max size of column combos to test for keys.
|
|
185
|
+
- max_lhs_size: max size of LHS in discovered FDs.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
- QMainWindow: The visualization window
|
|
189
|
+
"""
|
|
190
|
+
# Get profile results
|
|
191
|
+
fd_results, key_results, n_rows, cols, max_combination_size, max_lhs_size, normalized_tables = profile(
|
|
192
|
+
df, max_combination_size, max_lhs_size
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Create main window
|
|
196
|
+
window = QMainWindow()
|
|
197
|
+
window.setWindowTitle("Table Profile: Keys & Dependencies")
|
|
198
|
+
window.resize(900, 700)
|
|
199
|
+
|
|
200
|
+
# Create central widget and layout
|
|
201
|
+
central_widget = QWidget()
|
|
202
|
+
window.setCentralWidget(central_widget)
|
|
203
|
+
layout = QVBoxLayout(central_widget)
|
|
204
|
+
|
|
205
|
+
# Add header
|
|
206
|
+
header = QLabel(f"Analyzed {n_rows} rows × {len(cols)} columns; key combos up to size {max_combination_size}, FDs up to LHS size {max_lhs_size}")
|
|
207
|
+
header.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
208
|
+
header.setStyleSheet("font-size: 14pt; font-weight: bold; margin: 10px;")
|
|
209
|
+
layout.addWidget(header)
|
|
210
|
+
|
|
211
|
+
# Add description
|
|
212
|
+
description = QLabel(
|
|
213
|
+
"This profile helps identify candidate keys and functional dependencies in your data. "
|
|
214
|
+
"★ Candidate keys are minimal combinations of columns that uniquely identify rows. "
|
|
215
|
+
"⊃ Superkeys are non-minimal column sets that uniquely identify rows. "
|
|
216
|
+
"Functional dependencies indicate when one column's values determine another's."
|
|
217
|
+
)
|
|
218
|
+
description.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
219
|
+
description.setWordWrap(True)
|
|
220
|
+
description.setStyleSheet("margin-bottom: 10px;")
|
|
221
|
+
layout.addWidget(description)
|
|
222
|
+
|
|
223
|
+
# Add key for icons
|
|
224
|
+
icons_key = QLabel("Key: ★ = Minimal Candidate Key | ⊃ = Non-minimal Superkey")
|
|
225
|
+
icons_key.setAlignment(Qt.AlignmentFlag.AlignCenter)
|
|
226
|
+
icons_key.setStyleSheet("font-style: italic; margin-bottom: 15px;")
|
|
227
|
+
layout.addWidget(icons_key)
|
|
228
|
+
|
|
229
|
+
# Create tabs
|
|
230
|
+
tabs = QTabWidget()
|
|
231
|
+
|
|
232
|
+
# Tab for Candidate Keys
|
|
233
|
+
key_tab = QWidget()
|
|
234
|
+
key_layout = QVBoxLayout()
|
|
235
|
+
|
|
236
|
+
key_header = QLabel("Keys (Column Combinations that Uniquely Identify Rows)")
|
|
237
|
+
key_header.setStyleSheet("font-weight: bold;")
|
|
238
|
+
key_layout.addWidget(key_header)
|
|
239
|
+
|
|
240
|
+
key_table = QTableWidget(len(key_results), 4)
|
|
241
|
+
key_table.setHorizontalHeaderLabels(["Columns", "Unique Count", "Uniqueness Ratio", "Key Type"])
|
|
242
|
+
key_table.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeMode.Stretch)
|
|
243
|
+
for row, (cols_str, count, ratio, key_type) in enumerate(key_results):
|
|
244
|
+
key_table.setItem(row, 0, QTableWidgetItem(cols_str))
|
|
245
|
+
key_table.setItem(row, 1, QTableWidgetItem(str(count)))
|
|
246
|
+
key_table.setItem(row, 2, QTableWidgetItem(ratio))
|
|
247
|
+
|
|
248
|
+
# Create item with appropriate styling
|
|
249
|
+
type_item = QTableWidgetItem(key_type)
|
|
250
|
+
if "Candidate Key" in key_type:
|
|
251
|
+
type_item.setForeground(Qt.GlobalColor.darkGreen)
|
|
252
|
+
elif "Superkey" in key_type:
|
|
253
|
+
type_item.setForeground(Qt.GlobalColor.darkBlue)
|
|
254
|
+
key_table.setItem(row, 3, type_item)
|
|
255
|
+
|
|
256
|
+
key_layout.addWidget(key_table)
|
|
257
|
+
key_tab.setLayout(key_layout)
|
|
258
|
+
tabs.addTab(key_tab, "Keys")
|
|
259
|
+
|
|
260
|
+
# Tab for FDs
|
|
261
|
+
fd_tab = QWidget()
|
|
262
|
+
fd_layout = QVBoxLayout()
|
|
263
|
+
|
|
264
|
+
fd_header = QLabel("Functional Dependencies (When Values in One Set of Columns Determine Another Column)")
|
|
265
|
+
fd_header.setStyleSheet("font-weight: bold;")
|
|
266
|
+
fd_layout.addWidget(fd_header)
|
|
267
|
+
|
|
268
|
+
fd_table = QTableWidget(len(fd_results), 2)
|
|
269
|
+
fd_table.setHorizontalHeaderLabels(["Determinant (LHS)", "Dependent (RHS)"])
|
|
270
|
+
fd_table.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeMode.Stretch)
|
|
271
|
+
for i, (lhs, rhs) in enumerate(fd_results):
|
|
272
|
+
lhs_item = QTableWidgetItem(lhs)
|
|
273
|
+
lhs_item.setFlags(lhs_item.flags() ^ Qt.ItemFlag.ItemIsEditable)
|
|
274
|
+
fd_table.setItem(i, 0, lhs_item)
|
|
275
|
+
fd_table.setItem(i, 1, QTableWidgetItem(rhs))
|
|
276
|
+
fd_layout.addWidget(fd_table)
|
|
277
|
+
fd_tab.setLayout(fd_layout)
|
|
278
|
+
tabs.addTab(fd_tab, "Functional Dependencies")
|
|
279
|
+
|
|
280
|
+
# Tab for Normalized Tables
|
|
281
|
+
norm_tab = QWidget()
|
|
282
|
+
norm_layout = QVBoxLayout()
|
|
283
|
+
|
|
284
|
+
norm_header = QLabel("Proposed Normalized Tables (Based on Functional Dependencies)")
|
|
285
|
+
norm_header.setStyleSheet("font-weight: bold;")
|
|
286
|
+
norm_layout.addWidget(norm_header)
|
|
287
|
+
|
|
288
|
+
norm_description = QLabel(
|
|
289
|
+
"These tables represent a proposed normalized schema based on the discovered functional dependencies. "
|
|
290
|
+
"Each table includes attributes that are functionally dependent on its primary key. "
|
|
291
|
+
"This is an approximate 3NF decomposition and may need further refinement."
|
|
292
|
+
)
|
|
293
|
+
norm_description.setWordWrap(True)
|
|
294
|
+
norm_description.setStyleSheet("margin-bottom: 10px;")
|
|
295
|
+
norm_layout.addWidget(norm_description)
|
|
296
|
+
|
|
297
|
+
norm_table = QTableWidget(len(normalized_tables), 3)
|
|
298
|
+
norm_table.setHorizontalHeaderLabels(["Table Name", "Primary Key", "Attributes"])
|
|
299
|
+
norm_table.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeMode.Stretch)
|
|
300
|
+
for i, (table_name, primary_key, attributes) in enumerate(normalized_tables):
|
|
301
|
+
norm_table.setItem(i, 0, QTableWidgetItem(table_name))
|
|
302
|
+
|
|
303
|
+
pk_item = QTableWidgetItem(primary_key)
|
|
304
|
+
pk_item.setForeground(Qt.GlobalColor.darkGreen)
|
|
305
|
+
norm_table.setItem(i, 1, pk_item)
|
|
306
|
+
|
|
307
|
+
norm_table.setItem(i, 2, QTableWidgetItem(", ".join(attributes)))
|
|
308
|
+
|
|
309
|
+
norm_layout.addWidget(norm_table)
|
|
310
|
+
norm_tab.setLayout(norm_layout)
|
|
311
|
+
tabs.addTab(norm_tab, "Normalized Tables")
|
|
312
|
+
|
|
313
|
+
layout.addWidget(tabs)
|
|
314
|
+
|
|
315
|
+
# Show the window
|
|
316
|
+
window.show()
|
|
317
|
+
return window
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def test_profile_keys(test_size=100):
|
|
321
|
+
# Generate a dataframe with some realistic examples of a customer-product-order relationship
|
|
322
|
+
# Create customer data
|
|
323
|
+
customer_ids = list(range(1, 21)) # 20 customers
|
|
324
|
+
customer_names = ["John", "Jane", "Alice", "Bob", "Charlie", "Diana", "Edward", "Fiona", "George", "Hannah"]
|
|
325
|
+
|
|
326
|
+
# Create product data
|
|
327
|
+
product_names = ["Apple", "Banana", "Orange", "Grape", "Mango", "Strawberry", "Blueberry", "Kiwi", "Pineapple", "Watermelon"]
|
|
328
|
+
product_groups = ["Fruit"] * len(product_names)
|
|
329
|
+
|
|
330
|
+
# Generate random orders
|
|
331
|
+
random.seed(42) # For reproducibility
|
|
332
|
+
df_data = {
|
|
333
|
+
"customer_id": [random.choice(customer_ids) for _ in range(test_size)],
|
|
334
|
+
"customer_name": [customer_names[i % len(customer_names)] for i in range(test_size)],
|
|
335
|
+
"product_name": [random.choice(product_names) for _ in range(test_size)],
|
|
336
|
+
"product_group": ["Fruit" for _ in range(test_size)],
|
|
337
|
+
"order_date": [pd.Timestamp("2021-01-01") + pd.Timedelta(days=random.randint(0, 30)) for _ in range(test_size)],
|
|
338
|
+
"order_amount": [random.randint(100, 1000) for _ in range(test_size)]
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
# Ensure consistent relationships
|
|
342
|
+
for i in range(test_size):
|
|
343
|
+
# Ensure customer_name is consistently associated with customer_id
|
|
344
|
+
customer_idx = df_data["customer_id"][i] % len(customer_names)
|
|
345
|
+
df_data["customer_name"][i] = customer_names[customer_idx]
|
|
346
|
+
|
|
347
|
+
df = pd.DataFrame(df_data)
|
|
348
|
+
|
|
349
|
+
# Create and show visualization
|
|
350
|
+
app = QApplication(sys.argv)
|
|
351
|
+
window = visualize_profile(df, max_combination_size=3, max_lhs_size=2)
|
|
352
|
+
sys.exit(app.exec())
|
|
353
|
+
|
|
354
|
+
# Only run the test function when script is executed directly
|
|
355
|
+
if __name__ == "__main__":
|
|
356
|
+
test_profile_keys()
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sqlshell
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A powerful SQL shell with GUI interface for data analysis
|
|
5
|
+
Author: SQLShell Team
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/oyvinrog/SQLShell
|
|
8
|
+
Keywords: sql,data analysis,gui,duckdb
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: pandas>=2.0.0
|
|
19
|
+
Requires-Dist: numpy>=1.24.0
|
|
20
|
+
Requires-Dist: PyQt6>=6.4.0
|
|
21
|
+
Requires-Dist: duckdb>=0.9.0
|
|
22
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
23
|
+
Requires-Dist: pyarrow>=14.0.1
|
|
24
|
+
Requires-Dist: fastparquet>=2023.10.1
|
|
25
|
+
Requires-Dist: xlrd>=2.0.1
|
|
26
|
+
Requires-Dist: deltalake
|
|
27
|
+
Requires-Dist: Pillow>=10.0.0
|
|
28
|
+
|
|
29
|
+
# SQLShell
|
|
30
|
+
|
|
31
|
+
<div align="center">
|
|
32
|
+
|
|
33
|
+
<img src="sqlshell_logo.png" alt="SQLShell Logo" width="180" height="auto">
|
|
34
|
+
|
|
35
|
+
**A powerful SQL shell with GUI interface for data analysis**
|
|
36
|
+
|
|
37
|
+
<img src="sqlshell_demo.png" alt="SQLShell Interface" width="80%" height="auto">
|
|
38
|
+
|
|
39
|
+
</div>
|
|
40
|
+
|
|
41
|
+
## 🚀 Key Features
|
|
42
|
+
|
|
43
|
+
- **Interactive SQL Interface** - Rich syntax highlighting for enhanced query writing
|
|
44
|
+
- **Context-Aware Suggestions** - Intelligent SQL autocompletion based on query context and schema
|
|
45
|
+
- **DuckDB Integration** - Powerful analytical queries powered by DuckDB
|
|
46
|
+
- **Multi-Format Support** - Import and query Excel (.xlsx, .xls), CSV, and Parquet files effortlessly
|
|
47
|
+
- **Modern UI** - Clean, tabular results display with intuitive controls
|
|
48
|
+
- **Table Preview** - Quick view of imported data tables
|
|
49
|
+
- **Test Data Generation** - Built-in sample data for testing and learning
|
|
50
|
+
- **Multiple Views** - Support for multiple concurrent table views
|
|
51
|
+
- **Productivity Tools** - Streamlined workflow with keyboard shortcuts (e.g., Ctrl+Enter for query execution)
|
|
52
|
+
|
|
53
|
+
## 📦 Installation
|
|
54
|
+
|
|
55
|
+
### Using pip (Recommended)
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install sqlshell
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Linux Setup with Virtual Environment
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Create and activate virtual environment
|
|
65
|
+
python3 -m venv ~/.venv/sqlshell
|
|
66
|
+
source ~/.venv/sqlshell/bin/activate
|
|
67
|
+
|
|
68
|
+
# Install SQLShell
|
|
69
|
+
pip install sqlshell
|
|
70
|
+
|
|
71
|
+
# Configure shell alias
|
|
72
|
+
echo 'alias sqls="~/.venv/sqlshell/bin/sqls"' >> ~/.bashrc # or ~/.zshrc for Zsh
|
|
73
|
+
source ~/.bashrc # or source ~/.zshrc
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Development Installation
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
git clone https://github.com/oyvinrog/SQLShell.git
|
|
80
|
+
cd SQLShell
|
|
81
|
+
pip install -e .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## 🎯 Getting Started
|
|
85
|
+
|
|
86
|
+
1. **Launch the Application**
|
|
87
|
+
```bash
|
|
88
|
+
sqls
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
If the `sqls` command doesn't work (e.g., "access denied" on Windows), you can use this alternative:
|
|
92
|
+
```bash
|
|
93
|
+
python -c "import sqlshell; sqlshell.start()"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
2. **Database Connection**
|
|
97
|
+
- SQLShell automatically connects to a local DuckDB database named 'pool.db'
|
|
98
|
+
|
|
99
|
+
3. **Working with Data Files**
|
|
100
|
+
- Click "Load Files" to select your Excel, CSV, or Parquet files
|
|
101
|
+
- File contents are loaded as queryable SQL tables
|
|
102
|
+
- Query using standard SQL syntax
|
|
103
|
+
|
|
104
|
+
4. **Query Execution**
|
|
105
|
+
- Enter SQL in the editor
|
|
106
|
+
- Execute using Ctrl+Enter or the "Execute" button
|
|
107
|
+
- View results in the structured output panel
|
|
108
|
+
|
|
109
|
+
5. **Test Data**
|
|
110
|
+
- Load sample test data using the "Test" button for quick experimentation
|
|
111
|
+
|
|
112
|
+
6. **Using Context-Aware Suggestions**
|
|
113
|
+
- Press Ctrl+Space to manually trigger suggestions
|
|
114
|
+
- Suggestions appear automatically as you type
|
|
115
|
+
- Context-specific suggestions based on your query position:
|
|
116
|
+
- After SELECT: columns and functions
|
|
117
|
+
- After FROM/JOIN: tables with join conditions
|
|
118
|
+
- After WHERE: columns with appropriate operators
|
|
119
|
+
- Inside functions: relevant column suggestions
|
|
120
|
+
|
|
121
|
+
## 📝 Query Examples
|
|
122
|
+
|
|
123
|
+
### Basic Join Operation
|
|
124
|
+
```sql
|
|
125
|
+
SELECT *
|
|
126
|
+
FROM sample_sales_data cd
|
|
127
|
+
INNER JOIN product_catalog pc ON pc.productid = cd.productid
|
|
128
|
+
LIMIT 3;
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Multi-Statement Queries
|
|
132
|
+
```sql
|
|
133
|
+
-- Create a temporary view
|
|
134
|
+
CREATE OR REPLACE TEMPORARY VIEW test_v AS
|
|
135
|
+
SELECT *
|
|
136
|
+
FROM sample_sales_data cd
|
|
137
|
+
INNER JOIN product_catalog pc ON pc.productid = cd.productid;
|
|
138
|
+
|
|
139
|
+
-- Query the view
|
|
140
|
+
SELECT DISTINCT productid
|
|
141
|
+
FROM test_v;
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## 💡 Pro Tips
|
|
145
|
+
|
|
146
|
+
- Use temporary views for complex query organization
|
|
147
|
+
- Leverage keyboard shortcuts for efficient workflow
|
|
148
|
+
- Explore the multi-format support for various data sources
|
|
149
|
+
- Create multiple tabs for parallel query development
|
|
150
|
+
- The context-aware suggestions learn from your query patterns
|
|
151
|
+
- Type `table_name.` to see all columns for a specific table
|
|
152
|
+
- After JOIN keyword, the system suggests relevant tables and join conditions
|
|
153
|
+
|
|
154
|
+
## 📊 Column Profiler
|
|
155
|
+
|
|
156
|
+
The Column Profiler provides quick statistical insights into your table columns:
|
|
157
|
+
|
|
158
|
+
<img src="column_profiler.png" alt="Column Profiler" width="80%" height="auto">
|
|
159
|
+
|
|
160
|
+
### Using the Column Profiler
|
|
161
|
+
|
|
162
|
+
1. **Access the Profiler**
|
|
163
|
+
- Right-click on any table in the schema browser
|
|
164
|
+
- Select "Profile Table" from the context menu
|
|
165
|
+
|
|
166
|
+
2. **View Column Statistics**
|
|
167
|
+
- Instantly see key metrics for each column:
|
|
168
|
+
- Data type
|
|
169
|
+
- Non-null count and percentage
|
|
170
|
+
- Unique values count
|
|
171
|
+
- Mean, median, min, and max values (for numeric columns)
|
|
172
|
+
- Most frequent values and their counts
|
|
173
|
+
- Distribution visualization
|
|
174
|
+
|
|
175
|
+
3. **Benefits**
|
|
176
|
+
- Quickly understand data distribution
|
|
177
|
+
- Identify outliers and data quality issues
|
|
178
|
+
- Make informed decisions about query conditions
|
|
179
|
+
- Assess column cardinality for join operations
|
|
180
|
+
|
|
181
|
+
The Column Profiler is an invaluable tool for exploratory data analysis, helping you gain insights before writing complex queries.
|
|
182
|
+
|
|
183
|
+
## 📋 Requirements
|
|
184
|
+
|
|
185
|
+
- Python 3.8 or higher
|
|
186
|
+
- Dependencies (automatically installed):
|
|
187
|
+
- PyQt6 ≥ 6.4.0
|
|
188
|
+
- DuckDB ≥ 0.9.0
|
|
189
|
+
- Pandas ≥ 2.0.0
|
|
190
|
+
- NumPy ≥ 1.24.0
|
|
191
|
+
- openpyxl ≥ 3.1.0 (Excel support)
|
|
192
|
+
- pyarrow ≥ 14.0.1 (Parquet support)
|
|
193
|
+
- fastparquet ≥ 2023.10.1 (Alternative parquet engine)
|
|
194
|
+
- xlrd ≥ 2.0.1 (Support for older .xls files)
|
|
195
|
+
|
|
196
|
+
## 📄 License
|
|
197
|
+
|
|
198
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
|
@@ -1,15 +1,22 @@
|
|
|
1
|
+
sqlshell/LICENSE,sha256=YFVzvqHDVzBVtEZoKwcHhashVdNy4P7tDEQ561jAdyo,1070
|
|
2
|
+
sqlshell/MANIFEST.in,sha256=UautKSW4Kzjsy1Ti05-P58qRgM4ct4mmG3aserBGaX0,144
|
|
3
|
+
sqlshell/README.md,sha256=UoWQzdsYThrOoajT40iOtpI73g5ANB7w12vll0eH0Ck,1357
|
|
1
4
|
sqlshell/__init__.py,sha256=GAZ3g4YsExb-aFyN0a77whBxRRk4XMGJYakvpeKbxdg,164
|
|
2
|
-
sqlshell/
|
|
3
|
-
sqlshell/
|
|
4
|
-
sqlshell/
|
|
5
|
-
sqlshell/
|
|
6
|
-
sqlshell/
|
|
5
|
+
sqlshell/context_suggester.py,sha256=OdfSBqwKWtf6yGj-_cNjf9RZG9cc70TWZ_7ieAVKJqk,33970
|
|
6
|
+
sqlshell/create_test_data.py,sha256=uJSFyqm8zYWpyERPd29iMu-EbtvKiwK5WV0N-LibNOc,5385
|
|
7
|
+
sqlshell/editor.py,sha256=iWSYUtsNCud7HWZrcqD9Ef7FEa0nt7ekeUHV6CmCgao,39635
|
|
8
|
+
sqlshell/main.py,sha256=vt7s6dXmY6iw-Y3jr2YtPNMwwvdqPu1zqNRNidtIRN0,150575
|
|
9
|
+
sqlshell/menus.py,sha256=hiT1CXXnsRKkai7oJlPi94du_GKtIhl5X5LOGvqcOqs,5684
|
|
10
|
+
sqlshell/query_tab.py,sha256=9Yu_7MRikZXrQ003N2HOvjFEBdiz_J9ZLWgTzXPiumc,8404
|
|
7
11
|
sqlshell/splash_screen.py,sha256=K0Ku_nXJWmWSnVEh2OttIthRZcnUoY_tmjIAWIWLm7Y,17604
|
|
8
12
|
sqlshell/sqlshell_demo.png,sha256=dPp9J1FVqQVfrh-gekosuha2Jw2p2--wxbOmt2kr7fg,133550
|
|
13
|
+
sqlshell/styles.py,sha256=EGA_Ow-XerPEQgj82ts3fnqkEPMcjSlJPblbPu9L__s,7135
|
|
14
|
+
sqlshell/suggester_integration.py,sha256=w3fKuSq5ex5OHxSBzZunyq3mbGvX06-7nxgLClnK5Kw,13232
|
|
9
15
|
sqlshell/syntax_highlighter.py,sha256=mPwsD8N4XzAUx0IgwlelyfjUhe0xmH0Ug3UI9hTcHz0,5861
|
|
16
|
+
sqlshell/table_list.py,sha256=ET9UGxhRL2j42GC6WqocotvQIOUzRskH9BnmciVX_As,37670
|
|
10
17
|
sqlshell/data/create_test_data.py,sha256=sUTcf50V8-bVwYV2VNTLK65c-iHiU4wb99By67I10zM,5404
|
|
11
18
|
sqlshell/db/__init__.py,sha256=AJGRkywFCnJliwfOBvtE_ISXjdESkRea7lBFM5KjuTU,152
|
|
12
|
-
sqlshell/db/database_manager.py,sha256=
|
|
19
|
+
sqlshell/db/database_manager.py,sha256=DRPoRYgY9DthD1YvLuMeo-aRfaAAcdAKZKXMPDmAcsg,35722
|
|
13
20
|
sqlshell/resources/__init__.py,sha256=VLTJ_5pUHhctRiV8UZDvG-jnsjgT6JQvW-ZPzIJqBIY,44
|
|
14
21
|
sqlshell/resources/create_icon.py,sha256=O7idVEKwmSXxLUsbeRn6zcYVQLPSdJi98nGamTgXiM4,4905
|
|
15
22
|
sqlshell/resources/create_splash.py,sha256=t1KK43Y0pHKGcdRkbnZgV6_y1c1C0THHQl5_fmpC2gQ,3347
|
|
@@ -24,8 +31,11 @@ sqlshell/sqlshell/create_test_databases.py,sha256=oqryFJJahqLFsAjBFM4r9Fe1ea7djD
|
|
|
24
31
|
sqlshell/ui/__init__.py,sha256=2CsTDAvRZJ99gkjs3-rdwkxyGVAKXX6ueOhPdP1VXQc,206
|
|
25
32
|
sqlshell/ui/bar_chart_delegate.py,sha256=tbtIt2ZqPIcYWNJzpONpYa0CYURkLdjkg23TI7TmOKY,1881
|
|
26
33
|
sqlshell/ui/filter_header.py,sha256=c4Mg1J1yTUfrnT9C-xDWHhcauRsgU3WNfvVInv1J814,16074
|
|
27
|
-
sqlshell
|
|
28
|
-
sqlshell
|
|
29
|
-
sqlshell
|
|
30
|
-
sqlshell-0.
|
|
31
|
-
sqlshell-0.
|
|
34
|
+
sqlshell/utils/__init__.py,sha256=iPKvOsKcfnV7xvhQVOz8BiQ4kbFZ7PGUW8vg0vyMqvk,225
|
|
35
|
+
sqlshell/utils/profile_entropy.py,sha256=pJTcXlBkSEPzL3Fvxizf5gBkw6IsUjfa9Y6MjAqF1do,13238
|
|
36
|
+
sqlshell/utils/profile_keys.py,sha256=ajdBTqvZVmAlVaY-kDmv_D8D3sKASG75PLTzf8Y8nX4,14170
|
|
37
|
+
sqlshell-0.2.0.dist-info/METADATA,sha256=EihrTD_KfJTWcYr24WnHEpMBfbYNK6NRJHVhCeU8vQQ,6101
|
|
38
|
+
sqlshell-0.2.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
|
39
|
+
sqlshell-0.2.0.dist-info/entry_points.txt,sha256=Kd0fOvyOW7UiTgTVY7abVOmDIH2Y2nawGTp5kVadac4,44
|
|
40
|
+
sqlshell-0.2.0.dist-info/top_level.txt,sha256=ahwsMFhvAqI97ZkT2xvHL5iZCO1p13mNiUOFkdSFwms,9
|
|
41
|
+
sqlshell-0.2.0.dist-info/RECORD,,
|
sqlshell/setup.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
from setuptools import setup, find_packages
|
|
2
|
-
|
|
3
|
-
setup(
|
|
4
|
-
name="sqlshell",
|
|
5
|
-
version="0.1.1",
|
|
6
|
-
packages=find_packages(),
|
|
7
|
-
install_requires=[
|
|
8
|
-
'pandas>=2.0.0',
|
|
9
|
-
'numpy>=1.24.0',
|
|
10
|
-
'PyQt6>=6.4.0',
|
|
11
|
-
'duckdb>=0.9.0',
|
|
12
|
-
'openpyxl>=3.1.0',
|
|
13
|
-
'pyarrow>=14.0.1',
|
|
14
|
-
'fastparquet>=2023.10.1',
|
|
15
|
-
'xlrd>=2.0.1'
|
|
16
|
-
],
|
|
17
|
-
entry_points={
|
|
18
|
-
'console_scripts': [
|
|
19
|
-
'sqls=sqlshell.main:main',
|
|
20
|
-
],
|
|
21
|
-
},
|
|
22
|
-
author="SQLShell Team",
|
|
23
|
-
description="A powerful SQL shell with GUI interface for data analysis",
|
|
24
|
-
long_description=open('README.md', encoding='utf-8').read(),
|
|
25
|
-
long_description_content_type="text/markdown",
|
|
26
|
-
keywords="sql, data analysis, gui, duckdb",
|
|
27
|
-
url="https://github.com/yourusername/sqlshell",
|
|
28
|
-
classifiers=[
|
|
29
|
-
"Development Status :: 3 - Alpha",
|
|
30
|
-
"Intended Audience :: Developers",
|
|
31
|
-
"Programming Language :: Python :: 3",
|
|
32
|
-
"Programming Language :: Python :: 3.8",
|
|
33
|
-
"Programming Language :: Python :: 3.9",
|
|
34
|
-
"Programming Language :: Python :: 3.10",
|
|
35
|
-
"Programming Language :: Python :: 3.11",
|
|
36
|
-
],
|
|
37
|
-
python_requires=">=3.8",
|
|
38
|
-
include_package_data=True,
|
|
39
|
-
package_data={
|
|
40
|
-
'sqlshell': ['*.db'],
|
|
41
|
-
},
|
|
42
|
-
)
|