sqlshell 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlshell might be problematic. Click here for more details.
- sqlshell/__init__.py +1 -1
- sqlshell/db/__init__.py +5 -0
- sqlshell/db/database_manager.py +691 -0
- sqlshell/editor.py +546 -45
- sqlshell/main.py +1472 -889
- sqlshell/query_tab.py +172 -0
- sqlshell/resources/create_icon.py +106 -28
- sqlshell/resources/create_splash.py +41 -11
- sqlshell/resources/icon.png +0 -0
- sqlshell/resources/logo_large.png +0 -0
- sqlshell/resources/logo_medium.png +0 -0
- sqlshell/resources/logo_small.png +0 -0
- sqlshell/setup.py +1 -1
- sqlshell/splash_screen.py +276 -48
- sqlshell/ui/__init__.py +6 -0
- sqlshell/ui/bar_chart_delegate.py +49 -0
- sqlshell/ui/filter_header.py +403 -0
- {sqlshell-0.1.8.dist-info → sqlshell-0.1.9.dist-info}/METADATA +8 -6
- sqlshell-0.1.9.dist-info/RECORD +31 -0
- {sqlshell-0.1.8.dist-info → sqlshell-0.1.9.dist-info}/WHEEL +1 -1
- sqlshell-0.1.8.dist-info/RECORD +0 -21
- {sqlshell-0.1.8.dist-info → sqlshell-0.1.9.dist-info}/entry_points.txt +0 -0
- {sqlshell-0.1.8.dist-info → sqlshell-0.1.9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sqlite3
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import duckdb
|
|
5
|
+
|
|
6
|
+
class DatabaseManager:
|
|
7
|
+
"""
|
|
8
|
+
Manages database connections and operations for SQLShell.
|
|
9
|
+
Handles both SQLite and DuckDB connections.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize the database manager with no active connection."""
|
|
14
|
+
self.conn = None
|
|
15
|
+
self.connection_type = None
|
|
16
|
+
self.loaded_tables = {} # Maps table_name to file_path or 'database'/'query_result'
|
|
17
|
+
self.table_columns = {} # Maps table_name to list of column names
|
|
18
|
+
self.database_path = None # Track the path to the current database file
|
|
19
|
+
|
|
20
|
+
def is_connected(self):
|
|
21
|
+
"""Check if there is an active database connection."""
|
|
22
|
+
return self.conn is not None
|
|
23
|
+
|
|
24
|
+
def get_connection_info(self):
|
|
25
|
+
"""Get information about the current connection."""
|
|
26
|
+
if not self.is_connected():
|
|
27
|
+
return "No database connected"
|
|
28
|
+
|
|
29
|
+
if self.connection_type == "sqlite":
|
|
30
|
+
return "Connected to: SQLite database"
|
|
31
|
+
elif self.connection_type == "duckdb":
|
|
32
|
+
return "Connected to: DuckDB database"
|
|
33
|
+
return "Connected to: Unknown database type"
|
|
34
|
+
|
|
35
|
+
def close_connection(self):
|
|
36
|
+
"""Close the current database connection if one exists."""
|
|
37
|
+
if self.conn:
|
|
38
|
+
try:
|
|
39
|
+
if self.connection_type == "duckdb":
|
|
40
|
+
self.conn.close()
|
|
41
|
+
else: # sqlite
|
|
42
|
+
self.conn.close()
|
|
43
|
+
except Exception:
|
|
44
|
+
pass # Ignore errors when closing
|
|
45
|
+
finally:
|
|
46
|
+
self.conn = None
|
|
47
|
+
self.connection_type = None
|
|
48
|
+
self.database_path = None # Clear the database path
|
|
49
|
+
|
|
50
|
+
def open_database(self, filename):
|
|
51
|
+
"""
|
|
52
|
+
Open a database connection to the specified file.
|
|
53
|
+
Detects whether it's a SQLite or DuckDB database.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
filename: Path to the database file
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
True if successful, False otherwise
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
Exception: If there's an error opening the database
|
|
63
|
+
"""
|
|
64
|
+
# Close any existing connection
|
|
65
|
+
self.close_connection()
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
if self.is_sqlite_db(filename):
|
|
69
|
+
self.conn = sqlite3.connect(filename)
|
|
70
|
+
self.connection_type = "sqlite"
|
|
71
|
+
else:
|
|
72
|
+
self.conn = duckdb.connect(filename)
|
|
73
|
+
self.connection_type = "duckdb"
|
|
74
|
+
|
|
75
|
+
# Store the database path
|
|
76
|
+
self.database_path = os.path.abspath(filename)
|
|
77
|
+
|
|
78
|
+
# Load tables from the database
|
|
79
|
+
self.load_database_tables()
|
|
80
|
+
return True
|
|
81
|
+
except (sqlite3.Error, duckdb.Error) as e:
|
|
82
|
+
self.conn = None
|
|
83
|
+
self.connection_type = None
|
|
84
|
+
self.database_path = None
|
|
85
|
+
raise Exception(f"Failed to open database: {str(e)}")
|
|
86
|
+
|
|
87
|
+
def create_memory_connection(self):
|
|
88
|
+
"""Create an in-memory DuckDB connection."""
|
|
89
|
+
self.close_connection()
|
|
90
|
+
self.conn = duckdb.connect(':memory:')
|
|
91
|
+
self.connection_type = 'duckdb'
|
|
92
|
+
self.database_path = None # No file path for in-memory database
|
|
93
|
+
return "Connected to: in-memory DuckDB"
|
|
94
|
+
|
|
95
|
+
def is_sqlite_db(self, filename):
|
|
96
|
+
"""
|
|
97
|
+
Check if the file is a SQLite database by examining its header.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
filename: Path to the database file
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Boolean indicating if the file is a SQLite database
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
with open(filename, 'rb') as f:
|
|
107
|
+
header = f.read(16)
|
|
108
|
+
return header[:16] == b'SQLite format 3\x00'
|
|
109
|
+
except:
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
def load_database_tables(self):
|
|
113
|
+
"""
|
|
114
|
+
Load all tables from the current database connection.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A list of table names loaded
|
|
118
|
+
"""
|
|
119
|
+
try:
|
|
120
|
+
if not self.is_connected():
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
table_names = []
|
|
124
|
+
|
|
125
|
+
if self.connection_type == 'sqlite':
|
|
126
|
+
query = "SELECT name FROM sqlite_master WHERE type='table'"
|
|
127
|
+
cursor = self.conn.cursor()
|
|
128
|
+
tables = cursor.execute(query).fetchall()
|
|
129
|
+
|
|
130
|
+
for (table_name,) in tables:
|
|
131
|
+
self.loaded_tables[table_name] = 'database'
|
|
132
|
+
table_names.append(table_name)
|
|
133
|
+
|
|
134
|
+
# Get column names for each table
|
|
135
|
+
try:
|
|
136
|
+
column_query = f"PRAGMA table_info({table_name})"
|
|
137
|
+
columns = cursor.execute(column_query).fetchall()
|
|
138
|
+
self.table_columns[table_name] = [col[1] for col in columns] # Column name is at index 1
|
|
139
|
+
except Exception:
|
|
140
|
+
self.table_columns[table_name] = []
|
|
141
|
+
|
|
142
|
+
else: # duckdb
|
|
143
|
+
query = "SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
|
|
144
|
+
result = self.conn.execute(query).fetchdf()
|
|
145
|
+
|
|
146
|
+
for table_name in result['table_name']:
|
|
147
|
+
self.loaded_tables[table_name] = 'database'
|
|
148
|
+
table_names.append(table_name)
|
|
149
|
+
|
|
150
|
+
# Get column names for each table
|
|
151
|
+
try:
|
|
152
|
+
column_query = f"SELECT column_name FROM information_schema.columns WHERE table_name='{table_name}' AND table_schema='main'"
|
|
153
|
+
columns = self.conn.execute(column_query).fetchdf()
|
|
154
|
+
self.table_columns[table_name] = columns['column_name'].tolist()
|
|
155
|
+
except Exception:
|
|
156
|
+
self.table_columns[table_name] = []
|
|
157
|
+
|
|
158
|
+
return table_names
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
raise Exception(f'Error loading tables: {str(e)}')
|
|
162
|
+
|
|
163
|
+
def execute_query(self, query):
|
|
164
|
+
"""
|
|
165
|
+
Execute a SQL query against the current database connection.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
query: SQL query string to execute
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Pandas DataFrame with the query results
|
|
172
|
+
|
|
173
|
+
Raises:
|
|
174
|
+
Exception: If there's an error executing the query
|
|
175
|
+
"""
|
|
176
|
+
if not query.strip():
|
|
177
|
+
raise ValueError("Empty query")
|
|
178
|
+
|
|
179
|
+
if not self.is_connected():
|
|
180
|
+
raise ValueError("No database connection")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
if self.connection_type == "duckdb":
|
|
184
|
+
result = self.conn.execute(query).fetchdf()
|
|
185
|
+
else: # sqlite
|
|
186
|
+
result = pd.read_sql_query(query, self.conn)
|
|
187
|
+
|
|
188
|
+
return result
|
|
189
|
+
except (duckdb.Error, sqlite3.Error) as e:
|
|
190
|
+
error_msg = str(e).lower()
|
|
191
|
+
if "syntax error" in error_msg:
|
|
192
|
+
raise SyntaxError(f"SQL syntax error: {str(e)}")
|
|
193
|
+
elif "no such table" in error_msg:
|
|
194
|
+
# Extract the table name from the error message when possible
|
|
195
|
+
import re
|
|
196
|
+
table_match = re.search(r"'([^']+)'", str(e))
|
|
197
|
+
table_name = table_match.group(1) if table_match else "unknown"
|
|
198
|
+
|
|
199
|
+
# Check if this table is in our loaded_tables dict but came from a database
|
|
200
|
+
if table_name in self.loaded_tables and self.loaded_tables[table_name] == 'database':
|
|
201
|
+
raise ValueError(f"Table '{table_name}' was part of a database but is not accessible. "
|
|
202
|
+
f"Please reconnect to the original database using the 'Open Database' button.")
|
|
203
|
+
else:
|
|
204
|
+
raise ValueError(f"Table not found: {str(e)}")
|
|
205
|
+
elif "no such column" in error_msg:
|
|
206
|
+
raise ValueError(f"Column not found: {str(e)}")
|
|
207
|
+
else:
|
|
208
|
+
raise Exception(f"Database error: {str(e)}")
|
|
209
|
+
|
|
210
|
+
def load_file(self, file_path):
|
|
211
|
+
"""
|
|
212
|
+
Load data from a file into the database.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
file_path: Path to the data file (Excel, CSV, Parquet)
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Tuple of (table_name, DataFrame) for the loaded data
|
|
219
|
+
|
|
220
|
+
Raises:
|
|
221
|
+
ValueError: If the file format is unsupported or there's an error
|
|
222
|
+
"""
|
|
223
|
+
try:
|
|
224
|
+
# Read the file into a DataFrame, using optimized loading strategies
|
|
225
|
+
if file_path.endswith(('.xlsx', '.xls')):
|
|
226
|
+
# Try to use a streaming approach for Excel files
|
|
227
|
+
try:
|
|
228
|
+
# For Excel files, we first check if it's a large file
|
|
229
|
+
# If it's large, we may want to show only a subset
|
|
230
|
+
excel_file = pd.ExcelFile(file_path)
|
|
231
|
+
sheet_name = excel_file.sheet_names[0] # Default to first sheet
|
|
232
|
+
|
|
233
|
+
# Read the first row to get column names
|
|
234
|
+
df_preview = pd.read_excel(excel_file, sheet_name=sheet_name, nrows=5)
|
|
235
|
+
|
|
236
|
+
# If the file is very large, use chunksize
|
|
237
|
+
file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
|
|
238
|
+
|
|
239
|
+
if file_size > 50: # If file is larger than 50MB
|
|
240
|
+
# Use a limited subset for large files to avoid memory issues
|
|
241
|
+
df = pd.read_excel(excel_file, sheet_name=sheet_name, nrows=100000) # Cap at 100k rows
|
|
242
|
+
else:
|
|
243
|
+
# For smaller files, read everything
|
|
244
|
+
df = pd.read_excel(excel_file, sheet_name=sheet_name)
|
|
245
|
+
except Exception:
|
|
246
|
+
# Fallback to standard reading method
|
|
247
|
+
df = pd.read_excel(file_path)
|
|
248
|
+
elif file_path.endswith('.csv'):
|
|
249
|
+
# For CSV files, we can use chunking for large files
|
|
250
|
+
try:
|
|
251
|
+
# Check if it's a large file
|
|
252
|
+
file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
|
|
253
|
+
|
|
254
|
+
if file_size > 50: # If file is larger than 50MB
|
|
255
|
+
# Read the first chunk to get column types
|
|
256
|
+
df_preview = pd.read_csv(file_path, nrows=1000)
|
|
257
|
+
|
|
258
|
+
# Use optimized dtypes for better memory usage
|
|
259
|
+
dtypes = {col: df_preview[col].dtype for col in df_preview.columns}
|
|
260
|
+
|
|
261
|
+
# Read again with chunk processing, combining up to 100k rows
|
|
262
|
+
chunks = []
|
|
263
|
+
for chunk in pd.read_csv(file_path, dtype=dtypes, chunksize=10000):
|
|
264
|
+
chunks.append(chunk)
|
|
265
|
+
if len(chunks) * 10000 >= 100000: # Cap at 100k rows
|
|
266
|
+
break
|
|
267
|
+
|
|
268
|
+
df = pd.concat(chunks, ignore_index=True)
|
|
269
|
+
else:
|
|
270
|
+
# For smaller files, read everything at once
|
|
271
|
+
df = pd.read_csv(file_path)
|
|
272
|
+
except Exception:
|
|
273
|
+
# Fallback to standard reading method
|
|
274
|
+
df = pd.read_csv(file_path)
|
|
275
|
+
elif file_path.endswith('.parquet'):
|
|
276
|
+
df = pd.read_parquet(file_path)
|
|
277
|
+
else:
|
|
278
|
+
raise ValueError("Unsupported file format")
|
|
279
|
+
|
|
280
|
+
# Generate table name from file name
|
|
281
|
+
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
|
282
|
+
table_name = self.sanitize_table_name(base_name)
|
|
283
|
+
|
|
284
|
+
# Ensure unique table name
|
|
285
|
+
original_name = table_name
|
|
286
|
+
counter = 1
|
|
287
|
+
while table_name in self.loaded_tables:
|
|
288
|
+
table_name = f"{original_name}_{counter}"
|
|
289
|
+
counter += 1
|
|
290
|
+
|
|
291
|
+
# Register the table in the database
|
|
292
|
+
if not self.is_connected():
|
|
293
|
+
self.create_memory_connection()
|
|
294
|
+
|
|
295
|
+
# Handle table creation based on database type
|
|
296
|
+
if self.connection_type == 'sqlite':
|
|
297
|
+
# For SQLite, create a table from the DataFrame
|
|
298
|
+
# For large dataframes, use a chunked approach to avoid memory issues
|
|
299
|
+
if len(df) > 10000:
|
|
300
|
+
# Create the table with the first chunk
|
|
301
|
+
df.iloc[:1000].to_sql(table_name, self.conn, index=False, if_exists='replace')
|
|
302
|
+
|
|
303
|
+
# Append the rest in chunks
|
|
304
|
+
chunk_size = 5000
|
|
305
|
+
for i in range(1000, len(df), chunk_size):
|
|
306
|
+
end = min(i + chunk_size, len(df))
|
|
307
|
+
df.iloc[i:end].to_sql(table_name, self.conn, index=False, if_exists='append')
|
|
308
|
+
else:
|
|
309
|
+
# For smaller dataframes, do it in one go
|
|
310
|
+
df.to_sql(table_name, self.conn, index=False, if_exists='replace')
|
|
311
|
+
else:
|
|
312
|
+
# For DuckDB, register the DataFrame as a view
|
|
313
|
+
self.conn.register(table_name, df)
|
|
314
|
+
|
|
315
|
+
# Store information about the table
|
|
316
|
+
self.loaded_tables[table_name] = file_path
|
|
317
|
+
self.table_columns[table_name] = df.columns.tolist()
|
|
318
|
+
|
|
319
|
+
return table_name, df
|
|
320
|
+
|
|
321
|
+
except MemoryError:
|
|
322
|
+
raise ValueError("Not enough memory to load this file. Try using a smaller file or increasing available memory.")
|
|
323
|
+
except Exception as e:
|
|
324
|
+
raise ValueError(f"Error loading file: {str(e)}")
|
|
325
|
+
|
|
326
|
+
def remove_table(self, table_name):
|
|
327
|
+
"""
|
|
328
|
+
Remove a table from the database.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
table_name: Name of the table to remove
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
Boolean indicating success
|
|
335
|
+
"""
|
|
336
|
+
if not table_name in self.loaded_tables:
|
|
337
|
+
return False
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
# Remove from database
|
|
341
|
+
if self.connection_type == 'sqlite':
|
|
342
|
+
self.conn.execute(f'DROP TABLE IF EXISTS "{table_name}"')
|
|
343
|
+
else: # duckdb
|
|
344
|
+
self.conn.execute(f'DROP VIEW IF EXISTS {table_name}')
|
|
345
|
+
|
|
346
|
+
# Remove from tracking
|
|
347
|
+
del self.loaded_tables[table_name]
|
|
348
|
+
if table_name in self.table_columns:
|
|
349
|
+
del self.table_columns[table_name]
|
|
350
|
+
|
|
351
|
+
return True
|
|
352
|
+
except Exception:
|
|
353
|
+
return False
|
|
354
|
+
|
|
355
|
+
def get_table_preview(self, table_name, limit=5):
|
|
356
|
+
"""
|
|
357
|
+
Get a preview of the data in a table.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
table_name: Name of the table to preview
|
|
361
|
+
limit: Number of rows to preview
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Pandas DataFrame with the preview data
|
|
365
|
+
"""
|
|
366
|
+
if not table_name in self.loaded_tables:
|
|
367
|
+
raise ValueError(f"Table '{table_name}' not found")
|
|
368
|
+
|
|
369
|
+
try:
|
|
370
|
+
if self.connection_type == 'sqlite':
|
|
371
|
+
return pd.read_sql_query(f'SELECT * FROM "{table_name}" LIMIT {limit}', self.conn)
|
|
372
|
+
else:
|
|
373
|
+
return self.conn.execute(f'SELECT * FROM {table_name} LIMIT {limit}').fetchdf()
|
|
374
|
+
except Exception as e:
|
|
375
|
+
raise Exception(f"Error previewing table: {str(e)}")
|
|
376
|
+
|
|
377
|
+
def rename_table(self, old_name, new_name):
|
|
378
|
+
"""
|
|
379
|
+
Rename a table in the database.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
old_name: Current name of the table
|
|
383
|
+
new_name: New name for the table
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
Boolean indicating success
|
|
387
|
+
"""
|
|
388
|
+
if not old_name in self.loaded_tables:
|
|
389
|
+
return False
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
# Sanitize the new name
|
|
393
|
+
new_name = self.sanitize_table_name(new_name)
|
|
394
|
+
|
|
395
|
+
# Check if new name already exists
|
|
396
|
+
if new_name in self.loaded_tables and new_name != old_name:
|
|
397
|
+
raise ValueError(f"Table '{new_name}' already exists")
|
|
398
|
+
|
|
399
|
+
# Rename in database
|
|
400
|
+
if self.connection_type == 'sqlite':
|
|
401
|
+
self.conn.execute(f'ALTER TABLE "{old_name}" RENAME TO "{new_name}"')
|
|
402
|
+
else: # duckdb
|
|
403
|
+
# For DuckDB, we need to:
|
|
404
|
+
# 1. Get the data from the old view/table
|
|
405
|
+
df = self.conn.execute(f'SELECT * FROM {old_name}').fetchdf()
|
|
406
|
+
# 2. Drop the old view
|
|
407
|
+
self.conn.execute(f'DROP VIEW IF EXISTS {old_name}')
|
|
408
|
+
# 3. Register the data under the new name
|
|
409
|
+
self.conn.register(new_name, df)
|
|
410
|
+
|
|
411
|
+
# Update tracking
|
|
412
|
+
self.loaded_tables[new_name] = self.loaded_tables.pop(old_name)
|
|
413
|
+
self.table_columns[new_name] = self.table_columns.pop(old_name)
|
|
414
|
+
|
|
415
|
+
return True
|
|
416
|
+
|
|
417
|
+
except Exception as e:
|
|
418
|
+
raise Exception(f"Failed to rename table: {str(e)}")
|
|
419
|
+
|
|
420
|
+
def sanitize_table_name(self, name):
|
|
421
|
+
"""
|
|
422
|
+
Sanitize a table name to be valid in SQL.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
name: The proposed table name
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
A sanitized table name
|
|
429
|
+
"""
|
|
430
|
+
import re
|
|
431
|
+
name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
432
|
+
# Ensure it starts with a letter
|
|
433
|
+
if not name or not name[0].isalpha():
|
|
434
|
+
name = 'table_' + name
|
|
435
|
+
return name.lower()
|
|
436
|
+
|
|
437
|
+
def register_dataframe(self, df, table_name, source='query_result'):
|
|
438
|
+
"""
|
|
439
|
+
Register a DataFrame as a table in the database.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
df: Pandas DataFrame to register
|
|
443
|
+
table_name: Name for the table
|
|
444
|
+
source: Source of the data (for tracking)
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
The table name used (may be different if there was a conflict)
|
|
448
|
+
"""
|
|
449
|
+
# Sanitize and ensure unique name
|
|
450
|
+
table_name = self.sanitize_table_name(table_name)
|
|
451
|
+
original_name = table_name
|
|
452
|
+
counter = 1
|
|
453
|
+
while table_name in self.loaded_tables:
|
|
454
|
+
table_name = f"{original_name}_{counter}"
|
|
455
|
+
counter += 1
|
|
456
|
+
|
|
457
|
+
# Register in database
|
|
458
|
+
if self.connection_type == 'sqlite':
|
|
459
|
+
df.to_sql(table_name, self.conn, index=False, if_exists='replace')
|
|
460
|
+
else: # duckdb
|
|
461
|
+
self.conn.register(table_name, df)
|
|
462
|
+
|
|
463
|
+
# Track the table
|
|
464
|
+
self.loaded_tables[table_name] = source
|
|
465
|
+
self.table_columns[table_name] = df.columns.tolist()
|
|
466
|
+
|
|
467
|
+
return table_name
|
|
468
|
+
|
|
469
|
+
def get_all_table_columns(self):
|
|
470
|
+
"""
|
|
471
|
+
Get all table and column names for autocompletion.
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
List of completion words (table names and column names)
|
|
475
|
+
"""
|
|
476
|
+
# Start with table names
|
|
477
|
+
completion_words = set(self.loaded_tables.keys())
|
|
478
|
+
|
|
479
|
+
# Track column data types for smarter autocompletion
|
|
480
|
+
column_data_types = {} # {table.column: data_type}
|
|
481
|
+
|
|
482
|
+
# Detect potential table relationships for JOIN suggestions
|
|
483
|
+
potential_relationships = [] # [(table1, column1, table2, column2)]
|
|
484
|
+
|
|
485
|
+
# Add column names with and without table prefixes, up to reasonable limits
|
|
486
|
+
MAX_COLUMNS_PER_TABLE = 100 # Limit columns to prevent memory issues
|
|
487
|
+
MAX_TABLES_WITH_COLUMNS = 20 # Limit the number of tables to process
|
|
488
|
+
|
|
489
|
+
# Sort tables by name to ensure consistent behavior
|
|
490
|
+
table_items = sorted(list(self.table_columns.items()))
|
|
491
|
+
|
|
492
|
+
# Process only a limited number of tables
|
|
493
|
+
for table, columns in table_items[:MAX_TABLES_WITH_COLUMNS]:
|
|
494
|
+
# Add each column name by itself
|
|
495
|
+
for col in columns[:MAX_COLUMNS_PER_TABLE]:
|
|
496
|
+
completion_words.add(col)
|
|
497
|
+
|
|
498
|
+
# Add qualified column names (table.column)
|
|
499
|
+
for col in columns[:MAX_COLUMNS_PER_TABLE]:
|
|
500
|
+
completion_words.add(f"{table}.{col}")
|
|
501
|
+
|
|
502
|
+
# Try to infer table relationships based on column naming
|
|
503
|
+
self._detect_relationships(table, columns, potential_relationships)
|
|
504
|
+
|
|
505
|
+
# Try to infer column data types when possible
|
|
506
|
+
if self.is_connected():
|
|
507
|
+
try:
|
|
508
|
+
self._detect_column_types(table, column_data_types)
|
|
509
|
+
except Exception:
|
|
510
|
+
pass
|
|
511
|
+
|
|
512
|
+
# Add common SQL functions and aggregations with context-aware completions
|
|
513
|
+
sql_functions = [
|
|
514
|
+
# Aggregation functions with completed parentheses
|
|
515
|
+
"COUNT(*)", "COUNT(DISTINCT ", "SUM(", "AVG(", "MIN(", "MAX(",
|
|
516
|
+
|
|
517
|
+
# String functions
|
|
518
|
+
"CONCAT(", "SUBSTR(", "LOWER(", "UPPER(", "TRIM(", "REPLACE(", "LENGTH(",
|
|
519
|
+
"REGEXP_REPLACE(", "REGEXP_EXTRACT(", "REGEXP_MATCH(",
|
|
520
|
+
|
|
521
|
+
# Date/time functions
|
|
522
|
+
"CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", "NOW()",
|
|
523
|
+
"EXTRACT(", "DATE_TRUNC(", "DATE_PART(", "DATEADD(", "DATEDIFF(",
|
|
524
|
+
|
|
525
|
+
# Type conversion
|
|
526
|
+
"CAST( AS ", "CONVERT(", "TRY_CAST( AS ", "FORMAT(",
|
|
527
|
+
|
|
528
|
+
# Conditional functions
|
|
529
|
+
"COALESCE(", "NULLIF(", "GREATEST(", "LEAST(", "IFF(", "IFNULL(",
|
|
530
|
+
|
|
531
|
+
# Window functions
|
|
532
|
+
"ROW_NUMBER() OVER (", "RANK() OVER (", "DENSE_RANK() OVER (",
|
|
533
|
+
"LEAD( OVER (", "LAG( OVER (", "FIRST_VALUE( OVER (", "LAST_VALUE( OVER ("
|
|
534
|
+
]
|
|
535
|
+
|
|
536
|
+
# Add common SQL patterns with context awareness
|
|
537
|
+
sql_patterns = [
|
|
538
|
+
# Basic query patterns
|
|
539
|
+
"SELECT * FROM ", "SELECT COUNT(*) FROM ",
|
|
540
|
+
"SELECT DISTINCT ", "GROUP BY ", "ORDER BY ", "HAVING ",
|
|
541
|
+
"LIMIT ", "OFFSET ", "WHERE ",
|
|
542
|
+
|
|
543
|
+
# JOIN patterns - complete with ON and common join points
|
|
544
|
+
"INNER JOIN ", "LEFT JOIN ", "RIGHT JOIN ", "FULL OUTER JOIN ",
|
|
545
|
+
"LEFT OUTER JOIN ", "RIGHT OUTER JOIN ", "CROSS JOIN ",
|
|
546
|
+
|
|
547
|
+
# Advanced patterns
|
|
548
|
+
"WITH _ AS (", "CASE WHEN _ THEN _ ELSE _ END",
|
|
549
|
+
"OVER (PARTITION BY _ ORDER BY _)",
|
|
550
|
+
"EXISTS (SELECT 1 FROM _ WHERE _)",
|
|
551
|
+
"NOT EXISTS (SELECT 1 FROM _ WHERE _)",
|
|
552
|
+
|
|
553
|
+
# Common operator patterns
|
|
554
|
+
"BETWEEN _ AND _", "IN (", "NOT IN (", "IS NULL", "IS NOT NULL",
|
|
555
|
+
"LIKE '%_%'", "NOT LIKE ", "ILIKE ",
|
|
556
|
+
|
|
557
|
+
# Data manipulation patterns
|
|
558
|
+
"INSERT INTO _ VALUES (", "INSERT INTO _ (_) VALUES (_)",
|
|
559
|
+
"UPDATE _ SET _ = _ WHERE _", "DELETE FROM _ WHERE _"
|
|
560
|
+
]
|
|
561
|
+
|
|
562
|
+
# Add table relationships as suggested JOIN patterns
|
|
563
|
+
for table1, col1, table2, col2 in potential_relationships:
|
|
564
|
+
join_pattern = f"JOIN {table2} ON {table1}.{col1} = {table2}.{col2}"
|
|
565
|
+
completion_words.add(join_pattern)
|
|
566
|
+
|
|
567
|
+
# Also add the reverse relationship
|
|
568
|
+
join_pattern_rev = f"JOIN {table1} ON {table2}.{col2} = {table1}.{col1}"
|
|
569
|
+
completion_words.add(join_pattern_rev)
|
|
570
|
+
|
|
571
|
+
# Add all SQL extras to the completion words
|
|
572
|
+
completion_words.update(sql_functions)
|
|
573
|
+
completion_words.update(sql_patterns)
|
|
574
|
+
|
|
575
|
+
# Add common data-specific comparison patterns based on column types
|
|
576
|
+
for col_name, data_type in column_data_types.items():
|
|
577
|
+
if 'INT' in data_type.upper() or 'NUM' in data_type.upper() or 'FLOAT' in data_type.upper():
|
|
578
|
+
# Numeric columns
|
|
579
|
+
completion_words.add(f"{col_name} > ")
|
|
580
|
+
completion_words.add(f"{col_name} < ")
|
|
581
|
+
completion_words.add(f"{col_name} >= ")
|
|
582
|
+
completion_words.add(f"{col_name} <= ")
|
|
583
|
+
completion_words.add(f"{col_name} BETWEEN ")
|
|
584
|
+
elif 'DATE' in data_type.upper() or 'TIME' in data_type.upper():
|
|
585
|
+
# Date/time columns
|
|
586
|
+
completion_words.add(f"{col_name} > CURRENT_DATE")
|
|
587
|
+
completion_words.add(f"{col_name} < CURRENT_DATE")
|
|
588
|
+
completion_words.add(f"{col_name} BETWEEN CURRENT_DATE - INTERVAL ")
|
|
589
|
+
completion_words.add(f"EXTRACT(YEAR FROM {col_name})")
|
|
590
|
+
completion_words.add(f"DATE_TRUNC('month', {col_name})")
|
|
591
|
+
elif 'CHAR' in data_type.upper() or 'TEXT' in data_type.upper() or 'VARCHAR' in data_type.upper():
|
|
592
|
+
# String columns
|
|
593
|
+
completion_words.add(f"{col_name} LIKE '%")
|
|
594
|
+
completion_words.add(f"{col_name} ILIKE '%")
|
|
595
|
+
completion_words.add(f"LOWER({col_name}) = ")
|
|
596
|
+
completion_words.add(f"UPPER({col_name}) = ")
|
|
597
|
+
|
|
598
|
+
# Convert set back to list and sort for better usability
|
|
599
|
+
completion_list = list(completion_words)
|
|
600
|
+
completion_list.sort(key=lambda x: (not x.isupper(), x)) # Prioritize SQL keywords
|
|
601
|
+
|
|
602
|
+
return completion_list
|
|
603
|
+
|
|
604
|
+
def _detect_relationships(self, table, columns, potential_relationships):
|
|
605
|
+
"""
|
|
606
|
+
Detect potential relationships between tables based on column naming patterns.
|
|
607
|
+
|
|
608
|
+
Args:
|
|
609
|
+
table: Current table name
|
|
610
|
+
columns: List of column names in this table
|
|
611
|
+
potential_relationships: List to populate with detected relationships
|
|
612
|
+
"""
|
|
613
|
+
# Look for columns that might be foreign keys (common patterns)
|
|
614
|
+
for col in columns:
|
|
615
|
+
# Common ID patterns: table_id, tableId, TableID, etc.
|
|
616
|
+
if col.lower().endswith('_id') or col.lower().endswith('id'):
|
|
617
|
+
# Extract potential table name from column name
|
|
618
|
+
if col.lower().endswith('_id'):
|
|
619
|
+
potential_table = col[:-3] # Remove '_id'
|
|
620
|
+
else:
|
|
621
|
+
# Try to extract tablename from camelCase or PascalCase
|
|
622
|
+
potential_table = col[:-2] # Remove 'Id'
|
|
623
|
+
|
|
624
|
+
# Normalize to lowercase for comparison
|
|
625
|
+
potential_table = potential_table.lower()
|
|
626
|
+
|
|
627
|
+
# Check if this potential table exists in our loaded tables
|
|
628
|
+
for existing_table in self.loaded_tables.keys():
|
|
629
|
+
# Normalize for comparison
|
|
630
|
+
existing_lower = existing_table.lower()
|
|
631
|
+
|
|
632
|
+
# If we find a matching table, it's likely a relationship
|
|
633
|
+
if existing_lower == potential_table or existing_lower.endswith(f"_{potential_table}"):
|
|
634
|
+
# Add this relationship
|
|
635
|
+
# We assume the target column in the referenced table is 'id'
|
|
636
|
+
potential_relationships.append((table, col, existing_table, 'id'))
|
|
637
|
+
break
|
|
638
|
+
|
|
639
|
+
# Also detect columns with same name across tables (potential join points)
|
|
640
|
+
for other_table, other_columns in self.table_columns.items():
|
|
641
|
+
if other_table != table and col in other_columns:
|
|
642
|
+
# Same column name in different tables - potential join point
|
|
643
|
+
potential_relationships.append((table, col, other_table, col))
|
|
644
|
+
|
|
645
|
+
def _detect_column_types(self, table, column_data_types):
|
|
646
|
+
"""
|
|
647
|
+
Detect column data types for a table to enable smarter autocompletion.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
table: Table name to analyze
|
|
651
|
+
column_data_types: Dictionary to populate with column data types
|
|
652
|
+
"""
|
|
653
|
+
if not self.is_connected():
|
|
654
|
+
return
|
|
655
|
+
|
|
656
|
+
try:
|
|
657
|
+
if self.connection_type == 'sqlite':
|
|
658
|
+
# Get column info from SQLite
|
|
659
|
+
cursor = self.conn.cursor()
|
|
660
|
+
cursor.execute(f"PRAGMA table_info({table})")
|
|
661
|
+
columns_info = cursor.fetchall()
|
|
662
|
+
|
|
663
|
+
for column_info in columns_info:
|
|
664
|
+
col_name = column_info[1] # Column name is at index 1
|
|
665
|
+
data_type = column_info[2] # Data type is at index 2
|
|
666
|
+
|
|
667
|
+
# Store as table.column: data_type for qualified lookups
|
|
668
|
+
column_data_types[f"{table}.{col_name}"] = data_type
|
|
669
|
+
# Also store just column: data_type for unqualified lookups
|
|
670
|
+
column_data_types[col_name] = data_type
|
|
671
|
+
|
|
672
|
+
elif self.connection_type == 'duckdb':
|
|
673
|
+
# Get column info from DuckDB
|
|
674
|
+
query = f"""
|
|
675
|
+
SELECT column_name, data_type
|
|
676
|
+
FROM information_schema.columns
|
|
677
|
+
WHERE table_name='{table}' AND table_schema='main'
|
|
678
|
+
"""
|
|
679
|
+
result = self.conn.execute(query).fetchdf()
|
|
680
|
+
|
|
681
|
+
for _, row in result.iterrows():
|
|
682
|
+
col_name = row['column_name']
|
|
683
|
+
data_type = row['data_type']
|
|
684
|
+
|
|
685
|
+
# Store as table.column: data_type for qualified lookups
|
|
686
|
+
column_data_types[f"{table}.{col_name}"] = data_type
|
|
687
|
+
# Also store just column: data_type for unqualified lookups
|
|
688
|
+
column_data_types[col_name] = data_type
|
|
689
|
+
except Exception:
|
|
690
|
+
# Ignore errors in type detection - this is just for enhancement
|
|
691
|
+
pass
|