sqlshell 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlshell might be problematic. Click here for more details.

@@ -0,0 +1,691 @@
1
+ import os
2
+ import sqlite3
3
+ import pandas as pd
4
+ import duckdb
5
+
6
+ class DatabaseManager:
7
+ """
8
+ Manages database connections and operations for SQLShell.
9
+ Handles both SQLite and DuckDB connections.
10
+ """
11
+
12
+ def __init__(self):
13
+ """Initialize the database manager with no active connection."""
14
+ self.conn = None
15
+ self.connection_type = None
16
+ self.loaded_tables = {} # Maps table_name to file_path or 'database'/'query_result'
17
+ self.table_columns = {} # Maps table_name to list of column names
18
+ self.database_path = None # Track the path to the current database file
19
+
20
+ def is_connected(self):
21
+ """Check if there is an active database connection."""
22
+ return self.conn is not None
23
+
24
+ def get_connection_info(self):
25
+ """Get information about the current connection."""
26
+ if not self.is_connected():
27
+ return "No database connected"
28
+
29
+ if self.connection_type == "sqlite":
30
+ return "Connected to: SQLite database"
31
+ elif self.connection_type == "duckdb":
32
+ return "Connected to: DuckDB database"
33
+ return "Connected to: Unknown database type"
34
+
35
+ def close_connection(self):
36
+ """Close the current database connection if one exists."""
37
+ if self.conn:
38
+ try:
39
+ if self.connection_type == "duckdb":
40
+ self.conn.close()
41
+ else: # sqlite
42
+ self.conn.close()
43
+ except Exception:
44
+ pass # Ignore errors when closing
45
+ finally:
46
+ self.conn = None
47
+ self.connection_type = None
48
+ self.database_path = None # Clear the database path
49
+
50
+ def open_database(self, filename):
51
+ """
52
+ Open a database connection to the specified file.
53
+ Detects whether it's a SQLite or DuckDB database.
54
+
55
+ Args:
56
+ filename: Path to the database file
57
+
58
+ Returns:
59
+ True if successful, False otherwise
60
+
61
+ Raises:
62
+ Exception: If there's an error opening the database
63
+ """
64
+ # Close any existing connection
65
+ self.close_connection()
66
+
67
+ try:
68
+ if self.is_sqlite_db(filename):
69
+ self.conn = sqlite3.connect(filename)
70
+ self.connection_type = "sqlite"
71
+ else:
72
+ self.conn = duckdb.connect(filename)
73
+ self.connection_type = "duckdb"
74
+
75
+ # Store the database path
76
+ self.database_path = os.path.abspath(filename)
77
+
78
+ # Load tables from the database
79
+ self.load_database_tables()
80
+ return True
81
+ except (sqlite3.Error, duckdb.Error) as e:
82
+ self.conn = None
83
+ self.connection_type = None
84
+ self.database_path = None
85
+ raise Exception(f"Failed to open database: {str(e)}")
86
+
87
+ def create_memory_connection(self):
88
+ """Create an in-memory DuckDB connection."""
89
+ self.close_connection()
90
+ self.conn = duckdb.connect(':memory:')
91
+ self.connection_type = 'duckdb'
92
+ self.database_path = None # No file path for in-memory database
93
+ return "Connected to: in-memory DuckDB"
94
+
95
+ def is_sqlite_db(self, filename):
96
+ """
97
+ Check if the file is a SQLite database by examining its header.
98
+
99
+ Args:
100
+ filename: Path to the database file
101
+
102
+ Returns:
103
+ Boolean indicating if the file is a SQLite database
104
+ """
105
+ try:
106
+ with open(filename, 'rb') as f:
107
+ header = f.read(16)
108
+ return header[:16] == b'SQLite format 3\x00'
109
+ except:
110
+ return False
111
+
112
+ def load_database_tables(self):
113
+ """
114
+ Load all tables from the current database connection.
115
+
116
+ Returns:
117
+ A list of table names loaded
118
+ """
119
+ try:
120
+ if not self.is_connected():
121
+ return []
122
+
123
+ table_names = []
124
+
125
+ if self.connection_type == 'sqlite':
126
+ query = "SELECT name FROM sqlite_master WHERE type='table'"
127
+ cursor = self.conn.cursor()
128
+ tables = cursor.execute(query).fetchall()
129
+
130
+ for (table_name,) in tables:
131
+ self.loaded_tables[table_name] = 'database'
132
+ table_names.append(table_name)
133
+
134
+ # Get column names for each table
135
+ try:
136
+ column_query = f"PRAGMA table_info({table_name})"
137
+ columns = cursor.execute(column_query).fetchall()
138
+ self.table_columns[table_name] = [col[1] for col in columns] # Column name is at index 1
139
+ except Exception:
140
+ self.table_columns[table_name] = []
141
+
142
+ else: # duckdb
143
+ query = "SELECT table_name FROM information_schema.tables WHERE table_schema='main'"
144
+ result = self.conn.execute(query).fetchdf()
145
+
146
+ for table_name in result['table_name']:
147
+ self.loaded_tables[table_name] = 'database'
148
+ table_names.append(table_name)
149
+
150
+ # Get column names for each table
151
+ try:
152
+ column_query = f"SELECT column_name FROM information_schema.columns WHERE table_name='{table_name}' AND table_schema='main'"
153
+ columns = self.conn.execute(column_query).fetchdf()
154
+ self.table_columns[table_name] = columns['column_name'].tolist()
155
+ except Exception:
156
+ self.table_columns[table_name] = []
157
+
158
+ return table_names
159
+
160
+ except Exception as e:
161
+ raise Exception(f'Error loading tables: {str(e)}')
162
+
163
+ def execute_query(self, query):
164
+ """
165
+ Execute a SQL query against the current database connection.
166
+
167
+ Args:
168
+ query: SQL query string to execute
169
+
170
+ Returns:
171
+ Pandas DataFrame with the query results
172
+
173
+ Raises:
174
+ Exception: If there's an error executing the query
175
+ """
176
+ if not query.strip():
177
+ raise ValueError("Empty query")
178
+
179
+ if not self.is_connected():
180
+ raise ValueError("No database connection")
181
+
182
+ try:
183
+ if self.connection_type == "duckdb":
184
+ result = self.conn.execute(query).fetchdf()
185
+ else: # sqlite
186
+ result = pd.read_sql_query(query, self.conn)
187
+
188
+ return result
189
+ except (duckdb.Error, sqlite3.Error) as e:
190
+ error_msg = str(e).lower()
191
+ if "syntax error" in error_msg:
192
+ raise SyntaxError(f"SQL syntax error: {str(e)}")
193
+ elif "no such table" in error_msg:
194
+ # Extract the table name from the error message when possible
195
+ import re
196
+ table_match = re.search(r"'([^']+)'", str(e))
197
+ table_name = table_match.group(1) if table_match else "unknown"
198
+
199
+ # Check if this table is in our loaded_tables dict but came from a database
200
+ if table_name in self.loaded_tables and self.loaded_tables[table_name] == 'database':
201
+ raise ValueError(f"Table '{table_name}' was part of a database but is not accessible. "
202
+ f"Please reconnect to the original database using the 'Open Database' button.")
203
+ else:
204
+ raise ValueError(f"Table not found: {str(e)}")
205
+ elif "no such column" in error_msg:
206
+ raise ValueError(f"Column not found: {str(e)}")
207
+ else:
208
+ raise Exception(f"Database error: {str(e)}")
209
+
210
+ def load_file(self, file_path):
211
+ """
212
+ Load data from a file into the database.
213
+
214
+ Args:
215
+ file_path: Path to the data file (Excel, CSV, Parquet)
216
+
217
+ Returns:
218
+ Tuple of (table_name, DataFrame) for the loaded data
219
+
220
+ Raises:
221
+ ValueError: If the file format is unsupported or there's an error
222
+ """
223
+ try:
224
+ # Read the file into a DataFrame, using optimized loading strategies
225
+ if file_path.endswith(('.xlsx', '.xls')):
226
+ # Try to use a streaming approach for Excel files
227
+ try:
228
+ # For Excel files, we first check if it's a large file
229
+ # If it's large, we may want to show only a subset
230
+ excel_file = pd.ExcelFile(file_path)
231
+ sheet_name = excel_file.sheet_names[0] # Default to first sheet
232
+
233
+ # Read the first row to get column names
234
+ df_preview = pd.read_excel(excel_file, sheet_name=sheet_name, nrows=5)
235
+
236
+ # If the file is very large, use chunksize
237
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
238
+
239
+ if file_size > 50: # If file is larger than 50MB
240
+ # Use a limited subset for large files to avoid memory issues
241
+ df = pd.read_excel(excel_file, sheet_name=sheet_name, nrows=100000) # Cap at 100k rows
242
+ else:
243
+ # For smaller files, read everything
244
+ df = pd.read_excel(excel_file, sheet_name=sheet_name)
245
+ except Exception:
246
+ # Fallback to standard reading method
247
+ df = pd.read_excel(file_path)
248
+ elif file_path.endswith('.csv'):
249
+ # For CSV files, we can use chunking for large files
250
+ try:
251
+ # Check if it's a large file
252
+ file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
253
+
254
+ if file_size > 50: # If file is larger than 50MB
255
+ # Read the first chunk to get column types
256
+ df_preview = pd.read_csv(file_path, nrows=1000)
257
+
258
+ # Use optimized dtypes for better memory usage
259
+ dtypes = {col: df_preview[col].dtype for col in df_preview.columns}
260
+
261
+ # Read again with chunk processing, combining up to 100k rows
262
+ chunks = []
263
+ for chunk in pd.read_csv(file_path, dtype=dtypes, chunksize=10000):
264
+ chunks.append(chunk)
265
+ if len(chunks) * 10000 >= 100000: # Cap at 100k rows
266
+ break
267
+
268
+ df = pd.concat(chunks, ignore_index=True)
269
+ else:
270
+ # For smaller files, read everything at once
271
+ df = pd.read_csv(file_path)
272
+ except Exception:
273
+ # Fallback to standard reading method
274
+ df = pd.read_csv(file_path)
275
+ elif file_path.endswith('.parquet'):
276
+ df = pd.read_parquet(file_path)
277
+ else:
278
+ raise ValueError("Unsupported file format")
279
+
280
+ # Generate table name from file name
281
+ base_name = os.path.splitext(os.path.basename(file_path))[0]
282
+ table_name = self.sanitize_table_name(base_name)
283
+
284
+ # Ensure unique table name
285
+ original_name = table_name
286
+ counter = 1
287
+ while table_name in self.loaded_tables:
288
+ table_name = f"{original_name}_{counter}"
289
+ counter += 1
290
+
291
+ # Register the table in the database
292
+ if not self.is_connected():
293
+ self.create_memory_connection()
294
+
295
+ # Handle table creation based on database type
296
+ if self.connection_type == 'sqlite':
297
+ # For SQLite, create a table from the DataFrame
298
+ # For large dataframes, use a chunked approach to avoid memory issues
299
+ if len(df) > 10000:
300
+ # Create the table with the first chunk
301
+ df.iloc[:1000].to_sql(table_name, self.conn, index=False, if_exists='replace')
302
+
303
+ # Append the rest in chunks
304
+ chunk_size = 5000
305
+ for i in range(1000, len(df), chunk_size):
306
+ end = min(i + chunk_size, len(df))
307
+ df.iloc[i:end].to_sql(table_name, self.conn, index=False, if_exists='append')
308
+ else:
309
+ # For smaller dataframes, do it in one go
310
+ df.to_sql(table_name, self.conn, index=False, if_exists='replace')
311
+ else:
312
+ # For DuckDB, register the DataFrame as a view
313
+ self.conn.register(table_name, df)
314
+
315
+ # Store information about the table
316
+ self.loaded_tables[table_name] = file_path
317
+ self.table_columns[table_name] = df.columns.tolist()
318
+
319
+ return table_name, df
320
+
321
+ except MemoryError:
322
+ raise ValueError("Not enough memory to load this file. Try using a smaller file or increasing available memory.")
323
+ except Exception as e:
324
+ raise ValueError(f"Error loading file: {str(e)}")
325
+
326
+ def remove_table(self, table_name):
327
+ """
328
+ Remove a table from the database.
329
+
330
+ Args:
331
+ table_name: Name of the table to remove
332
+
333
+ Returns:
334
+ Boolean indicating success
335
+ """
336
+ if not table_name in self.loaded_tables:
337
+ return False
338
+
339
+ try:
340
+ # Remove from database
341
+ if self.connection_type == 'sqlite':
342
+ self.conn.execute(f'DROP TABLE IF EXISTS "{table_name}"')
343
+ else: # duckdb
344
+ self.conn.execute(f'DROP VIEW IF EXISTS {table_name}')
345
+
346
+ # Remove from tracking
347
+ del self.loaded_tables[table_name]
348
+ if table_name in self.table_columns:
349
+ del self.table_columns[table_name]
350
+
351
+ return True
352
+ except Exception:
353
+ return False
354
+
355
+ def get_table_preview(self, table_name, limit=5):
356
+ """
357
+ Get a preview of the data in a table.
358
+
359
+ Args:
360
+ table_name: Name of the table to preview
361
+ limit: Number of rows to preview
362
+
363
+ Returns:
364
+ Pandas DataFrame with the preview data
365
+ """
366
+ if not table_name in self.loaded_tables:
367
+ raise ValueError(f"Table '{table_name}' not found")
368
+
369
+ try:
370
+ if self.connection_type == 'sqlite':
371
+ return pd.read_sql_query(f'SELECT * FROM "{table_name}" LIMIT {limit}', self.conn)
372
+ else:
373
+ return self.conn.execute(f'SELECT * FROM {table_name} LIMIT {limit}').fetchdf()
374
+ except Exception as e:
375
+ raise Exception(f"Error previewing table: {str(e)}")
376
+
377
+ def rename_table(self, old_name, new_name):
378
+ """
379
+ Rename a table in the database.
380
+
381
+ Args:
382
+ old_name: Current name of the table
383
+ new_name: New name for the table
384
+
385
+ Returns:
386
+ Boolean indicating success
387
+ """
388
+ if not old_name in self.loaded_tables:
389
+ return False
390
+
391
+ try:
392
+ # Sanitize the new name
393
+ new_name = self.sanitize_table_name(new_name)
394
+
395
+ # Check if new name already exists
396
+ if new_name in self.loaded_tables and new_name != old_name:
397
+ raise ValueError(f"Table '{new_name}' already exists")
398
+
399
+ # Rename in database
400
+ if self.connection_type == 'sqlite':
401
+ self.conn.execute(f'ALTER TABLE "{old_name}" RENAME TO "{new_name}"')
402
+ else: # duckdb
403
+ # For DuckDB, we need to:
404
+ # 1. Get the data from the old view/table
405
+ df = self.conn.execute(f'SELECT * FROM {old_name}').fetchdf()
406
+ # 2. Drop the old view
407
+ self.conn.execute(f'DROP VIEW IF EXISTS {old_name}')
408
+ # 3. Register the data under the new name
409
+ self.conn.register(new_name, df)
410
+
411
+ # Update tracking
412
+ self.loaded_tables[new_name] = self.loaded_tables.pop(old_name)
413
+ self.table_columns[new_name] = self.table_columns.pop(old_name)
414
+
415
+ return True
416
+
417
+ except Exception as e:
418
+ raise Exception(f"Failed to rename table: {str(e)}")
419
+
420
+ def sanitize_table_name(self, name):
421
+ """
422
+ Sanitize a table name to be valid in SQL.
423
+
424
+ Args:
425
+ name: The proposed table name
426
+
427
+ Returns:
428
+ A sanitized table name
429
+ """
430
+ import re
431
+ name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
432
+ # Ensure it starts with a letter
433
+ if not name or not name[0].isalpha():
434
+ name = 'table_' + name
435
+ return name.lower()
436
+
437
+ def register_dataframe(self, df, table_name, source='query_result'):
438
+ """
439
+ Register a DataFrame as a table in the database.
440
+
441
+ Args:
442
+ df: Pandas DataFrame to register
443
+ table_name: Name for the table
444
+ source: Source of the data (for tracking)
445
+
446
+ Returns:
447
+ The table name used (may be different if there was a conflict)
448
+ """
449
+ # Sanitize and ensure unique name
450
+ table_name = self.sanitize_table_name(table_name)
451
+ original_name = table_name
452
+ counter = 1
453
+ while table_name in self.loaded_tables:
454
+ table_name = f"{original_name}_{counter}"
455
+ counter += 1
456
+
457
+ # Register in database
458
+ if self.connection_type == 'sqlite':
459
+ df.to_sql(table_name, self.conn, index=False, if_exists='replace')
460
+ else: # duckdb
461
+ self.conn.register(table_name, df)
462
+
463
+ # Track the table
464
+ self.loaded_tables[table_name] = source
465
+ self.table_columns[table_name] = df.columns.tolist()
466
+
467
+ return table_name
468
+
469
+ def get_all_table_columns(self):
470
+ """
471
+ Get all table and column names for autocompletion.
472
+
473
+ Returns:
474
+ List of completion words (table names and column names)
475
+ """
476
+ # Start with table names
477
+ completion_words = set(self.loaded_tables.keys())
478
+
479
+ # Track column data types for smarter autocompletion
480
+ column_data_types = {} # {table.column: data_type}
481
+
482
+ # Detect potential table relationships for JOIN suggestions
483
+ potential_relationships = [] # [(table1, column1, table2, column2)]
484
+
485
+ # Add column names with and without table prefixes, up to reasonable limits
486
+ MAX_COLUMNS_PER_TABLE = 100 # Limit columns to prevent memory issues
487
+ MAX_TABLES_WITH_COLUMNS = 20 # Limit the number of tables to process
488
+
489
+ # Sort tables by name to ensure consistent behavior
490
+ table_items = sorted(list(self.table_columns.items()))
491
+
492
+ # Process only a limited number of tables
493
+ for table, columns in table_items[:MAX_TABLES_WITH_COLUMNS]:
494
+ # Add each column name by itself
495
+ for col in columns[:MAX_COLUMNS_PER_TABLE]:
496
+ completion_words.add(col)
497
+
498
+ # Add qualified column names (table.column)
499
+ for col in columns[:MAX_COLUMNS_PER_TABLE]:
500
+ completion_words.add(f"{table}.{col}")
501
+
502
+ # Try to infer table relationships based on column naming
503
+ self._detect_relationships(table, columns, potential_relationships)
504
+
505
+ # Try to infer column data types when possible
506
+ if self.is_connected():
507
+ try:
508
+ self._detect_column_types(table, column_data_types)
509
+ except Exception:
510
+ pass
511
+
512
+ # Add common SQL functions and aggregations with context-aware completions
513
+ sql_functions = [
514
+ # Aggregation functions with completed parentheses
515
+ "COUNT(*)", "COUNT(DISTINCT ", "SUM(", "AVG(", "MIN(", "MAX(",
516
+
517
+ # String functions
518
+ "CONCAT(", "SUBSTR(", "LOWER(", "UPPER(", "TRIM(", "REPLACE(", "LENGTH(",
519
+ "REGEXP_REPLACE(", "REGEXP_EXTRACT(", "REGEXP_MATCH(",
520
+
521
+ # Date/time functions
522
+ "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP", "NOW()",
523
+ "EXTRACT(", "DATE_TRUNC(", "DATE_PART(", "DATEADD(", "DATEDIFF(",
524
+
525
+ # Type conversion
526
+ "CAST( AS ", "CONVERT(", "TRY_CAST( AS ", "FORMAT(",
527
+
528
+ # Conditional functions
529
+ "COALESCE(", "NULLIF(", "GREATEST(", "LEAST(", "IFF(", "IFNULL(",
530
+
531
+ # Window functions
532
+ "ROW_NUMBER() OVER (", "RANK() OVER (", "DENSE_RANK() OVER (",
533
+ "LEAD( OVER (", "LAG( OVER (", "FIRST_VALUE( OVER (", "LAST_VALUE( OVER ("
534
+ ]
535
+
536
+ # Add common SQL patterns with context awareness
537
+ sql_patterns = [
538
+ # Basic query patterns
539
+ "SELECT * FROM ", "SELECT COUNT(*) FROM ",
540
+ "SELECT DISTINCT ", "GROUP BY ", "ORDER BY ", "HAVING ",
541
+ "LIMIT ", "OFFSET ", "WHERE ",
542
+
543
+ # JOIN patterns - complete with ON and common join points
544
+ "INNER JOIN ", "LEFT JOIN ", "RIGHT JOIN ", "FULL OUTER JOIN ",
545
+ "LEFT OUTER JOIN ", "RIGHT OUTER JOIN ", "CROSS JOIN ",
546
+
547
+ # Advanced patterns
548
+ "WITH _ AS (", "CASE WHEN _ THEN _ ELSE _ END",
549
+ "OVER (PARTITION BY _ ORDER BY _)",
550
+ "EXISTS (SELECT 1 FROM _ WHERE _)",
551
+ "NOT EXISTS (SELECT 1 FROM _ WHERE _)",
552
+
553
+ # Common operator patterns
554
+ "BETWEEN _ AND _", "IN (", "NOT IN (", "IS NULL", "IS NOT NULL",
555
+ "LIKE '%_%'", "NOT LIKE ", "ILIKE ",
556
+
557
+ # Data manipulation patterns
558
+ "INSERT INTO _ VALUES (", "INSERT INTO _ (_) VALUES (_)",
559
+ "UPDATE _ SET _ = _ WHERE _", "DELETE FROM _ WHERE _"
560
+ ]
561
+
562
+ # Add table relationships as suggested JOIN patterns
563
+ for table1, col1, table2, col2 in potential_relationships:
564
+ join_pattern = f"JOIN {table2} ON {table1}.{col1} = {table2}.{col2}"
565
+ completion_words.add(join_pattern)
566
+
567
+ # Also add the reverse relationship
568
+ join_pattern_rev = f"JOIN {table1} ON {table2}.{col2} = {table1}.{col1}"
569
+ completion_words.add(join_pattern_rev)
570
+
571
+ # Add all SQL extras to the completion words
572
+ completion_words.update(sql_functions)
573
+ completion_words.update(sql_patterns)
574
+
575
+ # Add common data-specific comparison patterns based on column types
576
+ for col_name, data_type in column_data_types.items():
577
+ if 'INT' in data_type.upper() or 'NUM' in data_type.upper() or 'FLOAT' in data_type.upper():
578
+ # Numeric columns
579
+ completion_words.add(f"{col_name} > ")
580
+ completion_words.add(f"{col_name} < ")
581
+ completion_words.add(f"{col_name} >= ")
582
+ completion_words.add(f"{col_name} <= ")
583
+ completion_words.add(f"{col_name} BETWEEN ")
584
+ elif 'DATE' in data_type.upper() or 'TIME' in data_type.upper():
585
+ # Date/time columns
586
+ completion_words.add(f"{col_name} > CURRENT_DATE")
587
+ completion_words.add(f"{col_name} < CURRENT_DATE")
588
+ completion_words.add(f"{col_name} BETWEEN CURRENT_DATE - INTERVAL ")
589
+ completion_words.add(f"EXTRACT(YEAR FROM {col_name})")
590
+ completion_words.add(f"DATE_TRUNC('month', {col_name})")
591
+ elif 'CHAR' in data_type.upper() or 'TEXT' in data_type.upper() or 'VARCHAR' in data_type.upper():
592
+ # String columns
593
+ completion_words.add(f"{col_name} LIKE '%")
594
+ completion_words.add(f"{col_name} ILIKE '%")
595
+ completion_words.add(f"LOWER({col_name}) = ")
596
+ completion_words.add(f"UPPER({col_name}) = ")
597
+
598
+ # Convert set back to list and sort for better usability
599
+ completion_list = list(completion_words)
600
+ completion_list.sort(key=lambda x: (not x.isupper(), x)) # Prioritize SQL keywords
601
+
602
+ return completion_list
603
+
604
+ def _detect_relationships(self, table, columns, potential_relationships):
605
+ """
606
+ Detect potential relationships between tables based on column naming patterns.
607
+
608
+ Args:
609
+ table: Current table name
610
+ columns: List of column names in this table
611
+ potential_relationships: List to populate with detected relationships
612
+ """
613
+ # Look for columns that might be foreign keys (common patterns)
614
+ for col in columns:
615
+ # Common ID patterns: table_id, tableId, TableID, etc.
616
+ if col.lower().endswith('_id') or col.lower().endswith('id'):
617
+ # Extract potential table name from column name
618
+ if col.lower().endswith('_id'):
619
+ potential_table = col[:-3] # Remove '_id'
620
+ else:
621
+ # Try to extract tablename from camelCase or PascalCase
622
+ potential_table = col[:-2] # Remove 'Id'
623
+
624
+ # Normalize to lowercase for comparison
625
+ potential_table = potential_table.lower()
626
+
627
+ # Check if this potential table exists in our loaded tables
628
+ for existing_table in self.loaded_tables.keys():
629
+ # Normalize for comparison
630
+ existing_lower = existing_table.lower()
631
+
632
+ # If we find a matching table, it's likely a relationship
633
+ if existing_lower == potential_table or existing_lower.endswith(f"_{potential_table}"):
634
+ # Add this relationship
635
+ # We assume the target column in the referenced table is 'id'
636
+ potential_relationships.append((table, col, existing_table, 'id'))
637
+ break
638
+
639
+ # Also detect columns with same name across tables (potential join points)
640
+ for other_table, other_columns in self.table_columns.items():
641
+ if other_table != table and col in other_columns:
642
+ # Same column name in different tables - potential join point
643
+ potential_relationships.append((table, col, other_table, col))
644
+
645
+ def _detect_column_types(self, table, column_data_types):
646
+ """
647
+ Detect column data types for a table to enable smarter autocompletion.
648
+
649
+ Args:
650
+ table: Table name to analyze
651
+ column_data_types: Dictionary to populate with column data types
652
+ """
653
+ if not self.is_connected():
654
+ return
655
+
656
+ try:
657
+ if self.connection_type == 'sqlite':
658
+ # Get column info from SQLite
659
+ cursor = self.conn.cursor()
660
+ cursor.execute(f"PRAGMA table_info({table})")
661
+ columns_info = cursor.fetchall()
662
+
663
+ for column_info in columns_info:
664
+ col_name = column_info[1] # Column name is at index 1
665
+ data_type = column_info[2] # Data type is at index 2
666
+
667
+ # Store as table.column: data_type for qualified lookups
668
+ column_data_types[f"{table}.{col_name}"] = data_type
669
+ # Also store just column: data_type for unqualified lookups
670
+ column_data_types[col_name] = data_type
671
+
672
+ elif self.connection_type == 'duckdb':
673
+ # Get column info from DuckDB
674
+ query = f"""
675
+ SELECT column_name, data_type
676
+ FROM information_schema.columns
677
+ WHERE table_name='{table}' AND table_schema='main'
678
+ """
679
+ result = self.conn.execute(query).fetchdf()
680
+
681
+ for _, row in result.iterrows():
682
+ col_name = row['column_name']
683
+ data_type = row['data_type']
684
+
685
+ # Store as table.column: data_type for qualified lookups
686
+ column_data_types[f"{table}.{col_name}"] = data_type
687
+ # Also store just column: data_type for unqualified lookups
688
+ column_data_types[col_name] = data_type
689
+ except Exception:
690
+ # Ignore errors in type detection - this is just for enhancement
691
+ pass