sqlshell 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlshell might be problematic. Click here for more details.

@@ -0,0 +1,547 @@
1
+ import sys
2
+ import itertools
3
+ import pandas as pd
4
+ from typing import List, Dict, Tuple, Set, Callable
5
+ from PyQt6.QtWidgets import (
6
+ QApplication, QWidget, QVBoxLayout, QLabel, QTableWidget, QTableWidgetItem, QHeaderView, QTabWidget, QMainWindow,
7
+ QPushButton, QHBoxLayout, QMessageBox
8
+ )
9
+ from PyQt6.QtCore import Qt
10
+
11
+
12
+ def find_foreign_keys(dfs: List[pd.DataFrame], df_names: List[str], min_match_ratio: float = 0.95):
13
+ """
14
+ Discover potential foreign key relationships between DataFrames.
15
+
16
+ Parameters:
17
+ - dfs: List of pandas DataFrames to analyze
18
+ - df_names: Names of the DataFrames (used for reporting)
19
+ - min_match_ratio: Minimum ratio of matching values to consider a foreign key
20
+
21
+ Returns:
22
+ - List of tuples (referenced_table, referenced_column, referencing_table, referencing_column, match_ratio)
23
+ """
24
+ foreign_keys = []
25
+
26
+ # First, identify potential primary keys in each DataFrame
27
+ pk_candidates = {}
28
+ for i, df in enumerate(dfs):
29
+ name = df_names[i]
30
+ # Consider columns with unique values as potential primary keys
31
+ for col in df.columns:
32
+ if df[col].nunique() == len(df) and not df[col].isna().any():
33
+ if name not in pk_candidates:
34
+ pk_candidates[name] = []
35
+ pk_candidates[name].append(col)
36
+
37
+ # For each DataFrame pair, check for foreign key relationships
38
+ for i, df1 in enumerate(dfs):
39
+ name1 = df_names[i]
40
+
41
+ # Skip if this DataFrame has no primary key candidates
42
+ if name1 not in pk_candidates:
43
+ continue
44
+
45
+ # For each potential primary key column
46
+ for pk_col in pk_candidates[name1]:
47
+ pk_values = set(df1[pk_col])
48
+
49
+ # Check every other DataFrame for matching columns
50
+ for j, df2 in enumerate(dfs):
51
+ name2 = df_names[j]
52
+
53
+ # Skip self-references
54
+ if i == j:
55
+ continue
56
+
57
+ # Check each column in df2 for potential foreign key relationship
58
+ for fk_col in df2.columns:
59
+ # Skip if data types are incompatible
60
+ if df1[pk_col].dtype != df2[fk_col].dtype:
61
+ continue
62
+
63
+ # Get unique values in potential foreign key column
64
+ fk_values = set(df2[fk_col].dropna())
65
+
66
+ # Skip empty columns
67
+ if not fk_values:
68
+ continue
69
+
70
+ # Check if foreign key values are a subset of primary key values
71
+ common_values = fk_values.intersection(pk_values)
72
+ match_ratio = len(common_values) / len(fk_values)
73
+
74
+ # Consider it a foreign key if match ratio exceeds threshold
75
+ if match_ratio >= min_match_ratio:
76
+ foreign_keys.append((name1, pk_col, name2, fk_col, match_ratio))
77
+
78
+ # Sort by match ratio (descending)
79
+ foreign_keys.sort(key=lambda x: x[4], reverse=True)
80
+ return foreign_keys
81
+
82
+
83
+ def find_inclusion_dependencies(dfs: List[pd.DataFrame], df_names: List[str], min_match_ratio: float = 0.8):
84
+ """
85
+ Find inclusion dependencies (more general than foreign keys) between DataFrames.
86
+ An inclusion dependency exists when values in one column are a subset of values in another column.
87
+
88
+ Parameters:
89
+ - dfs: List of pandas DataFrames to analyze
90
+ - df_names: Names of the DataFrames
91
+ - min_match_ratio: Minimum ratio of matching values
92
+
93
+ Returns:
94
+ - List of tuples (referenced_table, referenced_column, referencing_table, referencing_column, match_ratio)
95
+ """
96
+ dependencies = []
97
+
98
+ # For each pair of DataFrames
99
+ for i, df1 in enumerate(dfs):
100
+ name1 = df_names[i]
101
+
102
+ for j, df2 in enumerate(dfs):
103
+ name2 = df_names[j]
104
+
105
+ # Skip self-comparison for the same index
106
+ if i == j:
107
+ continue
108
+
109
+ # For each potential pair of columns
110
+ for col1 in df1.columns:
111
+ # Get unique values in the potential referenced column
112
+ values1 = set(df1[col1].dropna())
113
+
114
+ # Skip empty columns
115
+ if not values1:
116
+ continue
117
+
118
+ for col2 in df2.columns:
119
+ # Skip if data types are incompatible
120
+ if df1[col1].dtype != df2[col2].dtype:
121
+ continue
122
+
123
+ # Get unique values in the potential referencing column
124
+ values2 = set(df2[col2].dropna())
125
+
126
+ # Skip empty columns
127
+ if not values2:
128
+ continue
129
+
130
+ # Check if values2 is approximately a subset of values1
131
+ common_values = values2.intersection(values1)
132
+ match_ratio = len(common_values) / len(values2)
133
+
134
+ # Consider it an inclusion dependency if match ratio exceeds threshold
135
+ if match_ratio >= min_match_ratio:
136
+ dependencies.append((name1, col1, name2, col2, match_ratio))
137
+
138
+ # Sort by match ratio (descending)
139
+ dependencies.sort(key=lambda x: x[4], reverse=True)
140
+ return dependencies
141
+
142
+
143
+ def profile_referential_integrity(dfs: List[pd.DataFrame], df_names: List[str], foreign_keys):
144
+ """
145
+ Profile the referential integrity of discovered foreign keys.
146
+
147
+ Parameters:
148
+ - dfs: List of pandas DataFrames
149
+ - df_names: Names of the DataFrames
150
+ - foreign_keys: List of foreign key relationships
151
+
152
+ Returns:
153
+ - Dictionary with referential integrity statistics
154
+ """
155
+ integrity_results = {}
156
+
157
+ # Create lookup for DataFrames by name
158
+ df_dict = {name: df for name, df in zip(df_names, dfs)}
159
+
160
+ for pk_table, pk_col, fk_table, fk_col, _ in foreign_keys:
161
+ pk_df = df_dict[pk_table]
162
+ fk_df = df_dict[fk_table]
163
+
164
+ # Get primary key values
165
+ pk_values = set(pk_df[pk_col])
166
+
167
+ # Get foreign key values
168
+ fk_values = set(fk_df[fk_col].dropna())
169
+
170
+ # Count values that violate referential integrity
171
+ violations = fk_values - pk_values
172
+ violation_count = len(violations)
173
+
174
+ # Calculate violation ratio
175
+ total_fk_values = len(fk_df[fk_col].dropna())
176
+ violation_ratio = violation_count / total_fk_values if total_fk_values > 0 else 0
177
+
178
+ # Record results
179
+ key = (pk_table, pk_col, fk_table, fk_col)
180
+ integrity_results[key] = {
181
+ 'violation_count': violation_count,
182
+ 'violation_ratio': violation_ratio,
183
+ 'total_fk_values': total_fk_values,
184
+ 'violations': list(violations)[:10] # Only store first 10 violations for display
185
+ }
186
+
187
+ return integrity_results
188
+
189
+
190
+ def profile_foreign_keys(dfs: List[pd.DataFrame], df_names: List[str] = None, min_match_ratio: float = 0.95):
191
+ """
192
+ Analyze a list of pandas DataFrames to discover foreign key relationships.
193
+
194
+ Parameters:
195
+ - dfs: List of pandas DataFrames to analyze
196
+ - df_names: Optional list of names for the DataFrames. If None, names will be generated.
197
+ - min_match_ratio: Minimum ratio of matching values to consider a foreign key
198
+
199
+ Returns:
200
+ - Tuple of (foreign_keys, inclusion_dependencies, integrity_results)
201
+ """
202
+ # Generate default names if not provided
203
+ if df_names is None:
204
+ df_names = [f"Table_{i+1}" for i in range(len(dfs))]
205
+
206
+ # Ensure we have the same number of names as DataFrames
207
+ assert len(dfs) == len(df_names), "Number of DataFrames must match number of names"
208
+
209
+ # Find foreign keys
210
+ foreign_keys = find_foreign_keys(dfs, df_names, min_match_ratio)
211
+
212
+ # Find more general inclusion dependencies
213
+ inclusion_dependencies = find_inclusion_dependencies(dfs, df_names, min_match_ratio * 0.8)
214
+
215
+ # Profile referential integrity
216
+ integrity_results = profile_referential_integrity(dfs, df_names, foreign_keys)
217
+
218
+ return foreign_keys, inclusion_dependencies, integrity_results
219
+
220
+
221
+ def visualize_foreign_keys(dfs: List[pd.DataFrame], df_names: List[str] = None, min_match_ratio: float = 0.95,
222
+ on_generate_join: Callable = None, parent=None):
223
+ """
224
+ Create a visual representation of foreign key relationships between DataFrames.
225
+
226
+ Parameters:
227
+ - dfs: List of pandas DataFrames to analyze
228
+ - df_names: Optional list of names for the DataFrames. If None, names will be generated.
229
+ - min_match_ratio: Minimum ratio of matching values to consider a foreign key
230
+ - on_generate_join: Callback function that will be called when the Generate JOIN button is clicked.
231
+ It receives a JOIN query string as its argument.
232
+ - parent: Parent widget for the QMainWindow. Typically the main application window.
233
+
234
+ Returns:
235
+ - QMainWindow: The visualization window
236
+ """
237
+ # Generate default names if not provided
238
+ if df_names is None:
239
+ df_names = [f"Table_{i+1}" for i in range(len(dfs))]
240
+
241
+ # Get profile results
242
+ foreign_keys, inclusion_dependencies, integrity_results = profile_foreign_keys(
243
+ dfs, df_names, min_match_ratio
244
+ )
245
+
246
+ # Create main window
247
+ window = QMainWindow(parent)
248
+ window.setWindowTitle("Foreign Key Analysis")
249
+ window.resize(900, 700)
250
+
251
+ # Create central widget and layout
252
+ central_widget = QWidget()
253
+ window.setCentralWidget(central_widget)
254
+ layout = QVBoxLayout(central_widget)
255
+
256
+ # Add header
257
+ header = QLabel(f"Analyzed {len(dfs)} tables with potential foreign key relationships")
258
+ header.setAlignment(Qt.AlignmentFlag.AlignCenter)
259
+ header.setStyleSheet("font-size: 14pt; font-weight: bold; margin: 10px;")
260
+ layout.addWidget(header)
261
+
262
+ # Add description
263
+ description = QLabel(
264
+ "This analysis helps identify potential foreign key relationships between tables. "
265
+ "Foreign keys are columns in one table that reference the primary key of another table. "
266
+ "The match ratio indicates how many values in the foreign key column exist in the referenced column."
267
+ )
268
+ description.setAlignment(Qt.AlignmentFlag.AlignCenter)
269
+ description.setWordWrap(True)
270
+ description.setStyleSheet("margin-bottom: 10px;")
271
+ layout.addWidget(description)
272
+
273
+ # Create tabs
274
+ tabs = QTabWidget()
275
+
276
+ # Define the "Add to editor" function to handle JOIN queries
277
+ def handle_join_query(query):
278
+ if on_generate_join:
279
+ on_generate_join(query)
280
+ QMessageBox.information(window, "JOIN Query Generated",
281
+ f"The following query has been added to the editor:\n\n{query}")
282
+
283
+ # Tab for Foreign Keys
284
+ fk_tab = QWidget()
285
+ fk_layout = QVBoxLayout()
286
+
287
+ fk_header = QLabel("Potential Foreign Key Relationships")
288
+ fk_header.setStyleSheet("font-weight: bold;")
289
+ fk_layout.addWidget(fk_header)
290
+
291
+ fk_table = QTableWidget(len(foreign_keys), 6) # Added column for Generate JOIN button
292
+ fk_table.setHorizontalHeaderLabels([
293
+ "Referenced Table", "Referenced Column", "Referencing Table", "Referencing Column", "Match Ratio", "Action"
294
+ ])
295
+ fk_table.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeMode.Stretch)
296
+
297
+ # Set minimum width for the Action column
298
+ fk_table.horizontalHeader().setSectionResizeMode(5, QHeaderView.ResizeMode.Interactive)
299
+ fk_table.setColumnWidth(5, 140) # Set a fixed width for action column
300
+
301
+ for row, (pk_table, pk_col, fk_table_name, fk_col, match_ratio) in enumerate(foreign_keys):
302
+ fk_table.setItem(row, 0, QTableWidgetItem(pk_table))
303
+ fk_table.setItem(row, 1, QTableWidgetItem(pk_col))
304
+ fk_table.setItem(row, 2, QTableWidgetItem(fk_table_name))
305
+ fk_table.setItem(row, 3, QTableWidgetItem(fk_col))
306
+
307
+ # Format match ratio with color coding
308
+ ratio_item = QTableWidgetItem(f"{match_ratio:.2%}")
309
+ if match_ratio >= 0.99:
310
+ ratio_item.setForeground(Qt.GlobalColor.darkGreen)
311
+ elif match_ratio >= 0.9:
312
+ ratio_item.setForeground(Qt.GlobalColor.darkBlue)
313
+ else:
314
+ ratio_item.setForeground(Qt.GlobalColor.darkYellow)
315
+ fk_table.setItem(row, 4, ratio_item)
316
+
317
+ # Add Generate JOIN hyperlink - optimized for better visibility
318
+ if on_generate_join is not None:
319
+ button_widget = QWidget()
320
+ button_layout = QHBoxLayout(button_widget)
321
+ button_layout.setContentsMargins(0, 0, 0, 0) # Minimal margins
322
+ button_layout.setSpacing(0) # No spacing
323
+
324
+ # Create a styled hyperlink label
325
+ join_link = QLabel("<a href='#' style='color: #3498DB; font-weight: bold;'>Generate JOIN</a>")
326
+ join_link.setTextFormat(Qt.TextFormat.RichText)
327
+ join_link.setTextInteractionFlags(Qt.TextInteractionFlag.TextBrowserInteraction)
328
+ join_link.setCursor(Qt.CursorShape.PointingHandCursor)
329
+ join_link.setAlignment(Qt.AlignmentFlag.AlignCenter) # Center the text
330
+ join_query = f"SELECT * FROM {fk_table_name} JOIN {pk_table} ON {fk_table_name}.{fk_col} = {pk_table}.{pk_col}"
331
+
332
+ # Connect linkActivated signal to handle the JOIN query
333
+ join_link.linkActivated.connect(lambda link, q=join_query: handle_join_query(q))
334
+
335
+ button_layout.addWidget(join_link)
336
+ fk_table.setCellWidget(row, 5, button_widget)
337
+
338
+ fk_layout.addWidget(fk_table)
339
+ fk_tab.setLayout(fk_layout)
340
+ tabs.addTab(fk_tab, "Foreign Keys")
341
+
342
+ # Tab for Inclusion Dependencies
343
+ id_tab = QWidget()
344
+ id_layout = QVBoxLayout()
345
+
346
+ id_header = QLabel("Inclusion Dependencies (Values in one column are a subset of another)")
347
+ id_header.setStyleSheet("font-weight: bold;")
348
+ id_layout.addWidget(id_header)
349
+
350
+ id_table = QTableWidget(len(inclusion_dependencies), 6) # Added column for Generate JOIN button
351
+ id_table.setHorizontalHeaderLabels([
352
+ "Referenced Table", "Referenced Column", "Referencing Table", "Referencing Column", "Match Ratio", "Action"
353
+ ])
354
+ id_table.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeMode.Stretch)
355
+
356
+ # Set minimum width for the Action column
357
+ id_table.horizontalHeader().setSectionResizeMode(5, QHeaderView.ResizeMode.Interactive)
358
+ id_table.setColumnWidth(5, 140) # Set a fixed width for action column
359
+
360
+ for row, (table1, col1, table2, col2, match_ratio) in enumerate(inclusion_dependencies):
361
+ id_table.setItem(row, 0, QTableWidgetItem(table1))
362
+ id_table.setItem(row, 1, QTableWidgetItem(col1))
363
+ id_table.setItem(row, 2, QTableWidgetItem(table2))
364
+ id_table.setItem(row, 3, QTableWidgetItem(col2))
365
+
366
+ # Format match ratio with color coding
367
+ ratio_item = QTableWidgetItem(f"{match_ratio:.2%}")
368
+ if match_ratio >= 0.95:
369
+ ratio_item.setForeground(Qt.GlobalColor.darkGreen)
370
+ elif match_ratio >= 0.8:
371
+ ratio_item.setForeground(Qt.GlobalColor.darkBlue)
372
+ else:
373
+ ratio_item.setForeground(Qt.GlobalColor.darkYellow)
374
+ id_table.setItem(row, 4, ratio_item)
375
+
376
+ # Add Generate JOIN hyperlink - optimized for better visibility
377
+ if on_generate_join is not None:
378
+ button_widget = QWidget()
379
+ button_layout = QHBoxLayout(button_widget)
380
+ button_layout.setContentsMargins(0, 0, 0, 0) # Minimal margins
381
+ button_layout.setSpacing(0) # No spacing
382
+
383
+ # Create a styled hyperlink label
384
+ join_link = QLabel("<a href='#' style='color: #3498DB; font-weight: bold;'>Generate JOIN</a>")
385
+ join_link.setTextFormat(Qt.TextFormat.RichText)
386
+ join_link.setTextInteractionFlags(Qt.TextInteractionFlag.TextBrowserInteraction)
387
+ join_link.setCursor(Qt.CursorShape.PointingHandCursor)
388
+ join_link.setAlignment(Qt.AlignmentFlag.AlignCenter) # Center the text
389
+ join_query = f"SELECT * FROM {table2} JOIN {table1} ON {table2}.{col2} = {table1}.{col1}"
390
+
391
+ # Connect linkActivated signal to handle the JOIN query
392
+ join_link.linkActivated.connect(lambda link, q=join_query: handle_join_query(q))
393
+
394
+ button_layout.addWidget(join_link)
395
+ id_table.setCellWidget(row, 5, button_widget)
396
+
397
+ id_layout.addWidget(id_table)
398
+ id_tab.setLayout(id_layout)
399
+ tabs.addTab(id_tab, "Inclusion Dependencies")
400
+
401
+ # Tab for Referential Integrity
402
+ ri_tab = QWidget()
403
+ ri_layout = QVBoxLayout()
404
+
405
+ ri_header = QLabel("Referential Integrity Analysis")
406
+ ri_header.setStyleSheet("font-weight: bold;")
407
+ ri_layout.addWidget(ri_header)
408
+
409
+ ri_description = QLabel(
410
+ "This table shows referential integrity violations for discovered foreign keys. "
411
+ "A violation occurs when a value in the foreign key column doesn't exist in the referenced column."
412
+ )
413
+ ri_description.setWordWrap(True)
414
+ ri_layout.addWidget(ri_description)
415
+
416
+ # Create table for referential integrity
417
+ ri_table = QTableWidget(len(integrity_results), 6) # Added column for Generate JOIN button
418
+ ri_table.setHorizontalHeaderLabels([
419
+ "Relationship", "Violations", "Total FK Values", "Violation %", "Example Violations", "Action"
420
+ ])
421
+ ri_table.horizontalHeader().setSectionResizeMode(QHeaderView.ResizeMode.Stretch)
422
+
423
+ # Set minimum width for the Action column
424
+ ri_table.horizontalHeader().setSectionResizeMode(5, QHeaderView.ResizeMode.Interactive)
425
+ ri_table.setColumnWidth(5, 140) # Set a fixed width for action column
426
+
427
+ row = 0
428
+ for key, stats in integrity_results.items():
429
+ pk_table, pk_col, fk_table, fk_col = key
430
+ relationship = f"{fk_table}.{fk_col} → {pk_table}.{pk_col}"
431
+
432
+ ri_table.setItem(row, 0, QTableWidgetItem(relationship))
433
+ ri_table.setItem(row, 1, QTableWidgetItem(str(stats['violation_count'])))
434
+ ri_table.setItem(row, 2, QTableWidgetItem(str(stats['total_fk_values'])))
435
+
436
+ # Format violation ratio with color coding
437
+ ratio_item = QTableWidgetItem(f"{stats['violation_ratio']:.2%}")
438
+ if stats['violation_ratio'] == 0:
439
+ ratio_item.setForeground(Qt.GlobalColor.darkGreen)
440
+ elif stats['violation_ratio'] < 0.01:
441
+ ratio_item.setForeground(Qt.GlobalColor.darkBlue)
442
+ else:
443
+ ratio_item.setForeground(Qt.GlobalColor.darkRed)
444
+ ri_table.setItem(row, 3, ratio_item)
445
+
446
+ # Show example violations
447
+ examples = ', '.join([str(v) for v in stats['violations']])
448
+ if stats['violation_count'] > len(stats['violations']):
449
+ examples += f" (and {stats['violation_count'] - len(stats['violations'])} more)"
450
+ ri_table.setItem(row, 4, QTableWidgetItem(examples))
451
+
452
+ # Add Generate JOIN hyperlink - optimized for better visibility
453
+ if on_generate_join is not None:
454
+ button_widget = QWidget()
455
+ button_layout = QHBoxLayout(button_widget)
456
+ button_layout.setContentsMargins(0, 0, 0, 0) # Minimal margins
457
+ button_layout.setSpacing(0) # No spacing
458
+
459
+ # Create a styled hyperlink label
460
+ join_link = QLabel("<a href='#' style='color: #3498DB; font-weight: bold;'>Generate JOIN</a>")
461
+ join_link.setTextFormat(Qt.TextFormat.RichText)
462
+ join_link.setTextInteractionFlags(Qt.TextInteractionFlag.TextBrowserInteraction)
463
+ join_link.setCursor(Qt.CursorShape.PointingHandCursor)
464
+ join_link.setAlignment(Qt.AlignmentFlag.AlignCenter) # Center the text
465
+ join_query = f"SELECT * FROM {fk_table} LEFT JOIN {pk_table} ON {fk_table}.{fk_col} = {pk_table}.{pk_col}"
466
+
467
+ # Connect linkActivated signal to handle the JOIN query
468
+ join_link.linkActivated.connect(lambda link, q=join_query: handle_join_query(q))
469
+
470
+ button_layout.addWidget(join_link)
471
+ ri_table.setCellWidget(row, 5, button_widget)
472
+
473
+ row += 1
474
+
475
+ ri_layout.addWidget(ri_table)
476
+ ri_tab.setLayout(ri_layout)
477
+ tabs.addTab(ri_tab, "Referential Integrity")
478
+
479
+ layout.addWidget(tabs)
480
+
481
+ # Show the window
482
+ window.show()
483
+ return window
484
+
485
+
486
+ def test_profile_foreign_keys():
487
+ """
488
+ Test function to demonstrate foreign key detection with sample data.
489
+ """
490
+ # Create test data with clear foreign key relationships
491
+
492
+ # Customers table
493
+ customers_data = {
494
+ "customer_id": list(range(1, 21)),
495
+ "customer_name": ["Customer " + str(i) for i in range(1, 21)],
496
+ "city": ["City " + str(i % 5) for i in range(1, 21)]
497
+ }
498
+ customers_df = pd.DataFrame(customers_data)
499
+
500
+ # Products table
501
+ products_data = {
502
+ "product_id": list(range(101, 111)),
503
+ "product_name": ["Product " + str(i) for i in range(101, 111)],
504
+ "category": ["Category " + str(i % 3) for i in range(101, 111)]
505
+ }
506
+ products_df = pd.DataFrame(products_data)
507
+
508
+ # Orders table (with foreign keys to customers and products)
509
+ import random
510
+ random.seed(42)
511
+
512
+ orders_data = {
513
+ "order_id": list(range(1001, 1101)),
514
+ "customer_id": [random.randint(1, 20) for _ in range(100)],
515
+ "order_date": [pd.Timestamp("2021-01-01") + pd.Timedelta(days=i) for i in range(100)]
516
+ }
517
+ orders_df = pd.DataFrame(orders_data)
518
+
519
+ # Order details table (with foreign keys to orders and products)
520
+ order_details_data = {
521
+ "order_detail_id": list(range(10001, 10201)),
522
+ "order_id": [random.choice(orders_data["order_id"]) for _ in range(200)],
523
+ "product_id": [random.choice(products_data["product_id"]) for _ in range(200)],
524
+ "quantity": [random.randint(1, 10) for _ in range(200)]
525
+ }
526
+ order_details_df = pd.DataFrame(order_details_data)
527
+
528
+ # Add some referential integrity violations
529
+ # Add some non-existent customer IDs
530
+ orders_df.loc[95:99, "customer_id"] = [25, 26, 27, 28, 29]
531
+
532
+ # Define a callback function to handle JOIN generation
533
+ def handle_join_query(query):
534
+ print(f"Generated JOIN query: {query}")
535
+ # In a real application, this would insert the query into the query editor
536
+
537
+ # Create and show visualization
538
+ dfs = [customers_df, products_df, orders_df, order_details_df]
539
+ df_names = ["Customers", "Products", "Orders", "OrderDetails"]
540
+
541
+ app = QApplication(sys.argv)
542
+ window = visualize_foreign_keys(dfs, df_names, min_match_ratio=0.9, on_generate_join=handle_join_query)
543
+ sys.exit(app.exec())
544
+
545
+ # Only run the test function when script is executed directly
546
+ if __name__ == "__main__":
547
+ test_profile_foreign_keys()