sqlshell 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. sqlshell/__init__.py +84 -0
  2. sqlshell/__main__.py +4926 -0
  3. sqlshell/ai_autocomplete.py +392 -0
  4. sqlshell/ai_settings_dialog.py +337 -0
  5. sqlshell/context_suggester.py +768 -0
  6. sqlshell/create_test_data.py +152 -0
  7. sqlshell/data/create_test_data.py +137 -0
  8. sqlshell/db/__init__.py +6 -0
  9. sqlshell/db/database_manager.py +1318 -0
  10. sqlshell/db/export_manager.py +188 -0
  11. sqlshell/editor.py +1166 -0
  12. sqlshell/editor_integration.py +127 -0
  13. sqlshell/execution_handler.py +421 -0
  14. sqlshell/menus.py +262 -0
  15. sqlshell/notification_manager.py +370 -0
  16. sqlshell/query_tab.py +904 -0
  17. sqlshell/resources/__init__.py +1 -0
  18. sqlshell/resources/icon.png +0 -0
  19. sqlshell/resources/logo_large.png +0 -0
  20. sqlshell/resources/logo_medium.png +0 -0
  21. sqlshell/resources/logo_small.png +0 -0
  22. sqlshell/resources/splash_screen.gif +0 -0
  23. sqlshell/space_invaders.py +501 -0
  24. sqlshell/splash_screen.py +405 -0
  25. sqlshell/sqlshell/__init__.py +5 -0
  26. sqlshell/sqlshell/create_test_data.py +118 -0
  27. sqlshell/sqlshell/create_test_databases.py +96 -0
  28. sqlshell/sqlshell_demo.png +0 -0
  29. sqlshell/styles.py +257 -0
  30. sqlshell/suggester_integration.py +330 -0
  31. sqlshell/syntax_highlighter.py +124 -0
  32. sqlshell/table_list.py +996 -0
  33. sqlshell/ui/__init__.py +6 -0
  34. sqlshell/ui/bar_chart_delegate.py +49 -0
  35. sqlshell/ui/filter_header.py +469 -0
  36. sqlshell/utils/__init__.py +16 -0
  37. sqlshell/utils/profile_cn2.py +1661 -0
  38. sqlshell/utils/profile_column.py +2635 -0
  39. sqlshell/utils/profile_distributions.py +616 -0
  40. sqlshell/utils/profile_entropy.py +347 -0
  41. sqlshell/utils/profile_foreign_keys.py +779 -0
  42. sqlshell/utils/profile_keys.py +2834 -0
  43. sqlshell/utils/profile_ohe.py +934 -0
  44. sqlshell/utils/profile_ohe_advanced.py +754 -0
  45. sqlshell/utils/profile_ohe_comparison.py +237 -0
  46. sqlshell/utils/profile_prediction.py +926 -0
  47. sqlshell/utils/profile_similarity.py +876 -0
  48. sqlshell/utils/search_in_df.py +90 -0
  49. sqlshell/widgets.py +400 -0
  50. sqlshell-0.4.4.dist-info/METADATA +441 -0
  51. sqlshell-0.4.4.dist-info/RECORD +54 -0
  52. sqlshell-0.4.4.dist-info/WHEEL +5 -0
  53. sqlshell-0.4.4.dist-info/entry_points.txt +2 -0
  54. sqlshell-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,237 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import warnings
6
+ warnings.filterwarnings('ignore')
7
+
8
+ # Import OHE functions
9
+ try:
10
+ from .profile_ohe import get_ohe # Basic OHE
11
+ from .profile_ohe_advanced import get_advanced_ohe, analyze_concept_correlations # Advanced OHE
12
+ except ImportError:
13
+ # Try without relative imports
14
+ import sys
15
+ import os
16
+ sys.path.insert(0, os.path.dirname(__file__))
17
+ from profile_ohe import get_ohe
18
+ from profile_ohe_advanced import get_advanced_ohe, analyze_concept_correlations
19
+
20
+ # Optional: Word embeddings support
21
+ try:
22
+ from sentence_transformers import SentenceTransformer
23
+ from sklearn.metrics.pairwise import cosine_similarity
24
+ from sklearn.cluster import AgglomerativeClustering
25
+ EMBEDDINGS_AVAILABLE = True
26
+ except ImportError:
27
+ EMBEDDINGS_AVAILABLE = False
28
+ print("Note: sentence-transformers not available. Install with: pip install sentence-transformers")
29
+
30
+ class EmbeddingAnalyzer:
31
+ """
32
+ Advanced semantic analysis using transformer-based embeddings.
33
+ """
34
+
35
+ def __init__(self, model_name='all-MiniLM-L6-v2'):
36
+ """Initialize embedding analyzer with pre-trained model."""
37
+ if not EMBEDDINGS_AVAILABLE:
38
+ self.model = None
39
+ return
40
+
41
+ try:
42
+ self.model = SentenceTransformer(model_name)
43
+ except Exception as e:
44
+ print(f"Failed to load embedding model: {e}")
45
+ self.model = None
46
+
47
+ def extract_semantic_clusters_embeddings(self, texts, n_clusters=8):
48
+ """
49
+ Extract semantic clusters using sentence embeddings.
50
+ More sophisticated than TF-IDF clustering.
51
+ """
52
+ if self.model is None or not texts:
53
+ return []
54
+
55
+ try:
56
+ # Generate embeddings for all texts
57
+ embeddings = self.model.encode(texts)
58
+
59
+ # Perform hierarchical clustering
60
+ clustering = AgglomerativeClustering(
61
+ n_clusters=min(n_clusters, len(texts)),
62
+ linkage='ward'
63
+ )
64
+ cluster_labels = clustering.fit_predict(embeddings)
65
+
66
+ # Group texts by cluster
67
+ clusters = {}
68
+ for i, label in enumerate(cluster_labels):
69
+ if label not in clusters:
70
+ clusters[label] = []
71
+ clusters[label].append(texts[i])
72
+
73
+ # Extract representative terms from each cluster
74
+ cluster_concepts = []
75
+ for cluster_id, cluster_texts in clusters.items():
76
+ if len(cluster_texts) >= 2: # Only clusters with multiple texts
77
+ # Get most common words across cluster texts
78
+ all_words = []
79
+ for text in cluster_texts:
80
+ words = text.lower().split()
81
+ all_words.extend([w for w in words if len(w) > 3])
82
+
83
+ from collections import Counter
84
+ common_words = Counter(all_words).most_common(5)
85
+
86
+ cluster_concepts.append({
87
+ 'name': f"embedding_cluster_{cluster_id}",
88
+ 'words': [word for word, _ in common_words],
89
+ 'texts': cluster_texts,
90
+ 'size': len(cluster_texts)
91
+ })
92
+
93
+ return cluster_concepts
94
+
95
+ except Exception as e:
96
+ print(f"Embedding clustering failed: {e}")
97
+ return []
98
+
99
+
100
+ def demo_advanced_algorithms():
101
+ """Demonstrate the power of advanced algorithms with AI-related text."""
102
+ print("\n" + "="*80)
103
+ print("DEMONSTRATION: Advanced OHE Algorithms vs Basic Approach")
104
+ print("="*80)
105
+
106
+ # Create AI/ML focused dataset (your use case!)
107
+ ai_data = {
108
+ 'description': [
109
+ "Machine learning engineer developing neural networks for computer vision applications",
110
+ "AI researcher working on natural language processing and large language models",
111
+ "Data scientist implementing deep learning algorithms for predictive analytics",
112
+ "Software engineer building recommendation systems using collaborative filtering",
113
+ "ML ops engineer deploying artificial intelligence models to cloud infrastructure",
114
+ "Computer vision specialist creating object detection systems for autonomous vehicles",
115
+ "NLP engineer developing chatbots and conversational AI systems",
116
+ "Deep learning researcher working on transformer architectures and attention mechanisms",
117
+ "AI product manager overseeing machine learning product development lifecycle",
118
+ "Data engineer building pipelines for real-time AI model inference and training"
119
+ ]
120
+ }
121
+
122
+ df = pd.DataFrame(ai_data)
123
+
124
+ print("\nOriginal Data:")
125
+ for i, desc in enumerate(ai_data['description'][:3]):
126
+ print(f"{i+1}. {desc}")
127
+ print("... (and 7 more)")
128
+
129
+ # Test Basic OHE
130
+ print("\n" + "-"*50)
131
+ print("BASIC OHE RESULTS:")
132
+ print("-"*50)
133
+ basic_result = get_ohe(df.copy(), 'description', binary_format="numeric")
134
+ basic_features = [col for col in basic_result.columns if col.startswith('has_')]
135
+ print(f"Features created: {len(basic_features)}")
136
+ for feature in basic_features:
137
+ coverage = (basic_result[feature] == 1).sum() / len(basic_result) * 100
138
+ print(f" • {feature}: {coverage:.1f}% coverage")
139
+
140
+ # Test Advanced OHE
141
+ print("\n" + "-"*50)
142
+ print("ADVANCED OHE RESULTS:")
143
+ print("-"*50)
144
+ advanced_result = get_advanced_ohe(
145
+ df.copy(),
146
+ 'description',
147
+ binary_format="numeric",
148
+ analysis_type="comprehensive",
149
+ max_features=15
150
+ )
151
+ advanced_features = [col for col in advanced_result.columns if col.startswith('has_')]
152
+ print(f"Features created: {len(advanced_features)}")
153
+
154
+ # Group features by type
155
+ feature_types = {}
156
+ for feature in advanced_features:
157
+ if 'topic_lda' in feature:
158
+ feature_types.setdefault('LDA Topics', []).append(feature)
159
+ elif 'topic_nmf' in feature:
160
+ feature_types.setdefault('NMF Topics', []).append(feature)
161
+ elif 'semantic_cluster' in feature:
162
+ feature_types.setdefault('Semantic Clusters', []).append(feature)
163
+ elif 'domain_' in feature:
164
+ feature_types.setdefault('Domain Concepts', []).append(feature)
165
+ elif 'ngram_' in feature:
166
+ feature_types.setdefault('Key N-grams', []).append(feature)
167
+ else:
168
+ feature_types.setdefault('Other', []).append(feature)
169
+
170
+ for ftype, features in feature_types.items():
171
+ print(f"\n{ftype}:")
172
+ for feature in features[:3]: # Show first 3 of each type
173
+ coverage = (advanced_result[feature] == 1).sum() / len(advanced_result) * 100
174
+ print(f" • {feature}: {coverage:.1f}% coverage")
175
+ if len(features) > 3:
176
+ print(f" ... and {len(features) - 3} more")
177
+
178
+ # Analyze correlations
179
+ print("\n" + "-"*50)
180
+ print("CORRELATION ANALYSIS:")
181
+ print("-"*50)
182
+ correlation_analysis = analyze_concept_correlations(advanced_result, advanced_features)
183
+
184
+ if correlation_analysis and correlation_analysis.get('strong_correlations'):
185
+ print("Strong correlations found (shows semantic relationships):")
186
+ for corr in correlation_analysis['strong_correlations'][:5]:
187
+ print(f" • {corr['feature1']} ↔ {corr['feature2']}: {corr['correlation']:.3f}")
188
+
189
+ print("\nThis shows the advanced algorithm captured semantic relationships!")
190
+ print("For example, 'AI' and 'machine learning' concepts are properly linked.")
191
+ else:
192
+ print("No strong correlations found - features are orthogonal")
193
+
194
+ # Show specific example of AI correlation capture
195
+ print("\n" + "-"*50)
196
+ print("AI CORRELATION ANALYSIS (Your Original Problem):")
197
+ print("-"*50)
198
+
199
+ # Check which features capture AI-related concepts
200
+ ai_related_features = []
201
+ for feature in advanced_features:
202
+ feature_name = feature.lower()
203
+ if any(term in feature_name for term in ['ai', 'artificial', 'intelligence', 'machine', 'learning', 'neural', 'deep']):
204
+ ai_related_features.append(feature)
205
+
206
+ if ai_related_features:
207
+ print(f"Found {len(ai_related_features)} AI-related features:")
208
+ for feature in ai_related_features:
209
+ coverage = (advanced_result[feature] == 1).sum() / len(advanced_result) * 100
210
+ print(f" • {feature}: {coverage:.1f}% coverage")
211
+
212
+ # Show which descriptions match these features
213
+ print("\nDescriptions matching AI-related features:")
214
+ for i, desc in enumerate(ai_data['description']):
215
+ matches = []
216
+ for feature in ai_related_features:
217
+ if advanced_result.iloc[i][feature] == 1:
218
+ matches.append(feature.replace('has_', ''))
219
+ if matches:
220
+ print(f" {i+1}. '{desc[:50]}...' → {', '.join(matches[:2])}")
221
+ else:
222
+ print("No explicit AI-related features found in feature names")
223
+ print("However, topic modeling may have captured these concepts in broader themes")
224
+
225
+ print("\n" + "="*80)
226
+ print("CONCLUSION: Advanced algorithms provide much richer semantic understanding!")
227
+ print("• Basic OHE: Only captures individual word frequency")
228
+ print("• Advanced OHE: Captures topics, themes, semantic clusters, and domain concepts")
229
+ print("• This solves your AI correlation problem by grouping related concepts!")
230
+ print("="*80)
231
+
232
+ return basic_result, advanced_result
233
+
234
+
235
+ if __name__ == "__main__":
236
+ # Run the demonstration
237
+ demo_advanced_algorithms()