sqlspec 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (155) hide show
  1. sqlspec/__init__.py +16 -3
  2. sqlspec/_serialization.py +3 -10
  3. sqlspec/_sql.py +1147 -0
  4. sqlspec/_typing.py +343 -41
  5. sqlspec/adapters/adbc/__init__.py +2 -6
  6. sqlspec/adapters/adbc/config.py +474 -149
  7. sqlspec/adapters/adbc/driver.py +330 -644
  8. sqlspec/adapters/aiosqlite/__init__.py +2 -6
  9. sqlspec/adapters/aiosqlite/config.py +143 -57
  10. sqlspec/adapters/aiosqlite/driver.py +269 -462
  11. sqlspec/adapters/asyncmy/__init__.py +3 -8
  12. sqlspec/adapters/asyncmy/config.py +247 -202
  13. sqlspec/adapters/asyncmy/driver.py +217 -451
  14. sqlspec/adapters/asyncpg/__init__.py +4 -7
  15. sqlspec/adapters/asyncpg/config.py +329 -176
  16. sqlspec/adapters/asyncpg/driver.py +418 -498
  17. sqlspec/adapters/bigquery/__init__.py +2 -2
  18. sqlspec/adapters/bigquery/config.py +407 -0
  19. sqlspec/adapters/bigquery/driver.py +592 -634
  20. sqlspec/adapters/duckdb/__init__.py +4 -1
  21. sqlspec/adapters/duckdb/config.py +432 -321
  22. sqlspec/adapters/duckdb/driver.py +393 -436
  23. sqlspec/adapters/oracledb/__init__.py +3 -8
  24. sqlspec/adapters/oracledb/config.py +625 -0
  25. sqlspec/adapters/oracledb/driver.py +549 -942
  26. sqlspec/adapters/psqlpy/__init__.py +4 -7
  27. sqlspec/adapters/psqlpy/config.py +372 -203
  28. sqlspec/adapters/psqlpy/driver.py +197 -550
  29. sqlspec/adapters/psycopg/__init__.py +3 -8
  30. sqlspec/adapters/psycopg/config.py +741 -0
  31. sqlspec/adapters/psycopg/driver.py +732 -733
  32. sqlspec/adapters/sqlite/__init__.py +2 -6
  33. sqlspec/adapters/sqlite/config.py +146 -81
  34. sqlspec/adapters/sqlite/driver.py +243 -426
  35. sqlspec/base.py +220 -825
  36. sqlspec/config.py +354 -0
  37. sqlspec/driver/__init__.py +22 -0
  38. sqlspec/driver/_async.py +252 -0
  39. sqlspec/driver/_common.py +338 -0
  40. sqlspec/driver/_sync.py +261 -0
  41. sqlspec/driver/mixins/__init__.py +17 -0
  42. sqlspec/driver/mixins/_pipeline.py +523 -0
  43. sqlspec/driver/mixins/_result_utils.py +122 -0
  44. sqlspec/driver/mixins/_sql_translator.py +35 -0
  45. sqlspec/driver/mixins/_storage.py +993 -0
  46. sqlspec/driver/mixins/_type_coercion.py +131 -0
  47. sqlspec/exceptions.py +299 -7
  48. sqlspec/extensions/aiosql/__init__.py +10 -0
  49. sqlspec/extensions/aiosql/adapter.py +474 -0
  50. sqlspec/extensions/litestar/__init__.py +1 -6
  51. sqlspec/extensions/litestar/_utils.py +1 -5
  52. sqlspec/extensions/litestar/config.py +5 -6
  53. sqlspec/extensions/litestar/handlers.py +13 -12
  54. sqlspec/extensions/litestar/plugin.py +22 -24
  55. sqlspec/extensions/litestar/providers.py +37 -55
  56. sqlspec/loader.py +528 -0
  57. sqlspec/service/__init__.py +3 -0
  58. sqlspec/service/base.py +24 -0
  59. sqlspec/service/pagination.py +26 -0
  60. sqlspec/statement/__init__.py +21 -0
  61. sqlspec/statement/builder/__init__.py +54 -0
  62. sqlspec/statement/builder/_ddl_utils.py +119 -0
  63. sqlspec/statement/builder/_parsing_utils.py +135 -0
  64. sqlspec/statement/builder/base.py +328 -0
  65. sqlspec/statement/builder/ddl.py +1379 -0
  66. sqlspec/statement/builder/delete.py +80 -0
  67. sqlspec/statement/builder/insert.py +274 -0
  68. sqlspec/statement/builder/merge.py +95 -0
  69. sqlspec/statement/builder/mixins/__init__.py +65 -0
  70. sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
  71. sqlspec/statement/builder/mixins/_case_builder.py +91 -0
  72. sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
  73. sqlspec/statement/builder/mixins/_delete_from.py +34 -0
  74. sqlspec/statement/builder/mixins/_from.py +61 -0
  75. sqlspec/statement/builder/mixins/_group_by.py +119 -0
  76. sqlspec/statement/builder/mixins/_having.py +35 -0
  77. sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
  78. sqlspec/statement/builder/mixins/_insert_into.py +36 -0
  79. sqlspec/statement/builder/mixins/_insert_values.py +69 -0
  80. sqlspec/statement/builder/mixins/_join.py +110 -0
  81. sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
  82. sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
  83. sqlspec/statement/builder/mixins/_order_by.py +46 -0
  84. sqlspec/statement/builder/mixins/_pivot.py +82 -0
  85. sqlspec/statement/builder/mixins/_returning.py +37 -0
  86. sqlspec/statement/builder/mixins/_select_columns.py +60 -0
  87. sqlspec/statement/builder/mixins/_set_ops.py +122 -0
  88. sqlspec/statement/builder/mixins/_unpivot.py +80 -0
  89. sqlspec/statement/builder/mixins/_update_from.py +54 -0
  90. sqlspec/statement/builder/mixins/_update_set.py +91 -0
  91. sqlspec/statement/builder/mixins/_update_table.py +29 -0
  92. sqlspec/statement/builder/mixins/_where.py +374 -0
  93. sqlspec/statement/builder/mixins/_window_functions.py +86 -0
  94. sqlspec/statement/builder/protocols.py +20 -0
  95. sqlspec/statement/builder/select.py +206 -0
  96. sqlspec/statement/builder/update.py +178 -0
  97. sqlspec/statement/filters.py +571 -0
  98. sqlspec/statement/parameters.py +736 -0
  99. sqlspec/statement/pipelines/__init__.py +67 -0
  100. sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
  101. sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
  102. sqlspec/statement/pipelines/base.py +315 -0
  103. sqlspec/statement/pipelines/context.py +119 -0
  104. sqlspec/statement/pipelines/result_types.py +41 -0
  105. sqlspec/statement/pipelines/transformers/__init__.py +8 -0
  106. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
  107. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
  108. sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
  109. sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
  110. sqlspec/statement/pipelines/validators/__init__.py +23 -0
  111. sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
  112. sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
  113. sqlspec/statement/pipelines/validators/_performance.py +703 -0
  114. sqlspec/statement/pipelines/validators/_security.py +990 -0
  115. sqlspec/statement/pipelines/validators/base.py +67 -0
  116. sqlspec/statement/result.py +527 -0
  117. sqlspec/statement/splitter.py +701 -0
  118. sqlspec/statement/sql.py +1198 -0
  119. sqlspec/storage/__init__.py +15 -0
  120. sqlspec/storage/backends/__init__.py +0 -0
  121. sqlspec/storage/backends/base.py +166 -0
  122. sqlspec/storage/backends/fsspec.py +315 -0
  123. sqlspec/storage/backends/obstore.py +464 -0
  124. sqlspec/storage/protocol.py +170 -0
  125. sqlspec/storage/registry.py +315 -0
  126. sqlspec/typing.py +157 -36
  127. sqlspec/utils/correlation.py +155 -0
  128. sqlspec/utils/deprecation.py +3 -6
  129. sqlspec/utils/fixtures.py +6 -11
  130. sqlspec/utils/logging.py +135 -0
  131. sqlspec/utils/module_loader.py +45 -43
  132. sqlspec/utils/serializers.py +4 -0
  133. sqlspec/utils/singleton.py +6 -8
  134. sqlspec/utils/sync_tools.py +15 -27
  135. sqlspec/utils/text.py +58 -26
  136. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/METADATA +100 -26
  137. sqlspec-0.12.0.dist-info/RECORD +145 -0
  138. sqlspec/adapters/bigquery/config/__init__.py +0 -3
  139. sqlspec/adapters/bigquery/config/_common.py +0 -40
  140. sqlspec/adapters/bigquery/config/_sync.py +0 -87
  141. sqlspec/adapters/oracledb/config/__init__.py +0 -9
  142. sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
  143. sqlspec/adapters/oracledb/config/_common.py +0 -131
  144. sqlspec/adapters/oracledb/config/_sync.py +0 -186
  145. sqlspec/adapters/psycopg/config/__init__.py +0 -19
  146. sqlspec/adapters/psycopg/config/_async.py +0 -169
  147. sqlspec/adapters/psycopg/config/_common.py +0 -56
  148. sqlspec/adapters/psycopg/config/_sync.py +0 -168
  149. sqlspec/filters.py +0 -330
  150. sqlspec/mixins.py +0 -306
  151. sqlspec/statement.py +0 -378
  152. sqlspec-0.11.0.dist-info/RECORD +0 -69
  153. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/WHEEL +0 -0
  154. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/licenses/LICENSE +0 -0
  155. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,703 @@
1
+ """Performance validator for SQL query optimization."""
2
+
3
+ import logging
4
+ from collections import defaultdict
5
+ from dataclasses import dataclass, field
6
+ from typing import TYPE_CHECKING, Any, Optional
7
+
8
+ from sqlglot import expressions as exp
9
+ from sqlglot.optimizer import (
10
+ eliminate_joins,
11
+ eliminate_subqueries,
12
+ merge_subqueries,
13
+ normalize_identifiers,
14
+ optimize_joins,
15
+ pushdown_predicates,
16
+ pushdown_projections,
17
+ simplify,
18
+ )
19
+
20
+ from sqlspec.exceptions import RiskLevel
21
+ from sqlspec.statement.pipelines.validators.base import BaseValidator
22
+
23
+ if TYPE_CHECKING:
24
+ from sqlspec.statement.pipelines.context import SQLProcessingContext
25
+
26
+ __all__ = (
27
+ "JoinCondition",
28
+ "OptimizationOpportunity",
29
+ "PerformanceAnalysis",
30
+ "PerformanceConfig",
31
+ "PerformanceIssue",
32
+ "PerformanceValidator",
33
+ )
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # Constants
38
+ DEEP_NESTING_THRESHOLD = 2
39
+
40
+
41
+ @dataclass
42
+ class PerformanceConfig:
43
+ """Configuration for performance validation."""
44
+
45
+ max_joins: int = 5
46
+ max_subqueries: int = 3
47
+ max_union_branches: int = 5
48
+ warn_on_cartesian: bool = True
49
+ warn_on_missing_index: bool = True
50
+ complexity_threshold: int = 50
51
+ analyze_execution_plan: bool = False
52
+
53
+ # SQLGlot optimization analysis
54
+ enable_optimization_analysis: bool = True
55
+ suggest_optimizations: bool = True
56
+ optimization_threshold: float = 0.2 # 20% potential improvement to flag
57
+ max_optimization_attempts: int = 3
58
+
59
+
60
+ @dataclass
61
+ class PerformanceIssue:
62
+ """Represents a performance issue found during validation."""
63
+
64
+ issue_type: str # "cartesian", "excessive_joins", "missing_index", etc.
65
+ severity: str # "warning", "error", "critical"
66
+ description: str
67
+ impact: str # Expected performance impact
68
+ recommendation: str
69
+ location: "Optional[str]" = None # SQL fragment
70
+
71
+
72
+ @dataclass
73
+ class JoinCondition:
74
+ """Information about a join condition."""
75
+
76
+ left_table: str
77
+ right_table: str
78
+ condition: "Optional[exp.Expression]"
79
+ join_type: str
80
+
81
+
82
+ @dataclass
83
+ class OptimizationOpportunity:
84
+ """Represents a potential optimization for the query."""
85
+
86
+ optimization_type: str # "join_elimination", "predicate_pushdown", etc.
87
+ description: str
88
+ potential_improvement: float # Estimated improvement factor (0.0 to 1.0)
89
+ complexity_reduction: int # Estimated complexity score reduction
90
+ recommendation: str
91
+ optimized_sql: "Optional[str]" = None
92
+
93
+
94
+ @dataclass
95
+ class PerformanceAnalysis:
96
+ """Tracks performance metrics during AST traversal."""
97
+
98
+ # Join analysis
99
+ join_count: int = 0
100
+ join_types: "dict[str, int]" = field(default_factory=dict)
101
+ join_conditions: "list[JoinCondition]" = field(default_factory=list)
102
+ tables: "set[str]" = field(default_factory=set)
103
+
104
+ # Subquery analysis
105
+ subquery_count: int = 0
106
+ max_subquery_depth: int = 0
107
+ current_subquery_depth: int = 0
108
+ correlated_subqueries: int = 0
109
+
110
+ # Complexity metrics
111
+ where_conditions: int = 0
112
+ group_by_columns: int = 0
113
+ order_by_columns: int = 0
114
+ distinct_operations: int = 0
115
+ union_branches: int = 0
116
+
117
+ # Anti-patterns
118
+ select_star_count: int = 0
119
+ implicit_conversions: int = 0
120
+ non_sargable_predicates: int = 0
121
+
122
+ # SQLGlot optimization analysis
123
+ optimization_opportunities: "list[OptimizationOpportunity]" = field(default_factory=list)
124
+ original_complexity: int = 0
125
+ optimized_complexity: int = 0
126
+ potential_improvement: float = 0.0
127
+
128
+
129
+ class PerformanceValidator(BaseValidator):
130
+ """Comprehensive query performance validator.
131
+
132
+ Validates query performance by detecting:
133
+ - Cartesian products
134
+ - Excessive joins
135
+ - Deep subquery nesting
136
+ - Performance anti-patterns
137
+ - High query complexity
138
+ """
139
+
140
+ def __init__(self, config: "Optional[PerformanceConfig]" = None) -> None:
141
+ """Initialize the performance validator.
142
+
143
+ Args:
144
+ config: Configuration for performance validation
145
+ """
146
+ super().__init__()
147
+ self.config = config or PerformanceConfig()
148
+
149
+ def validate(self, expression: "exp.Expression", context: "SQLProcessingContext") -> None:
150
+ """Validate SQL statement for performance issues.
151
+
152
+ Args:
153
+ expression: The SQL expression to validate
154
+ context: The SQL processing context
155
+ """
156
+
157
+ # Performance analysis state
158
+ analysis = PerformanceAnalysis()
159
+
160
+ # Single traversal for all checks
161
+ self._analyze_expression(expression, analysis)
162
+
163
+ # Calculate baseline complexity
164
+ analysis.original_complexity = self._calculate_complexity(analysis)
165
+
166
+ # Perform SQLGlot optimization analysis if enabled
167
+ if self.config.enable_optimization_analysis:
168
+ self._analyze_optimization_opportunities(expression, analysis, context)
169
+
170
+ # Check for cartesian products
171
+ if self.config.warn_on_cartesian:
172
+ cartesian_issues = self._check_cartesian_products(analysis)
173
+ for issue in cartesian_issues:
174
+ self.add_error(
175
+ context,
176
+ message=issue.description,
177
+ code=issue.issue_type,
178
+ risk_level=self._severity_to_risk_level(issue.severity),
179
+ expression=expression,
180
+ )
181
+
182
+ # Check join complexity
183
+ if analysis.join_count > self.config.max_joins:
184
+ self.add_error(
185
+ context,
186
+ message=f"Query has {analysis.join_count} joins (max: {self.config.max_joins})",
187
+ code="excessive-joins",
188
+ risk_level=RiskLevel.MEDIUM,
189
+ expression=expression,
190
+ )
191
+
192
+ # Check subquery depth
193
+ if analysis.max_subquery_depth > self.config.max_subqueries:
194
+ self.add_error(
195
+ context,
196
+ message=f"Query has {analysis.max_subquery_depth} levels of subqueries",
197
+ code="deep-nesting",
198
+ risk_level=RiskLevel.MEDIUM,
199
+ expression=expression,
200
+ )
201
+
202
+ # Check for performance anti-patterns
203
+ pattern_issues = self._check_antipatterns(analysis)
204
+ for issue in pattern_issues:
205
+ self.add_error(
206
+ context,
207
+ message=issue.description,
208
+ code=issue.issue_type,
209
+ risk_level=self._severity_to_risk_level(issue.severity),
210
+ expression=expression,
211
+ )
212
+
213
+ # Calculate overall complexity score
214
+ complexity_score = self._calculate_complexity(analysis)
215
+
216
+ # Build metadata
217
+ context.metadata[self.__class__.__name__] = {
218
+ "complexity_score": complexity_score,
219
+ "join_analysis": {
220
+ "total_joins": analysis.join_count,
221
+ "join_types": dict(analysis.join_types),
222
+ "tables_involved": list(analysis.tables),
223
+ },
224
+ "subquery_analysis": {
225
+ "max_depth": analysis.max_subquery_depth,
226
+ "total_subqueries": analysis.subquery_count,
227
+ "correlated_subqueries": analysis.correlated_subqueries,
228
+ },
229
+ "optimization_analysis": {
230
+ "opportunities": [self._optimization_to_dict(opt) for opt in analysis.optimization_opportunities],
231
+ "original_complexity": analysis.original_complexity,
232
+ "optimized_complexity": analysis.optimized_complexity,
233
+ "potential_improvement": analysis.potential_improvement,
234
+ "optimization_enabled": self.config.enable_optimization_analysis,
235
+ },
236
+ }
237
+
238
+ @staticmethod
239
+ def _severity_to_risk_level(severity: str) -> RiskLevel:
240
+ """Convert severity string to RiskLevel."""
241
+ mapping = {
242
+ "critical": RiskLevel.CRITICAL,
243
+ "error": RiskLevel.HIGH,
244
+ "warning": RiskLevel.MEDIUM,
245
+ "info": RiskLevel.LOW,
246
+ }
247
+ return mapping.get(severity.lower(), RiskLevel.MEDIUM)
248
+
249
+ def _analyze_expression(self, expr: "exp.Expression", analysis: PerformanceAnalysis, depth: int = 0) -> None:
250
+ """Single-pass traversal to collect all performance metrics.
251
+
252
+ Args:
253
+ expr: Expression to analyze
254
+ analysis: Analysis state to update
255
+ depth: Current recursion depth
256
+ """
257
+ # Track subquery depth
258
+ if isinstance(expr, exp.Subquery):
259
+ analysis.subquery_count += 1
260
+ analysis.current_subquery_depth = max(analysis.current_subquery_depth, depth + 1)
261
+ analysis.max_subquery_depth = max(analysis.max_subquery_depth, analysis.current_subquery_depth)
262
+
263
+ # Check if correlated
264
+ if self._is_correlated_subquery(expr):
265
+ analysis.correlated_subqueries += 1
266
+
267
+ # Analyze joins
268
+ elif isinstance(expr, exp.Join):
269
+ analysis.join_count += 1
270
+ join_type = expr.args.get("kind", "INNER").upper()
271
+ analysis.join_types[join_type] = analysis.join_types.get(join_type, 0) + 1
272
+
273
+ # Extract join condition
274
+ condition = expr.args.get("on")
275
+ left_table = self._get_table_name(expr.parent) if expr.parent else "unknown"
276
+ right_table = self._get_table_name(expr.this)
277
+
278
+ analysis.join_conditions.append(
279
+ JoinCondition(left_table=left_table, right_table=right_table, condition=condition, join_type=join_type)
280
+ )
281
+
282
+ analysis.tables.add(left_table)
283
+ analysis.tables.add(right_table)
284
+
285
+ # Track other complexity factors
286
+ elif isinstance(expr, exp.Where):
287
+ analysis.where_conditions += len(list(expr.find_all(exp.Predicate)))
288
+
289
+ elif isinstance(expr, exp.Group):
290
+ analysis.group_by_columns += len(expr.expressions) if hasattr(expr, "expressions") else 0
291
+
292
+ elif isinstance(expr, exp.Order):
293
+ analysis.order_by_columns += len(expr.expressions) if hasattr(expr, "expressions") else 0
294
+
295
+ elif isinstance(expr, exp.Distinct):
296
+ analysis.distinct_operations += 1
297
+
298
+ elif isinstance(expr, exp.Union):
299
+ analysis.union_branches += 1
300
+
301
+ elif isinstance(expr, exp.Star):
302
+ analysis.select_star_count += 1
303
+
304
+ # Recursive traversal
305
+ for child in expr.args.values():
306
+ if isinstance(child, exp.Expression):
307
+ self._analyze_expression(child, analysis, depth)
308
+ elif isinstance(child, list):
309
+ for item in child:
310
+ if isinstance(item, exp.Expression):
311
+ self._analyze_expression(item, analysis, depth)
312
+
313
+ def _check_cartesian_products(self, analysis: PerformanceAnalysis) -> "list[PerformanceIssue]":
314
+ """Detect potential cartesian products from join analysis.
315
+
316
+ Args:
317
+ analysis: Performance analysis state
318
+
319
+ Returns:
320
+ List of cartesian product issues
321
+ """
322
+ issues = []
323
+
324
+ # Group joins by table pairs
325
+ join_graph: dict[str, set[str]] = defaultdict(set)
326
+ for condition in analysis.join_conditions:
327
+ if condition.condition is None: # CROSS JOIN
328
+ issues.append(
329
+ PerformanceIssue(
330
+ issue_type="cartesian_product",
331
+ severity="critical",
332
+ description=f"Explicit CROSS JOIN between {condition.left_table} and {condition.right_table}",
333
+ impact="Result set grows exponentially (MxN rows)",
334
+ recommendation="Add join condition or use WHERE clause",
335
+ )
336
+ )
337
+ else:
338
+ # Build join graph
339
+ join_graph[condition.left_table].add(condition.right_table)
340
+ join_graph[condition.right_table].add(condition.left_table)
341
+
342
+ # Check for disconnected tables (implicit cartesian)
343
+ if len(analysis.tables) > 1:
344
+ connected = self._find_connected_components(join_graph, analysis.tables)
345
+ if len(connected) > 1:
346
+ disconnected_tables = [list(component) for component in connected if len(component) > 0]
347
+ issues.append(
348
+ PerformanceIssue(
349
+ issue_type="implicit_cartesian",
350
+ severity="critical",
351
+ description=f"Tables form disconnected groups: {disconnected_tables}",
352
+ impact="Implicit cartesian product between table groups",
353
+ recommendation="Add join conditions between table groups",
354
+ )
355
+ )
356
+
357
+ return issues
358
+
359
+ @staticmethod
360
+ def _check_antipatterns(analysis: PerformanceAnalysis) -> "list[PerformanceIssue]":
361
+ """Check for common performance anti-patterns.
362
+
363
+ Args:
364
+ analysis: Performance analysis state
365
+
366
+ Returns:
367
+ List of anti-pattern issues
368
+ """
369
+ issues = []
370
+
371
+ # SELECT * in production queries
372
+ if analysis.select_star_count > 0:
373
+ issues.append(
374
+ PerformanceIssue(
375
+ issue_type="select_star",
376
+ severity="info", # Changed to info level
377
+ description=f"Query uses SELECT * ({analysis.select_star_count} occurrences)",
378
+ impact="Fetches unnecessary columns, breaks with schema changes",
379
+ recommendation="Explicitly list required columns",
380
+ )
381
+ )
382
+
383
+ # Non-sargable predicates
384
+ if analysis.non_sargable_predicates > 0:
385
+ issues.append(
386
+ PerformanceIssue(
387
+ issue_type="non_sargable",
388
+ severity="warning",
389
+ description=f"Query has {analysis.non_sargable_predicates} non-sargable predicates",
390
+ impact="Cannot use indexes effectively",
391
+ recommendation="Rewrite predicates to be sargable (avoid functions on columns)",
392
+ )
393
+ )
394
+
395
+ # Correlated subqueries
396
+ if analysis.correlated_subqueries > 0:
397
+ issues.append(
398
+ PerformanceIssue(
399
+ issue_type="correlated_subquery",
400
+ severity="warning",
401
+ description=f"Query has {analysis.correlated_subqueries} correlated subqueries",
402
+ impact="Subquery executes once per outer row (N+1 problem)",
403
+ recommendation="Rewrite using JOIN or window functions",
404
+ )
405
+ )
406
+
407
+ # Deep nesting
408
+ if analysis.max_subquery_depth > DEEP_NESTING_THRESHOLD:
409
+ issues.append(
410
+ PerformanceIssue(
411
+ issue_type="deep_nesting",
412
+ severity="warning",
413
+ description=f"Query has {analysis.max_subquery_depth} levels of nesting",
414
+ impact="Difficult for optimizer, hard to maintain",
415
+ recommendation="Use CTEs to flatten query structure",
416
+ )
417
+ )
418
+
419
+ return issues
420
+
421
+ @staticmethod
422
+ def _calculate_complexity(analysis: PerformanceAnalysis) -> int:
423
+ """Calculate overall query complexity score.
424
+
425
+ Args:
426
+ analysis: Performance analysis state
427
+
428
+ Returns:
429
+ Complexity score
430
+ """
431
+ score = 0
432
+
433
+ # Join complexity (exponential factor)
434
+ score += analysis.join_count**2 * 5
435
+
436
+ # Subquery complexity
437
+ score += analysis.subquery_count * 10
438
+ score += analysis.correlated_subqueries * 20
439
+ score += analysis.max_subquery_depth * 15
440
+
441
+ # Predicate complexity
442
+ score += analysis.where_conditions * 2
443
+
444
+ # Grouping/sorting complexity
445
+ score += analysis.group_by_columns * 3
446
+ score += analysis.order_by_columns * 2
447
+ score += analysis.distinct_operations * 5
448
+
449
+ # Anti-pattern penalties
450
+ score += analysis.select_star_count * 5
451
+ score += analysis.non_sargable_predicates * 10
452
+
453
+ # Union complexity
454
+ score += analysis.union_branches * 8
455
+
456
+ return score
457
+
458
+ def _determine_risk_level(self, issues: "list[PerformanceIssue]", complexity_score: int) -> RiskLevel:
459
+ """Determine overall risk level from issues and complexity.
460
+
461
+ Args:
462
+ issues: List of performance issues
463
+ complexity_score: Calculated complexity score
464
+
465
+ Returns:
466
+ Overall risk level
467
+ """
468
+ if any(issue.severity == "critical" for issue in issues):
469
+ return RiskLevel.CRITICAL
470
+
471
+ if complexity_score > self.config.complexity_threshold * 2:
472
+ return RiskLevel.HIGH
473
+
474
+ if any(issue.severity == "error" for issue in issues):
475
+ return RiskLevel.HIGH
476
+
477
+ if complexity_score > self.config.complexity_threshold:
478
+ return RiskLevel.MEDIUM
479
+
480
+ if any(issue.severity == "warning" for issue in issues):
481
+ return RiskLevel.LOW
482
+
483
+ return RiskLevel.SKIP
484
+
485
+ @staticmethod
486
+ def _is_correlated_subquery(subquery: "exp.Subquery") -> bool:
487
+ """Check if subquery is correlated (references outer query).
488
+
489
+ Args:
490
+ subquery: Subquery expression
491
+
492
+ Returns:
493
+ True if correlated
494
+ """
495
+ # Simplified check - look for column references without table qualifiers
496
+ # In a real implementation, would need to track scope
497
+ return any(not col.table for col in subquery.find_all(exp.Column))
498
+
499
+ @staticmethod
500
+ def _get_table_name(expr: "Optional[exp.Expression]") -> str:
501
+ """Extract table name from expression.
502
+
503
+ Args:
504
+ expr: Expression to extract from
505
+
506
+ Returns:
507
+ Table name or "unknown"
508
+ """
509
+ if expr is None:
510
+ return "unknown"
511
+
512
+ if isinstance(expr, exp.Table):
513
+ return expr.name
514
+
515
+ # Try to find table in expression
516
+ tables = list(expr.find_all(exp.Table))
517
+ if tables:
518
+ return tables[0].name
519
+
520
+ return "unknown"
521
+
522
+ @staticmethod
523
+ def _find_connected_components(graph: "dict[str, set[str]]", nodes: "set[str]") -> "list[set[str]]":
524
+ """Find connected components in join graph.
525
+
526
+ Args:
527
+ graph: Adjacency list representation
528
+ nodes: All nodes to consider
529
+
530
+ Returns:
531
+ List of connected components
532
+ """
533
+ visited = set()
534
+ components = []
535
+
536
+ def dfs(node: str, component: "set[str]") -> None:
537
+ """Depth-first search to find component."""
538
+ visited.add(node)
539
+ component.add(node)
540
+ for neighbor in graph.get(node, set()):
541
+ if neighbor not in visited and neighbor in nodes:
542
+ dfs(neighbor, component)
543
+
544
+ for node in nodes:
545
+ if node not in visited:
546
+ component: set[str] = set()
547
+ dfs(node, component)
548
+ components.append(component)
549
+
550
+ return components
551
+
552
+ def _analyze_optimization_opportunities(
553
+ self, expression: "exp.Expression", analysis: PerformanceAnalysis, context: "SQLProcessingContext"
554
+ ) -> None:
555
+ """Analyze query using SQLGlot optimizers to find improvement opportunities.
556
+
557
+ Args:
558
+ expression: The SQL expression to analyze
559
+ analysis: Analysis state to update
560
+ context: Processing context for dialect information
561
+ """
562
+ if not expression:
563
+ return
564
+
565
+ original_sql = expression.sql(dialect=context.dialect)
566
+ opportunities = []
567
+
568
+ try:
569
+ # Try different SQLGlot optimization strategies
570
+ optimizations = [
571
+ ("join_elimination", eliminate_joins.eliminate_joins, "Eliminate unnecessary joins"),
572
+ ("subquery_elimination", eliminate_subqueries.eliminate_subqueries, "Eliminate or merge subqueries"),
573
+ ("subquery_merging", merge_subqueries.merge_subqueries, "Merge subqueries into main query"),
574
+ (
575
+ "predicate_pushdown",
576
+ pushdown_predicates.pushdown_predicates,
577
+ "Push predicates closer to data sources",
578
+ ),
579
+ (
580
+ "projection_pushdown",
581
+ pushdown_projections.pushdown_projections,
582
+ "Push projections down to reduce data movement",
583
+ ),
584
+ ("join_optimization", optimize_joins.optimize_joins, "Optimize join order and conditions"),
585
+ ("simplification", simplify.simplify, "Simplify expressions and conditions"),
586
+ (
587
+ "identifier_normalization",
588
+ normalize_identifiers.normalize_identifiers,
589
+ "Normalize identifier casing",
590
+ ),
591
+ ]
592
+
593
+ best_optimized = expression.copy()
594
+ cumulative_improvement = 0.0
595
+
596
+ for opt_type, optimizer, description in optimizations:
597
+ try:
598
+ # Apply the optimization
599
+ optimized = optimizer(expression.copy(), dialect=context.dialect) # type: ignore[operator]
600
+
601
+ if optimized is None:
602
+ continue
603
+
604
+ optimized_sql = optimized.sql(dialect=context.dialect)
605
+
606
+ # Skip if no changes made
607
+ if optimized_sql == original_sql:
608
+ continue
609
+
610
+ # Calculate complexity before and after
611
+ original_temp_analysis = PerformanceAnalysis()
612
+ optimized_temp_analysis = PerformanceAnalysis()
613
+
614
+ self._analyze_expression(expression, original_temp_analysis)
615
+ self._analyze_expression(optimized, optimized_temp_analysis)
616
+
617
+ original_complexity = self._calculate_complexity(original_temp_analysis)
618
+ optimized_complexity = self._calculate_complexity(optimized_temp_analysis)
619
+
620
+ # Calculate improvement factor
621
+ if original_complexity > 0:
622
+ improvement = (original_complexity - optimized_complexity) / original_complexity
623
+ else:
624
+ improvement = 0.0
625
+
626
+ # Only add if improvement meets threshold
627
+ if improvement >= self.config.optimization_threshold:
628
+ opportunities.append(
629
+ OptimizationOpportunity(
630
+ optimization_type=opt_type,
631
+ description=f"{description} (complexity reduction: {original_complexity - optimized_complexity})",
632
+ potential_improvement=improvement,
633
+ complexity_reduction=original_complexity - optimized_complexity,
634
+ recommendation=f"Apply {opt_type}: {description.lower()}",
635
+ optimized_sql=optimized_sql,
636
+ )
637
+ )
638
+
639
+ # Update the best optimization if this is better
640
+ if improvement > cumulative_improvement:
641
+ best_optimized = optimized
642
+ cumulative_improvement = improvement
643
+
644
+ except Exception as e:
645
+ # Optimization failed, log and continue with next one
646
+ logger.debug("SQLGlot optimization failed: %s", e)
647
+ continue
648
+
649
+ # Calculate final optimized complexity
650
+ if opportunities:
651
+ optimized_analysis = PerformanceAnalysis()
652
+ self._analyze_expression(best_optimized, optimized_analysis)
653
+ analysis.optimized_complexity = self._calculate_complexity(optimized_analysis)
654
+ analysis.potential_improvement = cumulative_improvement
655
+ else:
656
+ analysis.optimized_complexity = analysis.original_complexity
657
+ analysis.potential_improvement = 0.0
658
+
659
+ analysis.optimization_opportunities = opportunities
660
+
661
+ except Exception:
662
+ # If optimization analysis fails completely, just skip it
663
+ analysis.optimization_opportunities = []
664
+ analysis.optimized_complexity = analysis.original_complexity
665
+ analysis.potential_improvement = 0.0
666
+
667
+ @staticmethod
668
+ def _optimization_to_dict(optimization: OptimizationOpportunity) -> "dict[str, Any]":
669
+ """Convert OptimizationOpportunity to dictionary.
670
+
671
+ Args:
672
+ optimization: The optimization opportunity
673
+
674
+ Returns:
675
+ Dictionary representation
676
+ """
677
+ return {
678
+ "optimization_type": optimization.optimization_type,
679
+ "description": optimization.description,
680
+ "potential_improvement": optimization.potential_improvement,
681
+ "complexity_reduction": optimization.complexity_reduction,
682
+ "recommendation": optimization.recommendation,
683
+ "optimized_sql": optimization.optimized_sql,
684
+ }
685
+
686
+ @staticmethod
687
+ def _issue_to_dict(issue: PerformanceIssue) -> "dict[str, Any]":
688
+ """Convert PerformanceIssue to dictionary.
689
+
690
+ Args:
691
+ issue: The performance issue
692
+
693
+ Returns:
694
+ Dictionary representation
695
+ """
696
+ return {
697
+ "issue_type": issue.issue_type,
698
+ "severity": issue.severity,
699
+ "description": issue.description,
700
+ "impact": issue.impact,
701
+ "recommendation": issue.recommendation,
702
+ "location": issue.location,
703
+ }