sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (159) hide show
  1. sqlspec/__init__.py +50 -25
  2. sqlspec/__main__.py +1 -1
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +480 -121
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +115 -260
  10. sqlspec/adapters/adbc/driver.py +462 -367
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +199 -129
  14. sqlspec/adapters/aiosqlite/driver.py +230 -269
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -168
  18. sqlspec/adapters/asyncmy/driver.py +260 -225
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +82 -181
  22. sqlspec/adapters/asyncpg/driver.py +285 -383
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -258
  26. sqlspec/adapters/bigquery/driver.py +474 -646
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +415 -351
  30. sqlspec/adapters/duckdb/driver.py +343 -413
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -379
  34. sqlspec/adapters/oracledb/driver.py +507 -560
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -254
  38. sqlspec/adapters/psqlpy/driver.py +505 -234
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -403
  42. sqlspec/adapters/psycopg/driver.py +706 -872
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +202 -118
  46. sqlspec/adapters/sqlite/driver.py +264 -303
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder → builder}/_base.py +120 -55
  50. sqlspec/{statement/builder → builder}/_column.py +17 -6
  51. sqlspec/{statement/builder → builder}/_ddl.py +46 -79
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
  53. sqlspec/{statement/builder → builder}/_delete.py +6 -25
  54. sqlspec/{statement/builder → builder}/_insert.py +18 -65
  55. sqlspec/builder/_merge.py +56 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
  57. sqlspec/{statement/builder → builder}/_select.py +11 -56
  58. sqlspec/{statement/builder → builder}/_update.py +12 -18
  59. sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
  60. sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
  61. sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
  62. sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
  63. sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
  64. sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
  65. sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
  66. sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
  67. sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
  68. sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
  69. sqlspec/cli.py +4 -5
  70. sqlspec/config.py +180 -133
  71. sqlspec/core/__init__.py +63 -0
  72. sqlspec/core/cache.py +873 -0
  73. sqlspec/core/compiler.py +396 -0
  74. sqlspec/core/filters.py +830 -0
  75. sqlspec/core/hashing.py +310 -0
  76. sqlspec/core/parameters.py +1209 -0
  77. sqlspec/core/result.py +664 -0
  78. sqlspec/{statement → core}/splitter.py +321 -191
  79. sqlspec/core/statement.py +666 -0
  80. sqlspec/driver/__init__.py +7 -10
  81. sqlspec/driver/_async.py +387 -176
  82. sqlspec/driver/_common.py +527 -289
  83. sqlspec/driver/_sync.py +390 -172
  84. sqlspec/driver/mixins/__init__.py +2 -19
  85. sqlspec/driver/mixins/_result_tools.py +164 -0
  86. sqlspec/driver/mixins/_sql_translator.py +6 -3
  87. sqlspec/exceptions.py +5 -252
  88. sqlspec/extensions/aiosql/adapter.py +93 -96
  89. sqlspec/extensions/litestar/cli.py +1 -1
  90. sqlspec/extensions/litestar/config.py +0 -1
  91. sqlspec/extensions/litestar/handlers.py +15 -26
  92. sqlspec/extensions/litestar/plugin.py +18 -16
  93. sqlspec/extensions/litestar/providers.py +17 -52
  94. sqlspec/loader.py +424 -105
  95. sqlspec/migrations/__init__.py +12 -0
  96. sqlspec/migrations/base.py +92 -68
  97. sqlspec/migrations/commands.py +24 -106
  98. sqlspec/migrations/loaders.py +402 -0
  99. sqlspec/migrations/runner.py +49 -51
  100. sqlspec/migrations/tracker.py +31 -44
  101. sqlspec/migrations/utils.py +64 -24
  102. sqlspec/protocols.py +7 -183
  103. sqlspec/storage/__init__.py +1 -1
  104. sqlspec/storage/backends/base.py +37 -40
  105. sqlspec/storage/backends/fsspec.py +136 -112
  106. sqlspec/storage/backends/obstore.py +138 -160
  107. sqlspec/storage/capabilities.py +5 -4
  108. sqlspec/storage/registry.py +57 -106
  109. sqlspec/typing.py +136 -115
  110. sqlspec/utils/__init__.py +2 -3
  111. sqlspec/utils/correlation.py +0 -3
  112. sqlspec/utils/deprecation.py +6 -6
  113. sqlspec/utils/fixtures.py +6 -6
  114. sqlspec/utils/logging.py +0 -2
  115. sqlspec/utils/module_loader.py +7 -12
  116. sqlspec/utils/singleton.py +0 -1
  117. sqlspec/utils/sync_tools.py +17 -38
  118. sqlspec/utils/text.py +12 -51
  119. sqlspec/utils/type_guards.py +443 -232
  120. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
  121. sqlspec-0.16.0.dist-info/RECORD +134 -0
  122. sqlspec/adapters/adbc/transformers.py +0 -108
  123. sqlspec/driver/connection.py +0 -207
  124. sqlspec/driver/mixins/_cache.py +0 -114
  125. sqlspec/driver/mixins/_csv_writer.py +0 -91
  126. sqlspec/driver/mixins/_pipeline.py +0 -508
  127. sqlspec/driver/mixins/_query_tools.py +0 -796
  128. sqlspec/driver/mixins/_result_utils.py +0 -138
  129. sqlspec/driver/mixins/_storage.py +0 -912
  130. sqlspec/driver/mixins/_type_coercion.py +0 -128
  131. sqlspec/driver/parameters.py +0 -138
  132. sqlspec/statement/__init__.py +0 -21
  133. sqlspec/statement/builder/_merge.py +0 -95
  134. sqlspec/statement/cache.py +0 -50
  135. sqlspec/statement/filters.py +0 -625
  136. sqlspec/statement/parameters.py +0 -956
  137. sqlspec/statement/pipelines/__init__.py +0 -210
  138. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  139. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  140. sqlspec/statement/pipelines/context.py +0 -109
  141. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  142. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  143. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  144. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  145. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  146. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  147. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  148. sqlspec/statement/pipelines/validators/_performance.py +0 -714
  149. sqlspec/statement/pipelines/validators/_security.py +0 -967
  150. sqlspec/statement/result.py +0 -435
  151. sqlspec/statement/sql.py +0 -1774
  152. sqlspec/utils/cached_property.py +0 -25
  153. sqlspec/utils/statement_hashing.py +0 -203
  154. sqlspec-0.14.1.dist-info/RECORD +0 -145
  155. /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
  156. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
  157. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
  158. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
  159. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0
@@ -1,714 +0,0 @@
1
- """Performance validator for SQL query optimization."""
2
-
3
- import logging
4
- from collections import defaultdict
5
- from dataclasses import dataclass, field
6
- from typing import TYPE_CHECKING, Any, Optional
7
-
8
- from sqlglot import expressions as exp
9
- from sqlglot.optimizer import (
10
- eliminate_joins,
11
- eliminate_subqueries,
12
- merge_subqueries,
13
- normalize_identifiers,
14
- optimize_joins,
15
- pushdown_predicates,
16
- pushdown_projections,
17
- simplify,
18
- )
19
-
20
- from sqlspec.exceptions import RiskLevel
21
- from sqlspec.protocols import ProcessorProtocol
22
- from sqlspec.statement.pipelines.context import ValidationError
23
- from sqlspec.utils.type_guards import has_expressions
24
-
25
- if TYPE_CHECKING:
26
- from sqlspec.statement.pipelines.context import SQLProcessingContext
27
-
28
- __all__ = (
29
- "JoinCondition",
30
- "OptimizationOpportunity",
31
- "PerformanceAnalysis",
32
- "PerformanceConfig",
33
- "PerformanceIssue",
34
- "PerformanceValidator",
35
- )
36
-
37
- logger = logging.getLogger(__name__)
38
-
39
- # Constants
40
- DEEP_NESTING_THRESHOLD = 2
41
-
42
-
43
- @dataclass
44
- class PerformanceConfig:
45
- """Configuration for performance validation."""
46
-
47
- max_joins: int = 5
48
- max_subqueries: int = 3
49
- max_union_branches: int = 5
50
- warn_on_cartesian: bool = True
51
- warn_on_missing_index: bool = True
52
- complexity_threshold: int = 50
53
- analyze_execution_plan: bool = False
54
-
55
- # SQLGlot optimization analysis
56
- enable_optimization_analysis: bool = True
57
- suggest_optimizations: bool = True
58
- optimization_threshold: float = 0.2 # 20% potential improvement to flag
59
- max_optimization_attempts: int = 3
60
-
61
-
62
- @dataclass
63
- class PerformanceIssue:
64
- """Represents a performance issue found during validation."""
65
-
66
- issue_type: str # "cartesian", "excessive_joins", "missing_index", etc.
67
- severity: str # "warning", "error", "critical"
68
- description: str
69
- impact: str # Expected performance impact
70
- recommendation: str
71
- location: "Optional[str]" = None # SQL fragment
72
-
73
-
74
- @dataclass
75
- class JoinCondition:
76
- """Information about a join condition."""
77
-
78
- left_table: str
79
- right_table: str
80
- condition: "Optional[exp.Expression]"
81
- join_type: str
82
-
83
-
84
- @dataclass
85
- class OptimizationOpportunity:
86
- """Represents a potential optimization for the query."""
87
-
88
- optimization_type: str # "join_elimination", "predicate_pushdown", etc.
89
- description: str
90
- potential_improvement: float # Estimated improvement factor (0.0 to 1.0)
91
- complexity_reduction: int # Estimated complexity score reduction
92
- recommendation: str
93
- optimized_sql: "Optional[str]" = None
94
-
95
-
96
- @dataclass
97
- class PerformanceAnalysis:
98
- """Tracks performance metrics during AST traversal."""
99
-
100
- # Join analysis
101
- join_count: int = 0
102
- join_types: "dict[str, int]" = field(default_factory=dict)
103
- join_conditions: "list[JoinCondition]" = field(default_factory=list)
104
- tables: "set[str]" = field(default_factory=set)
105
-
106
- # Subquery analysis
107
- subquery_count: int = 0
108
- max_subquery_depth: int = 0
109
- current_subquery_depth: int = 0
110
- correlated_subqueries: int = 0
111
-
112
- # Complexity metrics
113
- where_conditions: int = 0
114
- group_by_columns: int = 0
115
- order_by_columns: int = 0
116
- distinct_operations: int = 0
117
- union_branches: int = 0
118
-
119
- # Anti-patterns
120
- select_star_count: int = 0
121
- implicit_conversions: int = 0
122
- non_sargable_predicates: int = 0
123
-
124
- # SQLGlot optimization analysis
125
- optimization_opportunities: "list[OptimizationOpportunity]" = field(default_factory=list)
126
- original_complexity: int = 0
127
- optimized_complexity: int = 0
128
- potential_improvement: float = 0.0
129
-
130
-
131
- class PerformanceValidator(ProcessorProtocol):
132
- """Comprehensive query performance validator.
133
-
134
- Validates query performance by detecting:
135
- - Cartesian products
136
- - Excessive joins
137
- - Deep subquery nesting
138
- - Performance anti-patterns
139
- - High query complexity
140
- """
141
-
142
- def __init__(self, config: "Optional[PerformanceConfig]" = None) -> None:
143
- """Initialize the performance validator.
144
-
145
- Args:
146
- config: Configuration for performance validation
147
- """
148
- self.config = config or PerformanceConfig()
149
-
150
- def process(
151
- self, expression: "Optional[exp.Expression]", context: "SQLProcessingContext"
152
- ) -> "Optional[exp.Expression]":
153
- """Process the expression for validation (implements ProcessorProtocol)."""
154
- if expression is None:
155
- return None
156
- self.validate(expression, context)
157
- return expression
158
-
159
- def add_error(
160
- self,
161
- context: "SQLProcessingContext",
162
- message: str,
163
- code: str,
164
- risk_level: RiskLevel,
165
- expression: "Optional[exp.Expression]" = None,
166
- ) -> None:
167
- """Add a validation error to the context."""
168
- error = ValidationError(
169
- message=message, code=code, risk_level=risk_level, processor=self.__class__.__name__, expression=expression
170
- )
171
- context.validation_errors.append(error)
172
-
173
- def validate(self, expression: "exp.Expression", context: "SQLProcessingContext") -> None:
174
- """Validate SQL statement for performance issues.
175
-
176
- Args:
177
- expression: The SQL expression to validate
178
- context: The SQL processing context
179
- """
180
-
181
- # Performance analysis state
182
- analysis = PerformanceAnalysis()
183
-
184
- # Single traversal for all checks
185
- self._analyze_expression(expression, analysis)
186
-
187
- # Calculate baseline complexity
188
- analysis.original_complexity = self._calculate_complexity(analysis)
189
-
190
- # Perform SQLGlot optimization analysis if enabled
191
- if self.config.enable_optimization_analysis:
192
- self._analyze_optimization_opportunities(expression, analysis, context)
193
-
194
- if self.config.warn_on_cartesian:
195
- cartesian_issues = self._check_cartesian_products(analysis)
196
- for issue in cartesian_issues:
197
- self.add_error(
198
- context,
199
- message=issue.description,
200
- code=issue.issue_type,
201
- risk_level=self._severity_to_risk_level(issue.severity),
202
- expression=expression,
203
- )
204
-
205
- if analysis.join_count > self.config.max_joins:
206
- self.add_error(
207
- context,
208
- message=f"Query has {analysis.join_count} joins (max: {self.config.max_joins})",
209
- code="excessive-joins",
210
- risk_level=RiskLevel.MEDIUM,
211
- expression=expression,
212
- )
213
-
214
- if analysis.max_subquery_depth > self.config.max_subqueries:
215
- self.add_error(
216
- context,
217
- message=f"Query has {analysis.max_subquery_depth} levels of subqueries",
218
- code="deep-nesting",
219
- risk_level=RiskLevel.MEDIUM,
220
- expression=expression,
221
- )
222
-
223
- # Check for performance anti-patterns
224
- pattern_issues = self._check_antipatterns(analysis)
225
- for issue in pattern_issues:
226
- self.add_error(
227
- context,
228
- message=issue.description,
229
- code=issue.issue_type,
230
- risk_level=self._severity_to_risk_level(issue.severity),
231
- expression=expression,
232
- )
233
-
234
- # Calculate overall complexity score
235
- complexity_score = self._calculate_complexity(analysis)
236
-
237
- context.metadata[self.__class__.__name__] = {
238
- "complexity_score": complexity_score,
239
- "join_analysis": {
240
- "total_joins": analysis.join_count,
241
- "join_types": dict(analysis.join_types),
242
- "tables_involved": list(analysis.tables),
243
- },
244
- "subquery_analysis": {
245
- "max_depth": analysis.max_subquery_depth,
246
- "total_subqueries": analysis.subquery_count,
247
- "correlated_subqueries": analysis.correlated_subqueries,
248
- },
249
- "optimization_analysis": {
250
- "opportunities": [self._optimization_to_dict(opt) for opt in analysis.optimization_opportunities],
251
- "original_complexity": analysis.original_complexity,
252
- "optimized_complexity": analysis.optimized_complexity,
253
- "potential_improvement": analysis.potential_improvement,
254
- "optimization_enabled": self.config.enable_optimization_analysis,
255
- },
256
- }
257
-
258
- @staticmethod
259
- def _severity_to_risk_level(severity: str) -> RiskLevel:
260
- """Convert severity string to RiskLevel."""
261
- mapping = {
262
- "critical": RiskLevel.CRITICAL,
263
- "error": RiskLevel.HIGH,
264
- "warning": RiskLevel.MEDIUM,
265
- "info": RiskLevel.LOW,
266
- }
267
- return mapping.get(severity.lower(), RiskLevel.MEDIUM)
268
-
269
- def _analyze_expression(self, expr: "exp.Expression", analysis: PerformanceAnalysis, depth: int = 0) -> None:
270
- """Single-pass traversal to collect all performance metrics.
271
-
272
- Args:
273
- expr: Expression to analyze
274
- analysis: Analysis state to update
275
- depth: Current recursion depth
276
- """
277
- # Track subquery depth
278
- if isinstance(expr, exp.Subquery):
279
- analysis.subquery_count += 1
280
- analysis.current_subquery_depth = max(analysis.current_subquery_depth, depth + 1)
281
- analysis.max_subquery_depth = max(analysis.max_subquery_depth, analysis.current_subquery_depth)
282
-
283
- if self._is_correlated_subquery(expr):
284
- analysis.correlated_subqueries += 1
285
-
286
- # Analyze joins
287
- elif isinstance(expr, exp.Join):
288
- analysis.join_count += 1
289
- join_type = expr.args.get("kind", "INNER").upper()
290
- analysis.join_types[join_type] = analysis.join_types.get(join_type, 0) + 1
291
-
292
- condition = expr.args.get("on")
293
- left_table = self._get_table_name(expr.parent) if expr.parent else "unknown"
294
- right_table = self._get_table_name(expr.this)
295
-
296
- analysis.join_conditions.append(
297
- JoinCondition(left_table=left_table, right_table=right_table, condition=condition, join_type=join_type)
298
- )
299
-
300
- analysis.tables.add(left_table)
301
- analysis.tables.add(right_table)
302
-
303
- # Track other complexity factors
304
- elif isinstance(expr, exp.Where):
305
- analysis.where_conditions += len(list(expr.find_all(exp.Predicate)))
306
-
307
- elif isinstance(expr, exp.Group):
308
- analysis.group_by_columns += len(expr.expressions) if has_expressions(expr) else 0
309
-
310
- elif isinstance(expr, exp.Order):
311
- analysis.order_by_columns += len(expr.expressions) if has_expressions(expr) else 0
312
-
313
- elif isinstance(expr, exp.Distinct):
314
- analysis.distinct_operations += 1
315
-
316
- elif isinstance(expr, exp.Union):
317
- analysis.union_branches += 1
318
-
319
- elif isinstance(expr, exp.Star):
320
- analysis.select_star_count += 1
321
-
322
- # Recursive traversal
323
- expr_args = getattr(expr, "args", None)
324
- if expr_args is not None and isinstance(expr_args, dict):
325
- for child in expr_args.values():
326
- if isinstance(child, exp.Expression):
327
- self._analyze_expression(child, analysis, depth)
328
- elif isinstance(child, list):
329
- for item in child:
330
- if isinstance(item, exp.Expression):
331
- self._analyze_expression(item, analysis, depth)
332
-
333
- def _check_cartesian_products(self, analysis: PerformanceAnalysis) -> "list[PerformanceIssue]":
334
- """Detect potential cartesian products from join analysis.
335
-
336
- Args:
337
- analysis: Performance analysis state
338
-
339
- Returns:
340
- List of cartesian product issues
341
- """
342
- issues = []
343
-
344
- # Group joins by table pairs
345
- join_graph: dict[str, set[str]] = defaultdict(set)
346
- for condition in analysis.join_conditions:
347
- if condition.condition is None: # CROSS JOIN
348
- issues.append(
349
- PerformanceIssue(
350
- issue_type="cartesian_product",
351
- severity="critical",
352
- description=f"Explicit CROSS JOIN between {condition.left_table} and {condition.right_table}",
353
- impact="Result set grows exponentially (MxN rows)",
354
- recommendation="Add join condition or use WHERE clause",
355
- )
356
- )
357
- else:
358
- join_graph[condition.left_table].add(condition.right_table)
359
- join_graph[condition.right_table].add(condition.left_table)
360
-
361
- if len(analysis.tables) > 1:
362
- connected = self._find_connected_components(join_graph, analysis.tables)
363
- if len(connected) > 1:
364
- disconnected_tables = [list(component) for component in connected if len(component) > 0]
365
- issues.append(
366
- PerformanceIssue(
367
- issue_type="implicit_cartesian",
368
- severity="critical",
369
- description=f"Tables form disconnected groups: {disconnected_tables}",
370
- impact="Implicit cartesian product between table groups",
371
- recommendation="Add join conditions between table groups",
372
- )
373
- )
374
-
375
- return issues
376
-
377
- @staticmethod
378
- def _check_antipatterns(analysis: PerformanceAnalysis) -> "list[PerformanceIssue]":
379
- """Check for common performance anti-patterns.
380
-
381
- Args:
382
- analysis: Performance analysis state
383
-
384
- Returns:
385
- List of anti-pattern issues
386
- """
387
- issues = []
388
-
389
- # SELECT * in production queries
390
- if analysis.select_star_count > 0:
391
- issues.append(
392
- PerformanceIssue(
393
- issue_type="select_star",
394
- severity="info", # Changed to info level
395
- description=f"Query uses SELECT * ({analysis.select_star_count} occurrences)",
396
- impact="Fetches unnecessary columns, breaks with schema changes",
397
- recommendation="Explicitly list required columns",
398
- )
399
- )
400
-
401
- # Non-sargable predicates
402
- if analysis.non_sargable_predicates > 0:
403
- issues.append(
404
- PerformanceIssue(
405
- issue_type="non_sargable",
406
- severity="warning",
407
- description=f"Query has {analysis.non_sargable_predicates} non-sargable predicates",
408
- impact="Cannot use indexes effectively",
409
- recommendation="Rewrite predicates to be sargable (avoid functions on columns)",
410
- )
411
- )
412
-
413
- # Correlated subqueries
414
- if analysis.correlated_subqueries > 0:
415
- issues.append(
416
- PerformanceIssue(
417
- issue_type="correlated_subquery",
418
- severity="warning",
419
- description=f"Query has {analysis.correlated_subqueries} correlated subqueries",
420
- impact="Subquery executes once per outer row (N+1 problem)",
421
- recommendation="Rewrite using JOIN or window functions",
422
- )
423
- )
424
-
425
- # Deep nesting
426
- if analysis.max_subquery_depth > DEEP_NESTING_THRESHOLD:
427
- issues.append(
428
- PerformanceIssue(
429
- issue_type="deep_nesting",
430
- severity="warning",
431
- description=f"Query has {analysis.max_subquery_depth} levels of nesting",
432
- impact="Difficult for optimizer, hard to maintain",
433
- recommendation="Use CTEs to flatten query structure",
434
- )
435
- )
436
-
437
- return issues
438
-
439
- @staticmethod
440
- def _calculate_complexity(analysis: PerformanceAnalysis) -> int:
441
- """Calculate overall query complexity score.
442
-
443
- Args:
444
- analysis: Performance analysis state
445
-
446
- Returns:
447
- Complexity score
448
- """
449
- score = 0
450
-
451
- # Join complexity (exponential factor)
452
- score += analysis.join_count**2 * 5
453
-
454
- # Subquery complexity
455
- score += analysis.subquery_count * 10
456
- score += analysis.correlated_subqueries * 20
457
- score += analysis.max_subquery_depth * 15
458
-
459
- # Predicate complexity
460
- score += analysis.where_conditions * 2
461
-
462
- # Grouping/sorting complexity
463
- score += analysis.group_by_columns * 3
464
- score += analysis.order_by_columns * 2
465
- score += analysis.distinct_operations * 5
466
-
467
- # Anti-pattern penalties
468
- score += analysis.select_star_count * 5
469
- score += analysis.non_sargable_predicates * 10
470
-
471
- # Union complexity
472
- score += analysis.union_branches * 8
473
-
474
- return score
475
-
476
- def _determine_risk_level(self, issues: "list[PerformanceIssue]", complexity_score: int) -> RiskLevel:
477
- """Determine overall risk level from issues and complexity.
478
-
479
- Args:
480
- issues: List of performance issues
481
- complexity_score: Calculated complexity score
482
-
483
- Returns:
484
- Overall risk level
485
- """
486
- if any(issue.severity == "critical" for issue in issues):
487
- return RiskLevel.CRITICAL
488
-
489
- if complexity_score > self.config.complexity_threshold * 2:
490
- return RiskLevel.HIGH
491
-
492
- if any(issue.severity == "error" for issue in issues):
493
- return RiskLevel.HIGH
494
-
495
- if complexity_score > self.config.complexity_threshold:
496
- return RiskLevel.MEDIUM
497
-
498
- if any(issue.severity == "warning" for issue in issues):
499
- return RiskLevel.LOW
500
-
501
- return RiskLevel.SKIP
502
-
503
- @staticmethod
504
- def _is_correlated_subquery(subquery: "exp.Subquery") -> bool:
505
- """Check if subquery is correlated (references outer query).
506
-
507
- Args:
508
- subquery: Subquery expression
509
-
510
- Returns:
511
- True if correlated
512
- """
513
- # Simplified check - look for column references without table qualifiers
514
- # In a real implementation, would need to track scope
515
- return any(not col.table for col in subquery.find_all(exp.Column))
516
-
517
- @staticmethod
518
- def _get_table_name(expr: "Optional[exp.Expression]") -> str:
519
- """Extract table name from expression.
520
-
521
- Args:
522
- expr: Expression to extract from
523
-
524
- Returns:
525
- Table name or "unknown"
526
- """
527
- if expr is None:
528
- return "unknown"
529
-
530
- if isinstance(expr, exp.Table):
531
- return expr.name
532
-
533
- # Try to find table in expression
534
- tables = list(expr.find_all(exp.Table))
535
- if tables:
536
- return tables[0].name
537
-
538
- return "unknown"
539
-
540
- @staticmethod
541
- def _find_connected_components(graph: "dict[str, set[str]]", nodes: "set[str]") -> "list[set[str]]":
542
- """Find connected components in join graph.
543
-
544
- Args:
545
- graph: Adjacency list representation
546
- nodes: All nodes to consider
547
-
548
- Returns:
549
- List of connected components
550
- """
551
- visited = set()
552
- components = []
553
-
554
- def dfs(node: str, component: "set[str]") -> None:
555
- """Depth-first search to find component."""
556
- visited.add(node)
557
- component.add(node)
558
- for neighbor in graph.get(node, set()):
559
- if neighbor not in visited and neighbor in nodes:
560
- dfs(neighbor, component)
561
-
562
- for node in nodes:
563
- if node not in visited:
564
- component: set[str] = set()
565
- dfs(node, component)
566
- components.append(component)
567
-
568
- return components
569
-
570
- def _analyze_optimization_opportunities(
571
- self, expression: "exp.Expression", analysis: PerformanceAnalysis, context: "SQLProcessingContext"
572
- ) -> None:
573
- """Analyze query using SQLGlot optimizers to find improvement opportunities.
574
-
575
- Args:
576
- expression: The SQL expression to analyze
577
- analysis: Analysis state to update
578
- context: Processing context for dialect information
579
- """
580
- if not expression:
581
- return
582
-
583
- original_sql = expression.sql(dialect=context.dialect)
584
- opportunities = []
585
-
586
- try:
587
- # Try different SQLGlot optimization strategies
588
- optimizations = [
589
- ("join_elimination", eliminate_joins.eliminate_joins, "Eliminate unnecessary joins"),
590
- ("subquery_elimination", eliminate_subqueries.eliminate_subqueries, "Eliminate or merge subqueries"),
591
- ("subquery_merging", merge_subqueries.merge_subqueries, "Merge subqueries into main query"),
592
- (
593
- "predicate_pushdown",
594
- pushdown_predicates.pushdown_predicates,
595
- "Push predicates closer to data sources",
596
- ),
597
- (
598
- "projection_pushdown",
599
- pushdown_projections.pushdown_projections,
600
- "Push projections down to reduce data movement",
601
- ),
602
- ("join_optimization", optimize_joins.optimize_joins, "Optimize join order and conditions"),
603
- ("simplification", simplify.simplify, "Simplify expressions and conditions"),
604
- ("identifier_conversion", normalize_identifiers.normalize_identifiers, "Normalize identifier casing"),
605
- ]
606
-
607
- best_optimized = expression.copy()
608
- cumulative_improvement = 0.0
609
-
610
- for opt_type, optimizer, description in optimizations:
611
- try:
612
- optimized = optimizer(expression.copy(), dialect=context.dialect) # type: ignore[operator]
613
-
614
- if optimized is None:
615
- continue
616
-
617
- optimized_sql = optimized.sql(dialect=context.dialect)
618
-
619
- # Skip if no changes made
620
- if optimized_sql == original_sql:
621
- continue
622
-
623
- # Calculate complexity before and after
624
- original_temp_analysis = PerformanceAnalysis()
625
- optimized_temp_analysis = PerformanceAnalysis()
626
-
627
- self._analyze_expression(expression, original_temp_analysis)
628
- self._analyze_expression(optimized, optimized_temp_analysis)
629
-
630
- original_complexity = self._calculate_complexity(original_temp_analysis)
631
- optimized_complexity = self._calculate_complexity(optimized_temp_analysis)
632
-
633
- # Calculate improvement factor
634
- if original_complexity > 0:
635
- improvement = (original_complexity - optimized_complexity) / original_complexity
636
- else:
637
- improvement = 0.0
638
-
639
- if improvement >= self.config.optimization_threshold:
640
- opportunities.append(
641
- OptimizationOpportunity(
642
- optimization_type=opt_type,
643
- description=f"{description} (complexity reduction: {original_complexity - optimized_complexity})",
644
- potential_improvement=improvement,
645
- complexity_reduction=original_complexity - optimized_complexity,
646
- recommendation=f"Apply {opt_type}: {description.lower()}",
647
- optimized_sql=optimized_sql,
648
- )
649
- )
650
-
651
- if improvement > cumulative_improvement:
652
- best_optimized = optimized
653
- cumulative_improvement = improvement
654
-
655
- except Exception as e:
656
- # Optimization failed, log and continue with next one
657
- logger.debug("SQLGlot optimization failed: %s", e)
658
- continue
659
-
660
- # Calculate final optimized complexity
661
- if opportunities:
662
- optimized_analysis = PerformanceAnalysis()
663
- self._analyze_expression(best_optimized, optimized_analysis)
664
- analysis.optimized_complexity = self._calculate_complexity(optimized_analysis)
665
- analysis.potential_improvement = cumulative_improvement
666
- else:
667
- analysis.optimized_complexity = analysis.original_complexity
668
- analysis.potential_improvement = 0.0
669
-
670
- analysis.optimization_opportunities = opportunities
671
-
672
- except Exception:
673
- # If optimization analysis fails completely, just skip it
674
- analysis.optimization_opportunities = []
675
- analysis.optimized_complexity = analysis.original_complexity
676
- analysis.potential_improvement = 0.0
677
-
678
- @staticmethod
679
- def _optimization_to_dict(optimization: OptimizationOpportunity) -> "dict[str, Any]":
680
- """Convert OptimizationOpportunity to dictionary.
681
-
682
- Args:
683
- optimization: The optimization opportunity
684
-
685
- Returns:
686
- Dictionary representation
687
- """
688
- return {
689
- "optimization_type": optimization.optimization_type,
690
- "description": optimization.description,
691
- "potential_improvement": optimization.potential_improvement,
692
- "complexity_reduction": optimization.complexity_reduction,
693
- "recommendation": optimization.recommendation,
694
- "optimized_sql": optimization.optimized_sql,
695
- }
696
-
697
- @staticmethod
698
- def _issue_to_dict(issue: PerformanceIssue) -> "dict[str, Any]":
699
- """Convert PerformanceIssue to dictionary.
700
-
701
- Args:
702
- issue: The performance issue
703
-
704
- Returns:
705
- Dictionary representation
706
- """
707
- return {
708
- "issue_type": issue.issue_type,
709
- "severity": issue.severity,
710
- "description": issue.description,
711
- "impact": issue.impact,
712
- "recommendation": issue.recommendation,
713
- "location": issue.location,
714
- }