sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +50 -25
- sqlspec/__main__.py +1 -1
- sqlspec/__metadata__.py +1 -3
- sqlspec/_serialization.py +1 -2
- sqlspec/_sql.py +480 -121
- sqlspec/_typing.py +278 -142
- sqlspec/adapters/adbc/__init__.py +4 -3
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +115 -260
- sqlspec/adapters/adbc/driver.py +462 -367
- sqlspec/adapters/aiosqlite/__init__.py +18 -3
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +199 -129
- sqlspec/adapters/aiosqlite/driver.py +230 -269
- sqlspec/adapters/asyncmy/__init__.py +18 -3
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +80 -168
- sqlspec/adapters/asyncmy/driver.py +260 -225
- sqlspec/adapters/asyncpg/__init__.py +19 -4
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +82 -181
- sqlspec/adapters/asyncpg/driver.py +285 -383
- sqlspec/adapters/bigquery/__init__.py +17 -3
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +191 -258
- sqlspec/adapters/bigquery/driver.py +474 -646
- sqlspec/adapters/duckdb/__init__.py +14 -3
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +415 -351
- sqlspec/adapters/duckdb/driver.py +343 -413
- sqlspec/adapters/oracledb/__init__.py +19 -5
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +123 -379
- sqlspec/adapters/oracledb/driver.py +507 -560
- sqlspec/adapters/psqlpy/__init__.py +13 -3
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +93 -254
- sqlspec/adapters/psqlpy/driver.py +505 -234
- sqlspec/adapters/psycopg/__init__.py +19 -5
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +143 -403
- sqlspec/adapters/psycopg/driver.py +706 -872
- sqlspec/adapters/sqlite/__init__.py +14 -3
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +202 -118
- sqlspec/adapters/sqlite/driver.py +264 -303
- sqlspec/base.py +105 -9
- sqlspec/{statement/builder → builder}/__init__.py +12 -14
- sqlspec/{statement/builder → builder}/_base.py +120 -55
- sqlspec/{statement/builder → builder}/_column.py +17 -6
- sqlspec/{statement/builder → builder}/_ddl.py +46 -79
- sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
- sqlspec/{statement/builder → builder}/_delete.py +6 -25
- sqlspec/{statement/builder → builder}/_insert.py +18 -65
- sqlspec/builder/_merge.py +56 -0
- sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
- sqlspec/{statement/builder → builder}/_select.py +11 -56
- sqlspec/{statement/builder → builder}/_update.py +12 -18
- sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
- sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
- sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
- sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
- sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
- sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
- sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
- sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
- sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
- sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
- sqlspec/cli.py +4 -5
- sqlspec/config.py +180 -133
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.py +873 -0
- sqlspec/core/compiler.py +396 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.py +1209 -0
- sqlspec/core/result.py +664 -0
- sqlspec/{statement → core}/splitter.py +321 -191
- sqlspec/core/statement.py +666 -0
- sqlspec/driver/__init__.py +7 -10
- sqlspec/driver/_async.py +387 -176
- sqlspec/driver/_common.py +527 -289
- sqlspec/driver/_sync.py +390 -172
- sqlspec/driver/mixins/__init__.py +2 -19
- sqlspec/driver/mixins/_result_tools.py +164 -0
- sqlspec/driver/mixins/_sql_translator.py +6 -3
- sqlspec/exceptions.py +5 -252
- sqlspec/extensions/aiosql/adapter.py +93 -96
- sqlspec/extensions/litestar/cli.py +1 -1
- sqlspec/extensions/litestar/config.py +0 -1
- sqlspec/extensions/litestar/handlers.py +15 -26
- sqlspec/extensions/litestar/plugin.py +18 -16
- sqlspec/extensions/litestar/providers.py +17 -52
- sqlspec/loader.py +424 -105
- sqlspec/migrations/__init__.py +12 -0
- sqlspec/migrations/base.py +92 -68
- sqlspec/migrations/commands.py +24 -106
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +49 -51
- sqlspec/migrations/tracker.py +31 -44
- sqlspec/migrations/utils.py +64 -24
- sqlspec/protocols.py +7 -183
- sqlspec/storage/__init__.py +1 -1
- sqlspec/storage/backends/base.py +37 -40
- sqlspec/storage/backends/fsspec.py +136 -112
- sqlspec/storage/backends/obstore.py +138 -160
- sqlspec/storage/capabilities.py +5 -4
- sqlspec/storage/registry.py +57 -106
- sqlspec/typing.py +136 -115
- sqlspec/utils/__init__.py +2 -3
- sqlspec/utils/correlation.py +0 -3
- sqlspec/utils/deprecation.py +6 -6
- sqlspec/utils/fixtures.py +6 -6
- sqlspec/utils/logging.py +0 -2
- sqlspec/utils/module_loader.py +7 -12
- sqlspec/utils/singleton.py +0 -1
- sqlspec/utils/sync_tools.py +17 -38
- sqlspec/utils/text.py +12 -51
- sqlspec/utils/type_guards.py +443 -232
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
- sqlspec-0.16.0.dist-info/RECORD +134 -0
- sqlspec/adapters/adbc/transformers.py +0 -108
- sqlspec/driver/connection.py +0 -207
- sqlspec/driver/mixins/_cache.py +0 -114
- sqlspec/driver/mixins/_csv_writer.py +0 -91
- sqlspec/driver/mixins/_pipeline.py +0 -508
- sqlspec/driver/mixins/_query_tools.py +0 -796
- sqlspec/driver/mixins/_result_utils.py +0 -138
- sqlspec/driver/mixins/_storage.py +0 -912
- sqlspec/driver/mixins/_type_coercion.py +0 -128
- sqlspec/driver/parameters.py +0 -138
- sqlspec/statement/__init__.py +0 -21
- sqlspec/statement/builder/_merge.py +0 -95
- sqlspec/statement/cache.py +0 -50
- sqlspec/statement/filters.py +0 -625
- sqlspec/statement/parameters.py +0 -956
- sqlspec/statement/pipelines/__init__.py +0 -210
- sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
- sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
- sqlspec/statement/pipelines/context.py +0 -109
- sqlspec/statement/pipelines/transformers/__init__.py +0 -7
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
- sqlspec/statement/pipelines/validators/__init__.py +0 -23
- sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
- sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
- sqlspec/statement/pipelines/validators/_performance.py +0 -714
- sqlspec/statement/pipelines/validators/_security.py +0 -967
- sqlspec/statement/result.py +0 -435
- sqlspec/statement/sql.py +0 -1774
- sqlspec/utils/cached_property.py +0 -25
- sqlspec/utils/statement_hashing.py +0 -203
- sqlspec-0.14.1.dist-info/RECORD +0 -145
- /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,714 +0,0 @@
|
|
|
1
|
-
"""Performance validator for SQL query optimization."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
from collections import defaultdict
|
|
5
|
-
from dataclasses import dataclass, field
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
7
|
-
|
|
8
|
-
from sqlglot import expressions as exp
|
|
9
|
-
from sqlglot.optimizer import (
|
|
10
|
-
eliminate_joins,
|
|
11
|
-
eliminate_subqueries,
|
|
12
|
-
merge_subqueries,
|
|
13
|
-
normalize_identifiers,
|
|
14
|
-
optimize_joins,
|
|
15
|
-
pushdown_predicates,
|
|
16
|
-
pushdown_projections,
|
|
17
|
-
simplify,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
from sqlspec.exceptions import RiskLevel
|
|
21
|
-
from sqlspec.protocols import ProcessorProtocol
|
|
22
|
-
from sqlspec.statement.pipelines.context import ValidationError
|
|
23
|
-
from sqlspec.utils.type_guards import has_expressions
|
|
24
|
-
|
|
25
|
-
if TYPE_CHECKING:
|
|
26
|
-
from sqlspec.statement.pipelines.context import SQLProcessingContext
|
|
27
|
-
|
|
28
|
-
__all__ = (
|
|
29
|
-
"JoinCondition",
|
|
30
|
-
"OptimizationOpportunity",
|
|
31
|
-
"PerformanceAnalysis",
|
|
32
|
-
"PerformanceConfig",
|
|
33
|
-
"PerformanceIssue",
|
|
34
|
-
"PerformanceValidator",
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
logger = logging.getLogger(__name__)
|
|
38
|
-
|
|
39
|
-
# Constants
|
|
40
|
-
DEEP_NESTING_THRESHOLD = 2
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
@dataclass
|
|
44
|
-
class PerformanceConfig:
|
|
45
|
-
"""Configuration for performance validation."""
|
|
46
|
-
|
|
47
|
-
max_joins: int = 5
|
|
48
|
-
max_subqueries: int = 3
|
|
49
|
-
max_union_branches: int = 5
|
|
50
|
-
warn_on_cartesian: bool = True
|
|
51
|
-
warn_on_missing_index: bool = True
|
|
52
|
-
complexity_threshold: int = 50
|
|
53
|
-
analyze_execution_plan: bool = False
|
|
54
|
-
|
|
55
|
-
# SQLGlot optimization analysis
|
|
56
|
-
enable_optimization_analysis: bool = True
|
|
57
|
-
suggest_optimizations: bool = True
|
|
58
|
-
optimization_threshold: float = 0.2 # 20% potential improvement to flag
|
|
59
|
-
max_optimization_attempts: int = 3
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
@dataclass
|
|
63
|
-
class PerformanceIssue:
|
|
64
|
-
"""Represents a performance issue found during validation."""
|
|
65
|
-
|
|
66
|
-
issue_type: str # "cartesian", "excessive_joins", "missing_index", etc.
|
|
67
|
-
severity: str # "warning", "error", "critical"
|
|
68
|
-
description: str
|
|
69
|
-
impact: str # Expected performance impact
|
|
70
|
-
recommendation: str
|
|
71
|
-
location: "Optional[str]" = None # SQL fragment
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@dataclass
|
|
75
|
-
class JoinCondition:
|
|
76
|
-
"""Information about a join condition."""
|
|
77
|
-
|
|
78
|
-
left_table: str
|
|
79
|
-
right_table: str
|
|
80
|
-
condition: "Optional[exp.Expression]"
|
|
81
|
-
join_type: str
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
@dataclass
|
|
85
|
-
class OptimizationOpportunity:
|
|
86
|
-
"""Represents a potential optimization for the query."""
|
|
87
|
-
|
|
88
|
-
optimization_type: str # "join_elimination", "predicate_pushdown", etc.
|
|
89
|
-
description: str
|
|
90
|
-
potential_improvement: float # Estimated improvement factor (0.0 to 1.0)
|
|
91
|
-
complexity_reduction: int # Estimated complexity score reduction
|
|
92
|
-
recommendation: str
|
|
93
|
-
optimized_sql: "Optional[str]" = None
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
@dataclass
|
|
97
|
-
class PerformanceAnalysis:
|
|
98
|
-
"""Tracks performance metrics during AST traversal."""
|
|
99
|
-
|
|
100
|
-
# Join analysis
|
|
101
|
-
join_count: int = 0
|
|
102
|
-
join_types: "dict[str, int]" = field(default_factory=dict)
|
|
103
|
-
join_conditions: "list[JoinCondition]" = field(default_factory=list)
|
|
104
|
-
tables: "set[str]" = field(default_factory=set)
|
|
105
|
-
|
|
106
|
-
# Subquery analysis
|
|
107
|
-
subquery_count: int = 0
|
|
108
|
-
max_subquery_depth: int = 0
|
|
109
|
-
current_subquery_depth: int = 0
|
|
110
|
-
correlated_subqueries: int = 0
|
|
111
|
-
|
|
112
|
-
# Complexity metrics
|
|
113
|
-
where_conditions: int = 0
|
|
114
|
-
group_by_columns: int = 0
|
|
115
|
-
order_by_columns: int = 0
|
|
116
|
-
distinct_operations: int = 0
|
|
117
|
-
union_branches: int = 0
|
|
118
|
-
|
|
119
|
-
# Anti-patterns
|
|
120
|
-
select_star_count: int = 0
|
|
121
|
-
implicit_conversions: int = 0
|
|
122
|
-
non_sargable_predicates: int = 0
|
|
123
|
-
|
|
124
|
-
# SQLGlot optimization analysis
|
|
125
|
-
optimization_opportunities: "list[OptimizationOpportunity]" = field(default_factory=list)
|
|
126
|
-
original_complexity: int = 0
|
|
127
|
-
optimized_complexity: int = 0
|
|
128
|
-
potential_improvement: float = 0.0
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
class PerformanceValidator(ProcessorProtocol):
|
|
132
|
-
"""Comprehensive query performance validator.
|
|
133
|
-
|
|
134
|
-
Validates query performance by detecting:
|
|
135
|
-
- Cartesian products
|
|
136
|
-
- Excessive joins
|
|
137
|
-
- Deep subquery nesting
|
|
138
|
-
- Performance anti-patterns
|
|
139
|
-
- High query complexity
|
|
140
|
-
"""
|
|
141
|
-
|
|
142
|
-
def __init__(self, config: "Optional[PerformanceConfig]" = None) -> None:
|
|
143
|
-
"""Initialize the performance validator.
|
|
144
|
-
|
|
145
|
-
Args:
|
|
146
|
-
config: Configuration for performance validation
|
|
147
|
-
"""
|
|
148
|
-
self.config = config or PerformanceConfig()
|
|
149
|
-
|
|
150
|
-
def process(
|
|
151
|
-
self, expression: "Optional[exp.Expression]", context: "SQLProcessingContext"
|
|
152
|
-
) -> "Optional[exp.Expression]":
|
|
153
|
-
"""Process the expression for validation (implements ProcessorProtocol)."""
|
|
154
|
-
if expression is None:
|
|
155
|
-
return None
|
|
156
|
-
self.validate(expression, context)
|
|
157
|
-
return expression
|
|
158
|
-
|
|
159
|
-
def add_error(
|
|
160
|
-
self,
|
|
161
|
-
context: "SQLProcessingContext",
|
|
162
|
-
message: str,
|
|
163
|
-
code: str,
|
|
164
|
-
risk_level: RiskLevel,
|
|
165
|
-
expression: "Optional[exp.Expression]" = None,
|
|
166
|
-
) -> None:
|
|
167
|
-
"""Add a validation error to the context."""
|
|
168
|
-
error = ValidationError(
|
|
169
|
-
message=message, code=code, risk_level=risk_level, processor=self.__class__.__name__, expression=expression
|
|
170
|
-
)
|
|
171
|
-
context.validation_errors.append(error)
|
|
172
|
-
|
|
173
|
-
def validate(self, expression: "exp.Expression", context: "SQLProcessingContext") -> None:
|
|
174
|
-
"""Validate SQL statement for performance issues.
|
|
175
|
-
|
|
176
|
-
Args:
|
|
177
|
-
expression: The SQL expression to validate
|
|
178
|
-
context: The SQL processing context
|
|
179
|
-
"""
|
|
180
|
-
|
|
181
|
-
# Performance analysis state
|
|
182
|
-
analysis = PerformanceAnalysis()
|
|
183
|
-
|
|
184
|
-
# Single traversal for all checks
|
|
185
|
-
self._analyze_expression(expression, analysis)
|
|
186
|
-
|
|
187
|
-
# Calculate baseline complexity
|
|
188
|
-
analysis.original_complexity = self._calculate_complexity(analysis)
|
|
189
|
-
|
|
190
|
-
# Perform SQLGlot optimization analysis if enabled
|
|
191
|
-
if self.config.enable_optimization_analysis:
|
|
192
|
-
self._analyze_optimization_opportunities(expression, analysis, context)
|
|
193
|
-
|
|
194
|
-
if self.config.warn_on_cartesian:
|
|
195
|
-
cartesian_issues = self._check_cartesian_products(analysis)
|
|
196
|
-
for issue in cartesian_issues:
|
|
197
|
-
self.add_error(
|
|
198
|
-
context,
|
|
199
|
-
message=issue.description,
|
|
200
|
-
code=issue.issue_type,
|
|
201
|
-
risk_level=self._severity_to_risk_level(issue.severity),
|
|
202
|
-
expression=expression,
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
if analysis.join_count > self.config.max_joins:
|
|
206
|
-
self.add_error(
|
|
207
|
-
context,
|
|
208
|
-
message=f"Query has {analysis.join_count} joins (max: {self.config.max_joins})",
|
|
209
|
-
code="excessive-joins",
|
|
210
|
-
risk_level=RiskLevel.MEDIUM,
|
|
211
|
-
expression=expression,
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
if analysis.max_subquery_depth > self.config.max_subqueries:
|
|
215
|
-
self.add_error(
|
|
216
|
-
context,
|
|
217
|
-
message=f"Query has {analysis.max_subquery_depth} levels of subqueries",
|
|
218
|
-
code="deep-nesting",
|
|
219
|
-
risk_level=RiskLevel.MEDIUM,
|
|
220
|
-
expression=expression,
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
# Check for performance anti-patterns
|
|
224
|
-
pattern_issues = self._check_antipatterns(analysis)
|
|
225
|
-
for issue in pattern_issues:
|
|
226
|
-
self.add_error(
|
|
227
|
-
context,
|
|
228
|
-
message=issue.description,
|
|
229
|
-
code=issue.issue_type,
|
|
230
|
-
risk_level=self._severity_to_risk_level(issue.severity),
|
|
231
|
-
expression=expression,
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
# Calculate overall complexity score
|
|
235
|
-
complexity_score = self._calculate_complexity(analysis)
|
|
236
|
-
|
|
237
|
-
context.metadata[self.__class__.__name__] = {
|
|
238
|
-
"complexity_score": complexity_score,
|
|
239
|
-
"join_analysis": {
|
|
240
|
-
"total_joins": analysis.join_count,
|
|
241
|
-
"join_types": dict(analysis.join_types),
|
|
242
|
-
"tables_involved": list(analysis.tables),
|
|
243
|
-
},
|
|
244
|
-
"subquery_analysis": {
|
|
245
|
-
"max_depth": analysis.max_subquery_depth,
|
|
246
|
-
"total_subqueries": analysis.subquery_count,
|
|
247
|
-
"correlated_subqueries": analysis.correlated_subqueries,
|
|
248
|
-
},
|
|
249
|
-
"optimization_analysis": {
|
|
250
|
-
"opportunities": [self._optimization_to_dict(opt) for opt in analysis.optimization_opportunities],
|
|
251
|
-
"original_complexity": analysis.original_complexity,
|
|
252
|
-
"optimized_complexity": analysis.optimized_complexity,
|
|
253
|
-
"potential_improvement": analysis.potential_improvement,
|
|
254
|
-
"optimization_enabled": self.config.enable_optimization_analysis,
|
|
255
|
-
},
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
@staticmethod
|
|
259
|
-
def _severity_to_risk_level(severity: str) -> RiskLevel:
|
|
260
|
-
"""Convert severity string to RiskLevel."""
|
|
261
|
-
mapping = {
|
|
262
|
-
"critical": RiskLevel.CRITICAL,
|
|
263
|
-
"error": RiskLevel.HIGH,
|
|
264
|
-
"warning": RiskLevel.MEDIUM,
|
|
265
|
-
"info": RiskLevel.LOW,
|
|
266
|
-
}
|
|
267
|
-
return mapping.get(severity.lower(), RiskLevel.MEDIUM)
|
|
268
|
-
|
|
269
|
-
def _analyze_expression(self, expr: "exp.Expression", analysis: PerformanceAnalysis, depth: int = 0) -> None:
|
|
270
|
-
"""Single-pass traversal to collect all performance metrics.
|
|
271
|
-
|
|
272
|
-
Args:
|
|
273
|
-
expr: Expression to analyze
|
|
274
|
-
analysis: Analysis state to update
|
|
275
|
-
depth: Current recursion depth
|
|
276
|
-
"""
|
|
277
|
-
# Track subquery depth
|
|
278
|
-
if isinstance(expr, exp.Subquery):
|
|
279
|
-
analysis.subquery_count += 1
|
|
280
|
-
analysis.current_subquery_depth = max(analysis.current_subquery_depth, depth + 1)
|
|
281
|
-
analysis.max_subquery_depth = max(analysis.max_subquery_depth, analysis.current_subquery_depth)
|
|
282
|
-
|
|
283
|
-
if self._is_correlated_subquery(expr):
|
|
284
|
-
analysis.correlated_subqueries += 1
|
|
285
|
-
|
|
286
|
-
# Analyze joins
|
|
287
|
-
elif isinstance(expr, exp.Join):
|
|
288
|
-
analysis.join_count += 1
|
|
289
|
-
join_type = expr.args.get("kind", "INNER").upper()
|
|
290
|
-
analysis.join_types[join_type] = analysis.join_types.get(join_type, 0) + 1
|
|
291
|
-
|
|
292
|
-
condition = expr.args.get("on")
|
|
293
|
-
left_table = self._get_table_name(expr.parent) if expr.parent else "unknown"
|
|
294
|
-
right_table = self._get_table_name(expr.this)
|
|
295
|
-
|
|
296
|
-
analysis.join_conditions.append(
|
|
297
|
-
JoinCondition(left_table=left_table, right_table=right_table, condition=condition, join_type=join_type)
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
analysis.tables.add(left_table)
|
|
301
|
-
analysis.tables.add(right_table)
|
|
302
|
-
|
|
303
|
-
# Track other complexity factors
|
|
304
|
-
elif isinstance(expr, exp.Where):
|
|
305
|
-
analysis.where_conditions += len(list(expr.find_all(exp.Predicate)))
|
|
306
|
-
|
|
307
|
-
elif isinstance(expr, exp.Group):
|
|
308
|
-
analysis.group_by_columns += len(expr.expressions) if has_expressions(expr) else 0
|
|
309
|
-
|
|
310
|
-
elif isinstance(expr, exp.Order):
|
|
311
|
-
analysis.order_by_columns += len(expr.expressions) if has_expressions(expr) else 0
|
|
312
|
-
|
|
313
|
-
elif isinstance(expr, exp.Distinct):
|
|
314
|
-
analysis.distinct_operations += 1
|
|
315
|
-
|
|
316
|
-
elif isinstance(expr, exp.Union):
|
|
317
|
-
analysis.union_branches += 1
|
|
318
|
-
|
|
319
|
-
elif isinstance(expr, exp.Star):
|
|
320
|
-
analysis.select_star_count += 1
|
|
321
|
-
|
|
322
|
-
# Recursive traversal
|
|
323
|
-
expr_args = getattr(expr, "args", None)
|
|
324
|
-
if expr_args is not None and isinstance(expr_args, dict):
|
|
325
|
-
for child in expr_args.values():
|
|
326
|
-
if isinstance(child, exp.Expression):
|
|
327
|
-
self._analyze_expression(child, analysis, depth)
|
|
328
|
-
elif isinstance(child, list):
|
|
329
|
-
for item in child:
|
|
330
|
-
if isinstance(item, exp.Expression):
|
|
331
|
-
self._analyze_expression(item, analysis, depth)
|
|
332
|
-
|
|
333
|
-
def _check_cartesian_products(self, analysis: PerformanceAnalysis) -> "list[PerformanceIssue]":
|
|
334
|
-
"""Detect potential cartesian products from join analysis.
|
|
335
|
-
|
|
336
|
-
Args:
|
|
337
|
-
analysis: Performance analysis state
|
|
338
|
-
|
|
339
|
-
Returns:
|
|
340
|
-
List of cartesian product issues
|
|
341
|
-
"""
|
|
342
|
-
issues = []
|
|
343
|
-
|
|
344
|
-
# Group joins by table pairs
|
|
345
|
-
join_graph: dict[str, set[str]] = defaultdict(set)
|
|
346
|
-
for condition in analysis.join_conditions:
|
|
347
|
-
if condition.condition is None: # CROSS JOIN
|
|
348
|
-
issues.append(
|
|
349
|
-
PerformanceIssue(
|
|
350
|
-
issue_type="cartesian_product",
|
|
351
|
-
severity="critical",
|
|
352
|
-
description=f"Explicit CROSS JOIN between {condition.left_table} and {condition.right_table}",
|
|
353
|
-
impact="Result set grows exponentially (MxN rows)",
|
|
354
|
-
recommendation="Add join condition or use WHERE clause",
|
|
355
|
-
)
|
|
356
|
-
)
|
|
357
|
-
else:
|
|
358
|
-
join_graph[condition.left_table].add(condition.right_table)
|
|
359
|
-
join_graph[condition.right_table].add(condition.left_table)
|
|
360
|
-
|
|
361
|
-
if len(analysis.tables) > 1:
|
|
362
|
-
connected = self._find_connected_components(join_graph, analysis.tables)
|
|
363
|
-
if len(connected) > 1:
|
|
364
|
-
disconnected_tables = [list(component) for component in connected if len(component) > 0]
|
|
365
|
-
issues.append(
|
|
366
|
-
PerformanceIssue(
|
|
367
|
-
issue_type="implicit_cartesian",
|
|
368
|
-
severity="critical",
|
|
369
|
-
description=f"Tables form disconnected groups: {disconnected_tables}",
|
|
370
|
-
impact="Implicit cartesian product between table groups",
|
|
371
|
-
recommendation="Add join conditions between table groups",
|
|
372
|
-
)
|
|
373
|
-
)
|
|
374
|
-
|
|
375
|
-
return issues
|
|
376
|
-
|
|
377
|
-
@staticmethod
|
|
378
|
-
def _check_antipatterns(analysis: PerformanceAnalysis) -> "list[PerformanceIssue]":
|
|
379
|
-
"""Check for common performance anti-patterns.
|
|
380
|
-
|
|
381
|
-
Args:
|
|
382
|
-
analysis: Performance analysis state
|
|
383
|
-
|
|
384
|
-
Returns:
|
|
385
|
-
List of anti-pattern issues
|
|
386
|
-
"""
|
|
387
|
-
issues = []
|
|
388
|
-
|
|
389
|
-
# SELECT * in production queries
|
|
390
|
-
if analysis.select_star_count > 0:
|
|
391
|
-
issues.append(
|
|
392
|
-
PerformanceIssue(
|
|
393
|
-
issue_type="select_star",
|
|
394
|
-
severity="info", # Changed to info level
|
|
395
|
-
description=f"Query uses SELECT * ({analysis.select_star_count} occurrences)",
|
|
396
|
-
impact="Fetches unnecessary columns, breaks with schema changes",
|
|
397
|
-
recommendation="Explicitly list required columns",
|
|
398
|
-
)
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
# Non-sargable predicates
|
|
402
|
-
if analysis.non_sargable_predicates > 0:
|
|
403
|
-
issues.append(
|
|
404
|
-
PerformanceIssue(
|
|
405
|
-
issue_type="non_sargable",
|
|
406
|
-
severity="warning",
|
|
407
|
-
description=f"Query has {analysis.non_sargable_predicates} non-sargable predicates",
|
|
408
|
-
impact="Cannot use indexes effectively",
|
|
409
|
-
recommendation="Rewrite predicates to be sargable (avoid functions on columns)",
|
|
410
|
-
)
|
|
411
|
-
)
|
|
412
|
-
|
|
413
|
-
# Correlated subqueries
|
|
414
|
-
if analysis.correlated_subqueries > 0:
|
|
415
|
-
issues.append(
|
|
416
|
-
PerformanceIssue(
|
|
417
|
-
issue_type="correlated_subquery",
|
|
418
|
-
severity="warning",
|
|
419
|
-
description=f"Query has {analysis.correlated_subqueries} correlated subqueries",
|
|
420
|
-
impact="Subquery executes once per outer row (N+1 problem)",
|
|
421
|
-
recommendation="Rewrite using JOIN or window functions",
|
|
422
|
-
)
|
|
423
|
-
)
|
|
424
|
-
|
|
425
|
-
# Deep nesting
|
|
426
|
-
if analysis.max_subquery_depth > DEEP_NESTING_THRESHOLD:
|
|
427
|
-
issues.append(
|
|
428
|
-
PerformanceIssue(
|
|
429
|
-
issue_type="deep_nesting",
|
|
430
|
-
severity="warning",
|
|
431
|
-
description=f"Query has {analysis.max_subquery_depth} levels of nesting",
|
|
432
|
-
impact="Difficult for optimizer, hard to maintain",
|
|
433
|
-
recommendation="Use CTEs to flatten query structure",
|
|
434
|
-
)
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
return issues
|
|
438
|
-
|
|
439
|
-
@staticmethod
|
|
440
|
-
def _calculate_complexity(analysis: PerformanceAnalysis) -> int:
|
|
441
|
-
"""Calculate overall query complexity score.
|
|
442
|
-
|
|
443
|
-
Args:
|
|
444
|
-
analysis: Performance analysis state
|
|
445
|
-
|
|
446
|
-
Returns:
|
|
447
|
-
Complexity score
|
|
448
|
-
"""
|
|
449
|
-
score = 0
|
|
450
|
-
|
|
451
|
-
# Join complexity (exponential factor)
|
|
452
|
-
score += analysis.join_count**2 * 5
|
|
453
|
-
|
|
454
|
-
# Subquery complexity
|
|
455
|
-
score += analysis.subquery_count * 10
|
|
456
|
-
score += analysis.correlated_subqueries * 20
|
|
457
|
-
score += analysis.max_subquery_depth * 15
|
|
458
|
-
|
|
459
|
-
# Predicate complexity
|
|
460
|
-
score += analysis.where_conditions * 2
|
|
461
|
-
|
|
462
|
-
# Grouping/sorting complexity
|
|
463
|
-
score += analysis.group_by_columns * 3
|
|
464
|
-
score += analysis.order_by_columns * 2
|
|
465
|
-
score += analysis.distinct_operations * 5
|
|
466
|
-
|
|
467
|
-
# Anti-pattern penalties
|
|
468
|
-
score += analysis.select_star_count * 5
|
|
469
|
-
score += analysis.non_sargable_predicates * 10
|
|
470
|
-
|
|
471
|
-
# Union complexity
|
|
472
|
-
score += analysis.union_branches * 8
|
|
473
|
-
|
|
474
|
-
return score
|
|
475
|
-
|
|
476
|
-
def _determine_risk_level(self, issues: "list[PerformanceIssue]", complexity_score: int) -> RiskLevel:
|
|
477
|
-
"""Determine overall risk level from issues and complexity.
|
|
478
|
-
|
|
479
|
-
Args:
|
|
480
|
-
issues: List of performance issues
|
|
481
|
-
complexity_score: Calculated complexity score
|
|
482
|
-
|
|
483
|
-
Returns:
|
|
484
|
-
Overall risk level
|
|
485
|
-
"""
|
|
486
|
-
if any(issue.severity == "critical" for issue in issues):
|
|
487
|
-
return RiskLevel.CRITICAL
|
|
488
|
-
|
|
489
|
-
if complexity_score > self.config.complexity_threshold * 2:
|
|
490
|
-
return RiskLevel.HIGH
|
|
491
|
-
|
|
492
|
-
if any(issue.severity == "error" for issue in issues):
|
|
493
|
-
return RiskLevel.HIGH
|
|
494
|
-
|
|
495
|
-
if complexity_score > self.config.complexity_threshold:
|
|
496
|
-
return RiskLevel.MEDIUM
|
|
497
|
-
|
|
498
|
-
if any(issue.severity == "warning" for issue in issues):
|
|
499
|
-
return RiskLevel.LOW
|
|
500
|
-
|
|
501
|
-
return RiskLevel.SKIP
|
|
502
|
-
|
|
503
|
-
@staticmethod
|
|
504
|
-
def _is_correlated_subquery(subquery: "exp.Subquery") -> bool:
|
|
505
|
-
"""Check if subquery is correlated (references outer query).
|
|
506
|
-
|
|
507
|
-
Args:
|
|
508
|
-
subquery: Subquery expression
|
|
509
|
-
|
|
510
|
-
Returns:
|
|
511
|
-
True if correlated
|
|
512
|
-
"""
|
|
513
|
-
# Simplified check - look for column references without table qualifiers
|
|
514
|
-
# In a real implementation, would need to track scope
|
|
515
|
-
return any(not col.table for col in subquery.find_all(exp.Column))
|
|
516
|
-
|
|
517
|
-
@staticmethod
|
|
518
|
-
def _get_table_name(expr: "Optional[exp.Expression]") -> str:
|
|
519
|
-
"""Extract table name from expression.
|
|
520
|
-
|
|
521
|
-
Args:
|
|
522
|
-
expr: Expression to extract from
|
|
523
|
-
|
|
524
|
-
Returns:
|
|
525
|
-
Table name or "unknown"
|
|
526
|
-
"""
|
|
527
|
-
if expr is None:
|
|
528
|
-
return "unknown"
|
|
529
|
-
|
|
530
|
-
if isinstance(expr, exp.Table):
|
|
531
|
-
return expr.name
|
|
532
|
-
|
|
533
|
-
# Try to find table in expression
|
|
534
|
-
tables = list(expr.find_all(exp.Table))
|
|
535
|
-
if tables:
|
|
536
|
-
return tables[0].name
|
|
537
|
-
|
|
538
|
-
return "unknown"
|
|
539
|
-
|
|
540
|
-
@staticmethod
|
|
541
|
-
def _find_connected_components(graph: "dict[str, set[str]]", nodes: "set[str]") -> "list[set[str]]":
|
|
542
|
-
"""Find connected components in join graph.
|
|
543
|
-
|
|
544
|
-
Args:
|
|
545
|
-
graph: Adjacency list representation
|
|
546
|
-
nodes: All nodes to consider
|
|
547
|
-
|
|
548
|
-
Returns:
|
|
549
|
-
List of connected components
|
|
550
|
-
"""
|
|
551
|
-
visited = set()
|
|
552
|
-
components = []
|
|
553
|
-
|
|
554
|
-
def dfs(node: str, component: "set[str]") -> None:
|
|
555
|
-
"""Depth-first search to find component."""
|
|
556
|
-
visited.add(node)
|
|
557
|
-
component.add(node)
|
|
558
|
-
for neighbor in graph.get(node, set()):
|
|
559
|
-
if neighbor not in visited and neighbor in nodes:
|
|
560
|
-
dfs(neighbor, component)
|
|
561
|
-
|
|
562
|
-
for node in nodes:
|
|
563
|
-
if node not in visited:
|
|
564
|
-
component: set[str] = set()
|
|
565
|
-
dfs(node, component)
|
|
566
|
-
components.append(component)
|
|
567
|
-
|
|
568
|
-
return components
|
|
569
|
-
|
|
570
|
-
def _analyze_optimization_opportunities(
|
|
571
|
-
self, expression: "exp.Expression", analysis: PerformanceAnalysis, context: "SQLProcessingContext"
|
|
572
|
-
) -> None:
|
|
573
|
-
"""Analyze query using SQLGlot optimizers to find improvement opportunities.
|
|
574
|
-
|
|
575
|
-
Args:
|
|
576
|
-
expression: The SQL expression to analyze
|
|
577
|
-
analysis: Analysis state to update
|
|
578
|
-
context: Processing context for dialect information
|
|
579
|
-
"""
|
|
580
|
-
if not expression:
|
|
581
|
-
return
|
|
582
|
-
|
|
583
|
-
original_sql = expression.sql(dialect=context.dialect)
|
|
584
|
-
opportunities = []
|
|
585
|
-
|
|
586
|
-
try:
|
|
587
|
-
# Try different SQLGlot optimization strategies
|
|
588
|
-
optimizations = [
|
|
589
|
-
("join_elimination", eliminate_joins.eliminate_joins, "Eliminate unnecessary joins"),
|
|
590
|
-
("subquery_elimination", eliminate_subqueries.eliminate_subqueries, "Eliminate or merge subqueries"),
|
|
591
|
-
("subquery_merging", merge_subqueries.merge_subqueries, "Merge subqueries into main query"),
|
|
592
|
-
(
|
|
593
|
-
"predicate_pushdown",
|
|
594
|
-
pushdown_predicates.pushdown_predicates,
|
|
595
|
-
"Push predicates closer to data sources",
|
|
596
|
-
),
|
|
597
|
-
(
|
|
598
|
-
"projection_pushdown",
|
|
599
|
-
pushdown_projections.pushdown_projections,
|
|
600
|
-
"Push projections down to reduce data movement",
|
|
601
|
-
),
|
|
602
|
-
("join_optimization", optimize_joins.optimize_joins, "Optimize join order and conditions"),
|
|
603
|
-
("simplification", simplify.simplify, "Simplify expressions and conditions"),
|
|
604
|
-
("identifier_conversion", normalize_identifiers.normalize_identifiers, "Normalize identifier casing"),
|
|
605
|
-
]
|
|
606
|
-
|
|
607
|
-
best_optimized = expression.copy()
|
|
608
|
-
cumulative_improvement = 0.0
|
|
609
|
-
|
|
610
|
-
for opt_type, optimizer, description in optimizations:
|
|
611
|
-
try:
|
|
612
|
-
optimized = optimizer(expression.copy(), dialect=context.dialect) # type: ignore[operator]
|
|
613
|
-
|
|
614
|
-
if optimized is None:
|
|
615
|
-
continue
|
|
616
|
-
|
|
617
|
-
optimized_sql = optimized.sql(dialect=context.dialect)
|
|
618
|
-
|
|
619
|
-
# Skip if no changes made
|
|
620
|
-
if optimized_sql == original_sql:
|
|
621
|
-
continue
|
|
622
|
-
|
|
623
|
-
# Calculate complexity before and after
|
|
624
|
-
original_temp_analysis = PerformanceAnalysis()
|
|
625
|
-
optimized_temp_analysis = PerformanceAnalysis()
|
|
626
|
-
|
|
627
|
-
self._analyze_expression(expression, original_temp_analysis)
|
|
628
|
-
self._analyze_expression(optimized, optimized_temp_analysis)
|
|
629
|
-
|
|
630
|
-
original_complexity = self._calculate_complexity(original_temp_analysis)
|
|
631
|
-
optimized_complexity = self._calculate_complexity(optimized_temp_analysis)
|
|
632
|
-
|
|
633
|
-
# Calculate improvement factor
|
|
634
|
-
if original_complexity > 0:
|
|
635
|
-
improvement = (original_complexity - optimized_complexity) / original_complexity
|
|
636
|
-
else:
|
|
637
|
-
improvement = 0.0
|
|
638
|
-
|
|
639
|
-
if improvement >= self.config.optimization_threshold:
|
|
640
|
-
opportunities.append(
|
|
641
|
-
OptimizationOpportunity(
|
|
642
|
-
optimization_type=opt_type,
|
|
643
|
-
description=f"{description} (complexity reduction: {original_complexity - optimized_complexity})",
|
|
644
|
-
potential_improvement=improvement,
|
|
645
|
-
complexity_reduction=original_complexity - optimized_complexity,
|
|
646
|
-
recommendation=f"Apply {opt_type}: {description.lower()}",
|
|
647
|
-
optimized_sql=optimized_sql,
|
|
648
|
-
)
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
if improvement > cumulative_improvement:
|
|
652
|
-
best_optimized = optimized
|
|
653
|
-
cumulative_improvement = improvement
|
|
654
|
-
|
|
655
|
-
except Exception as e:
|
|
656
|
-
# Optimization failed, log and continue with next one
|
|
657
|
-
logger.debug("SQLGlot optimization failed: %s", e)
|
|
658
|
-
continue
|
|
659
|
-
|
|
660
|
-
# Calculate final optimized complexity
|
|
661
|
-
if opportunities:
|
|
662
|
-
optimized_analysis = PerformanceAnalysis()
|
|
663
|
-
self._analyze_expression(best_optimized, optimized_analysis)
|
|
664
|
-
analysis.optimized_complexity = self._calculate_complexity(optimized_analysis)
|
|
665
|
-
analysis.potential_improvement = cumulative_improvement
|
|
666
|
-
else:
|
|
667
|
-
analysis.optimized_complexity = analysis.original_complexity
|
|
668
|
-
analysis.potential_improvement = 0.0
|
|
669
|
-
|
|
670
|
-
analysis.optimization_opportunities = opportunities
|
|
671
|
-
|
|
672
|
-
except Exception:
|
|
673
|
-
# If optimization analysis fails completely, just skip it
|
|
674
|
-
analysis.optimization_opportunities = []
|
|
675
|
-
analysis.optimized_complexity = analysis.original_complexity
|
|
676
|
-
analysis.potential_improvement = 0.0
|
|
677
|
-
|
|
678
|
-
@staticmethod
|
|
679
|
-
def _optimization_to_dict(optimization: OptimizationOpportunity) -> "dict[str, Any]":
|
|
680
|
-
"""Convert OptimizationOpportunity to dictionary.
|
|
681
|
-
|
|
682
|
-
Args:
|
|
683
|
-
optimization: The optimization opportunity
|
|
684
|
-
|
|
685
|
-
Returns:
|
|
686
|
-
Dictionary representation
|
|
687
|
-
"""
|
|
688
|
-
return {
|
|
689
|
-
"optimization_type": optimization.optimization_type,
|
|
690
|
-
"description": optimization.description,
|
|
691
|
-
"potential_improvement": optimization.potential_improvement,
|
|
692
|
-
"complexity_reduction": optimization.complexity_reduction,
|
|
693
|
-
"recommendation": optimization.recommendation,
|
|
694
|
-
"optimized_sql": optimization.optimized_sql,
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
@staticmethod
|
|
698
|
-
def _issue_to_dict(issue: PerformanceIssue) -> "dict[str, Any]":
|
|
699
|
-
"""Convert PerformanceIssue to dictionary.
|
|
700
|
-
|
|
701
|
-
Args:
|
|
702
|
-
issue: The performance issue
|
|
703
|
-
|
|
704
|
-
Returns:
|
|
705
|
-
Dictionary representation
|
|
706
|
-
"""
|
|
707
|
-
return {
|
|
708
|
-
"issue_type": issue.issue_type,
|
|
709
|
-
"severity": issue.severity,
|
|
710
|
-
"description": issue.description,
|
|
711
|
-
"impact": issue.impact,
|
|
712
|
-
"recommendation": issue.recommendation,
|
|
713
|
-
"location": issue.location,
|
|
714
|
-
}
|