sqlspec 0.14.1__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +50 -25
- sqlspec/__main__.py +1 -1
- sqlspec/__metadata__.py +1 -3
- sqlspec/_serialization.py +1 -2
- sqlspec/_sql.py +256 -120
- sqlspec/_typing.py +278 -142
- sqlspec/adapters/adbc/__init__.py +4 -3
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +115 -260
- sqlspec/adapters/adbc/driver.py +462 -367
- sqlspec/adapters/aiosqlite/__init__.py +18 -3
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +199 -129
- sqlspec/adapters/aiosqlite/driver.py +230 -269
- sqlspec/adapters/asyncmy/__init__.py +18 -3
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +80 -168
- sqlspec/adapters/asyncmy/driver.py +260 -225
- sqlspec/adapters/asyncpg/__init__.py +19 -4
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +82 -181
- sqlspec/adapters/asyncpg/driver.py +285 -383
- sqlspec/adapters/bigquery/__init__.py +17 -3
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +191 -258
- sqlspec/adapters/bigquery/driver.py +474 -646
- sqlspec/adapters/duckdb/__init__.py +14 -3
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +415 -351
- sqlspec/adapters/duckdb/driver.py +343 -413
- sqlspec/adapters/oracledb/__init__.py +19 -5
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +123 -379
- sqlspec/adapters/oracledb/driver.py +507 -560
- sqlspec/adapters/psqlpy/__init__.py +13 -3
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +93 -254
- sqlspec/adapters/psqlpy/driver.py +505 -234
- sqlspec/adapters/psycopg/__init__.py +19 -5
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +143 -403
- sqlspec/adapters/psycopg/driver.py +706 -872
- sqlspec/adapters/sqlite/__init__.py +14 -3
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +202 -118
- sqlspec/adapters/sqlite/driver.py +264 -303
- sqlspec/base.py +105 -9
- sqlspec/{statement/builder → builder}/__init__.py +12 -14
- sqlspec/{statement/builder → builder}/_base.py +120 -55
- sqlspec/{statement/builder → builder}/_column.py +17 -6
- sqlspec/{statement/builder → builder}/_ddl.py +46 -79
- sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
- sqlspec/{statement/builder → builder}/_delete.py +6 -25
- sqlspec/{statement/builder → builder}/_insert.py +6 -64
- sqlspec/builder/_merge.py +56 -0
- sqlspec/{statement/builder → builder}/_parsing_utils.py +3 -10
- sqlspec/{statement/builder → builder}/_select.py +11 -56
- sqlspec/{statement/builder → builder}/_update.py +12 -18
- sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
- sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
- sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +22 -16
- sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
- sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +3 -5
- sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
- sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
- sqlspec/{statement/builder → builder}/mixins/_select_operations.py +21 -36
- sqlspec/{statement/builder → builder}/mixins/_update_operations.py +3 -14
- sqlspec/{statement/builder → builder}/mixins/_where_clause.py +52 -79
- sqlspec/cli.py +4 -5
- sqlspec/config.py +180 -133
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.py +873 -0
- sqlspec/core/compiler.py +396 -0
- sqlspec/core/filters.py +828 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.py +1209 -0
- sqlspec/core/result.py +664 -0
- sqlspec/{statement → core}/splitter.py +321 -191
- sqlspec/core/statement.py +651 -0
- sqlspec/driver/__init__.py +7 -10
- sqlspec/driver/_async.py +387 -176
- sqlspec/driver/_common.py +527 -289
- sqlspec/driver/_sync.py +390 -172
- sqlspec/driver/mixins/__init__.py +2 -19
- sqlspec/driver/mixins/_result_tools.py +168 -0
- sqlspec/driver/mixins/_sql_translator.py +6 -3
- sqlspec/exceptions.py +5 -252
- sqlspec/extensions/aiosql/adapter.py +93 -96
- sqlspec/extensions/litestar/config.py +0 -1
- sqlspec/extensions/litestar/handlers.py +15 -26
- sqlspec/extensions/litestar/plugin.py +16 -14
- sqlspec/extensions/litestar/providers.py +17 -52
- sqlspec/loader.py +424 -105
- sqlspec/migrations/__init__.py +12 -0
- sqlspec/migrations/base.py +92 -68
- sqlspec/migrations/commands.py +24 -106
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +49 -51
- sqlspec/migrations/tracker.py +31 -44
- sqlspec/migrations/utils.py +64 -24
- sqlspec/protocols.py +7 -183
- sqlspec/storage/__init__.py +1 -1
- sqlspec/storage/backends/base.py +37 -40
- sqlspec/storage/backends/fsspec.py +136 -112
- sqlspec/storage/backends/obstore.py +138 -160
- sqlspec/storage/capabilities.py +5 -4
- sqlspec/storage/registry.py +57 -106
- sqlspec/typing.py +136 -115
- sqlspec/utils/__init__.py +2 -3
- sqlspec/utils/correlation.py +0 -3
- sqlspec/utils/deprecation.py +6 -6
- sqlspec/utils/fixtures.py +6 -6
- sqlspec/utils/logging.py +0 -2
- sqlspec/utils/module_loader.py +7 -12
- sqlspec/utils/singleton.py +0 -1
- sqlspec/utils/sync_tools.py +16 -37
- sqlspec/utils/text.py +12 -51
- sqlspec/utils/type_guards.py +443 -232
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/METADATA +7 -2
- sqlspec-0.15.0.dist-info/RECORD +134 -0
- sqlspec/adapters/adbc/transformers.py +0 -108
- sqlspec/driver/connection.py +0 -207
- sqlspec/driver/mixins/_cache.py +0 -114
- sqlspec/driver/mixins/_csv_writer.py +0 -91
- sqlspec/driver/mixins/_pipeline.py +0 -508
- sqlspec/driver/mixins/_query_tools.py +0 -796
- sqlspec/driver/mixins/_result_utils.py +0 -138
- sqlspec/driver/mixins/_storage.py +0 -912
- sqlspec/driver/mixins/_type_coercion.py +0 -128
- sqlspec/driver/parameters.py +0 -138
- sqlspec/statement/__init__.py +0 -21
- sqlspec/statement/builder/_merge.py +0 -95
- sqlspec/statement/cache.py +0 -50
- sqlspec/statement/filters.py +0 -625
- sqlspec/statement/parameters.py +0 -956
- sqlspec/statement/pipelines/__init__.py +0 -210
- sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
- sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
- sqlspec/statement/pipelines/context.py +0 -109
- sqlspec/statement/pipelines/transformers/__init__.py +0 -7
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
- sqlspec/statement/pipelines/validators/__init__.py +0 -23
- sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
- sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
- sqlspec/statement/pipelines/validators/_performance.py +0 -714
- sqlspec/statement/pipelines/validators/_security.py +0 -967
- sqlspec/statement/result.py +0 -435
- sqlspec/statement/sql.py +0 -1774
- sqlspec/utils/cached_property.py +0 -25
- sqlspec/utils/statement_hashing.py +0 -203
- sqlspec-0.14.1.dist-info/RECORD +0 -145
- /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,646 +0,0 @@
|
|
|
1
|
-
"""SQL statement analyzer for extracting metadata and complexity metrics."""
|
|
2
|
-
|
|
3
|
-
import time
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
6
|
-
|
|
7
|
-
from sqlglot import exp, parse_one
|
|
8
|
-
from sqlglot.errors import ParseError as SQLGlotParseError
|
|
9
|
-
|
|
10
|
-
from sqlspec.protocols import ProcessorProtocol
|
|
11
|
-
from sqlspec.statement.pipelines.context import AnalysisFinding
|
|
12
|
-
from sqlspec.utils.correlation import CorrelationContext
|
|
13
|
-
from sqlspec.utils.logging import get_logger
|
|
14
|
-
from sqlspec.utils.type_guards import has_expressions
|
|
15
|
-
|
|
16
|
-
if TYPE_CHECKING:
|
|
17
|
-
from sqlglot.dialects.dialect import DialectType
|
|
18
|
-
|
|
19
|
-
from sqlspec.statement.pipelines.context import SQLProcessingContext
|
|
20
|
-
from sqlspec.statement.sql import SQLConfig
|
|
21
|
-
|
|
22
|
-
__all__ = ("StatementAnalysis", "StatementAnalyzer")
|
|
23
|
-
|
|
24
|
-
# Constants for statement analysis
|
|
25
|
-
HIGH_SUBQUERY_COUNT_THRESHOLD = 10
|
|
26
|
-
"""Threshold for flagging high number of subqueries."""
|
|
27
|
-
|
|
28
|
-
HIGH_CORRELATED_SUBQUERY_THRESHOLD = 3
|
|
29
|
-
"""Threshold for flagging multiple correlated subqueries."""
|
|
30
|
-
|
|
31
|
-
EXPENSIVE_FUNCTION_THRESHOLD = 5
|
|
32
|
-
"""Threshold for flagging multiple expensive functions."""
|
|
33
|
-
|
|
34
|
-
NESTED_FUNCTION_THRESHOLD = 3
|
|
35
|
-
"""Threshold for flagging multiple nested function calls."""
|
|
36
|
-
|
|
37
|
-
logger = get_logger("pipelines.analyzers")
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@dataclass
|
|
41
|
-
class StatementAnalysis:
|
|
42
|
-
"""Analysis result for parsed SQL statements."""
|
|
43
|
-
|
|
44
|
-
statement_type: str
|
|
45
|
-
"""Type of SQL statement (Insert, Select, Update, Delete, etc.)"""
|
|
46
|
-
expression: exp.Expression
|
|
47
|
-
"""Parsed SQLGlot expression"""
|
|
48
|
-
table_name: "Optional[str]" = None
|
|
49
|
-
"""Primary table name if detected"""
|
|
50
|
-
columns: "list[str]" = field(default_factory=list)
|
|
51
|
-
"""Column names if detected"""
|
|
52
|
-
has_returning: bool = False
|
|
53
|
-
"""Whether statement has RETURNING clause"""
|
|
54
|
-
is_from_select: bool = False
|
|
55
|
-
"""Whether this is an INSERT FROM SELECT pattern"""
|
|
56
|
-
parameters: "dict[str, Any]" = field(default_factory=dict)
|
|
57
|
-
"""Extracted parameters from the SQL"""
|
|
58
|
-
tables: "list[str]" = field(default_factory=list)
|
|
59
|
-
"""All table names referenced in the query"""
|
|
60
|
-
complexity_score: int = 0
|
|
61
|
-
"""Complexity score based on query structure"""
|
|
62
|
-
uses_subqueries: bool = False
|
|
63
|
-
"""Whether the query uses subqueries"""
|
|
64
|
-
join_count: int = 0
|
|
65
|
-
"""Number of joins in the query"""
|
|
66
|
-
aggregate_functions: "list[str]" = field(default_factory=list)
|
|
67
|
-
"""List of aggregate functions used"""
|
|
68
|
-
|
|
69
|
-
# Enhanced complexity metrics
|
|
70
|
-
join_types: "dict[str, int]" = field(default_factory=dict)
|
|
71
|
-
"""Types and counts of joins"""
|
|
72
|
-
max_subquery_depth: int = 0
|
|
73
|
-
"""Maximum subquery nesting depth"""
|
|
74
|
-
correlated_subquery_count: int = 0
|
|
75
|
-
"""Number of correlated subqueries"""
|
|
76
|
-
function_count: int = 0
|
|
77
|
-
"""Total number of function calls"""
|
|
78
|
-
where_condition_count: int = 0
|
|
79
|
-
"""Number of WHERE conditions"""
|
|
80
|
-
potential_cartesian_products: int = 0
|
|
81
|
-
"""Number of potential Cartesian products detected"""
|
|
82
|
-
complexity_warnings: "list[str]" = field(default_factory=list)
|
|
83
|
-
"""Warnings about query complexity"""
|
|
84
|
-
complexity_issues: "list[str]" = field(default_factory=list)
|
|
85
|
-
"""Issues with query complexity"""
|
|
86
|
-
|
|
87
|
-
# Additional attributes for aggregator compatibility
|
|
88
|
-
subquery_count: int = 0
|
|
89
|
-
"""Total number of subqueries"""
|
|
90
|
-
operations: "list[str]" = field(default_factory=list)
|
|
91
|
-
"""SQL operations performed (SELECT, JOIN, etc.)"""
|
|
92
|
-
has_aggregation: bool = False
|
|
93
|
-
"""Whether query uses aggregation functions"""
|
|
94
|
-
has_window_functions: bool = False
|
|
95
|
-
"""Whether query uses window functions"""
|
|
96
|
-
cte_count: int = 0
|
|
97
|
-
"""Number of CTEs (Common Table Expressions)"""
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class StatementAnalyzer(ProcessorProtocol):
|
|
101
|
-
"""SQL statement analyzer that extracts metadata and insights from SQL statements.
|
|
102
|
-
|
|
103
|
-
This processor analyzes SQL expressions to extract useful metadata without
|
|
104
|
-
modifying the SQL itself. It can be used in pipelines to gather insights
|
|
105
|
-
about query complexity, table usage, etc.
|
|
106
|
-
"""
|
|
107
|
-
|
|
108
|
-
def __init__(
|
|
109
|
-
self,
|
|
110
|
-
cache_size: int = 1000,
|
|
111
|
-
max_join_count: int = 10,
|
|
112
|
-
max_subquery_depth: int = 3,
|
|
113
|
-
max_function_calls: int = 20,
|
|
114
|
-
max_where_conditions: int = 15,
|
|
115
|
-
) -> None:
|
|
116
|
-
"""Initialize the analyzer.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
cache_size: Maximum number of parsed expressions to cache.
|
|
120
|
-
max_join_count: Maximum allowed joins before flagging.
|
|
121
|
-
max_subquery_depth: Maximum allowed subquery nesting depth.
|
|
122
|
-
max_function_calls: Maximum allowed function calls.
|
|
123
|
-
max_where_conditions: Maximum allowed WHERE conditions.
|
|
124
|
-
"""
|
|
125
|
-
self.cache_size = cache_size
|
|
126
|
-
self.max_join_count = max_join_count
|
|
127
|
-
self.max_subquery_depth = max_subquery_depth
|
|
128
|
-
self.max_function_calls = max_function_calls
|
|
129
|
-
self.max_where_conditions = max_where_conditions
|
|
130
|
-
self._parse_cache: dict[tuple[str, Optional[str]], exp.Expression] = {}
|
|
131
|
-
self._analysis_cache: dict[str, StatementAnalysis] = {}
|
|
132
|
-
|
|
133
|
-
def process(
|
|
134
|
-
self, expression: "Optional[exp.Expression]", context: "SQLProcessingContext"
|
|
135
|
-
) -> "Optional[exp.Expression]":
|
|
136
|
-
"""Process the SQL expression to extract analysis metadata and store it in the context."""
|
|
137
|
-
if expression is None:
|
|
138
|
-
return None
|
|
139
|
-
|
|
140
|
-
CorrelationContext.get()
|
|
141
|
-
start_time = time.perf_counter()
|
|
142
|
-
|
|
143
|
-
if not context.config.enable_analysis:
|
|
144
|
-
return expression
|
|
145
|
-
|
|
146
|
-
analysis_result_obj = self.analyze_expression(expression, context.dialect, context.config)
|
|
147
|
-
|
|
148
|
-
duration = time.perf_counter() - start_time
|
|
149
|
-
|
|
150
|
-
if analysis_result_obj.complexity_warnings:
|
|
151
|
-
for warning in analysis_result_obj.complexity_warnings:
|
|
152
|
-
finding = AnalysisFinding(key="complexity_warning", value=warning, processor=self.__class__.__name__)
|
|
153
|
-
context.analysis_findings.append(finding)
|
|
154
|
-
|
|
155
|
-
if analysis_result_obj.complexity_issues:
|
|
156
|
-
for issue in analysis_result_obj.complexity_issues:
|
|
157
|
-
finding = AnalysisFinding(key="complexity_issue", value=issue, processor=self.__class__.__name__)
|
|
158
|
-
context.analysis_findings.append(finding)
|
|
159
|
-
|
|
160
|
-
# Store metadata in context
|
|
161
|
-
context.metadata[self.__class__.__name__] = {
|
|
162
|
-
"duration_ms": duration * 1000,
|
|
163
|
-
"statement_type": analysis_result_obj.statement_type,
|
|
164
|
-
"table_count": len(analysis_result_obj.tables),
|
|
165
|
-
"has_subqueries": analysis_result_obj.uses_subqueries,
|
|
166
|
-
"join_count": analysis_result_obj.join_count,
|
|
167
|
-
"complexity_score": analysis_result_obj.complexity_score,
|
|
168
|
-
}
|
|
169
|
-
return expression
|
|
170
|
-
|
|
171
|
-
def analyze_statement(self, sql_string: str, dialect: "DialectType" = None) -> StatementAnalysis:
|
|
172
|
-
"""Analyze SQL string and extract components efficiently.
|
|
173
|
-
|
|
174
|
-
Args:
|
|
175
|
-
sql_string: The SQL string to analyze
|
|
176
|
-
dialect: SQL dialect for parsing
|
|
177
|
-
|
|
178
|
-
Returns:
|
|
179
|
-
StatementAnalysis with extracted components
|
|
180
|
-
"""
|
|
181
|
-
# Check cache first
|
|
182
|
-
cache_key = sql_string.strip()
|
|
183
|
-
if cache_key in self._analysis_cache:
|
|
184
|
-
return self._analysis_cache[cache_key]
|
|
185
|
-
|
|
186
|
-
# Use cache key for expression parsing performance
|
|
187
|
-
parse_cache_key = (sql_string.strip(), str(dialect) if dialect else None)
|
|
188
|
-
|
|
189
|
-
if parse_cache_key in self._parse_cache:
|
|
190
|
-
expr = self._parse_cache[parse_cache_key]
|
|
191
|
-
else:
|
|
192
|
-
try:
|
|
193
|
-
expr = exp.maybe_parse(sql_string, dialect=dialect)
|
|
194
|
-
if expr is None:
|
|
195
|
-
expr = parse_one(sql_string, dialect=dialect)
|
|
196
|
-
|
|
197
|
-
# Simple expressions like Alias or Identifier are not valid SQL statements
|
|
198
|
-
valid_statement_types = (
|
|
199
|
-
exp.Select,
|
|
200
|
-
exp.Insert,
|
|
201
|
-
exp.Update,
|
|
202
|
-
exp.Delete,
|
|
203
|
-
exp.Create,
|
|
204
|
-
exp.Drop,
|
|
205
|
-
exp.Alter,
|
|
206
|
-
exp.Merge,
|
|
207
|
-
exp.Command,
|
|
208
|
-
exp.Set,
|
|
209
|
-
exp.Show,
|
|
210
|
-
exp.Describe,
|
|
211
|
-
exp.Use,
|
|
212
|
-
exp.Union,
|
|
213
|
-
exp.Intersect,
|
|
214
|
-
exp.Except,
|
|
215
|
-
)
|
|
216
|
-
if not isinstance(expr, valid_statement_types):
|
|
217
|
-
logger.warning("Parsed expression is not a valid SQL statement: %s", type(expr).__name__)
|
|
218
|
-
return StatementAnalysis(statement_type="Unknown", expression=exp.Anonymous(this="UNKNOWN"))
|
|
219
|
-
|
|
220
|
-
if len(self._parse_cache) < self.cache_size:
|
|
221
|
-
self._parse_cache[parse_cache_key] = expr
|
|
222
|
-
except (SQLGlotParseError, Exception) as e:
|
|
223
|
-
logger.warning("Failed to parse SQL statement: %s", e)
|
|
224
|
-
return StatementAnalysis(statement_type="Unknown", expression=exp.Anonymous(this="UNKNOWN"))
|
|
225
|
-
|
|
226
|
-
return self.analyze_expression(expr)
|
|
227
|
-
|
|
228
|
-
def analyze_expression(
|
|
229
|
-
self, expression: exp.Expression, dialect: "DialectType" = None, config: "Optional[SQLConfig]" = None
|
|
230
|
-
) -> StatementAnalysis:
|
|
231
|
-
"""Analyze a SQLGlot expression directly, potentially using validation results for context."""
|
|
232
|
-
# This caching needs to be context-aware if analysis depends on prior steps (e.g. validation_result)
|
|
233
|
-
# For simplicity, let's assume for now direct expression analysis is cacheable if validation_result is not used deeply.
|
|
234
|
-
cache_key = expression.sql() # Simplified cache key
|
|
235
|
-
if cache_key in self._analysis_cache:
|
|
236
|
-
return self._analysis_cache[cache_key]
|
|
237
|
-
|
|
238
|
-
analysis = StatementAnalysis(
|
|
239
|
-
statement_type=type(expression).__name__,
|
|
240
|
-
expression=expression,
|
|
241
|
-
table_name=self._extract_primary_table_name(expression),
|
|
242
|
-
columns=self._extract_columns(expression),
|
|
243
|
-
has_returning=bool(expression.find(exp.Returning)),
|
|
244
|
-
is_from_select=self._is_insert_from_select(expression),
|
|
245
|
-
parameters=self._extract_parameters(expression),
|
|
246
|
-
tables=self._extract_all_tables(expression),
|
|
247
|
-
uses_subqueries=self._has_subqueries(expression),
|
|
248
|
-
join_count=self._count_joins(expression),
|
|
249
|
-
aggregate_functions=self._extract_aggregate_functions(expression),
|
|
250
|
-
)
|
|
251
|
-
# Calculate subquery_count and cte_count before complexity analysis
|
|
252
|
-
analysis.subquery_count = len(list(expression.find_all(exp.Subquery)))
|
|
253
|
-
# Also need to account for IN/EXISTS subqueries that aren't wrapped in Subquery nodes
|
|
254
|
-
for in_clause in expression.find_all(exp.In):
|
|
255
|
-
if in_clause.args.get("query") and isinstance(in_clause.args.get("query"), exp.Select):
|
|
256
|
-
analysis.subquery_count += 1
|
|
257
|
-
for exists_clause in expression.find_all(exp.Exists):
|
|
258
|
-
if exists_clause.this and isinstance(exists_clause.this, exp.Select):
|
|
259
|
-
analysis.subquery_count += 1
|
|
260
|
-
|
|
261
|
-
# Calculate CTE count before complexity score
|
|
262
|
-
analysis.cte_count = len(list(expression.find_all(exp.CTE)))
|
|
263
|
-
|
|
264
|
-
self._analyze_complexity(expression, analysis)
|
|
265
|
-
analysis.complexity_score = self._calculate_comprehensive_complexity_score(analysis)
|
|
266
|
-
analysis.operations = self._extract_operations(expression)
|
|
267
|
-
analysis.has_aggregation = len(analysis.aggregate_functions) > 0
|
|
268
|
-
analysis.has_window_functions = self._has_window_functions(expression)
|
|
269
|
-
|
|
270
|
-
if len(self._analysis_cache) < self.cache_size:
|
|
271
|
-
self._analysis_cache[cache_key] = analysis
|
|
272
|
-
return analysis
|
|
273
|
-
|
|
274
|
-
def _analyze_complexity(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
|
|
275
|
-
"""Perform comprehensive complexity analysis."""
|
|
276
|
-
self._analyze_joins(expression, analysis)
|
|
277
|
-
self._analyze_subqueries(expression, analysis)
|
|
278
|
-
self._analyze_where_clauses(expression, analysis)
|
|
279
|
-
self._analyze_functions(expression, analysis)
|
|
280
|
-
|
|
281
|
-
def _analyze_joins(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
|
|
282
|
-
"""Analyze JOIN operations for potential issues."""
|
|
283
|
-
join_nodes = list(expression.find_all(exp.Join))
|
|
284
|
-
analysis.join_count = len(join_nodes)
|
|
285
|
-
|
|
286
|
-
warnings = []
|
|
287
|
-
issues = []
|
|
288
|
-
cartesian_products = 0
|
|
289
|
-
|
|
290
|
-
for select in expression.find_all(exp.Select):
|
|
291
|
-
from_clause = select.args.get("from")
|
|
292
|
-
if from_clause and has_expressions(from_clause) and len(from_clause.expressions) > 1:
|
|
293
|
-
# This logic checks for multiple tables in FROM without explicit JOINs
|
|
294
|
-
# It's a simplified check for potential cartesian products
|
|
295
|
-
cartesian_products += 1
|
|
296
|
-
|
|
297
|
-
if cartesian_products > 0:
|
|
298
|
-
issues.append(
|
|
299
|
-
f"Potential Cartesian product detected ({cartesian_products} instances from multiple FROM tables without JOIN)"
|
|
300
|
-
)
|
|
301
|
-
|
|
302
|
-
for join_node in join_nodes:
|
|
303
|
-
join_type = join_node.kind.upper() if join_node.kind else "INNER"
|
|
304
|
-
analysis.join_types[join_type] = analysis.join_types.get(join_type, 0) + 1
|
|
305
|
-
|
|
306
|
-
if join_type == "CROSS":
|
|
307
|
-
issues.append("Explicit CROSS JOIN found, potential Cartesian product.")
|
|
308
|
-
cartesian_products += 1
|
|
309
|
-
elif not join_node.args.get("on") and not join_node.args.get("using") and join_type != "NATURAL":
|
|
310
|
-
issues.append(f"JOIN ({join_node.sql()}) without ON/USING clause, potential Cartesian product.")
|
|
311
|
-
cartesian_products += 1
|
|
312
|
-
|
|
313
|
-
if analysis.join_count > self.max_join_count:
|
|
314
|
-
issues.append(f"Excessive number of joins ({analysis.join_count}), may cause performance issues")
|
|
315
|
-
elif analysis.join_count > self.max_join_count // 2:
|
|
316
|
-
warnings.append(f"High number of joins ({analysis.join_count}), monitor performance")
|
|
317
|
-
|
|
318
|
-
analysis.potential_cartesian_products = cartesian_products
|
|
319
|
-
analysis.complexity_warnings.extend(warnings)
|
|
320
|
-
analysis.complexity_issues.extend(issues)
|
|
321
|
-
|
|
322
|
-
def _analyze_subqueries(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
|
|
323
|
-
"""Analyze subquery complexity and nesting depth."""
|
|
324
|
-
subqueries: list[exp.Expression] = list(expression.find_all(exp.Subquery))
|
|
325
|
-
# Workaround for EXISTS clauses: sqlglot doesn't wrap EXISTS subqueries in Subquery nodes
|
|
326
|
-
subqueries.extend(
|
|
327
|
-
[
|
|
328
|
-
exists_clause.this
|
|
329
|
-
for exists_clause in expression.find_all(exp.Exists)
|
|
330
|
-
if exists_clause.this and isinstance(exists_clause.this, exp.Select)
|
|
331
|
-
]
|
|
332
|
-
)
|
|
333
|
-
|
|
334
|
-
analysis.subquery_count = len(subqueries)
|
|
335
|
-
max_depth = 0
|
|
336
|
-
correlated_count = 0
|
|
337
|
-
|
|
338
|
-
# Calculate maximum nesting depth - simpler approach
|
|
339
|
-
def calculate_depth(expr: exp.Expression) -> int:
|
|
340
|
-
"""Calculate the maximum depth of nested SELECT statements."""
|
|
341
|
-
max_depth = 0
|
|
342
|
-
|
|
343
|
-
select_statements = list(expr.find_all(exp.Select))
|
|
344
|
-
|
|
345
|
-
for select in select_statements:
|
|
346
|
-
# Count how many parent SELECTs this one has
|
|
347
|
-
depth = 0
|
|
348
|
-
current = select.parent
|
|
349
|
-
while current:
|
|
350
|
-
if isinstance(current, exp.Select):
|
|
351
|
-
depth += 1
|
|
352
|
-
elif isinstance(current, (exp.Subquery, exp.In, exp.Exists)):
|
|
353
|
-
# These nodes can contain SELECTs, check their parent
|
|
354
|
-
parent = current.parent
|
|
355
|
-
while parent and not isinstance(parent, exp.Select):
|
|
356
|
-
parent = parent.parent
|
|
357
|
-
if parent:
|
|
358
|
-
current = parent
|
|
359
|
-
continue
|
|
360
|
-
current = current.parent if current else None
|
|
361
|
-
|
|
362
|
-
max_depth = max(max_depth, depth)
|
|
363
|
-
|
|
364
|
-
return max_depth
|
|
365
|
-
|
|
366
|
-
max_depth = calculate_depth(expression)
|
|
367
|
-
outer_tables = {tbl.alias or tbl.name for tbl in expression.find_all(exp.Table)}
|
|
368
|
-
for subquery in subqueries:
|
|
369
|
-
for col in subquery.find_all(exp.Column):
|
|
370
|
-
if col.table and col.table in outer_tables:
|
|
371
|
-
correlated_count += 1
|
|
372
|
-
break
|
|
373
|
-
|
|
374
|
-
warnings = []
|
|
375
|
-
issues = []
|
|
376
|
-
|
|
377
|
-
if max_depth > self.max_subquery_depth:
|
|
378
|
-
issues.append(f"Excessive subquery nesting depth ({max_depth})")
|
|
379
|
-
elif max_depth > self.max_subquery_depth // 2:
|
|
380
|
-
warnings.append(f"High subquery nesting depth ({max_depth})")
|
|
381
|
-
|
|
382
|
-
if analysis.subquery_count > HIGH_SUBQUERY_COUNT_THRESHOLD:
|
|
383
|
-
warnings.append(f"High number of subqueries ({analysis.subquery_count})")
|
|
384
|
-
|
|
385
|
-
if correlated_count > HIGH_CORRELATED_SUBQUERY_THRESHOLD:
|
|
386
|
-
warnings.append(f"Multiple correlated subqueries detected ({correlated_count})")
|
|
387
|
-
|
|
388
|
-
analysis.max_subquery_depth = max_depth
|
|
389
|
-
analysis.correlated_subquery_count = correlated_count
|
|
390
|
-
analysis.complexity_warnings.extend(warnings)
|
|
391
|
-
analysis.complexity_issues.extend(issues)
|
|
392
|
-
|
|
393
|
-
def _analyze_where_clauses(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
|
|
394
|
-
"""Analyze WHERE clause complexity."""
|
|
395
|
-
where_clauses = list(expression.find_all(exp.Where))
|
|
396
|
-
total_conditions = 0
|
|
397
|
-
|
|
398
|
-
for where_clause in where_clauses:
|
|
399
|
-
total_conditions += len(list(where_clause.find_all(exp.And)))
|
|
400
|
-
total_conditions += len(list(where_clause.find_all(exp.Or)))
|
|
401
|
-
|
|
402
|
-
warnings = []
|
|
403
|
-
issues = []
|
|
404
|
-
|
|
405
|
-
if total_conditions > self.max_where_conditions:
|
|
406
|
-
issues.append(f"Excessive WHERE conditions ({total_conditions})")
|
|
407
|
-
elif total_conditions > self.max_where_conditions // 2:
|
|
408
|
-
warnings.append(f"Complex WHERE clause ({total_conditions} conditions)")
|
|
409
|
-
|
|
410
|
-
analysis.where_condition_count = total_conditions
|
|
411
|
-
analysis.complexity_warnings.extend(warnings)
|
|
412
|
-
analysis.complexity_issues.extend(issues)
|
|
413
|
-
|
|
414
|
-
def _analyze_functions(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
|
|
415
|
-
"""Analyze function usage and complexity."""
|
|
416
|
-
function_types: dict[str, int] = {}
|
|
417
|
-
nested_functions = 0
|
|
418
|
-
function_count = 0
|
|
419
|
-
for func in expression.find_all(exp.Func):
|
|
420
|
-
func_name = func.name.lower() if func.name else "unknown"
|
|
421
|
-
function_types[func_name] = function_types.get(func_name, 0) + 1
|
|
422
|
-
if any(isinstance(arg, exp.Func) for arg in func.args.values()):
|
|
423
|
-
nested_functions += 1
|
|
424
|
-
function_count += 1
|
|
425
|
-
|
|
426
|
-
expensive_functions = {"regexp", "regex", "like", "concat_ws", "group_concat"}
|
|
427
|
-
expensive_count = sum(function_types.get(func, 0) for func in expensive_functions)
|
|
428
|
-
|
|
429
|
-
warnings = []
|
|
430
|
-
issues = []
|
|
431
|
-
|
|
432
|
-
if function_count > self.max_function_calls:
|
|
433
|
-
issues.append(f"Excessive function calls ({function_count})")
|
|
434
|
-
elif function_count > self.max_function_calls // 2:
|
|
435
|
-
warnings.append(f"High number of function calls ({function_count})")
|
|
436
|
-
|
|
437
|
-
if expensive_count > EXPENSIVE_FUNCTION_THRESHOLD:
|
|
438
|
-
warnings.append(f"Multiple expensive functions used ({expensive_count})")
|
|
439
|
-
|
|
440
|
-
if nested_functions > NESTED_FUNCTION_THRESHOLD:
|
|
441
|
-
warnings.append(f"Multiple nested function calls ({nested_functions})")
|
|
442
|
-
|
|
443
|
-
analysis.function_count = function_count
|
|
444
|
-
analysis.complexity_warnings.extend(warnings)
|
|
445
|
-
analysis.complexity_issues.extend(issues)
|
|
446
|
-
|
|
447
|
-
@staticmethod
|
|
448
|
-
def _calculate_comprehensive_complexity_score(analysis: StatementAnalysis) -> int:
|
|
449
|
-
"""Calculate an overall complexity score based on various metrics."""
|
|
450
|
-
score = 0
|
|
451
|
-
|
|
452
|
-
# Join complexity
|
|
453
|
-
score += analysis.join_count * 3
|
|
454
|
-
score += analysis.potential_cartesian_products * 20
|
|
455
|
-
|
|
456
|
-
# Subquery complexity
|
|
457
|
-
score += analysis.subquery_count * 5 # Use actual subquery count
|
|
458
|
-
score += analysis.max_subquery_depth * 10
|
|
459
|
-
score += analysis.correlated_subquery_count * 8
|
|
460
|
-
|
|
461
|
-
# CTE complexity (CTEs are complex, especially recursive ones)
|
|
462
|
-
score += analysis.cte_count * 7
|
|
463
|
-
|
|
464
|
-
# WHERE clause complexity
|
|
465
|
-
score += analysis.where_condition_count * 2
|
|
466
|
-
|
|
467
|
-
# Function complexity
|
|
468
|
-
score += analysis.function_count * 1
|
|
469
|
-
|
|
470
|
-
return score
|
|
471
|
-
|
|
472
|
-
@staticmethod
|
|
473
|
-
def _extract_primary_table_name(expr: exp.Expression) -> "Optional[str]":
|
|
474
|
-
"""Extract the primary table name from an expression."""
|
|
475
|
-
if isinstance(expr, exp.Insert):
|
|
476
|
-
if expr.this:
|
|
477
|
-
table = expr.this
|
|
478
|
-
if isinstance(table, exp.Table):
|
|
479
|
-
return table.name
|
|
480
|
-
if isinstance(table, (exp.Identifier, exp.Var)):
|
|
481
|
-
return str(table.name)
|
|
482
|
-
elif isinstance(expr, (exp.Update, exp.Delete)):
|
|
483
|
-
if expr.this:
|
|
484
|
-
if isinstance(expr.this, (exp.Table, exp.Identifier, exp.Var)):
|
|
485
|
-
return str(expr.this.name)
|
|
486
|
-
return str(expr.this)
|
|
487
|
-
elif isinstance(expr, exp.Select) and (from_clause := expr.find(exp.From)) and from_clause.this:
|
|
488
|
-
if isinstance(from_clause.this, (exp.Table, exp.Identifier, exp.Var)):
|
|
489
|
-
return str(from_clause.this.name)
|
|
490
|
-
return str(from_clause.this)
|
|
491
|
-
return None
|
|
492
|
-
|
|
493
|
-
@staticmethod
|
|
494
|
-
def _extract_columns(expr: exp.Expression) -> "list[str]":
|
|
495
|
-
"""Extract column names from an expression."""
|
|
496
|
-
columns: list[str] = []
|
|
497
|
-
if isinstance(expr, exp.Insert):
|
|
498
|
-
if expr.this and has_expressions(expr.this):
|
|
499
|
-
columns.extend(
|
|
500
|
-
str(col_expr.name)
|
|
501
|
-
for col_expr in expr.this.expressions
|
|
502
|
-
if isinstance(col_expr, (exp.Column, exp.Identifier, exp.Var))
|
|
503
|
-
)
|
|
504
|
-
elif isinstance(expr, exp.Select):
|
|
505
|
-
for projection in expr.expressions:
|
|
506
|
-
if isinstance(projection, exp.Column):
|
|
507
|
-
columns.append(str(projection.name))
|
|
508
|
-
elif isinstance(projection, exp.Alias) and projection.alias:
|
|
509
|
-
columns.append(str(projection.alias))
|
|
510
|
-
elif isinstance(projection, (exp.Identifier, exp.Var)):
|
|
511
|
-
columns.append(str(projection.name))
|
|
512
|
-
|
|
513
|
-
return columns
|
|
514
|
-
|
|
515
|
-
@staticmethod
|
|
516
|
-
def _extract_all_tables(expr: exp.Expression) -> "list[str]":
|
|
517
|
-
"""Extract all table names referenced in the expression."""
|
|
518
|
-
tables: list[str] = []
|
|
519
|
-
for table in expr.find_all(exp.Table):
|
|
520
|
-
if isinstance(table, exp.Table):
|
|
521
|
-
table_name = str(table.name)
|
|
522
|
-
if table_name not in tables:
|
|
523
|
-
tables.append(table_name)
|
|
524
|
-
return tables
|
|
525
|
-
|
|
526
|
-
@staticmethod
|
|
527
|
-
def _is_insert_from_select(expr: exp.Expression) -> bool:
|
|
528
|
-
"""Check if this is an INSERT FROM SELECT pattern."""
|
|
529
|
-
if not isinstance(expr, exp.Insert):
|
|
530
|
-
return False
|
|
531
|
-
return bool(expr.expression and isinstance(expr.expression, exp.Select))
|
|
532
|
-
|
|
533
|
-
@staticmethod
|
|
534
|
-
def _extract_parameters(_expr: exp.Expression) -> "dict[str, Any]":
|
|
535
|
-
"""Extract parameters from the expression."""
|
|
536
|
-
# This could be enhanced to extract actual parameter placeholders
|
|
537
|
-
# For now, _expr is unused but will be used in future enhancements
|
|
538
|
-
_ = _expr
|
|
539
|
-
return {}
|
|
540
|
-
|
|
541
|
-
@staticmethod
|
|
542
|
-
def _has_subqueries(expr: exp.Expression) -> bool:
|
|
543
|
-
"""Check if the expression contains subqueries.
|
|
544
|
-
|
|
545
|
-
Note: Due to sqlglot parser inconsistency, subqueries in IN clauses
|
|
546
|
-
are not wrapped in Subquery nodes, so we need additional detection.
|
|
547
|
-
CTEs are not considered subqueries.
|
|
548
|
-
"""
|
|
549
|
-
# Standard subquery detection
|
|
550
|
-
if expr.find(exp.Subquery):
|
|
551
|
-
return True
|
|
552
|
-
|
|
553
|
-
# sqlglot compatibility: IN clauses with SELECT need explicit handling
|
|
554
|
-
for in_clause in expr.find_all(exp.In):
|
|
555
|
-
query_node = in_clause.args.get("query")
|
|
556
|
-
if query_node and isinstance(query_node, exp.Select):
|
|
557
|
-
return True
|
|
558
|
-
|
|
559
|
-
# sqlglot compatibility: EXISTS clauses with SELECT need explicit handling
|
|
560
|
-
for exists_clause in expr.find_all(exp.Exists):
|
|
561
|
-
if exists_clause.this and isinstance(exists_clause.this, exp.Select):
|
|
562
|
-
return True
|
|
563
|
-
|
|
564
|
-
# Check for multiple SELECT statements (indicates subqueries)
|
|
565
|
-
# but exclude those within CTEs
|
|
566
|
-
select_statements = []
|
|
567
|
-
for select in expr.find_all(exp.Select):
|
|
568
|
-
parent = select.parent
|
|
569
|
-
is_in_cte = False
|
|
570
|
-
while parent:
|
|
571
|
-
if isinstance(parent, exp.CTE):
|
|
572
|
-
is_in_cte = True
|
|
573
|
-
break
|
|
574
|
-
parent = parent.parent
|
|
575
|
-
if not is_in_cte:
|
|
576
|
-
select_statements.append(select)
|
|
577
|
-
|
|
578
|
-
return len(select_statements) > 1
|
|
579
|
-
|
|
580
|
-
@staticmethod
|
|
581
|
-
def _count_joins(expr: exp.Expression) -> int:
|
|
582
|
-
"""Count the number of joins in the expression."""
|
|
583
|
-
return len(list(expr.find_all(exp.Join)))
|
|
584
|
-
|
|
585
|
-
@staticmethod
|
|
586
|
-
def _extract_aggregate_functions(expr: exp.Expression) -> "list[str]":
|
|
587
|
-
"""Extract aggregate function names from the expression."""
|
|
588
|
-
aggregates: list[str] = []
|
|
589
|
-
|
|
590
|
-
# Common aggregate function types in SQLGlot (using only those that exist)
|
|
591
|
-
aggregate_types = [exp.Count, exp.Sum, exp.Avg, exp.Min, exp.Max]
|
|
592
|
-
|
|
593
|
-
for agg_type in aggregate_types:
|
|
594
|
-
if expr.find(agg_type): # Check if this aggregate type exists in the expression
|
|
595
|
-
func_name = agg_type.__name__.lower()
|
|
596
|
-
if func_name not in aggregates:
|
|
597
|
-
aggregates.append(func_name)
|
|
598
|
-
|
|
599
|
-
return aggregates
|
|
600
|
-
|
|
601
|
-
def clear_cache(self) -> None:
|
|
602
|
-
"""Clear both parse and analysis caches."""
|
|
603
|
-
self._parse_cache.clear()
|
|
604
|
-
self._analysis_cache.clear()
|
|
605
|
-
|
|
606
|
-
@staticmethod
|
|
607
|
-
def _extract_operations(expr: exp.Expression) -> "list[str]":
|
|
608
|
-
"""Extract SQL operations performed."""
|
|
609
|
-
operations = []
|
|
610
|
-
|
|
611
|
-
# Main operation
|
|
612
|
-
if isinstance(expr, exp.Select):
|
|
613
|
-
operations.append("SELECT")
|
|
614
|
-
elif isinstance(expr, exp.Insert):
|
|
615
|
-
operations.append("INSERT")
|
|
616
|
-
elif isinstance(expr, exp.Update):
|
|
617
|
-
operations.append("UPDATE")
|
|
618
|
-
elif isinstance(expr, exp.Delete):
|
|
619
|
-
operations.append("DELETE")
|
|
620
|
-
elif isinstance(expr, exp.Create):
|
|
621
|
-
operations.append("CREATE")
|
|
622
|
-
elif isinstance(expr, exp.Drop):
|
|
623
|
-
operations.append("DROP")
|
|
624
|
-
elif isinstance(expr, exp.Alter):
|
|
625
|
-
operations.append("ALTER")
|
|
626
|
-
if expr.find(exp.Join):
|
|
627
|
-
operations.append("JOIN")
|
|
628
|
-
if expr.find(exp.Group):
|
|
629
|
-
operations.append("GROUP BY")
|
|
630
|
-
if expr.find(exp.Order):
|
|
631
|
-
operations.append("ORDER BY")
|
|
632
|
-
if expr.find(exp.Having):
|
|
633
|
-
operations.append("HAVING")
|
|
634
|
-
if expr.find(exp.Union):
|
|
635
|
-
operations.append("UNION")
|
|
636
|
-
if expr.find(exp.Intersect):
|
|
637
|
-
operations.append("INTERSECT")
|
|
638
|
-
if expr.find(exp.Except):
|
|
639
|
-
operations.append("EXCEPT")
|
|
640
|
-
|
|
641
|
-
return operations
|
|
642
|
-
|
|
643
|
-
@staticmethod
|
|
644
|
-
def _has_window_functions(expr: exp.Expression) -> bool:
|
|
645
|
-
"""Check if expression uses window functions."""
|
|
646
|
-
return bool(expr.find(exp.Window))
|