sqlspec 0.13.1__py3-none-any.whl → 0.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (185) hide show
  1. sqlspec/__init__.py +71 -8
  2. sqlspec/__main__.py +12 -0
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +930 -136
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +116 -285
  10. sqlspec/adapters/adbc/driver.py +462 -340
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +202 -150
  14. sqlspec/adapters/aiosqlite/driver.py +226 -247
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -199
  18. sqlspec/adapters/asyncmy/driver.py +257 -215
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +81 -214
  22. sqlspec/adapters/asyncpg/driver.py +284 -359
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -299
  26. sqlspec/adapters/bigquery/driver.py +474 -634
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +414 -397
  30. sqlspec/adapters/duckdb/driver.py +342 -393
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -458
  34. sqlspec/adapters/oracledb/driver.py +505 -531
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -307
  38. sqlspec/adapters/psqlpy/driver.py +504 -213
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -472
  42. sqlspec/adapters/psycopg/driver.py +704 -825
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +208 -142
  46. sqlspec/adapters/sqlite/driver.py +263 -278
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder/base.py → builder/_base.py} +184 -86
  50. sqlspec/{statement/builder/column.py → builder/_column.py} +97 -60
  51. sqlspec/{statement/builder/ddl.py → builder/_ddl.py} +61 -131
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +4 -10
  53. sqlspec/{statement/builder/delete.py → builder/_delete.py} +10 -30
  54. sqlspec/builder/_insert.py +421 -0
  55. sqlspec/builder/_merge.py +71 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +49 -26
  57. sqlspec/builder/_select.py +170 -0
  58. sqlspec/{statement/builder/update.py → builder/_update.py} +16 -20
  59. sqlspec/builder/mixins/__init__.py +55 -0
  60. sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
  61. sqlspec/{statement/builder/mixins/_delete_from.py → builder/mixins/_delete_operations.py} +8 -1
  62. sqlspec/builder/mixins/_insert_operations.py +244 -0
  63. sqlspec/{statement/builder/mixins/_join.py → builder/mixins/_join_operations.py} +45 -13
  64. sqlspec/{statement/builder/mixins/_merge_clauses.py → builder/mixins/_merge_operations.py} +188 -30
  65. sqlspec/builder/mixins/_order_limit_operations.py +135 -0
  66. sqlspec/builder/mixins/_pivot_operations.py +153 -0
  67. sqlspec/builder/mixins/_select_operations.py +604 -0
  68. sqlspec/builder/mixins/_update_operations.py +202 -0
  69. sqlspec/builder/mixins/_where_clause.py +644 -0
  70. sqlspec/cli.py +247 -0
  71. sqlspec/config.py +183 -138
  72. sqlspec/core/__init__.py +63 -0
  73. sqlspec/core/cache.py +871 -0
  74. sqlspec/core/compiler.py +417 -0
  75. sqlspec/core/filters.py +830 -0
  76. sqlspec/core/hashing.py +310 -0
  77. sqlspec/core/parameters.py +1237 -0
  78. sqlspec/core/result.py +677 -0
  79. sqlspec/{statement → core}/splitter.py +321 -191
  80. sqlspec/core/statement.py +676 -0
  81. sqlspec/driver/__init__.py +7 -10
  82. sqlspec/driver/_async.py +422 -163
  83. sqlspec/driver/_common.py +545 -287
  84. sqlspec/driver/_sync.py +426 -160
  85. sqlspec/driver/mixins/__init__.py +2 -13
  86. sqlspec/driver/mixins/_result_tools.py +193 -0
  87. sqlspec/driver/mixins/_sql_translator.py +65 -14
  88. sqlspec/exceptions.py +5 -252
  89. sqlspec/extensions/aiosql/adapter.py +93 -96
  90. sqlspec/extensions/litestar/__init__.py +2 -1
  91. sqlspec/extensions/litestar/cli.py +48 -0
  92. sqlspec/extensions/litestar/config.py +0 -1
  93. sqlspec/extensions/litestar/handlers.py +15 -26
  94. sqlspec/extensions/litestar/plugin.py +21 -16
  95. sqlspec/extensions/litestar/providers.py +17 -52
  96. sqlspec/loader.py +423 -104
  97. sqlspec/migrations/__init__.py +35 -0
  98. sqlspec/migrations/base.py +414 -0
  99. sqlspec/migrations/commands.py +443 -0
  100. sqlspec/migrations/loaders.py +402 -0
  101. sqlspec/migrations/runner.py +213 -0
  102. sqlspec/migrations/tracker.py +140 -0
  103. sqlspec/migrations/utils.py +129 -0
  104. sqlspec/protocols.py +51 -186
  105. sqlspec/storage/__init__.py +1 -1
  106. sqlspec/storage/backends/base.py +37 -40
  107. sqlspec/storage/backends/fsspec.py +136 -112
  108. sqlspec/storage/backends/obstore.py +138 -160
  109. sqlspec/storage/capabilities.py +5 -4
  110. sqlspec/storage/registry.py +57 -106
  111. sqlspec/typing.py +136 -115
  112. sqlspec/utils/__init__.py +2 -2
  113. sqlspec/utils/correlation.py +0 -3
  114. sqlspec/utils/deprecation.py +6 -6
  115. sqlspec/utils/fixtures.py +6 -6
  116. sqlspec/utils/logging.py +0 -2
  117. sqlspec/utils/module_loader.py +7 -12
  118. sqlspec/utils/singleton.py +0 -1
  119. sqlspec/utils/sync_tools.py +17 -38
  120. sqlspec/utils/text.py +12 -51
  121. sqlspec/utils/type_guards.py +482 -235
  122. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/METADATA +7 -2
  123. sqlspec-0.16.2.dist-info/RECORD +134 -0
  124. sqlspec-0.16.2.dist-info/entry_points.txt +2 -0
  125. sqlspec/driver/connection.py +0 -207
  126. sqlspec/driver/mixins/_csv_writer.py +0 -91
  127. sqlspec/driver/mixins/_pipeline.py +0 -512
  128. sqlspec/driver/mixins/_result_utils.py +0 -140
  129. sqlspec/driver/mixins/_storage.py +0 -926
  130. sqlspec/driver/mixins/_type_coercion.py +0 -130
  131. sqlspec/driver/parameters.py +0 -138
  132. sqlspec/service/__init__.py +0 -4
  133. sqlspec/service/_util.py +0 -147
  134. sqlspec/service/base.py +0 -1131
  135. sqlspec/service/pagination.py +0 -26
  136. sqlspec/statement/__init__.py +0 -21
  137. sqlspec/statement/builder/insert.py +0 -288
  138. sqlspec/statement/builder/merge.py +0 -95
  139. sqlspec/statement/builder/mixins/__init__.py +0 -65
  140. sqlspec/statement/builder/mixins/_aggregate_functions.py +0 -250
  141. sqlspec/statement/builder/mixins/_case_builder.py +0 -91
  142. sqlspec/statement/builder/mixins/_common_table_expr.py +0 -90
  143. sqlspec/statement/builder/mixins/_from.py +0 -63
  144. sqlspec/statement/builder/mixins/_group_by.py +0 -118
  145. sqlspec/statement/builder/mixins/_having.py +0 -35
  146. sqlspec/statement/builder/mixins/_insert_from_select.py +0 -47
  147. sqlspec/statement/builder/mixins/_insert_into.py +0 -36
  148. sqlspec/statement/builder/mixins/_insert_values.py +0 -67
  149. sqlspec/statement/builder/mixins/_limit_offset.py +0 -53
  150. sqlspec/statement/builder/mixins/_order_by.py +0 -46
  151. sqlspec/statement/builder/mixins/_pivot.py +0 -79
  152. sqlspec/statement/builder/mixins/_returning.py +0 -37
  153. sqlspec/statement/builder/mixins/_select_columns.py +0 -61
  154. sqlspec/statement/builder/mixins/_set_ops.py +0 -122
  155. sqlspec/statement/builder/mixins/_unpivot.py +0 -77
  156. sqlspec/statement/builder/mixins/_update_from.py +0 -55
  157. sqlspec/statement/builder/mixins/_update_set.py +0 -94
  158. sqlspec/statement/builder/mixins/_update_table.py +0 -29
  159. sqlspec/statement/builder/mixins/_where.py +0 -401
  160. sqlspec/statement/builder/mixins/_window_functions.py +0 -86
  161. sqlspec/statement/builder/select.py +0 -221
  162. sqlspec/statement/filters.py +0 -596
  163. sqlspec/statement/parameter_manager.py +0 -220
  164. sqlspec/statement/parameters.py +0 -867
  165. sqlspec/statement/pipelines/__init__.py +0 -210
  166. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  167. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  168. sqlspec/statement/pipelines/context.py +0 -115
  169. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  170. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  171. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  172. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  173. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  174. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  175. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  176. sqlspec/statement/pipelines/validators/_performance.py +0 -718
  177. sqlspec/statement/pipelines/validators/_security.py +0 -967
  178. sqlspec/statement/result.py +0 -435
  179. sqlspec/statement/sql.py +0 -1704
  180. sqlspec/statement/sql_compiler.py +0 -140
  181. sqlspec/utils/cached_property.py +0 -25
  182. sqlspec-0.13.1.dist-info/RECORD +0 -150
  183. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/WHEEL +0 -0
  184. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/LICENSE +0 -0
  185. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/NOTICE +0 -0
@@ -1,646 +0,0 @@
1
- """SQL statement analyzer for extracting metadata and complexity metrics."""
2
-
3
- import time
4
- from dataclasses import dataclass, field
5
- from typing import TYPE_CHECKING, Any, Optional
6
-
7
- from sqlglot import exp, parse_one
8
- from sqlglot.errors import ParseError as SQLGlotParseError
9
-
10
- from sqlspec.protocols import ProcessorProtocol
11
- from sqlspec.statement.pipelines.context import AnalysisFinding
12
- from sqlspec.utils.correlation import CorrelationContext
13
- from sqlspec.utils.logging import get_logger
14
- from sqlspec.utils.type_guards import has_expressions
15
-
16
- if TYPE_CHECKING:
17
- from sqlglot.dialects.dialect import DialectType
18
-
19
- from sqlspec.statement.pipelines.context import SQLProcessingContext
20
- from sqlspec.statement.sql import SQLConfig
21
-
22
- __all__ = ("StatementAnalysis", "StatementAnalyzer")
23
-
24
- # Constants for statement analysis
25
- HIGH_SUBQUERY_COUNT_THRESHOLD = 10
26
- """Threshold for flagging high number of subqueries."""
27
-
28
- HIGH_CORRELATED_SUBQUERY_THRESHOLD = 3
29
- """Threshold for flagging multiple correlated subqueries."""
30
-
31
- EXPENSIVE_FUNCTION_THRESHOLD = 5
32
- """Threshold for flagging multiple expensive functions."""
33
-
34
- NESTED_FUNCTION_THRESHOLD = 3
35
- """Threshold for flagging multiple nested function calls."""
36
-
37
- logger = get_logger("pipelines.analyzers")
38
-
39
-
40
- @dataclass
41
- class StatementAnalysis:
42
- """Analysis result for parsed SQL statements."""
43
-
44
- statement_type: str
45
- """Type of SQL statement (Insert, Select, Update, Delete, etc.)"""
46
- expression: exp.Expression
47
- """Parsed SQLGlot expression"""
48
- table_name: "Optional[str]" = None
49
- """Primary table name if detected"""
50
- columns: "list[str]" = field(default_factory=list)
51
- """Column names if detected"""
52
- has_returning: bool = False
53
- """Whether statement has RETURNING clause"""
54
- is_from_select: bool = False
55
- """Whether this is an INSERT FROM SELECT pattern"""
56
- parameters: "dict[str, Any]" = field(default_factory=dict)
57
- """Extracted parameters from the SQL"""
58
- tables: "list[str]" = field(default_factory=list)
59
- """All table names referenced in the query"""
60
- complexity_score: int = 0
61
- """Complexity score based on query structure"""
62
- uses_subqueries: bool = False
63
- """Whether the query uses subqueries"""
64
- join_count: int = 0
65
- """Number of joins in the query"""
66
- aggregate_functions: "list[str]" = field(default_factory=list)
67
- """List of aggregate functions used"""
68
-
69
- # Enhanced complexity metrics
70
- join_types: "dict[str, int]" = field(default_factory=dict)
71
- """Types and counts of joins"""
72
- max_subquery_depth: int = 0
73
- """Maximum subquery nesting depth"""
74
- correlated_subquery_count: int = 0
75
- """Number of correlated subqueries"""
76
- function_count: int = 0
77
- """Total number of function calls"""
78
- where_condition_count: int = 0
79
- """Number of WHERE conditions"""
80
- potential_cartesian_products: int = 0
81
- """Number of potential Cartesian products detected"""
82
- complexity_warnings: "list[str]" = field(default_factory=list)
83
- """Warnings about query complexity"""
84
- complexity_issues: "list[str]" = field(default_factory=list)
85
- """Issues with query complexity"""
86
-
87
- # Additional attributes for aggregator compatibility
88
- subquery_count: int = 0
89
- """Total number of subqueries"""
90
- operations: "list[str]" = field(default_factory=list)
91
- """SQL operations performed (SELECT, JOIN, etc.)"""
92
- has_aggregation: bool = False
93
- """Whether query uses aggregation functions"""
94
- has_window_functions: bool = False
95
- """Whether query uses window functions"""
96
- cte_count: int = 0
97
- """Number of CTEs (Common Table Expressions)"""
98
-
99
-
100
- class StatementAnalyzer(ProcessorProtocol):
101
- """SQL statement analyzer that extracts metadata and insights from SQL statements.
102
-
103
- This processor analyzes SQL expressions to extract useful metadata without
104
- modifying the SQL itself. It can be used in pipelines to gather insights
105
- about query complexity, table usage, etc.
106
- """
107
-
108
- def __init__(
109
- self,
110
- cache_size: int = 1000,
111
- max_join_count: int = 10,
112
- max_subquery_depth: int = 3,
113
- max_function_calls: int = 20,
114
- max_where_conditions: int = 15,
115
- ) -> None:
116
- """Initialize the analyzer.
117
-
118
- Args:
119
- cache_size: Maximum number of parsed expressions to cache.
120
- max_join_count: Maximum allowed joins before flagging.
121
- max_subquery_depth: Maximum allowed subquery nesting depth.
122
- max_function_calls: Maximum allowed function calls.
123
- max_where_conditions: Maximum allowed WHERE conditions.
124
- """
125
- self.cache_size = cache_size
126
- self.max_join_count = max_join_count
127
- self.max_subquery_depth = max_subquery_depth
128
- self.max_function_calls = max_function_calls
129
- self.max_where_conditions = max_where_conditions
130
- self._parse_cache: dict[tuple[str, Optional[str]], exp.Expression] = {}
131
- self._analysis_cache: dict[str, StatementAnalysis] = {}
132
-
133
- def process(
134
- self, expression: "Optional[exp.Expression]", context: "SQLProcessingContext"
135
- ) -> "Optional[exp.Expression]":
136
- """Process the SQL expression to extract analysis metadata and store it in the context."""
137
- if expression is None:
138
- return None
139
-
140
- CorrelationContext.get()
141
- start_time = time.perf_counter()
142
-
143
- if not context.config.enable_analysis:
144
- return expression
145
-
146
- analysis_result_obj = self.analyze_expression(expression, context.dialect, context.config)
147
-
148
- duration = time.perf_counter() - start_time
149
-
150
- if analysis_result_obj.complexity_warnings:
151
- for warning in analysis_result_obj.complexity_warnings:
152
- finding = AnalysisFinding(key="complexity_warning", value=warning, processor=self.__class__.__name__)
153
- context.analysis_findings.append(finding)
154
-
155
- if analysis_result_obj.complexity_issues:
156
- for issue in analysis_result_obj.complexity_issues:
157
- finding = AnalysisFinding(key="complexity_issue", value=issue, processor=self.__class__.__name__)
158
- context.analysis_findings.append(finding)
159
-
160
- # Store metadata in context
161
- context.metadata[self.__class__.__name__] = {
162
- "duration_ms": duration * 1000,
163
- "statement_type": analysis_result_obj.statement_type,
164
- "table_count": len(analysis_result_obj.tables),
165
- "has_subqueries": analysis_result_obj.uses_subqueries,
166
- "join_count": analysis_result_obj.join_count,
167
- "complexity_score": analysis_result_obj.complexity_score,
168
- }
169
- return expression
170
-
171
- def analyze_statement(self, sql_string: str, dialect: "DialectType" = None) -> StatementAnalysis:
172
- """Analyze SQL string and extract components efficiently.
173
-
174
- Args:
175
- sql_string: The SQL string to analyze
176
- dialect: SQL dialect for parsing
177
-
178
- Returns:
179
- StatementAnalysis with extracted components
180
- """
181
- # Check cache first
182
- cache_key = sql_string.strip()
183
- if cache_key in self._analysis_cache:
184
- return self._analysis_cache[cache_key]
185
-
186
- # Use cache key for expression parsing performance
187
- parse_cache_key = (sql_string.strip(), str(dialect) if dialect else None)
188
-
189
- if parse_cache_key in self._parse_cache:
190
- expr = self._parse_cache[parse_cache_key]
191
- else:
192
- try:
193
- expr = exp.maybe_parse(sql_string, dialect=dialect)
194
- if expr is None:
195
- expr = parse_one(sql_string, dialect=dialect)
196
-
197
- # Simple expressions like Alias or Identifier are not valid SQL statements
198
- valid_statement_types = (
199
- exp.Select,
200
- exp.Insert,
201
- exp.Update,
202
- exp.Delete,
203
- exp.Create,
204
- exp.Drop,
205
- exp.Alter,
206
- exp.Merge,
207
- exp.Command,
208
- exp.Set,
209
- exp.Show,
210
- exp.Describe,
211
- exp.Use,
212
- exp.Union,
213
- exp.Intersect,
214
- exp.Except,
215
- )
216
- if not isinstance(expr, valid_statement_types):
217
- logger.warning("Parsed expression is not a valid SQL statement: %s", type(expr).__name__)
218
- return StatementAnalysis(statement_type="Unknown", expression=exp.Anonymous(this="UNKNOWN"))
219
-
220
- if len(self._parse_cache) < self.cache_size:
221
- self._parse_cache[parse_cache_key] = expr
222
- except (SQLGlotParseError, Exception) as e:
223
- logger.warning("Failed to parse SQL statement: %s", e)
224
- return StatementAnalysis(statement_type="Unknown", expression=exp.Anonymous(this="UNKNOWN"))
225
-
226
- return self.analyze_expression(expr)
227
-
228
- def analyze_expression(
229
- self, expression: exp.Expression, dialect: "DialectType" = None, config: "Optional[SQLConfig]" = None
230
- ) -> StatementAnalysis:
231
- """Analyze a SQLGlot expression directly, potentially using validation results for context."""
232
- # This caching needs to be context-aware if analysis depends on prior steps (e.g. validation_result)
233
- # For simplicity, let's assume for now direct expression analysis is cacheable if validation_result is not used deeply.
234
- cache_key = expression.sql() # Simplified cache key
235
- if cache_key in self._analysis_cache:
236
- return self._analysis_cache[cache_key]
237
-
238
- analysis = StatementAnalysis(
239
- statement_type=type(expression).__name__,
240
- expression=expression,
241
- table_name=self._extract_primary_table_name(expression),
242
- columns=self._extract_columns(expression),
243
- has_returning=bool(expression.find(exp.Returning)),
244
- is_from_select=self._is_insert_from_select(expression),
245
- parameters=self._extract_parameters(expression),
246
- tables=self._extract_all_tables(expression),
247
- uses_subqueries=self._has_subqueries(expression),
248
- join_count=self._count_joins(expression),
249
- aggregate_functions=self._extract_aggregate_functions(expression),
250
- )
251
- # Calculate subquery_count and cte_count before complexity analysis
252
- analysis.subquery_count = len(list(expression.find_all(exp.Subquery)))
253
- # Also need to account for IN/EXISTS subqueries that aren't wrapped in Subquery nodes
254
- for in_clause in expression.find_all(exp.In):
255
- if in_clause.args.get("query") and isinstance(in_clause.args.get("query"), exp.Select):
256
- analysis.subquery_count += 1
257
- for exists_clause in expression.find_all(exp.Exists):
258
- if exists_clause.this and isinstance(exists_clause.this, exp.Select):
259
- analysis.subquery_count += 1
260
-
261
- # Calculate CTE count before complexity score
262
- analysis.cte_count = len(list(expression.find_all(exp.CTE)))
263
-
264
- self._analyze_complexity(expression, analysis)
265
- analysis.complexity_score = self._calculate_comprehensive_complexity_score(analysis)
266
- analysis.operations = self._extract_operations(expression)
267
- analysis.has_aggregation = len(analysis.aggregate_functions) > 0
268
- analysis.has_window_functions = self._has_window_functions(expression)
269
-
270
- if len(self._analysis_cache) < self.cache_size:
271
- self._analysis_cache[cache_key] = analysis
272
- return analysis
273
-
274
- def _analyze_complexity(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
275
- """Perform comprehensive complexity analysis."""
276
- self._analyze_joins(expression, analysis)
277
- self._analyze_subqueries(expression, analysis)
278
- self._analyze_where_clauses(expression, analysis)
279
- self._analyze_functions(expression, analysis)
280
-
281
- def _analyze_joins(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
282
- """Analyze JOIN operations for potential issues."""
283
- join_nodes = list(expression.find_all(exp.Join))
284
- analysis.join_count = len(join_nodes)
285
-
286
- warnings = []
287
- issues = []
288
- cartesian_products = 0
289
-
290
- for select in expression.find_all(exp.Select):
291
- from_clause = select.args.get("from")
292
- if from_clause and has_expressions(from_clause) and len(from_clause.expressions) > 1:
293
- # This logic checks for multiple tables in FROM without explicit JOINs
294
- # It's a simplified check for potential cartesian products
295
- cartesian_products += 1
296
-
297
- if cartesian_products > 0:
298
- issues.append(
299
- f"Potential Cartesian product detected ({cartesian_products} instances from multiple FROM tables without JOIN)"
300
- )
301
-
302
- for join_node in join_nodes:
303
- join_type = join_node.kind.upper() if join_node.kind else "INNER"
304
- analysis.join_types[join_type] = analysis.join_types.get(join_type, 0) + 1
305
-
306
- if join_type == "CROSS":
307
- issues.append("Explicit CROSS JOIN found, potential Cartesian product.")
308
- cartesian_products += 1
309
- elif not join_node.args.get("on") and not join_node.args.get("using") and join_type != "NATURAL":
310
- issues.append(f"JOIN ({join_node.sql()}) without ON/USING clause, potential Cartesian product.")
311
- cartesian_products += 1
312
-
313
- if analysis.join_count > self.max_join_count:
314
- issues.append(f"Excessive number of joins ({analysis.join_count}), may cause performance issues")
315
- elif analysis.join_count > self.max_join_count // 2:
316
- warnings.append(f"High number of joins ({analysis.join_count}), monitor performance")
317
-
318
- analysis.potential_cartesian_products = cartesian_products
319
- analysis.complexity_warnings.extend(warnings)
320
- analysis.complexity_issues.extend(issues)
321
-
322
- def _analyze_subqueries(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
323
- """Analyze subquery complexity and nesting depth."""
324
- subqueries: list[exp.Expression] = list(expression.find_all(exp.Subquery))
325
- # Workaround for EXISTS clauses: sqlglot doesn't wrap EXISTS subqueries in Subquery nodes
326
- subqueries.extend(
327
- [
328
- exists_clause.this
329
- for exists_clause in expression.find_all(exp.Exists)
330
- if exists_clause.this and isinstance(exists_clause.this, exp.Select)
331
- ]
332
- )
333
-
334
- analysis.subquery_count = len(subqueries)
335
- max_depth = 0
336
- correlated_count = 0
337
-
338
- # Calculate maximum nesting depth - simpler approach
339
- def calculate_depth(expr: exp.Expression) -> int:
340
- """Calculate the maximum depth of nested SELECT statements."""
341
- max_depth = 0
342
-
343
- select_statements = list(expr.find_all(exp.Select))
344
-
345
- for select in select_statements:
346
- # Count how many parent SELECTs this one has
347
- depth = 0
348
- current = select.parent
349
- while current:
350
- if isinstance(current, exp.Select):
351
- depth += 1
352
- elif isinstance(current, (exp.Subquery, exp.In, exp.Exists)):
353
- # These nodes can contain SELECTs, check their parent
354
- parent = current.parent
355
- while parent and not isinstance(parent, exp.Select):
356
- parent = parent.parent
357
- if parent:
358
- current = parent
359
- continue
360
- current = current.parent if current else None
361
-
362
- max_depth = max(max_depth, depth)
363
-
364
- return max_depth
365
-
366
- max_depth = calculate_depth(expression)
367
- outer_tables = {tbl.alias or tbl.name for tbl in expression.find_all(exp.Table)}
368
- for subquery in subqueries:
369
- for col in subquery.find_all(exp.Column):
370
- if col.table and col.table in outer_tables:
371
- correlated_count += 1
372
- break
373
-
374
- warnings = []
375
- issues = []
376
-
377
- if max_depth > self.max_subquery_depth:
378
- issues.append(f"Excessive subquery nesting depth ({max_depth})")
379
- elif max_depth > self.max_subquery_depth // 2:
380
- warnings.append(f"High subquery nesting depth ({max_depth})")
381
-
382
- if analysis.subquery_count > HIGH_SUBQUERY_COUNT_THRESHOLD:
383
- warnings.append(f"High number of subqueries ({analysis.subquery_count})")
384
-
385
- if correlated_count > HIGH_CORRELATED_SUBQUERY_THRESHOLD:
386
- warnings.append(f"Multiple correlated subqueries detected ({correlated_count})")
387
-
388
- analysis.max_subquery_depth = max_depth
389
- analysis.correlated_subquery_count = correlated_count
390
- analysis.complexity_warnings.extend(warnings)
391
- analysis.complexity_issues.extend(issues)
392
-
393
- def _analyze_where_clauses(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
394
- """Analyze WHERE clause complexity."""
395
- where_clauses = list(expression.find_all(exp.Where))
396
- total_conditions = 0
397
-
398
- for where_clause in where_clauses:
399
- total_conditions += len(list(where_clause.find_all(exp.And)))
400
- total_conditions += len(list(where_clause.find_all(exp.Or)))
401
-
402
- warnings = []
403
- issues = []
404
-
405
- if total_conditions > self.max_where_conditions:
406
- issues.append(f"Excessive WHERE conditions ({total_conditions})")
407
- elif total_conditions > self.max_where_conditions // 2:
408
- warnings.append(f"Complex WHERE clause ({total_conditions} conditions)")
409
-
410
- analysis.where_condition_count = total_conditions
411
- analysis.complexity_warnings.extend(warnings)
412
- analysis.complexity_issues.extend(issues)
413
-
414
- def _analyze_functions(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
415
- """Analyze function usage and complexity."""
416
- function_types: dict[str, int] = {}
417
- nested_functions = 0
418
- function_count = 0
419
- for func in expression.find_all(exp.Func):
420
- func_name = func.name.lower() if func.name else "unknown"
421
- function_types[func_name] = function_types.get(func_name, 0) + 1
422
- if any(isinstance(arg, exp.Func) for arg in func.args.values()):
423
- nested_functions += 1
424
- function_count += 1
425
-
426
- expensive_functions = {"regexp", "regex", "like", "concat_ws", "group_concat"}
427
- expensive_count = sum(function_types.get(func, 0) for func in expensive_functions)
428
-
429
- warnings = []
430
- issues = []
431
-
432
- if function_count > self.max_function_calls:
433
- issues.append(f"Excessive function calls ({function_count})")
434
- elif function_count > self.max_function_calls // 2:
435
- warnings.append(f"High number of function calls ({function_count})")
436
-
437
- if expensive_count > EXPENSIVE_FUNCTION_THRESHOLD:
438
- warnings.append(f"Multiple expensive functions used ({expensive_count})")
439
-
440
- if nested_functions > NESTED_FUNCTION_THRESHOLD:
441
- warnings.append(f"Multiple nested function calls ({nested_functions})")
442
-
443
- analysis.function_count = function_count
444
- analysis.complexity_warnings.extend(warnings)
445
- analysis.complexity_issues.extend(issues)
446
-
447
- @staticmethod
448
- def _calculate_comprehensive_complexity_score(analysis: StatementAnalysis) -> int:
449
- """Calculate an overall complexity score based on various metrics."""
450
- score = 0
451
-
452
- # Join complexity
453
- score += analysis.join_count * 3
454
- score += analysis.potential_cartesian_products * 20
455
-
456
- # Subquery complexity
457
- score += analysis.subquery_count * 5 # Use actual subquery count
458
- score += analysis.max_subquery_depth * 10
459
- score += analysis.correlated_subquery_count * 8
460
-
461
- # CTE complexity (CTEs are complex, especially recursive ones)
462
- score += analysis.cte_count * 7
463
-
464
- # WHERE clause complexity
465
- score += analysis.where_condition_count * 2
466
-
467
- # Function complexity
468
- score += analysis.function_count * 1
469
-
470
- return score
471
-
472
- @staticmethod
473
- def _extract_primary_table_name(expr: exp.Expression) -> "Optional[str]":
474
- """Extract the primary table name from an expression."""
475
- if isinstance(expr, exp.Insert):
476
- if expr.this:
477
- table = expr.this
478
- if isinstance(table, exp.Table):
479
- return table.name
480
- if isinstance(table, (exp.Identifier, exp.Var)):
481
- return str(table.name)
482
- elif isinstance(expr, (exp.Update, exp.Delete)):
483
- if expr.this:
484
- if isinstance(expr.this, (exp.Table, exp.Identifier, exp.Var)):
485
- return str(expr.this.name)
486
- return str(expr.this)
487
- elif isinstance(expr, exp.Select) and (from_clause := expr.find(exp.From)) and from_clause.this:
488
- if isinstance(from_clause.this, (exp.Table, exp.Identifier, exp.Var)):
489
- return str(from_clause.this.name)
490
- return str(from_clause.this)
491
- return None
492
-
493
- @staticmethod
494
- def _extract_columns(expr: exp.Expression) -> "list[str]":
495
- """Extract column names from an expression."""
496
- columns: list[str] = []
497
- if isinstance(expr, exp.Insert):
498
- if expr.this and has_expressions(expr.this):
499
- columns.extend(
500
- str(col_expr.name)
501
- for col_expr in expr.this.expressions
502
- if isinstance(col_expr, (exp.Column, exp.Identifier, exp.Var))
503
- )
504
- elif isinstance(expr, exp.Select):
505
- for projection in expr.expressions:
506
- if isinstance(projection, exp.Column):
507
- columns.append(str(projection.name))
508
- elif isinstance(projection, exp.Alias) and projection.alias:
509
- columns.append(str(projection.alias))
510
- elif isinstance(projection, (exp.Identifier, exp.Var)):
511
- columns.append(str(projection.name))
512
-
513
- return columns
514
-
515
- @staticmethod
516
- def _extract_all_tables(expr: exp.Expression) -> "list[str]":
517
- """Extract all table names referenced in the expression."""
518
- tables: list[str] = []
519
- for table in expr.find_all(exp.Table):
520
- if isinstance(table, exp.Table):
521
- table_name = str(table.name)
522
- if table_name not in tables:
523
- tables.append(table_name)
524
- return tables
525
-
526
- @staticmethod
527
- def _is_insert_from_select(expr: exp.Expression) -> bool:
528
- """Check if this is an INSERT FROM SELECT pattern."""
529
- if not isinstance(expr, exp.Insert):
530
- return False
531
- return bool(expr.expression and isinstance(expr.expression, exp.Select))
532
-
533
- @staticmethod
534
- def _extract_parameters(_expr: exp.Expression) -> "dict[str, Any]":
535
- """Extract parameters from the expression."""
536
- # This could be enhanced to extract actual parameter placeholders
537
- # For now, _expr is unused but will be used in future enhancements
538
- _ = _expr
539
- return {}
540
-
541
- @staticmethod
542
- def _has_subqueries(expr: exp.Expression) -> bool:
543
- """Check if the expression contains subqueries.
544
-
545
- Note: Due to sqlglot parser inconsistency, subqueries in IN clauses
546
- are not wrapped in Subquery nodes, so we need additional detection.
547
- CTEs are not considered subqueries.
548
- """
549
- # Standard subquery detection
550
- if expr.find(exp.Subquery):
551
- return True
552
-
553
- # sqlglot compatibility: IN clauses with SELECT need explicit handling
554
- for in_clause in expr.find_all(exp.In):
555
- query_node = in_clause.args.get("query")
556
- if query_node and isinstance(query_node, exp.Select):
557
- return True
558
-
559
- # sqlglot compatibility: EXISTS clauses with SELECT need explicit handling
560
- for exists_clause in expr.find_all(exp.Exists):
561
- if exists_clause.this and isinstance(exists_clause.this, exp.Select):
562
- return True
563
-
564
- # Check for multiple SELECT statements (indicates subqueries)
565
- # but exclude those within CTEs
566
- select_statements = []
567
- for select in expr.find_all(exp.Select):
568
- parent = select.parent
569
- is_in_cte = False
570
- while parent:
571
- if isinstance(parent, exp.CTE):
572
- is_in_cte = True
573
- break
574
- parent = parent.parent
575
- if not is_in_cte:
576
- select_statements.append(select)
577
-
578
- return len(select_statements) > 1
579
-
580
- @staticmethod
581
- def _count_joins(expr: exp.Expression) -> int:
582
- """Count the number of joins in the expression."""
583
- return len(list(expr.find_all(exp.Join)))
584
-
585
- @staticmethod
586
- def _extract_aggregate_functions(expr: exp.Expression) -> "list[str]":
587
- """Extract aggregate function names from the expression."""
588
- aggregates: list[str] = []
589
-
590
- # Common aggregate function types in SQLGlot (using only those that exist)
591
- aggregate_types = [exp.Count, exp.Sum, exp.Avg, exp.Min, exp.Max]
592
-
593
- for agg_type in aggregate_types:
594
- if expr.find(agg_type): # Check if this aggregate type exists in the expression
595
- func_name = agg_type.__name__.lower()
596
- if func_name not in aggregates:
597
- aggregates.append(func_name)
598
-
599
- return aggregates
600
-
601
- def clear_cache(self) -> None:
602
- """Clear both parse and analysis caches."""
603
- self._parse_cache.clear()
604
- self._analysis_cache.clear()
605
-
606
- @staticmethod
607
- def _extract_operations(expr: exp.Expression) -> "list[str]":
608
- """Extract SQL operations performed."""
609
- operations = []
610
-
611
- # Main operation
612
- if isinstance(expr, exp.Select):
613
- operations.append("SELECT")
614
- elif isinstance(expr, exp.Insert):
615
- operations.append("INSERT")
616
- elif isinstance(expr, exp.Update):
617
- operations.append("UPDATE")
618
- elif isinstance(expr, exp.Delete):
619
- operations.append("DELETE")
620
- elif isinstance(expr, exp.Create):
621
- operations.append("CREATE")
622
- elif isinstance(expr, exp.Drop):
623
- operations.append("DROP")
624
- elif isinstance(expr, exp.Alter):
625
- operations.append("ALTER")
626
- if expr.find(exp.Join):
627
- operations.append("JOIN")
628
- if expr.find(exp.Group):
629
- operations.append("GROUP BY")
630
- if expr.find(exp.Order):
631
- operations.append("ORDER BY")
632
- if expr.find(exp.Having):
633
- operations.append("HAVING")
634
- if expr.find(exp.Union):
635
- operations.append("UNION")
636
- if expr.find(exp.Intersect):
637
- operations.append("INTERSECT")
638
- if expr.find(exp.Except):
639
- operations.append("EXCEPT")
640
-
641
- return operations
642
-
643
- @staticmethod
644
- def _has_window_functions(expr: exp.Expression) -> bool:
645
- """Check if expression uses window functions."""
646
- return bool(expr.find(exp.Window))