sqlspec 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (155) hide show
  1. sqlspec/__init__.py +16 -3
  2. sqlspec/_serialization.py +3 -10
  3. sqlspec/_sql.py +1147 -0
  4. sqlspec/_typing.py +343 -41
  5. sqlspec/adapters/adbc/__init__.py +2 -6
  6. sqlspec/adapters/adbc/config.py +474 -149
  7. sqlspec/adapters/adbc/driver.py +330 -644
  8. sqlspec/adapters/aiosqlite/__init__.py +2 -6
  9. sqlspec/adapters/aiosqlite/config.py +143 -57
  10. sqlspec/adapters/aiosqlite/driver.py +269 -462
  11. sqlspec/adapters/asyncmy/__init__.py +3 -8
  12. sqlspec/adapters/asyncmy/config.py +247 -202
  13. sqlspec/adapters/asyncmy/driver.py +217 -451
  14. sqlspec/adapters/asyncpg/__init__.py +4 -7
  15. sqlspec/adapters/asyncpg/config.py +329 -176
  16. sqlspec/adapters/asyncpg/driver.py +418 -498
  17. sqlspec/adapters/bigquery/__init__.py +2 -2
  18. sqlspec/adapters/bigquery/config.py +407 -0
  19. sqlspec/adapters/bigquery/driver.py +592 -634
  20. sqlspec/adapters/duckdb/__init__.py +4 -1
  21. sqlspec/adapters/duckdb/config.py +432 -321
  22. sqlspec/adapters/duckdb/driver.py +393 -436
  23. sqlspec/adapters/oracledb/__init__.py +3 -8
  24. sqlspec/adapters/oracledb/config.py +625 -0
  25. sqlspec/adapters/oracledb/driver.py +549 -942
  26. sqlspec/adapters/psqlpy/__init__.py +4 -7
  27. sqlspec/adapters/psqlpy/config.py +372 -203
  28. sqlspec/adapters/psqlpy/driver.py +197 -550
  29. sqlspec/adapters/psycopg/__init__.py +3 -8
  30. sqlspec/adapters/psycopg/config.py +741 -0
  31. sqlspec/adapters/psycopg/driver.py +732 -733
  32. sqlspec/adapters/sqlite/__init__.py +2 -6
  33. sqlspec/adapters/sqlite/config.py +146 -81
  34. sqlspec/adapters/sqlite/driver.py +243 -426
  35. sqlspec/base.py +220 -825
  36. sqlspec/config.py +354 -0
  37. sqlspec/driver/__init__.py +22 -0
  38. sqlspec/driver/_async.py +252 -0
  39. sqlspec/driver/_common.py +338 -0
  40. sqlspec/driver/_sync.py +261 -0
  41. sqlspec/driver/mixins/__init__.py +17 -0
  42. sqlspec/driver/mixins/_pipeline.py +523 -0
  43. sqlspec/driver/mixins/_result_utils.py +122 -0
  44. sqlspec/driver/mixins/_sql_translator.py +35 -0
  45. sqlspec/driver/mixins/_storage.py +993 -0
  46. sqlspec/driver/mixins/_type_coercion.py +131 -0
  47. sqlspec/exceptions.py +299 -7
  48. sqlspec/extensions/aiosql/__init__.py +10 -0
  49. sqlspec/extensions/aiosql/adapter.py +474 -0
  50. sqlspec/extensions/litestar/__init__.py +1 -6
  51. sqlspec/extensions/litestar/_utils.py +1 -5
  52. sqlspec/extensions/litestar/config.py +5 -6
  53. sqlspec/extensions/litestar/handlers.py +13 -12
  54. sqlspec/extensions/litestar/plugin.py +22 -24
  55. sqlspec/extensions/litestar/providers.py +37 -55
  56. sqlspec/loader.py +528 -0
  57. sqlspec/service/__init__.py +3 -0
  58. sqlspec/service/base.py +24 -0
  59. sqlspec/service/pagination.py +26 -0
  60. sqlspec/statement/__init__.py +21 -0
  61. sqlspec/statement/builder/__init__.py +54 -0
  62. sqlspec/statement/builder/_ddl_utils.py +119 -0
  63. sqlspec/statement/builder/_parsing_utils.py +135 -0
  64. sqlspec/statement/builder/base.py +328 -0
  65. sqlspec/statement/builder/ddl.py +1379 -0
  66. sqlspec/statement/builder/delete.py +80 -0
  67. sqlspec/statement/builder/insert.py +274 -0
  68. sqlspec/statement/builder/merge.py +95 -0
  69. sqlspec/statement/builder/mixins/__init__.py +65 -0
  70. sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
  71. sqlspec/statement/builder/mixins/_case_builder.py +91 -0
  72. sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
  73. sqlspec/statement/builder/mixins/_delete_from.py +34 -0
  74. sqlspec/statement/builder/mixins/_from.py +61 -0
  75. sqlspec/statement/builder/mixins/_group_by.py +119 -0
  76. sqlspec/statement/builder/mixins/_having.py +35 -0
  77. sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
  78. sqlspec/statement/builder/mixins/_insert_into.py +36 -0
  79. sqlspec/statement/builder/mixins/_insert_values.py +69 -0
  80. sqlspec/statement/builder/mixins/_join.py +110 -0
  81. sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
  82. sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
  83. sqlspec/statement/builder/mixins/_order_by.py +46 -0
  84. sqlspec/statement/builder/mixins/_pivot.py +82 -0
  85. sqlspec/statement/builder/mixins/_returning.py +37 -0
  86. sqlspec/statement/builder/mixins/_select_columns.py +60 -0
  87. sqlspec/statement/builder/mixins/_set_ops.py +122 -0
  88. sqlspec/statement/builder/mixins/_unpivot.py +80 -0
  89. sqlspec/statement/builder/mixins/_update_from.py +54 -0
  90. sqlspec/statement/builder/mixins/_update_set.py +91 -0
  91. sqlspec/statement/builder/mixins/_update_table.py +29 -0
  92. sqlspec/statement/builder/mixins/_where.py +374 -0
  93. sqlspec/statement/builder/mixins/_window_functions.py +86 -0
  94. sqlspec/statement/builder/protocols.py +20 -0
  95. sqlspec/statement/builder/select.py +206 -0
  96. sqlspec/statement/builder/update.py +178 -0
  97. sqlspec/statement/filters.py +571 -0
  98. sqlspec/statement/parameters.py +736 -0
  99. sqlspec/statement/pipelines/__init__.py +67 -0
  100. sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
  101. sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
  102. sqlspec/statement/pipelines/base.py +315 -0
  103. sqlspec/statement/pipelines/context.py +119 -0
  104. sqlspec/statement/pipelines/result_types.py +41 -0
  105. sqlspec/statement/pipelines/transformers/__init__.py +8 -0
  106. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
  107. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
  108. sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
  109. sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
  110. sqlspec/statement/pipelines/validators/__init__.py +23 -0
  111. sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
  112. sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
  113. sqlspec/statement/pipelines/validators/_performance.py +703 -0
  114. sqlspec/statement/pipelines/validators/_security.py +990 -0
  115. sqlspec/statement/pipelines/validators/base.py +67 -0
  116. sqlspec/statement/result.py +527 -0
  117. sqlspec/statement/splitter.py +701 -0
  118. sqlspec/statement/sql.py +1198 -0
  119. sqlspec/storage/__init__.py +15 -0
  120. sqlspec/storage/backends/__init__.py +0 -0
  121. sqlspec/storage/backends/base.py +166 -0
  122. sqlspec/storage/backends/fsspec.py +315 -0
  123. sqlspec/storage/backends/obstore.py +464 -0
  124. sqlspec/storage/protocol.py +170 -0
  125. sqlspec/storage/registry.py +315 -0
  126. sqlspec/typing.py +157 -36
  127. sqlspec/utils/correlation.py +155 -0
  128. sqlspec/utils/deprecation.py +3 -6
  129. sqlspec/utils/fixtures.py +6 -11
  130. sqlspec/utils/logging.py +135 -0
  131. sqlspec/utils/module_loader.py +45 -43
  132. sqlspec/utils/serializers.py +4 -0
  133. sqlspec/utils/singleton.py +6 -8
  134. sqlspec/utils/sync_tools.py +15 -27
  135. sqlspec/utils/text.py +58 -26
  136. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/METADATA +100 -26
  137. sqlspec-0.12.0.dist-info/RECORD +145 -0
  138. sqlspec/adapters/bigquery/config/__init__.py +0 -3
  139. sqlspec/adapters/bigquery/config/_common.py +0 -40
  140. sqlspec/adapters/bigquery/config/_sync.py +0 -87
  141. sqlspec/adapters/oracledb/config/__init__.py +0 -9
  142. sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
  143. sqlspec/adapters/oracledb/config/_common.py +0 -131
  144. sqlspec/adapters/oracledb/config/_sync.py +0 -186
  145. sqlspec/adapters/psycopg/config/__init__.py +0 -19
  146. sqlspec/adapters/psycopg/config/_async.py +0 -169
  147. sqlspec/adapters/psycopg/config/_common.py +0 -56
  148. sqlspec/adapters/psycopg/config/_sync.py +0 -168
  149. sqlspec/filters.py +0 -330
  150. sqlspec/mixins.py +0 -306
  151. sqlspec/statement.py +0 -378
  152. sqlspec-0.11.0.dist-info/RECORD +0 -69
  153. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/WHEEL +0 -0
  154. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/licenses/LICENSE +0 -0
  155. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,990 @@
1
+ """Security validator for SQL statements."""
2
+
3
+ import contextlib
4
+ import logging
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum, auto
8
+ from typing import TYPE_CHECKING, Any, Optional
9
+
10
+ from sqlglot import exp
11
+ from sqlglot.expressions import EQ, Binary, Func, Literal, Or, Subquery, Union
12
+
13
+ from sqlspec.exceptions import RiskLevel
14
+ from sqlspec.statement.pipelines.base import ProcessorProtocol
15
+ from sqlspec.statement.pipelines.result_types import ValidationError
16
+
17
+ if TYPE_CHECKING:
18
+ from sqlspec.statement.pipelines.context import SQLProcessingContext
19
+
20
+ __all__ = ("SecurityIssue", "SecurityIssueType", "SecurityValidator", "SecurityValidatorConfig")
21
+
22
+ # Constants for magic values
23
+ MAX_FUNCTION_ARGS = 10
24
+ MAX_NESTING_LEVELS = 5
25
+ MIN_UNION_COUNT_FOR_INJECTION = 2
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # Constants
30
+ SUSPICIOUS_FUNC_THRESHOLD = 2
31
+
32
+
33
+ class SecurityIssueType(Enum):
34
+ """Types of security issues that can be detected."""
35
+
36
+ INJECTION = auto()
37
+ TAUTOLOGY = auto()
38
+ SUSPICIOUS_KEYWORD = auto()
39
+ COMBINED_ATTACK = auto()
40
+ AST_ANOMALY = auto() # New: AST-based detection
41
+ STRUCTURAL_ATTACK = auto() # New: Structural analysis
42
+
43
+
44
+ @dataclass
45
+ class SecurityIssue:
46
+ """Represents a detected security issue in SQL."""
47
+
48
+ issue_type: "SecurityIssueType"
49
+ risk_level: "RiskLevel"
50
+ description: str
51
+ location: Optional[str] = None
52
+ pattern_matched: Optional[str] = None
53
+ recommendation: Optional[str] = None
54
+ metadata: "dict[str, Any]" = field(default_factory=dict)
55
+ ast_node_type: Optional[str] = None # New: AST node type for AST-based detection
56
+ confidence: float = 1.0 # New: Confidence level (0.0 to 1.0)
57
+
58
+
59
+ @dataclass
60
+ class SecurityValidatorConfig:
61
+ """Configuration for the unified security validator."""
62
+
63
+ # Feature toggles
64
+ check_injection: bool = True
65
+ check_tautology: bool = True
66
+ check_keywords: bool = True
67
+ check_combined_patterns: bool = True
68
+ check_ast_anomalies: bool = True # New: AST-based anomaly detection
69
+ check_structural_attacks: bool = True # New: Structural attack detection
70
+
71
+ # Risk levels
72
+ default_risk_level: "RiskLevel" = RiskLevel.HIGH
73
+ injection_risk_level: "RiskLevel" = RiskLevel.HIGH
74
+ tautology_risk_level: "RiskLevel" = RiskLevel.MEDIUM
75
+ keyword_risk_level: "RiskLevel" = RiskLevel.MEDIUM
76
+ ast_anomaly_risk_level: "RiskLevel" = RiskLevel.MEDIUM
77
+
78
+ # Thresholds
79
+ max_union_count: int = 3
80
+ max_null_padding: int = 5
81
+ max_system_tables: int = 2
82
+ max_nesting_depth: int = 5 # New: Maximum nesting depth
83
+ max_literal_length: int = 1000 # New: Maximum literal length
84
+ min_confidence_threshold: float = 0.7 # New: Minimum confidence for reporting
85
+
86
+ # Allowed/blocked lists
87
+ allowed_functions: "list[str]" = field(default_factory=list)
88
+ blocked_functions: "list[str]" = field(default_factory=list)
89
+ allowed_system_schemas: "list[str]" = field(default_factory=list)
90
+
91
+ # Custom patterns (legacy support)
92
+ custom_injection_patterns: "list[str]" = field(default_factory=list)
93
+ custom_suspicious_patterns: "list[str]" = field(default_factory=list)
94
+
95
+
96
+ # Common regex patterns used across security checks
97
+ PATTERNS = {
98
+ # Injection patterns
99
+ "union_null": re.compile(r"UNION\s+(?:ALL\s+)?SELECT\s+(?:NULL(?:\s*,\s*NULL)*)", re.IGNORECASE),
100
+ "comment_evasion": re.compile(r"/\*.*?\*/|--.*?$|#.*?$", re.MULTILINE),
101
+ "encoded_chars": re.compile(r"(?:CHAR|CHR)\s*\([0-9]+\)", re.IGNORECASE),
102
+ "hex_encoding": re.compile(r"0x[0-9a-fA-F]+"),
103
+ "concat_evasion": re.compile(r"(?:CONCAT|CONCAT_WS|\|\|)\s*\([^)]+\)", re.IGNORECASE),
104
+ # Tautology patterns
105
+ "always_true": re.compile(r"(?:1\s*=\s*1|'1'\s*=\s*'1'|true|TRUE)\s*(?:OR|AND)?", re.IGNORECASE),
106
+ "or_patterns": re.compile(r"\bOR\s+1\s*=\s*1\b", re.IGNORECASE),
107
+ # Suspicious function patterns
108
+ "file_operations": re.compile(r"\b(?:LOAD_FILE|INTO\s+(?:OUTFILE|DUMPFILE))\b", re.IGNORECASE),
109
+ "exec_functions": re.compile(r"\b(?:EXEC|EXECUTE|xp_cmdshell|sp_executesql)\b", re.IGNORECASE),
110
+ "admin_functions": re.compile(r"\b(?:CREATE\s+USER|DROP\s+USER|GRANT|REVOKE)\b", re.IGNORECASE),
111
+ }
112
+
113
+ # System schemas that are often targeted in attacks
114
+ SYSTEM_SCHEMAS = {
115
+ "mysql": ["information_schema", "mysql", "performance_schema", "sys"],
116
+ "postgresql": ["information_schema", "pg_catalog", "pg_temp"],
117
+ "mssql": ["information_schema", "sys", "master", "msdb"],
118
+ "oracle": ["sys", "system", "dba_", "all_", "user_"],
119
+ }
120
+
121
+ # Functions commonly used in SQL injection attacks
122
+ SUSPICIOUS_FUNCTIONS = [
123
+ # String manipulation
124
+ "concat",
125
+ "concat_ws",
126
+ "substring",
127
+ "substr",
128
+ "char",
129
+ "chr",
130
+ "ascii",
131
+ "hex",
132
+ "unhex",
133
+ # File operations
134
+ "load_file",
135
+ "outfile",
136
+ "dumpfile",
137
+ # System information
138
+ "database",
139
+ "version",
140
+ "user",
141
+ "current_user",
142
+ "system_user",
143
+ "session_user",
144
+ # Time-based
145
+ "sleep",
146
+ "benchmark",
147
+ "pg_sleep",
148
+ "waitfor",
149
+ # Execution
150
+ "exec",
151
+ "execute",
152
+ "xp_cmdshell",
153
+ "sp_executesql",
154
+ # XML/JSON (for data extraction)
155
+ "extractvalue",
156
+ "updatexml",
157
+ "xmltype",
158
+ "json_extract",
159
+ ]
160
+
161
+
162
+ class SecurityValidator(ProcessorProtocol):
163
+ """Unified security validator that performs comprehensive security checks in a single pass."""
164
+
165
+ def __init__(self, config: Optional["SecurityValidatorConfig"] = None, **kwargs: Any) -> None:
166
+ """Initialize the security validator with configuration."""
167
+ self.config = config or SecurityValidatorConfig()
168
+ self._compiled_patterns: dict[str, re.Pattern[str]] = {}
169
+ self._compile_custom_patterns()
170
+
171
+ def _compile_custom_patterns(self) -> None:
172
+ """Compile custom regex patterns from configuration."""
173
+ for i, pattern in enumerate(self.config.custom_injection_patterns):
174
+ with contextlib.suppress(re.error):
175
+ self._compiled_patterns[f"custom_injection_{i}"] = re.compile(pattern, re.IGNORECASE)
176
+
177
+ for i, pattern in enumerate(self.config.custom_suspicious_patterns):
178
+ with contextlib.suppress(re.error):
179
+ self._compiled_patterns[f"custom_suspicious_{i}"] = re.compile(pattern, re.IGNORECASE)
180
+
181
+ def process(self, expression: Optional[exp.Expression], context: "SQLProcessingContext") -> None:
182
+ """Process the SQL expression and detect security issues in a single pass."""
183
+ if not context.current_expression:
184
+ return
185
+
186
+ security_issues: list[SecurityIssue] = []
187
+ visited_nodes: set[int] = set()
188
+
189
+ # Single AST traversal for all security checks
190
+ nesting_depth = 0
191
+ for node in context.current_expression.walk():
192
+ node_id = id(node)
193
+ if node_id in visited_nodes:
194
+ continue
195
+ visited_nodes.add(node_id)
196
+
197
+ # Track nesting depth
198
+ if isinstance(node, (Subquery, exp.Select)):
199
+ nesting_depth += 1
200
+
201
+ # Check injection patterns (enhanced AST-based)
202
+ if self.config.check_injection:
203
+ injection_issues = self._check_injection_patterns(node, context)
204
+ security_issues.extend(injection_issues)
205
+
206
+ # Check tautology conditions (enhanced)
207
+ if self.config.check_tautology:
208
+ tautology_issues = self._check_tautology_patterns(node, context)
209
+ security_issues.extend(tautology_issues)
210
+
211
+ # Check suspicious keywords/functions
212
+ if self.config.check_keywords:
213
+ keyword_issues = self._check_suspicious_keywords(node, context)
214
+ security_issues.extend(keyword_issues)
215
+
216
+ # New: Check AST anomalies
217
+ if self.config.check_ast_anomalies:
218
+ anomaly_issues = self._check_ast_anomalies(node, context, nesting_depth)
219
+ security_issues.extend(anomaly_issues)
220
+
221
+ # New: Check structural attacks
222
+ if self.config.check_structural_attacks:
223
+ structural_issues = self._check_structural_attacks(node, context)
224
+ security_issues.extend(structural_issues)
225
+
226
+ # Check combined attack patterns
227
+ if self.config.check_combined_patterns and security_issues:
228
+ combined_issues = self._check_combined_patterns(context.current_expression, security_issues)
229
+ security_issues.extend(combined_issues)
230
+
231
+ # Also check the initial SQL string for custom patterns (handles unparsed parts)
232
+ if self.config.check_injection and context.initial_sql_string:
233
+ for name, pattern in self._compiled_patterns.items():
234
+ if name.startswith("custom_injection_") and pattern.search(context.initial_sql_string):
235
+ security_issues.append(
236
+ SecurityIssue(
237
+ issue_type=SecurityIssueType.INJECTION,
238
+ risk_level=self.config.injection_risk_level,
239
+ description=f"Custom injection pattern matched: {name}",
240
+ location=context.initial_sql_string[:100],
241
+ pattern_matched=name,
242
+ )
243
+ )
244
+
245
+ # Determine overall risk level
246
+ if security_issues:
247
+ max(issue.risk_level for issue in security_issues)
248
+
249
+ # Create validation errors
250
+ for issue in security_issues:
251
+ error = ValidationError(
252
+ message=issue.description,
253
+ code="security-issue",
254
+ risk_level=issue.risk_level,
255
+ processor="SecurityValidator",
256
+ expression=expression,
257
+ )
258
+ context.validation_errors.append(error)
259
+
260
+ # Store metadata in context for access by caller
261
+ context.metadata["security_validator"] = {
262
+ "security_issues": security_issues,
263
+ "checks_performed": [
264
+ "injection" if self.config.check_injection else None,
265
+ "tautology" if self.config.check_tautology else None,
266
+ "keywords" if self.config.check_keywords else None,
267
+ "combined" if self.config.check_combined_patterns else None,
268
+ ],
269
+ "total_issues": len(security_issues),
270
+ "issue_breakdown": {
271
+ issue_type.name: sum(1 for issue in security_issues if issue.issue_type == issue_type)
272
+ for issue_type in SecurityIssueType
273
+ },
274
+ }
275
+
276
+ # Filter issues by confidence threshold
277
+ filtered_issues = [
278
+ issue for issue in security_issues if issue.confidence >= self.config.min_confidence_threshold
279
+ ]
280
+
281
+ # Update validation result with filtered issues
282
+ if filtered_issues != security_issues:
283
+ # Clear previous errors and add filtered ones
284
+ context.validation_errors = []
285
+ for issue in filtered_issues:
286
+ error = ValidationError(
287
+ message=issue.description,
288
+ code="security-issue",
289
+ risk_level=issue.risk_level,
290
+ processor="SecurityValidator",
291
+ expression=expression,
292
+ )
293
+ context.validation_errors.append(error)
294
+
295
+ # Update metadata with filtered issues
296
+ context.metadata["security_validator"] = {
297
+ "security_issues": filtered_issues,
298
+ "total_issues_found": len(security_issues),
299
+ "issues_after_confidence_filter": len(filtered_issues),
300
+ "confidence_threshold": self.config.min_confidence_threshold,
301
+ "checks_performed": [
302
+ "injection" if self.config.check_injection else None,
303
+ "tautology" if self.config.check_tautology else None,
304
+ "keywords" if self.config.check_keywords else None,
305
+ "combined" if self.config.check_combined_patterns else None,
306
+ "ast_anomalies" if self.config.check_ast_anomalies else None,
307
+ "structural" if self.config.check_structural_attacks else None,
308
+ ],
309
+ "issue_breakdown": {
310
+ issue_type.name: sum(1 for issue in filtered_issues if issue.issue_type == issue_type)
311
+ for issue_type in SecurityIssueType
312
+ },
313
+ }
314
+
315
+ def _check_injection_patterns(
316
+ self, node: "exp.Expression", context: "SQLProcessingContext"
317
+ ) -> "list[SecurityIssue]":
318
+ """Check for SQL injection patterns in the node."""
319
+ issues: list[SecurityIssue] = []
320
+
321
+ # Check UNION-based injection
322
+ if isinstance(node, exp.Union):
323
+ union_issues = self._check_union_injection(node, context)
324
+ issues.extend(union_issues)
325
+
326
+ sql_text = node.sql()
327
+ if PATTERNS["comment_evasion"].search(sql_text):
328
+ issues.append(
329
+ SecurityIssue(
330
+ issue_type=SecurityIssueType.INJECTION,
331
+ risk_level=self.config.injection_risk_level,
332
+ description="Comment-based SQL injection attempt detected",
333
+ location=sql_text[:100],
334
+ pattern_matched="comment_evasion",
335
+ recommendation="Remove or sanitize SQL comments",
336
+ )
337
+ )
338
+
339
+ # Check for encoded characters
340
+ if PATTERNS["encoded_chars"].search(sql_text) or PATTERNS["hex_encoding"].search(sql_text):
341
+ issues.append(
342
+ SecurityIssue(
343
+ issue_type=SecurityIssueType.INJECTION,
344
+ risk_level=self.config.injection_risk_level,
345
+ description="Encoded character evasion detected",
346
+ location=sql_text[:100],
347
+ pattern_matched="encoding_evasion",
348
+ recommendation="Validate and decode input properly",
349
+ )
350
+ )
351
+
352
+ # Check for system schema access
353
+ if isinstance(node, exp.Table):
354
+ system_access = self._check_system_schema_access(node)
355
+ if system_access:
356
+ issues.append(system_access)
357
+
358
+ for name, pattern in self._compiled_patterns.items():
359
+ if name.startswith("custom_injection_") and pattern.search(sql_text):
360
+ issues.append(
361
+ SecurityIssue(
362
+ issue_type=SecurityIssueType.INJECTION,
363
+ risk_level=self.config.injection_risk_level,
364
+ description=f"Custom injection pattern matched: {name}",
365
+ location=sql_text[:100],
366
+ pattern_matched=name,
367
+ )
368
+ )
369
+
370
+ return issues
371
+
372
+ def _check_union_injection(self, union_node: "exp.Union", context: "SQLProcessingContext") -> "list[SecurityIssue]":
373
+ """Check for UNION-based SQL injection patterns."""
374
+ issues: list[SecurityIssue] = []
375
+
376
+ # Count UNIONs in the query
377
+ if context.current_expression:
378
+ union_count = len(list(context.current_expression.find_all(exp.Union)))
379
+ else:
380
+ return []
381
+ if union_count > self.config.max_union_count:
382
+ issues.append(
383
+ SecurityIssue(
384
+ issue_type=SecurityIssueType.INJECTION,
385
+ risk_level=self.config.injection_risk_level,
386
+ description=f"Excessive UNION operations detected ({union_count})",
387
+ location=union_node.sql()[:100],
388
+ pattern_matched="excessive_unions",
389
+ recommendation="Limit the number of UNION operations",
390
+ metadata={"union_count": union_count},
391
+ )
392
+ )
393
+
394
+ # Check for NULL padding in UNION SELECT
395
+ if hasattr(union_node, "right") and isinstance(union_node.right, exp.Select):
396
+ select_expr = union_node.right
397
+ if select_expr.expressions:
398
+ null_count = sum(1 for expr in select_expr.expressions if isinstance(expr, exp.Null))
399
+ if null_count > self.config.max_null_padding:
400
+ issues.append(
401
+ SecurityIssue(
402
+ issue_type=SecurityIssueType.INJECTION,
403
+ risk_level=self.config.injection_risk_level,
404
+ description=f"UNION with excessive NULL padding ({null_count} NULLs)",
405
+ location=union_node.sql()[:100],
406
+ pattern_matched="union_null_padding",
407
+ recommendation="Validate UNION queries for proper column matching",
408
+ metadata={"null_count": null_count},
409
+ )
410
+ )
411
+
412
+ return issues
413
+
414
+ def _check_system_schema_access(self, table_node: "exp.Table") -> Optional["SecurityIssue"]:
415
+ """Check if a table reference is accessing system schemas."""
416
+ table_name = table_node.name.lower() if table_node.name else ""
417
+ schema_name = table_node.db.lower() if table_node.db else ""
418
+ table_node.catalog.lower() if table_node.catalog else ""
419
+
420
+ # Check if schema is in allowed list
421
+ if schema_name in self.config.allowed_system_schemas:
422
+ return None
423
+
424
+ # Check against known system schemas
425
+ for db_type, schemas in SYSTEM_SCHEMAS.items():
426
+ if schema_name in schemas or any(schema in table_name for schema in schemas):
427
+ return SecurityIssue(
428
+ issue_type=SecurityIssueType.INJECTION,
429
+ risk_level=self.config.injection_risk_level,
430
+ description=f"Access to system schema detected: {schema_name or table_name}",
431
+ location=table_node.sql(),
432
+ pattern_matched="system_schema_access",
433
+ recommendation="Restrict access to system schemas",
434
+ metadata={"database_type": db_type, "schema": schema_name, "table": table_name},
435
+ )
436
+
437
+ return None
438
+
439
+ def _check_tautology_patterns(
440
+ self, node: "exp.Expression", context: "SQLProcessingContext"
441
+ ) -> "list[SecurityIssue]":
442
+ """Check for tautology conditions that are always true."""
443
+ issues: list[SecurityIssue] = []
444
+
445
+ # Check for boolean literals in WHERE conditions
446
+ if isinstance(node, exp.Boolean) and node.this is True:
447
+ issues.append(
448
+ SecurityIssue(
449
+ issue_type=SecurityIssueType.TAUTOLOGY,
450
+ risk_level=self.config.tautology_risk_level,
451
+ description="Tautology: always-true literal condition detected",
452
+ location=node.sql(),
453
+ pattern_matched="always-true",
454
+ recommendation="Remove always-true conditions from WHERE clause",
455
+ )
456
+ )
457
+
458
+ # Check for tautological conditions
459
+ if isinstance(node, (exp.EQ, exp.NEQ, exp.GT, exp.LT, exp.GTE, exp.LTE)) and self._is_tautology(node):
460
+ issues.append(
461
+ SecurityIssue(
462
+ issue_type=SecurityIssueType.TAUTOLOGY,
463
+ risk_level=self.config.tautology_risk_level,
464
+ description="Tautology: always-true condition detected",
465
+ location=node.sql(),
466
+ pattern_matched="tautology_condition",
467
+ recommendation="Review WHERE conditions for always-true statements",
468
+ )
469
+ )
470
+
471
+ # Check for OR 1=1 patterns
472
+ if isinstance(node, exp.Or):
473
+ or_sql = node.sql()
474
+ if PATTERNS["or_patterns"].search(or_sql) or PATTERNS["always_true"].search(or_sql):
475
+ issues.append(
476
+ SecurityIssue(
477
+ issue_type=SecurityIssueType.TAUTOLOGY,
478
+ risk_level=self.config.tautology_risk_level,
479
+ description="OR with always-true condition detected",
480
+ location=or_sql[:100],
481
+ pattern_matched="or_tautology",
482
+ recommendation="Validate OR conditions in WHERE clauses",
483
+ )
484
+ )
485
+
486
+ return issues
487
+
488
+ def _is_tautology(self, comparison: "exp.Expression") -> bool:
489
+ """Check if a comparison is a tautology."""
490
+ if not isinstance(comparison, exp.Binary):
491
+ return False
492
+
493
+ # In sqlglot, binary expressions use 'this' and 'expression' for operands
494
+ left = comparison.this
495
+ right = comparison.expression
496
+
497
+ # Check if comparing identical expressions
498
+ if self._expressions_identical(left, right):
499
+ if isinstance(comparison, (exp.EQ, exp.GTE, exp.LTE)):
500
+ return True
501
+ if isinstance(comparison, (exp.NEQ, exp.GT, exp.LT)):
502
+ return False
503
+
504
+ # Check for literal comparisons
505
+ if isinstance(left, exp.Literal) and isinstance(right, exp.Literal):
506
+ try:
507
+ left_val = left.this
508
+ right_val = right.this
509
+
510
+ if isinstance(comparison, exp.EQ):
511
+ return bool(left_val == right_val)
512
+ if isinstance(comparison, exp.NEQ):
513
+ return bool(left_val != right_val)
514
+ # Add more comparison logic as needed
515
+ except Exception:
516
+ # Value extraction failed, can't evaluate the condition
517
+ logger.debug("Failed to extract values for comparison evaluation")
518
+
519
+ return False
520
+
521
+ @staticmethod
522
+ def _expressions_identical(expr1: "exp.Expression", expr2: "exp.Expression") -> bool:
523
+ """Check if two expressions are structurally identical."""
524
+ if type(expr1) is not type(expr2):
525
+ return False
526
+
527
+ if isinstance(expr1, exp.Column) and isinstance(expr2, exp.Column):
528
+ return expr1.name == expr2.name and expr1.table == expr2.table
529
+
530
+ if isinstance(expr1, exp.Literal) and isinstance(expr2, exp.Literal):
531
+ return bool(expr1.this == expr2.this)
532
+
533
+ # For other expressions, compare their SQL representations
534
+ return expr1.sql() == expr2.sql()
535
+
536
+ def _check_suspicious_keywords(
537
+ self, node: "exp.Expression", context: "SQLProcessingContext"
538
+ ) -> "list[SecurityIssue]":
539
+ """Check for suspicious functions and keywords."""
540
+ issues: list[SecurityIssue] = []
541
+
542
+ # Check function calls
543
+ if isinstance(node, exp.Func):
544
+ func_name = node.name.lower() if node.name else ""
545
+
546
+ # Check if function is explicitly blocked
547
+ if func_name in self.config.blocked_functions:
548
+ issues.append(
549
+ SecurityIssue(
550
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
551
+ risk_level=RiskLevel.HIGH,
552
+ description=f"Blocked function used: {func_name}",
553
+ location=node.sql()[:100],
554
+ pattern_matched="blocked_function",
555
+ recommendation=f"Function {func_name} is not allowed",
556
+ )
557
+ )
558
+ # Check if function is suspicious but not explicitly allowed
559
+ elif func_name in SUSPICIOUS_FUNCTIONS and func_name not in self.config.allowed_functions:
560
+ issues.append(
561
+ SecurityIssue(
562
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
563
+ risk_level=self.config.keyword_risk_level,
564
+ description=f"Suspicious function detected: {func_name}",
565
+ location=node.sql()[:100],
566
+ pattern_matched="suspicious_function",
567
+ recommendation=f"Review usage of {func_name} function",
568
+ metadata={"function": func_name},
569
+ )
570
+ )
571
+
572
+ # Special handling for Command nodes (e.g., EXECUTE statements)
573
+ if isinstance(node, exp.Command):
574
+ # Commands are often used for dynamic SQL execution
575
+ command_text = str(node)
576
+ if any(
577
+ keyword in command_text.lower() for keyword in ["execute", "exec", "sp_executesql", "grant", "revoke"]
578
+ ):
579
+ issues.append(
580
+ SecurityIssue(
581
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
582
+ risk_level=RiskLevel.HIGH,
583
+ description=f"Dynamic SQL execution command detected: {command_text.split()[0].lower()}",
584
+ location=command_text[:100],
585
+ pattern_matched="exec_command",
586
+ recommendation="Avoid dynamic SQL execution",
587
+ )
588
+ )
589
+
590
+ # Check for specific patterns in SQL text
591
+ if hasattr(node, "sql"):
592
+ sql_text = node.sql()
593
+
594
+ # File operations
595
+ if PATTERNS["file_operations"].search(sql_text):
596
+ issues.append(
597
+ SecurityIssue(
598
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
599
+ risk_level=RiskLevel.HIGH,
600
+ description="File operation detected in SQL",
601
+ location=sql_text[:100],
602
+ pattern_matched="file_operation",
603
+ recommendation="File operations should be handled at application level",
604
+ )
605
+ )
606
+
607
+ # Execution functions
608
+ if PATTERNS["exec_functions"].search(sql_text):
609
+ issues.append(
610
+ SecurityIssue(
611
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
612
+ risk_level=RiskLevel.HIGH,
613
+ description="Dynamic SQL execution function detected",
614
+ location=sql_text[:100],
615
+ pattern_matched="exec_function",
616
+ recommendation="Avoid dynamic SQL execution",
617
+ )
618
+ )
619
+
620
+ # Administrative commands
621
+ if PATTERNS["admin_functions"].search(sql_text):
622
+ issues.append(
623
+ SecurityIssue(
624
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
625
+ risk_level=RiskLevel.HIGH,
626
+ description="Administrative command detected",
627
+ location=sql_text[:100],
628
+ pattern_matched="admin_function",
629
+ recommendation="Administrative commands should be restricted",
630
+ )
631
+ )
632
+
633
+ # Check custom suspicious patterns
634
+ for name, pattern in self._compiled_patterns.items():
635
+ if name.startswith("custom_suspicious_") and pattern.search(sql_text):
636
+ issues.append(
637
+ SecurityIssue(
638
+ issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
639
+ risk_level=self.config.keyword_risk_level,
640
+ description=f"Custom suspicious pattern matched: {name}",
641
+ location=sql_text[:100],
642
+ pattern_matched=name,
643
+ )
644
+ )
645
+
646
+ return issues
647
+
648
+ @staticmethod
649
+ def _check_combined_patterns(
650
+ expression: "exp.Expression", # noqa: ARG004
651
+ existing_issues: "list[SecurityIssue]",
652
+ ) -> "list[SecurityIssue]":
653
+ """Check for combined attack patterns that indicate sophisticated attacks."""
654
+ combined_issues: list[SecurityIssue] = []
655
+
656
+ # Group issues by type
657
+ issue_types = {issue.issue_type for issue in existing_issues}
658
+
659
+ # Tautology + UNION = Classic SQLi
660
+ if SecurityIssueType.TAUTOLOGY in issue_types and SecurityIssueType.INJECTION in issue_types:
661
+ has_union = any(
662
+ "union" in issue.pattern_matched.lower() for issue in existing_issues if issue.pattern_matched
663
+ )
664
+ if has_union:
665
+ combined_issues.append(
666
+ SecurityIssue(
667
+ issue_type=SecurityIssueType.COMBINED_ATTACK,
668
+ risk_level=RiskLevel.HIGH,
669
+ description="Classic SQL injection pattern detected (Tautology + UNION)",
670
+ pattern_matched="classic_sqli",
671
+ recommendation="This appears to be a deliberate SQL injection attempt",
672
+ metadata={"attack_components": ["tautology", "union"], "confidence": "high"},
673
+ )
674
+ )
675
+
676
+ # Multiple suspicious functions + system schema = Data extraction attempt
677
+ suspicious_func_count = sum(
678
+ 1
679
+ for issue in existing_issues
680
+ if issue.issue_type == SecurityIssueType.SUSPICIOUS_KEYWORD and "function" in (issue.pattern_matched or "")
681
+ )
682
+ system_schema_access = any("system_schema" in (issue.pattern_matched or "") for issue in existing_issues)
683
+
684
+ if suspicious_func_count >= SUSPICIOUS_FUNC_THRESHOLD and system_schema_access:
685
+ combined_issues.append(
686
+ SecurityIssue(
687
+ issue_type=SecurityIssueType.COMBINED_ATTACK,
688
+ risk_level=RiskLevel.HIGH,
689
+ description="Data extraction attempt detected (Multiple functions + System schema)",
690
+ pattern_matched="data_extraction",
691
+ recommendation="Block queries attempting to extract system information",
692
+ metadata={"suspicious_functions": suspicious_func_count, "targets_system_schema": True},
693
+ )
694
+ )
695
+
696
+ # Encoding + Injection = Evasion attempt
697
+ has_encoding = any("encoding" in (issue.pattern_matched or "").lower() for issue in existing_issues)
698
+ has_comment = any("comment" in (issue.pattern_matched or "").lower() for issue in existing_issues)
699
+
700
+ if has_encoding or has_comment:
701
+ combined_issues.append(
702
+ SecurityIssue(
703
+ issue_type=SecurityIssueType.COMBINED_ATTACK,
704
+ risk_level=RiskLevel.HIGH,
705
+ description="Evasion technique detected in SQL injection attempt",
706
+ pattern_matched="evasion_attempt",
707
+ recommendation="Input appears to be crafted to bypass security filters",
708
+ metadata={
709
+ "evasion_techniques": [
710
+ "encoding" if has_encoding else None,
711
+ "comments" if has_comment else None,
712
+ ]
713
+ },
714
+ )
715
+ )
716
+
717
+ return combined_issues
718
+
719
+ def _check_ast_anomalies(
720
+ self, node: "exp.Expression", context: "SQLProcessingContext", nesting_depth: int
721
+ ) -> "list[SecurityIssue]":
722
+ """Check for AST-based anomalies that could indicate injection attempts.
723
+
724
+ This method uses sophisticated AST analysis instead of regex patterns.
725
+ """
726
+ issues: list[SecurityIssue] = []
727
+
728
+ # Check for excessive nesting (potential injection)
729
+ if nesting_depth > self.config.max_nesting_depth:
730
+ issues.append(
731
+ SecurityIssue(
732
+ issue_type=SecurityIssueType.AST_ANOMALY,
733
+ risk_level=self.config.ast_anomaly_risk_level,
734
+ description=f"Excessive query nesting detected (depth: {nesting_depth})",
735
+ location=node.sql()[:100] if hasattr(node, "sql") else str(node)[:100],
736
+ pattern_matched="excessive_nesting",
737
+ recommendation="Review query structure for potential injection",
738
+ ast_node_type=type(node).__name__,
739
+ confidence=0.8,
740
+ metadata={"nesting_depth": nesting_depth, "max_allowed": self.config.max_nesting_depth},
741
+ )
742
+ )
743
+
744
+ # Check for suspiciously long literals (potential injection payload)
745
+ if isinstance(node, Literal) and isinstance(node.this, str):
746
+ literal_length = len(str(node.this))
747
+ if literal_length > self.config.max_literal_length:
748
+ issues.append(
749
+ SecurityIssue(
750
+ issue_type=SecurityIssueType.AST_ANOMALY,
751
+ risk_level=self.config.ast_anomaly_risk_level,
752
+ description=f"Suspiciously long literal detected ({literal_length} chars)",
753
+ location=str(node.this)[:100],
754
+ pattern_matched="long_literal",
755
+ recommendation="Validate input length and content",
756
+ ast_node_type="Literal",
757
+ confidence=0.6,
758
+ metadata={"literal_length": literal_length, "max_allowed": self.config.max_literal_length},
759
+ )
760
+ )
761
+
762
+ # Check for unusual function call patterns
763
+ if isinstance(node, Func):
764
+ func_issues = self._analyze_function_anomalies(node)
765
+ issues.extend(func_issues)
766
+
767
+ # Check for suspicious binary operations (potential injection)
768
+ if isinstance(node, Binary):
769
+ binary_issues = self._analyze_binary_anomalies(node)
770
+ issues.extend(binary_issues)
771
+
772
+ return issues
773
+
774
+ def _check_structural_attacks(
775
+ self, node: "exp.Expression", context: "SQLProcessingContext"
776
+ ) -> "list[SecurityIssue]":
777
+ """Check for structural attack patterns using AST analysis."""
778
+ issues: list[SecurityIssue] = []
779
+
780
+ # Check for UNION-based injection using AST structure
781
+ if isinstance(node, Union):
782
+ union_issues = self._analyze_union_structure(node)
783
+ issues.extend(union_issues)
784
+
785
+ # Check for subquery injection patterns
786
+ if isinstance(node, Subquery):
787
+ subquery_issues = self._analyze_subquery_structure(node)
788
+ issues.extend(subquery_issues)
789
+
790
+ # Check for OR-based injection using AST structure
791
+ if isinstance(node, Or):
792
+ or_issues = self._analyze_or_structure(node)
793
+ issues.extend(or_issues)
794
+
795
+ return issues
796
+
797
+ @staticmethod
798
+ def _analyze_function_anomalies(func_node: Func) -> "list[SecurityIssue]":
799
+ """Analyze function calls for anomalous patterns."""
800
+ issues: list[SecurityIssue] = []
801
+
802
+ if not func_node.name:
803
+ return issues
804
+
805
+ func_name = func_node.name.lower()
806
+
807
+ # Check for chained function calls (potential evasion)
808
+ if hasattr(func_node, "this") and isinstance(func_node.this, Func):
809
+ nested_func = func_node.this
810
+ if nested_func.name and nested_func.name.lower() in SUSPICIOUS_FUNCTIONS:
811
+ issues.append(
812
+ SecurityIssue(
813
+ issue_type=SecurityIssueType.AST_ANOMALY,
814
+ risk_level=RiskLevel.MEDIUM,
815
+ description=f"Nested suspicious function call: {nested_func.name.lower()} inside {func_name}",
816
+ location=func_node.sql()[:100],
817
+ pattern_matched="nested_suspicious_function",
818
+ recommendation="Review nested function calls for evasion attempts",
819
+ ast_node_type="Func",
820
+ confidence=0.7,
821
+ metadata={"outer_function": func_name, "inner_function": nested_func.name.lower()},
822
+ )
823
+ )
824
+
825
+ # Check for unusual argument patterns
826
+ if hasattr(func_node, "expressions") and func_node.expressions:
827
+ arg_count = len(func_node.expressions)
828
+ if func_name in {"concat", "concat_ws"} and arg_count > MAX_FUNCTION_ARGS:
829
+ issues.append(
830
+ SecurityIssue(
831
+ issue_type=SecurityIssueType.AST_ANOMALY,
832
+ risk_level=RiskLevel.MEDIUM,
833
+ description=f"Excessive arguments to {func_name} function ({arg_count} args)",
834
+ location=func_node.sql()[:100],
835
+ pattern_matched="excessive_function_args",
836
+ recommendation="Review function arguments for potential injection",
837
+ ast_node_type="Func",
838
+ confidence=0.6,
839
+ metadata={"function": func_name, "arg_count": arg_count},
840
+ )
841
+ )
842
+
843
+ return issues
844
+
845
+ def _analyze_binary_anomalies(self, binary_node: Binary) -> "list[SecurityIssue]":
846
+ """Analyze binary operations for suspicious patterns."""
847
+ issues: list[SecurityIssue] = []
848
+
849
+ # Check for deeply nested binary operations (potential injection)
850
+ depth = self._calculate_binary_depth(binary_node)
851
+ if depth > MAX_NESTING_LEVELS: # Arbitrary threshold
852
+ issues.append(
853
+ SecurityIssue(
854
+ issue_type=SecurityIssueType.AST_ANOMALY,
855
+ risk_level=RiskLevel.LOW,
856
+ description=f"Deeply nested binary operations detected (depth: {depth})",
857
+ location=binary_node.sql()[:100],
858
+ pattern_matched="deep_binary_nesting",
859
+ recommendation="Review complex condition structures",
860
+ ast_node_type="Binary",
861
+ confidence=0.5,
862
+ metadata={"nesting_depth": depth},
863
+ )
864
+ )
865
+
866
+ return issues
867
+
868
+ def _analyze_union_structure(self, union_node: Union) -> "list[SecurityIssue]":
869
+ """Analyze UNION structure for injection patterns."""
870
+ issues: list[SecurityIssue] = []
871
+
872
+ # Check if UNION has mismatched column counts (classic injection)
873
+ if hasattr(union_node, "left") and hasattr(union_node, "right"):
874
+ left_cols = self._count_select_columns(union_node.left)
875
+ right_cols = self._count_select_columns(union_node.right)
876
+
877
+ if left_cols != right_cols and left_cols > 0 and right_cols > 0:
878
+ issues.append(
879
+ SecurityIssue(
880
+ issue_type=SecurityIssueType.STRUCTURAL_ATTACK,
881
+ risk_level=RiskLevel.HIGH,
882
+ description=f"UNION with mismatched column counts ({left_cols} vs {right_cols})",
883
+ location=union_node.sql()[:100],
884
+ pattern_matched="union_column_mismatch",
885
+ recommendation="UNION queries should have matching column counts",
886
+ ast_node_type="Union",
887
+ confidence=0.9,
888
+ metadata={"left_columns": left_cols, "right_columns": right_cols},
889
+ )
890
+ )
891
+
892
+ return issues
893
+
894
+ @staticmethod
895
+ def _analyze_subquery_structure(subquery_node: Subquery) -> "list[SecurityIssue]":
896
+ """Analyze subquery structure for injection patterns."""
897
+ issues: list[SecurityIssue] = []
898
+
899
+ # Check for subqueries that return unusual patterns
900
+ if hasattr(subquery_node, "this") and isinstance(subquery_node.this, exp.Select):
901
+ select_expr = subquery_node.this
902
+
903
+ # Check if subquery selects only literals (potential injection)
904
+ if hasattr(select_expr, "expressions") and select_expr.expressions:
905
+ literal_count = sum(1 for expr in select_expr.expressions if isinstance(expr, Literal))
906
+ total_expressions = len(select_expr.expressions)
907
+
908
+ if literal_count == total_expressions and total_expressions > MIN_UNION_COUNT_FOR_INJECTION:
909
+ issues.append(
910
+ SecurityIssue(
911
+ issue_type=SecurityIssueType.STRUCTURAL_ATTACK,
912
+ risk_level=RiskLevel.MEDIUM,
913
+ description=f"Subquery selecting only literals ({literal_count} literals)",
914
+ location=subquery_node.sql()[:100],
915
+ pattern_matched="literal_only_subquery",
916
+ recommendation="Review subqueries that only select literal values",
917
+ ast_node_type="Subquery",
918
+ confidence=0.7,
919
+ metadata={"literal_count": literal_count, "total_expressions": total_expressions},
920
+ )
921
+ )
922
+
923
+ return issues
924
+
925
+ def _analyze_or_structure(self, or_node: Or) -> "list[SecurityIssue]":
926
+ """Analyze OR conditions for tautology patterns."""
927
+ issues: list[SecurityIssue] = []
928
+
929
+ # Check for OR with tautological conditions using AST
930
+ if (
931
+ hasattr(or_node, "left")
932
+ and hasattr(or_node, "right")
933
+ and (self._is_always_true_condition(or_node.left) or self._is_always_true_condition(or_node.right))
934
+ ):
935
+ issues.append(
936
+ SecurityIssue(
937
+ issue_type=SecurityIssueType.STRUCTURAL_ATTACK,
938
+ risk_level=RiskLevel.HIGH,
939
+ description="OR condition with always-true clause detected",
940
+ location=or_node.sql()[:100],
941
+ pattern_matched="or_tautology_ast",
942
+ recommendation="Remove always-true conditions from OR clauses",
943
+ ast_node_type="Or",
944
+ confidence=0.95,
945
+ metadata={
946
+ "left_always_true": self._is_always_true_condition(or_node.left),
947
+ "right_always_true": self._is_always_true_condition(or_node.right),
948
+ },
949
+ )
950
+ )
951
+
952
+ return issues
953
+
954
+ def _calculate_binary_depth(self, node: Binary, depth: int = 0) -> int:
955
+ """Calculate the depth of nested binary operations."""
956
+ max_depth = depth
957
+
958
+ if hasattr(node, "left") and isinstance(node.left, Binary):
959
+ max_depth = max(max_depth, self._calculate_binary_depth(node.left, depth + 1))
960
+
961
+ if hasattr(node, "right") and isinstance(node.right, Binary):
962
+ max_depth = max(max_depth, self._calculate_binary_depth(node.right, depth + 1))
963
+
964
+ return max_depth
965
+
966
+ @staticmethod
967
+ def _count_select_columns(node: "exp.Expression") -> int:
968
+ """Count the number of columns in a SELECT statement."""
969
+ if isinstance(node, exp.Select) and hasattr(node, "expressions"):
970
+ return len(node.expressions) if node.expressions else 0
971
+ return 0
972
+
973
+ @staticmethod
974
+ def _is_always_true_condition(node: "exp.Expression") -> bool:
975
+ """Check if a condition is always true using AST analysis."""
976
+ # Check for literal true
977
+ if isinstance(node, Literal) and str(node.this).upper() in {"TRUE", "1"}:
978
+ return True
979
+
980
+ # Check for 1=1 or similar tautologies
981
+ return bool(
982
+ isinstance(node, EQ)
983
+ and hasattr(node, "left")
984
+ and hasattr(node, "right")
985
+ and (
986
+ isinstance(node.left, Literal)
987
+ and isinstance(node.right, Literal)
988
+ and str(node.left.this) == str(node.right.this)
989
+ )
990
+ )