sqlspec 0.14.1__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +50 -25
- sqlspec/__main__.py +1 -1
- sqlspec/__metadata__.py +1 -3
- sqlspec/_serialization.py +1 -2
- sqlspec/_sql.py +256 -120
- sqlspec/_typing.py +278 -142
- sqlspec/adapters/adbc/__init__.py +4 -3
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +115 -260
- sqlspec/adapters/adbc/driver.py +462 -367
- sqlspec/adapters/aiosqlite/__init__.py +18 -3
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +199 -129
- sqlspec/adapters/aiosqlite/driver.py +230 -269
- sqlspec/adapters/asyncmy/__init__.py +18 -3
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +80 -168
- sqlspec/adapters/asyncmy/driver.py +260 -225
- sqlspec/adapters/asyncpg/__init__.py +19 -4
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +82 -181
- sqlspec/adapters/asyncpg/driver.py +285 -383
- sqlspec/adapters/bigquery/__init__.py +17 -3
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +191 -258
- sqlspec/adapters/bigquery/driver.py +474 -646
- sqlspec/adapters/duckdb/__init__.py +14 -3
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +415 -351
- sqlspec/adapters/duckdb/driver.py +343 -413
- sqlspec/adapters/oracledb/__init__.py +19 -5
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +123 -379
- sqlspec/adapters/oracledb/driver.py +507 -560
- sqlspec/adapters/psqlpy/__init__.py +13 -3
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +93 -254
- sqlspec/adapters/psqlpy/driver.py +505 -234
- sqlspec/adapters/psycopg/__init__.py +19 -5
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +143 -403
- sqlspec/adapters/psycopg/driver.py +706 -872
- sqlspec/adapters/sqlite/__init__.py +14 -3
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +202 -118
- sqlspec/adapters/sqlite/driver.py +264 -303
- sqlspec/base.py +105 -9
- sqlspec/{statement/builder → builder}/__init__.py +12 -14
- sqlspec/{statement/builder → builder}/_base.py +120 -55
- sqlspec/{statement/builder → builder}/_column.py +17 -6
- sqlspec/{statement/builder → builder}/_ddl.py +46 -79
- sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
- sqlspec/{statement/builder → builder}/_delete.py +6 -25
- sqlspec/{statement/builder → builder}/_insert.py +6 -64
- sqlspec/builder/_merge.py +56 -0
- sqlspec/{statement/builder → builder}/_parsing_utils.py +3 -10
- sqlspec/{statement/builder → builder}/_select.py +11 -56
- sqlspec/{statement/builder → builder}/_update.py +12 -18
- sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
- sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
- sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +22 -16
- sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
- sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +3 -5
- sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
- sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
- sqlspec/{statement/builder → builder}/mixins/_select_operations.py +21 -36
- sqlspec/{statement/builder → builder}/mixins/_update_operations.py +3 -14
- sqlspec/{statement/builder → builder}/mixins/_where_clause.py +52 -79
- sqlspec/cli.py +4 -5
- sqlspec/config.py +180 -133
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.py +873 -0
- sqlspec/core/compiler.py +396 -0
- sqlspec/core/filters.py +828 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.py +1209 -0
- sqlspec/core/result.py +664 -0
- sqlspec/{statement → core}/splitter.py +321 -191
- sqlspec/core/statement.py +651 -0
- sqlspec/driver/__init__.py +7 -10
- sqlspec/driver/_async.py +387 -176
- sqlspec/driver/_common.py +527 -289
- sqlspec/driver/_sync.py +390 -172
- sqlspec/driver/mixins/__init__.py +2 -19
- sqlspec/driver/mixins/_result_tools.py +168 -0
- sqlspec/driver/mixins/_sql_translator.py +6 -3
- sqlspec/exceptions.py +5 -252
- sqlspec/extensions/aiosql/adapter.py +93 -96
- sqlspec/extensions/litestar/config.py +0 -1
- sqlspec/extensions/litestar/handlers.py +15 -26
- sqlspec/extensions/litestar/plugin.py +16 -14
- sqlspec/extensions/litestar/providers.py +17 -52
- sqlspec/loader.py +424 -105
- sqlspec/migrations/__init__.py +12 -0
- sqlspec/migrations/base.py +92 -68
- sqlspec/migrations/commands.py +24 -106
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +49 -51
- sqlspec/migrations/tracker.py +31 -44
- sqlspec/migrations/utils.py +64 -24
- sqlspec/protocols.py +7 -183
- sqlspec/storage/__init__.py +1 -1
- sqlspec/storage/backends/base.py +37 -40
- sqlspec/storage/backends/fsspec.py +136 -112
- sqlspec/storage/backends/obstore.py +138 -160
- sqlspec/storage/capabilities.py +5 -4
- sqlspec/storage/registry.py +57 -106
- sqlspec/typing.py +136 -115
- sqlspec/utils/__init__.py +2 -3
- sqlspec/utils/correlation.py +0 -3
- sqlspec/utils/deprecation.py +6 -6
- sqlspec/utils/fixtures.py +6 -6
- sqlspec/utils/logging.py +0 -2
- sqlspec/utils/module_loader.py +7 -12
- sqlspec/utils/singleton.py +0 -1
- sqlspec/utils/sync_tools.py +16 -37
- sqlspec/utils/text.py +12 -51
- sqlspec/utils/type_guards.py +443 -232
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/METADATA +7 -2
- sqlspec-0.15.0.dist-info/RECORD +134 -0
- sqlspec/adapters/adbc/transformers.py +0 -108
- sqlspec/driver/connection.py +0 -207
- sqlspec/driver/mixins/_cache.py +0 -114
- sqlspec/driver/mixins/_csv_writer.py +0 -91
- sqlspec/driver/mixins/_pipeline.py +0 -508
- sqlspec/driver/mixins/_query_tools.py +0 -796
- sqlspec/driver/mixins/_result_utils.py +0 -138
- sqlspec/driver/mixins/_storage.py +0 -912
- sqlspec/driver/mixins/_type_coercion.py +0 -128
- sqlspec/driver/parameters.py +0 -138
- sqlspec/statement/__init__.py +0 -21
- sqlspec/statement/builder/_merge.py +0 -95
- sqlspec/statement/cache.py +0 -50
- sqlspec/statement/filters.py +0 -625
- sqlspec/statement/parameters.py +0 -956
- sqlspec/statement/pipelines/__init__.py +0 -210
- sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
- sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
- sqlspec/statement/pipelines/context.py +0 -109
- sqlspec/statement/pipelines/transformers/__init__.py +0 -7
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
- sqlspec/statement/pipelines/validators/__init__.py +0 -23
- sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
- sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
- sqlspec/statement/pipelines/validators/_performance.py +0 -714
- sqlspec/statement/pipelines/validators/_security.py +0 -967
- sqlspec/statement/result.py +0 -435
- sqlspec/statement/sql.py +0 -1774
- sqlspec/utils/cached_property.py +0 -25
- sqlspec/utils/statement_hashing.py +0 -203
- sqlspec-0.14.1.dist-info/RECORD +0 -145
- /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.15.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,967 +0,0 @@
|
|
|
1
|
-
"""Security validator for SQL statements."""
|
|
2
|
-
|
|
3
|
-
import contextlib
|
|
4
|
-
import logging
|
|
5
|
-
import re
|
|
6
|
-
from dataclasses import dataclass, field
|
|
7
|
-
from enum import Enum, auto
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Optional
|
|
9
|
-
|
|
10
|
-
from sqlglot import exp
|
|
11
|
-
from sqlglot.expressions import EQ, Binary, Func, Literal, Or, Subquery, Union
|
|
12
|
-
|
|
13
|
-
from sqlspec.exceptions import RiskLevel
|
|
14
|
-
from sqlspec.protocols import ProcessorProtocol
|
|
15
|
-
from sqlspec.statement.pipelines.context import ValidationError
|
|
16
|
-
from sqlspec.utils.type_guards import has_expressions, has_sql_method
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from sqlspec.statement.pipelines.context import SQLProcessingContext
|
|
20
|
-
|
|
21
|
-
__all__ = ("SecurityIssue", "SecurityIssueType", "SecurityValidator", "SecurityValidatorConfig")
|
|
22
|
-
|
|
23
|
-
# Constants for magic values
|
|
24
|
-
MAX_FUNCTION_ARGS = 10
|
|
25
|
-
MAX_NESTING_LEVELS = 5
|
|
26
|
-
MIN_UNION_COUNT_FOR_INJECTION = 2
|
|
27
|
-
|
|
28
|
-
logger = logging.getLogger(__name__)
|
|
29
|
-
|
|
30
|
-
# Constants
|
|
31
|
-
SUSPICIOUS_FUNC_THRESHOLD = 2
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class SecurityIssueType(Enum):
|
|
35
|
-
"""Types of security issues that can be detected."""
|
|
36
|
-
|
|
37
|
-
INJECTION = auto()
|
|
38
|
-
TAUTOLOGY = auto()
|
|
39
|
-
SUSPICIOUS_KEYWORD = auto()
|
|
40
|
-
COMBINED_ATTACK = auto()
|
|
41
|
-
AST_ANOMALY = auto() # New: AST-based detection
|
|
42
|
-
STRUCTURAL_ATTACK = auto() # New: Structural analysis
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
@dataclass
|
|
46
|
-
class SecurityIssue:
|
|
47
|
-
"""Represents a detected security issue in SQL."""
|
|
48
|
-
|
|
49
|
-
issue_type: "SecurityIssueType"
|
|
50
|
-
risk_level: "RiskLevel"
|
|
51
|
-
description: str
|
|
52
|
-
location: Optional[str] = None
|
|
53
|
-
pattern_matched: Optional[str] = None
|
|
54
|
-
recommendation: Optional[str] = None
|
|
55
|
-
metadata: "dict[str, Any]" = field(default_factory=dict)
|
|
56
|
-
ast_node_type: Optional[str] = None # New: AST node type for AST-based detection
|
|
57
|
-
confidence: float = 1.0 # New: Confidence level (0.0 to 1.0)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@dataclass
|
|
61
|
-
class SecurityValidatorConfig:
|
|
62
|
-
"""Configuration for the unified security validator."""
|
|
63
|
-
|
|
64
|
-
# Feature toggles
|
|
65
|
-
check_injection: bool = True
|
|
66
|
-
check_tautology: bool = True
|
|
67
|
-
check_keywords: bool = True
|
|
68
|
-
check_combined_patterns: bool = True
|
|
69
|
-
check_ast_anomalies: bool = True # New: AST-based anomaly detection
|
|
70
|
-
check_structural_attacks: bool = True # New: Structural attack detection
|
|
71
|
-
|
|
72
|
-
# Risk levels
|
|
73
|
-
default_risk_level: "RiskLevel" = RiskLevel.HIGH
|
|
74
|
-
injection_risk_level: "RiskLevel" = RiskLevel.HIGH
|
|
75
|
-
tautology_risk_level: "RiskLevel" = RiskLevel.MEDIUM
|
|
76
|
-
keyword_risk_level: "RiskLevel" = RiskLevel.MEDIUM
|
|
77
|
-
ast_anomaly_risk_level: "RiskLevel" = RiskLevel.MEDIUM
|
|
78
|
-
|
|
79
|
-
# Thresholds
|
|
80
|
-
max_union_count: int = 3
|
|
81
|
-
max_null_padding: int = 5
|
|
82
|
-
max_system_tables: int = 2
|
|
83
|
-
max_nesting_depth: int = 5 # New: Maximum nesting depth
|
|
84
|
-
max_literal_length: int = 1000 # New: Maximum literal length
|
|
85
|
-
min_confidence_threshold: float = 0.7 # New: Minimum confidence for reporting
|
|
86
|
-
|
|
87
|
-
# Allowed/blocked lists
|
|
88
|
-
allowed_functions: "list[str]" = field(default_factory=list)
|
|
89
|
-
blocked_functions: "list[str]" = field(default_factory=list)
|
|
90
|
-
allowed_system_schemas: "list[str]" = field(default_factory=list)
|
|
91
|
-
|
|
92
|
-
# Custom patterns (legacy support)
|
|
93
|
-
custom_injection_patterns: "list[str]" = field(default_factory=list)
|
|
94
|
-
custom_suspicious_patterns: "list[str]" = field(default_factory=list)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# Common regex patterns used across security checks
|
|
98
|
-
PATTERNS = {
|
|
99
|
-
# Injection patterns
|
|
100
|
-
"union_null": re.compile(r"UNION\s+(?:ALL\s+)?SELECT\s+(?:NULL(?:\s*,\s*NULL)*)", re.IGNORECASE),
|
|
101
|
-
"comment_evasion": re.compile(r"/\*.*?\*/|--.*?$|#.*?$", re.MULTILINE),
|
|
102
|
-
"encoded_chars": re.compile(r"(?:CHAR|CHR)\s*\([0-9]+\)", re.IGNORECASE),
|
|
103
|
-
"hex_encoding": re.compile(r"0x[0-9a-fA-F]+"),
|
|
104
|
-
"concat_evasion": re.compile(r"(?:CONCAT|CONCAT_WS|\|\|)\s*\([^)]+\)", re.IGNORECASE),
|
|
105
|
-
# Tautology patterns
|
|
106
|
-
"always_true": re.compile(r"(?:1\s*=\s*1|'1'\s*=\s*'1'|true|TRUE)\s*(?:OR|AND)?", re.IGNORECASE),
|
|
107
|
-
"or_patterns": re.compile(r"\bOR\s+1\s*=\s*1\b", re.IGNORECASE),
|
|
108
|
-
# Suspicious function patterns
|
|
109
|
-
"file_operations": re.compile(r"\b(?:LOAD_FILE|INTO\s+(?:OUTFILE|DUMPFILE))\b", re.IGNORECASE),
|
|
110
|
-
"exec_functions": re.compile(r"\b(?:EXEC|EXECUTE|xp_cmdshell|sp_executesql)\b", re.IGNORECASE),
|
|
111
|
-
"admin_functions": re.compile(r"\b(?:CREATE\s+USER|DROP\s+USER|GRANT|REVOKE)\b", re.IGNORECASE),
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
# System schemas that are often targeted in attacks
|
|
115
|
-
SYSTEM_SCHEMAS = {
|
|
116
|
-
"mysql": ["information_schema", "mysql", "performance_schema", "sys"],
|
|
117
|
-
"postgresql": ["information_schema", "pg_catalog", "pg_temp"],
|
|
118
|
-
"mssql": ["information_schema", "sys", "master", "msdb"],
|
|
119
|
-
"oracle": ["sys", "system", "dba_", "all_", "user_"],
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
# Functions commonly used in SQL injection attacks
|
|
123
|
-
SUSPICIOUS_FUNCTIONS = [
|
|
124
|
-
# String manipulation
|
|
125
|
-
"concat",
|
|
126
|
-
"concat_ws",
|
|
127
|
-
"substring",
|
|
128
|
-
"substr",
|
|
129
|
-
"char",
|
|
130
|
-
"chr",
|
|
131
|
-
"ascii",
|
|
132
|
-
"hex",
|
|
133
|
-
"unhex",
|
|
134
|
-
# File operations
|
|
135
|
-
"load_file",
|
|
136
|
-
"outfile",
|
|
137
|
-
"dumpfile",
|
|
138
|
-
# System information
|
|
139
|
-
"database",
|
|
140
|
-
"version",
|
|
141
|
-
"user",
|
|
142
|
-
"current_user",
|
|
143
|
-
"system_user",
|
|
144
|
-
"session_user",
|
|
145
|
-
# Time-based
|
|
146
|
-
"sleep",
|
|
147
|
-
"benchmark",
|
|
148
|
-
"pg_sleep",
|
|
149
|
-
"waitfor",
|
|
150
|
-
# Execution
|
|
151
|
-
"exec",
|
|
152
|
-
"execute",
|
|
153
|
-
"xp_cmdshell",
|
|
154
|
-
"sp_executesql",
|
|
155
|
-
# XML/JSON (for data extraction)
|
|
156
|
-
"extractvalue",
|
|
157
|
-
"updatexml",
|
|
158
|
-
"xmltype",
|
|
159
|
-
"json_extract",
|
|
160
|
-
]
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
class SecurityValidator(ProcessorProtocol):
|
|
164
|
-
"""Unified security validator that performs comprehensive security checks in a single pass."""
|
|
165
|
-
|
|
166
|
-
def __init__(self, config: Optional["SecurityValidatorConfig"] = None, **kwargs: Any) -> None:
|
|
167
|
-
"""Initialize the security validator with configuration."""
|
|
168
|
-
self.config = config or SecurityValidatorConfig()
|
|
169
|
-
self._compiled_patterns: dict[str, re.Pattern[str]] = {}
|
|
170
|
-
self._compile_custom_patterns()
|
|
171
|
-
|
|
172
|
-
def _compile_custom_patterns(self) -> None:
|
|
173
|
-
"""Compile custom regex patterns from configuration."""
|
|
174
|
-
for i, pattern in enumerate(self.config.custom_injection_patterns):
|
|
175
|
-
with contextlib.suppress(re.error):
|
|
176
|
-
self._compiled_patterns[f"custom_injection_{i}"] = re.compile(pattern, re.IGNORECASE)
|
|
177
|
-
|
|
178
|
-
for i, pattern in enumerate(self.config.custom_suspicious_patterns):
|
|
179
|
-
with contextlib.suppress(re.error):
|
|
180
|
-
self._compiled_patterns[f"custom_suspicious_{i}"] = re.compile(pattern, re.IGNORECASE)
|
|
181
|
-
|
|
182
|
-
def add_error(
|
|
183
|
-
self,
|
|
184
|
-
context: "SQLProcessingContext",
|
|
185
|
-
message: str,
|
|
186
|
-
code: str,
|
|
187
|
-
risk_level: RiskLevel,
|
|
188
|
-
expression: "Optional[exp.Expression]" = None,
|
|
189
|
-
) -> None:
|
|
190
|
-
"""Add a validation error to the context."""
|
|
191
|
-
error = ValidationError(
|
|
192
|
-
message=message, code=code, risk_level=risk_level, processor=self.__class__.__name__, expression=expression
|
|
193
|
-
)
|
|
194
|
-
context.validation_errors.append(error)
|
|
195
|
-
|
|
196
|
-
def process(
|
|
197
|
-
self, expression: Optional[exp.Expression], context: "SQLProcessingContext"
|
|
198
|
-
) -> Optional[exp.Expression]:
|
|
199
|
-
"""Process the SQL expression and detect security issues in a single pass."""
|
|
200
|
-
if not context.current_expression:
|
|
201
|
-
return None
|
|
202
|
-
|
|
203
|
-
security_issues: list[SecurityIssue] = []
|
|
204
|
-
visited_nodes: set[int] = set()
|
|
205
|
-
|
|
206
|
-
# Single AST traversal for all security checks
|
|
207
|
-
nesting_depth = 0
|
|
208
|
-
for node in context.current_expression.walk():
|
|
209
|
-
node_id = id(node)
|
|
210
|
-
if node_id in visited_nodes:
|
|
211
|
-
continue
|
|
212
|
-
visited_nodes.add(node_id)
|
|
213
|
-
|
|
214
|
-
# Track nesting depth
|
|
215
|
-
if isinstance(node, (Subquery, exp.Select)):
|
|
216
|
-
nesting_depth += 1
|
|
217
|
-
|
|
218
|
-
if self.config.check_injection:
|
|
219
|
-
injection_issues = self._check_injection_patterns(node, context)
|
|
220
|
-
security_issues.extend(injection_issues)
|
|
221
|
-
|
|
222
|
-
if self.config.check_tautology:
|
|
223
|
-
tautology_issues = self._check_tautology_patterns(node, context)
|
|
224
|
-
security_issues.extend(tautology_issues)
|
|
225
|
-
|
|
226
|
-
if self.config.check_keywords:
|
|
227
|
-
keyword_issues = self._check_suspicious_keywords(node, context)
|
|
228
|
-
security_issues.extend(keyword_issues)
|
|
229
|
-
|
|
230
|
-
# New: Check AST anomalies
|
|
231
|
-
if self.config.check_ast_anomalies:
|
|
232
|
-
anomaly_issues = self._check_ast_anomalies(node, context, nesting_depth)
|
|
233
|
-
security_issues.extend(anomaly_issues)
|
|
234
|
-
|
|
235
|
-
# New: Check structural attacks
|
|
236
|
-
if self.config.check_structural_attacks:
|
|
237
|
-
structural_issues = self._check_structural_attacks(node, context)
|
|
238
|
-
security_issues.extend(structural_issues)
|
|
239
|
-
|
|
240
|
-
if self.config.check_combined_patterns and security_issues:
|
|
241
|
-
combined_issues = self._check_combined_patterns(context.current_expression, security_issues)
|
|
242
|
-
security_issues.extend(combined_issues)
|
|
243
|
-
|
|
244
|
-
# Also check the initial SQL string for custom patterns (handles unparsed parts)
|
|
245
|
-
if self.config.check_injection and context.initial_sql_string:
|
|
246
|
-
for name, pattern in self._compiled_patterns.items():
|
|
247
|
-
if name.startswith("custom_injection_") and pattern.search(context.initial_sql_string):
|
|
248
|
-
security_issues.append(
|
|
249
|
-
SecurityIssue(
|
|
250
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
251
|
-
risk_level=self.config.injection_risk_level,
|
|
252
|
-
description=f"Custom injection pattern matched: {name}",
|
|
253
|
-
location=context.initial_sql_string[:100],
|
|
254
|
-
pattern_matched=name,
|
|
255
|
-
)
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
if security_issues:
|
|
259
|
-
max(issue.risk_level for issue in security_issues)
|
|
260
|
-
|
|
261
|
-
for issue in security_issues:
|
|
262
|
-
error = ValidationError(
|
|
263
|
-
message=issue.description,
|
|
264
|
-
code="security-issue",
|
|
265
|
-
risk_level=issue.risk_level,
|
|
266
|
-
processor="SecurityValidator",
|
|
267
|
-
expression=expression,
|
|
268
|
-
)
|
|
269
|
-
context.validation_errors.append(error)
|
|
270
|
-
|
|
271
|
-
# Store metadata in context for access by caller
|
|
272
|
-
context.metadata["security_validator"] = {
|
|
273
|
-
"security_issues": security_issues,
|
|
274
|
-
"checks_performed": [
|
|
275
|
-
"injection" if self.config.check_injection else None,
|
|
276
|
-
"tautology" if self.config.check_tautology else None,
|
|
277
|
-
"keywords" if self.config.check_keywords else None,
|
|
278
|
-
"combined" if self.config.check_combined_patterns else None,
|
|
279
|
-
],
|
|
280
|
-
"total_issues": len(security_issues),
|
|
281
|
-
"issue_breakdown": {
|
|
282
|
-
issue_type.name: sum(1 for issue in security_issues if issue.issue_type == issue_type)
|
|
283
|
-
for issue_type in SecurityIssueType
|
|
284
|
-
},
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
# Filter issues by confidence threshold
|
|
288
|
-
filtered_issues = [
|
|
289
|
-
issue for issue in security_issues if issue.confidence >= self.config.min_confidence_threshold
|
|
290
|
-
]
|
|
291
|
-
|
|
292
|
-
if filtered_issues != security_issues:
|
|
293
|
-
context.validation_errors = []
|
|
294
|
-
for issue in filtered_issues:
|
|
295
|
-
error = ValidationError(
|
|
296
|
-
message=issue.description,
|
|
297
|
-
code="security-issue",
|
|
298
|
-
risk_level=issue.risk_level,
|
|
299
|
-
processor="SecurityValidator",
|
|
300
|
-
expression=expression,
|
|
301
|
-
)
|
|
302
|
-
context.validation_errors.append(error)
|
|
303
|
-
|
|
304
|
-
context.metadata["security_validator"] = {
|
|
305
|
-
"security_issues": filtered_issues,
|
|
306
|
-
"total_issues_found": len(security_issues),
|
|
307
|
-
"issues_after_confidence_filter": len(filtered_issues),
|
|
308
|
-
"confidence_threshold": self.config.min_confidence_threshold,
|
|
309
|
-
"checks_performed": [
|
|
310
|
-
"injection" if self.config.check_injection else None,
|
|
311
|
-
"tautology" if self.config.check_tautology else None,
|
|
312
|
-
"keywords" if self.config.check_keywords else None,
|
|
313
|
-
"combined" if self.config.check_combined_patterns else None,
|
|
314
|
-
"ast_anomalies" if self.config.check_ast_anomalies else None,
|
|
315
|
-
"structural" if self.config.check_structural_attacks else None,
|
|
316
|
-
],
|
|
317
|
-
"issue_breakdown": {
|
|
318
|
-
issue_type.name: sum(1 for issue in filtered_issues if issue.issue_type == issue_type)
|
|
319
|
-
for issue_type in SecurityIssueType
|
|
320
|
-
},
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
return expression
|
|
324
|
-
|
|
325
|
-
def _check_injection_patterns(
|
|
326
|
-
self, node: "exp.Expression", context: "SQLProcessingContext"
|
|
327
|
-
) -> "list[SecurityIssue]":
|
|
328
|
-
"""Check for SQL injection patterns in the node."""
|
|
329
|
-
issues: list[SecurityIssue] = []
|
|
330
|
-
|
|
331
|
-
if isinstance(node, exp.Union):
|
|
332
|
-
union_issues = self._check_union_injection(node, context)
|
|
333
|
-
issues.extend(union_issues)
|
|
334
|
-
|
|
335
|
-
sql_text = node.sql()
|
|
336
|
-
if PATTERNS["comment_evasion"].search(sql_text):
|
|
337
|
-
issues.append(
|
|
338
|
-
SecurityIssue(
|
|
339
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
340
|
-
risk_level=self.config.injection_risk_level,
|
|
341
|
-
description="Comment-based SQL injection attempt detected",
|
|
342
|
-
location=sql_text[:100],
|
|
343
|
-
pattern_matched="comment_evasion",
|
|
344
|
-
recommendation="Remove or sanitize SQL comments",
|
|
345
|
-
)
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
if PATTERNS["encoded_chars"].search(sql_text) or PATTERNS["hex_encoding"].search(sql_text):
|
|
349
|
-
issues.append(
|
|
350
|
-
SecurityIssue(
|
|
351
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
352
|
-
risk_level=self.config.injection_risk_level,
|
|
353
|
-
description="Encoded character evasion detected",
|
|
354
|
-
location=sql_text[:100],
|
|
355
|
-
pattern_matched="encoding_evasion",
|
|
356
|
-
recommendation="Validate and decode input properly",
|
|
357
|
-
)
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
if isinstance(node, exp.Table):
|
|
361
|
-
system_access = self._check_system_schema_access(node)
|
|
362
|
-
if system_access:
|
|
363
|
-
issues.append(system_access)
|
|
364
|
-
|
|
365
|
-
for name, pattern in self._compiled_patterns.items():
|
|
366
|
-
if name.startswith("custom_injection_") and pattern.search(sql_text):
|
|
367
|
-
issues.append(
|
|
368
|
-
SecurityIssue(
|
|
369
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
370
|
-
risk_level=self.config.injection_risk_level,
|
|
371
|
-
description=f"Custom injection pattern matched: {name}",
|
|
372
|
-
location=sql_text[:100],
|
|
373
|
-
pattern_matched=name,
|
|
374
|
-
)
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
return issues
|
|
378
|
-
|
|
379
|
-
def _check_union_injection(self, union_node: "exp.Union", context: "SQLProcessingContext") -> "list[SecurityIssue]":
|
|
380
|
-
"""Check for UNION-based SQL injection patterns."""
|
|
381
|
-
issues: list[SecurityIssue] = []
|
|
382
|
-
|
|
383
|
-
# Count UNIONs in the query
|
|
384
|
-
if context.current_expression:
|
|
385
|
-
union_count = len(list(context.current_expression.find_all(exp.Union)))
|
|
386
|
-
else:
|
|
387
|
-
return []
|
|
388
|
-
if union_count > self.config.max_union_count:
|
|
389
|
-
issues.append(
|
|
390
|
-
SecurityIssue(
|
|
391
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
392
|
-
risk_level=self.config.injection_risk_level,
|
|
393
|
-
description=f"Excessive UNION operations detected ({union_count})",
|
|
394
|
-
location=union_node.sql()[:100],
|
|
395
|
-
pattern_matched="excessive_unions",
|
|
396
|
-
recommendation="Limit the number of UNION operations",
|
|
397
|
-
metadata={"union_count": union_count},
|
|
398
|
-
)
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
if isinstance(union_node, exp.Union) and isinstance(union_node.right, exp.Select):
|
|
402
|
-
select_expr = union_node.right
|
|
403
|
-
if select_expr.expressions:
|
|
404
|
-
null_count = sum(1 for expr in select_expr.expressions if isinstance(expr, exp.Null))
|
|
405
|
-
if null_count > self.config.max_null_padding:
|
|
406
|
-
issues.append(
|
|
407
|
-
SecurityIssue(
|
|
408
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
409
|
-
risk_level=self.config.injection_risk_level,
|
|
410
|
-
description=f"UNION with excessive NULL padding ({null_count} NULLs)",
|
|
411
|
-
location=union_node.sql()[:100],
|
|
412
|
-
pattern_matched="union_null_padding",
|
|
413
|
-
recommendation="Validate UNION queries for proper column matching",
|
|
414
|
-
metadata={"null_count": null_count},
|
|
415
|
-
)
|
|
416
|
-
)
|
|
417
|
-
|
|
418
|
-
return issues
|
|
419
|
-
|
|
420
|
-
def _check_system_schema_access(self, table_node: "exp.Table") -> Optional["SecurityIssue"]:
|
|
421
|
-
"""Check if a table reference is accessing system schemas."""
|
|
422
|
-
table_name = table_node.name.lower() if table_node.name else ""
|
|
423
|
-
schema_name = table_node.db.lower() if table_node.db else ""
|
|
424
|
-
table_node.catalog.lower() if table_node.catalog else ""
|
|
425
|
-
|
|
426
|
-
if schema_name in self.config.allowed_system_schemas:
|
|
427
|
-
return None
|
|
428
|
-
|
|
429
|
-
# Check against known system schemas
|
|
430
|
-
for db_type, schemas in SYSTEM_SCHEMAS.items():
|
|
431
|
-
if schema_name in schemas or any(schema in table_name for schema in schemas):
|
|
432
|
-
return SecurityIssue(
|
|
433
|
-
issue_type=SecurityIssueType.INJECTION,
|
|
434
|
-
risk_level=self.config.injection_risk_level,
|
|
435
|
-
description=f"Access to system schema detected: {schema_name or table_name}",
|
|
436
|
-
location=table_node.sql(),
|
|
437
|
-
pattern_matched="system_schema_access",
|
|
438
|
-
recommendation="Restrict access to system schemas",
|
|
439
|
-
metadata={"database_type": db_type, "schema": schema_name, "table": table_name},
|
|
440
|
-
)
|
|
441
|
-
|
|
442
|
-
return None
|
|
443
|
-
|
|
444
|
-
def _check_tautology_patterns(
|
|
445
|
-
self, node: "exp.Expression", context: "SQLProcessingContext"
|
|
446
|
-
) -> "list[SecurityIssue]":
|
|
447
|
-
"""Check for tautology conditions that are always true."""
|
|
448
|
-
issues: list[SecurityIssue] = []
|
|
449
|
-
|
|
450
|
-
if isinstance(node, exp.Boolean) and node.this is True:
|
|
451
|
-
issues.append(
|
|
452
|
-
SecurityIssue(
|
|
453
|
-
issue_type=SecurityIssueType.TAUTOLOGY,
|
|
454
|
-
risk_level=self.config.tautology_risk_level,
|
|
455
|
-
description="Tautology: always-true literal condition detected",
|
|
456
|
-
location=node.sql(),
|
|
457
|
-
pattern_matched="always-true",
|
|
458
|
-
recommendation="Remove always-true conditions from WHERE clause",
|
|
459
|
-
)
|
|
460
|
-
)
|
|
461
|
-
|
|
462
|
-
if isinstance(node, (exp.EQ, exp.NEQ, exp.GT, exp.LT, exp.GTE, exp.LTE)) and self._is_tautology(node):
|
|
463
|
-
issues.append(
|
|
464
|
-
SecurityIssue(
|
|
465
|
-
issue_type=SecurityIssueType.TAUTOLOGY,
|
|
466
|
-
risk_level=self.config.tautology_risk_level,
|
|
467
|
-
description="Tautology: always-true condition detected",
|
|
468
|
-
location=node.sql(),
|
|
469
|
-
pattern_matched="tautology_condition",
|
|
470
|
-
recommendation="Review WHERE conditions for always-true statements",
|
|
471
|
-
)
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
if isinstance(node, exp.Or):
|
|
475
|
-
or_sql = node.sql()
|
|
476
|
-
if PATTERNS["or_patterns"].search(or_sql) or PATTERNS["always_true"].search(or_sql):
|
|
477
|
-
issues.append(
|
|
478
|
-
SecurityIssue(
|
|
479
|
-
issue_type=SecurityIssueType.TAUTOLOGY,
|
|
480
|
-
risk_level=self.config.tautology_risk_level,
|
|
481
|
-
description="OR with always-true condition detected",
|
|
482
|
-
location=or_sql[:100],
|
|
483
|
-
pattern_matched="or_tautology",
|
|
484
|
-
recommendation="Validate OR conditions in WHERE clauses",
|
|
485
|
-
)
|
|
486
|
-
)
|
|
487
|
-
|
|
488
|
-
return issues
|
|
489
|
-
|
|
490
|
-
def _is_tautology(self, comparison: "exp.Expression") -> bool:
|
|
491
|
-
"""Check if a comparison is a tautology."""
|
|
492
|
-
if not isinstance(comparison, exp.Binary):
|
|
493
|
-
return False
|
|
494
|
-
|
|
495
|
-
# In sqlglot, binary expressions use 'this' and 'expression' for operands
|
|
496
|
-
left = comparison.this
|
|
497
|
-
right = comparison.expression
|
|
498
|
-
|
|
499
|
-
if self._expressions_identical(left, right):
|
|
500
|
-
if isinstance(comparison, (exp.EQ, exp.GTE, exp.LTE)):
|
|
501
|
-
return True
|
|
502
|
-
if isinstance(comparison, (exp.NEQ, exp.GT, exp.LT)):
|
|
503
|
-
return False
|
|
504
|
-
|
|
505
|
-
if isinstance(left, exp.Literal) and isinstance(right, exp.Literal):
|
|
506
|
-
try:
|
|
507
|
-
left_val = left.this
|
|
508
|
-
right_val = right.this
|
|
509
|
-
|
|
510
|
-
if isinstance(comparison, exp.EQ):
|
|
511
|
-
return bool(left_val == right_val)
|
|
512
|
-
if isinstance(comparison, exp.NEQ):
|
|
513
|
-
return bool(left_val != right_val)
|
|
514
|
-
except Exception:
|
|
515
|
-
# Value extraction failed, can't evaluate the condition
|
|
516
|
-
logger.debug("Failed to extract values for comparison evaluation")
|
|
517
|
-
|
|
518
|
-
return False
|
|
519
|
-
|
|
520
|
-
@staticmethod
|
|
521
|
-
def _expressions_identical(expr1: "exp.Expression", expr2: "exp.Expression") -> bool:
|
|
522
|
-
"""Check if two expressions are structurally identical."""
|
|
523
|
-
if type(expr1) is not type(expr2):
|
|
524
|
-
return False
|
|
525
|
-
|
|
526
|
-
if isinstance(expr1, exp.Column) and isinstance(expr2, exp.Column):
|
|
527
|
-
return expr1.name == expr2.name and expr1.table == expr2.table
|
|
528
|
-
|
|
529
|
-
if isinstance(expr1, exp.Literal) and isinstance(expr2, exp.Literal):
|
|
530
|
-
return bool(expr1.this == expr2.this)
|
|
531
|
-
|
|
532
|
-
# For other expressions, compare their SQL representations
|
|
533
|
-
return expr1.sql() == expr2.sql()
|
|
534
|
-
|
|
535
|
-
def _check_suspicious_keywords(
|
|
536
|
-
self, node: "exp.Expression", context: "SQLProcessingContext"
|
|
537
|
-
) -> "list[SecurityIssue]":
|
|
538
|
-
"""Check for suspicious functions and keywords."""
|
|
539
|
-
issues: list[SecurityIssue] = []
|
|
540
|
-
|
|
541
|
-
if isinstance(node, exp.Func):
|
|
542
|
-
func_name = node.name.lower() if node.name else ""
|
|
543
|
-
|
|
544
|
-
if func_name in self.config.blocked_functions:
|
|
545
|
-
issues.append(
|
|
546
|
-
SecurityIssue(
|
|
547
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
548
|
-
risk_level=RiskLevel.HIGH,
|
|
549
|
-
description=f"Blocked function used: {func_name}",
|
|
550
|
-
location=node.sql()[:100],
|
|
551
|
-
pattern_matched="blocked_function",
|
|
552
|
-
recommendation=f"Function {func_name} is not allowed",
|
|
553
|
-
)
|
|
554
|
-
)
|
|
555
|
-
elif func_name in SUSPICIOUS_FUNCTIONS and func_name not in self.config.allowed_functions:
|
|
556
|
-
issues.append(
|
|
557
|
-
SecurityIssue(
|
|
558
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
559
|
-
risk_level=self.config.keyword_risk_level,
|
|
560
|
-
description=f"Suspicious function detected: {func_name}",
|
|
561
|
-
location=node.sql()[:100],
|
|
562
|
-
pattern_matched="suspicious_function",
|
|
563
|
-
recommendation=f"Review usage of {func_name} function",
|
|
564
|
-
metadata={"function": func_name},
|
|
565
|
-
)
|
|
566
|
-
)
|
|
567
|
-
|
|
568
|
-
if isinstance(node, exp.Command):
|
|
569
|
-
# Commands are often used for dynamic SQL execution
|
|
570
|
-
command_text = str(node)
|
|
571
|
-
if any(
|
|
572
|
-
keyword in command_text.lower() for keyword in ["execute", "exec", "sp_executesql", "grant", "revoke"]
|
|
573
|
-
):
|
|
574
|
-
issues.append(
|
|
575
|
-
SecurityIssue(
|
|
576
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
577
|
-
risk_level=RiskLevel.HIGH,
|
|
578
|
-
description=f"Dynamic SQL execution command detected: {command_text.split()[0].lower()}",
|
|
579
|
-
location=command_text[:100],
|
|
580
|
-
pattern_matched="exec_command",
|
|
581
|
-
recommendation="Avoid dynamic SQL execution",
|
|
582
|
-
)
|
|
583
|
-
)
|
|
584
|
-
|
|
585
|
-
if has_sql_method(node):
|
|
586
|
-
sql_text = node.sql()
|
|
587
|
-
|
|
588
|
-
# File operations
|
|
589
|
-
if PATTERNS["file_operations"].search(sql_text):
|
|
590
|
-
issues.append(
|
|
591
|
-
SecurityIssue(
|
|
592
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
593
|
-
risk_level=RiskLevel.HIGH,
|
|
594
|
-
description="File operation detected in SQL",
|
|
595
|
-
location=sql_text[:100],
|
|
596
|
-
pattern_matched="file_operation",
|
|
597
|
-
recommendation="File operations should be handled at application level",
|
|
598
|
-
)
|
|
599
|
-
)
|
|
600
|
-
|
|
601
|
-
# Execution functions
|
|
602
|
-
if PATTERNS["exec_functions"].search(sql_text):
|
|
603
|
-
issues.append(
|
|
604
|
-
SecurityIssue(
|
|
605
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
606
|
-
risk_level=RiskLevel.HIGH,
|
|
607
|
-
description="Dynamic SQL execution function detected",
|
|
608
|
-
location=sql_text[:100],
|
|
609
|
-
pattern_matched="exec_function",
|
|
610
|
-
recommendation="Avoid dynamic SQL execution",
|
|
611
|
-
)
|
|
612
|
-
)
|
|
613
|
-
|
|
614
|
-
# Administrative commands
|
|
615
|
-
if PATTERNS["admin_functions"].search(sql_text):
|
|
616
|
-
issues.append(
|
|
617
|
-
SecurityIssue(
|
|
618
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
619
|
-
risk_level=RiskLevel.HIGH,
|
|
620
|
-
description="Administrative command detected",
|
|
621
|
-
location=sql_text[:100],
|
|
622
|
-
pattern_matched="admin_function",
|
|
623
|
-
recommendation="Administrative commands should be restricted",
|
|
624
|
-
)
|
|
625
|
-
)
|
|
626
|
-
|
|
627
|
-
# Check custom suspicious patterns
|
|
628
|
-
for name, pattern in self._compiled_patterns.items():
|
|
629
|
-
if name.startswith("custom_suspicious_") and pattern.search(sql_text):
|
|
630
|
-
issues.append(
|
|
631
|
-
SecurityIssue(
|
|
632
|
-
issue_type=SecurityIssueType.SUSPICIOUS_KEYWORD,
|
|
633
|
-
risk_level=self.config.keyword_risk_level,
|
|
634
|
-
description=f"Custom suspicious pattern matched: {name}",
|
|
635
|
-
location=sql_text[:100],
|
|
636
|
-
pattern_matched=name,
|
|
637
|
-
)
|
|
638
|
-
)
|
|
639
|
-
|
|
640
|
-
return issues
|
|
641
|
-
|
|
642
|
-
@staticmethod
|
|
643
|
-
def _check_combined_patterns(
|
|
644
|
-
expression: "exp.Expression", # noqa: ARG004
|
|
645
|
-
existing_issues: "list[SecurityIssue]",
|
|
646
|
-
) -> "list[SecurityIssue]":
|
|
647
|
-
"""Check for combined attack patterns that indicate sophisticated attacks."""
|
|
648
|
-
combined_issues: list[SecurityIssue] = []
|
|
649
|
-
|
|
650
|
-
# Group issues by type
|
|
651
|
-
issue_types = {issue.issue_type for issue in existing_issues}
|
|
652
|
-
|
|
653
|
-
# Tautology + UNION = Classic SQLi
|
|
654
|
-
if SecurityIssueType.TAUTOLOGY in issue_types and SecurityIssueType.INJECTION in issue_types:
|
|
655
|
-
has_union = any(
|
|
656
|
-
"union" in issue.pattern_matched.lower() for issue in existing_issues if issue.pattern_matched
|
|
657
|
-
)
|
|
658
|
-
if has_union:
|
|
659
|
-
combined_issues.append(
|
|
660
|
-
SecurityIssue(
|
|
661
|
-
issue_type=SecurityIssueType.COMBINED_ATTACK,
|
|
662
|
-
risk_level=RiskLevel.HIGH,
|
|
663
|
-
description="Classic SQL injection pattern detected (Tautology + UNION)",
|
|
664
|
-
pattern_matched="classic_sqli",
|
|
665
|
-
recommendation="This appears to be a deliberate SQL injection attempt",
|
|
666
|
-
metadata={"attack_components": ["tautology", "union"], "confidence": "high"},
|
|
667
|
-
)
|
|
668
|
-
)
|
|
669
|
-
|
|
670
|
-
# Multiple suspicious functions + system schema = Data extraction attempt
|
|
671
|
-
suspicious_func_count = sum(
|
|
672
|
-
1
|
|
673
|
-
for issue in existing_issues
|
|
674
|
-
if issue.issue_type == SecurityIssueType.SUSPICIOUS_KEYWORD and "function" in (issue.pattern_matched or "")
|
|
675
|
-
)
|
|
676
|
-
system_schema_access = any("system_schema" in (issue.pattern_matched or "") for issue in existing_issues)
|
|
677
|
-
|
|
678
|
-
if suspicious_func_count >= SUSPICIOUS_FUNC_THRESHOLD and system_schema_access:
|
|
679
|
-
combined_issues.append(
|
|
680
|
-
SecurityIssue(
|
|
681
|
-
issue_type=SecurityIssueType.COMBINED_ATTACK,
|
|
682
|
-
risk_level=RiskLevel.HIGH,
|
|
683
|
-
description="Data extraction attempt detected (Multiple functions + System schema)",
|
|
684
|
-
pattern_matched="data_extraction",
|
|
685
|
-
recommendation="Block queries attempting to extract system information",
|
|
686
|
-
metadata={"suspicious_functions": suspicious_func_count, "targets_system_schema": True},
|
|
687
|
-
)
|
|
688
|
-
)
|
|
689
|
-
|
|
690
|
-
# Encoding + Injection = Evasion attempt
|
|
691
|
-
has_encoding = any("encoding" in (issue.pattern_matched or "").lower() for issue in existing_issues)
|
|
692
|
-
has_comment = any("comment" in (issue.pattern_matched or "").lower() for issue in existing_issues)
|
|
693
|
-
|
|
694
|
-
if has_encoding or has_comment:
|
|
695
|
-
combined_issues.append(
|
|
696
|
-
SecurityIssue(
|
|
697
|
-
issue_type=SecurityIssueType.COMBINED_ATTACK,
|
|
698
|
-
risk_level=RiskLevel.HIGH,
|
|
699
|
-
description="Evasion technique detected in SQL injection attempt",
|
|
700
|
-
pattern_matched="evasion_attempt",
|
|
701
|
-
recommendation="Input appears to be crafted to bypass security filters",
|
|
702
|
-
metadata={
|
|
703
|
-
"evasion_techniques": [
|
|
704
|
-
"encoding" if has_encoding else None,
|
|
705
|
-
"comments" if has_comment else None,
|
|
706
|
-
]
|
|
707
|
-
},
|
|
708
|
-
)
|
|
709
|
-
)
|
|
710
|
-
|
|
711
|
-
return combined_issues
|
|
712
|
-
|
|
713
|
-
def _check_ast_anomalies(
|
|
714
|
-
self, node: "exp.Expression", context: "SQLProcessingContext", nesting_depth: int
|
|
715
|
-
) -> "list[SecurityIssue]":
|
|
716
|
-
"""Check for AST-based anomalies that could indicate injection attempts.
|
|
717
|
-
|
|
718
|
-
This method uses sophisticated AST analysis instead of regex patterns.
|
|
719
|
-
"""
|
|
720
|
-
issues: list[SecurityIssue] = []
|
|
721
|
-
|
|
722
|
-
if nesting_depth > self.config.max_nesting_depth:
|
|
723
|
-
issues.append(
|
|
724
|
-
SecurityIssue(
|
|
725
|
-
issue_type=SecurityIssueType.AST_ANOMALY,
|
|
726
|
-
risk_level=self.config.ast_anomaly_risk_level,
|
|
727
|
-
description=f"Excessive query nesting detected (depth: {nesting_depth})",
|
|
728
|
-
location=node.sql()[:100] if has_sql_method(node) else str(node)[:100],
|
|
729
|
-
pattern_matched="excessive_nesting",
|
|
730
|
-
recommendation="Review query structure for potential injection",
|
|
731
|
-
ast_node_type=type(node).__name__,
|
|
732
|
-
confidence=0.8,
|
|
733
|
-
metadata={"nesting_depth": nesting_depth, "max_allowed": self.config.max_nesting_depth},
|
|
734
|
-
)
|
|
735
|
-
)
|
|
736
|
-
|
|
737
|
-
if isinstance(node, Literal) and isinstance(node.this, str):
|
|
738
|
-
literal_length = len(str(node.this))
|
|
739
|
-
if literal_length > self.config.max_literal_length:
|
|
740
|
-
issues.append(
|
|
741
|
-
SecurityIssue(
|
|
742
|
-
issue_type=SecurityIssueType.AST_ANOMALY,
|
|
743
|
-
risk_level=self.config.ast_anomaly_risk_level,
|
|
744
|
-
description=f"Suspiciously long literal detected ({literal_length} chars)",
|
|
745
|
-
location=str(node.this)[:100],
|
|
746
|
-
pattern_matched="long_literal",
|
|
747
|
-
recommendation="Validate input length and content",
|
|
748
|
-
ast_node_type="Literal",
|
|
749
|
-
confidence=0.6,
|
|
750
|
-
metadata={"literal_length": literal_length, "max_allowed": self.config.max_literal_length},
|
|
751
|
-
)
|
|
752
|
-
)
|
|
753
|
-
|
|
754
|
-
if isinstance(node, Func):
|
|
755
|
-
func_issues = self._analyze_function_anomalies(node)
|
|
756
|
-
issues.extend(func_issues)
|
|
757
|
-
|
|
758
|
-
if isinstance(node, Binary):
|
|
759
|
-
binary_issues = self._analyze_binary_anomalies(node)
|
|
760
|
-
issues.extend(binary_issues)
|
|
761
|
-
|
|
762
|
-
return issues
|
|
763
|
-
|
|
764
|
-
def _check_structural_attacks(
|
|
765
|
-
self, node: "exp.Expression", context: "SQLProcessingContext"
|
|
766
|
-
) -> "list[SecurityIssue]":
|
|
767
|
-
"""Check for structural attack patterns using AST analysis."""
|
|
768
|
-
issues: list[SecurityIssue] = []
|
|
769
|
-
|
|
770
|
-
if isinstance(node, Union):
|
|
771
|
-
union_issues = self._analyze_union_structure(node)
|
|
772
|
-
issues.extend(union_issues)
|
|
773
|
-
|
|
774
|
-
if isinstance(node, Subquery):
|
|
775
|
-
subquery_issues = self._analyze_subquery_structure(node)
|
|
776
|
-
issues.extend(subquery_issues)
|
|
777
|
-
|
|
778
|
-
if isinstance(node, Or):
|
|
779
|
-
or_issues = self._analyze_or_structure(node)
|
|
780
|
-
issues.extend(or_issues)
|
|
781
|
-
|
|
782
|
-
return issues
|
|
783
|
-
|
|
784
|
-
@staticmethod
|
|
785
|
-
def _analyze_function_anomalies(func_node: Func) -> "list[SecurityIssue]":
|
|
786
|
-
"""Analyze function calls for anomalous patterns."""
|
|
787
|
-
issues: list[SecurityIssue] = []
|
|
788
|
-
|
|
789
|
-
if not func_node.name:
|
|
790
|
-
return issues
|
|
791
|
-
|
|
792
|
-
func_name = func_node.name.lower()
|
|
793
|
-
|
|
794
|
-
if func_node.this and isinstance(func_node.this, Func):
|
|
795
|
-
nested_func = func_node.this
|
|
796
|
-
if nested_func.name and nested_func.name.lower() in SUSPICIOUS_FUNCTIONS:
|
|
797
|
-
issues.append(
|
|
798
|
-
SecurityIssue(
|
|
799
|
-
issue_type=SecurityIssueType.AST_ANOMALY,
|
|
800
|
-
risk_level=RiskLevel.MEDIUM,
|
|
801
|
-
description=f"Nested suspicious function call: {nested_func.name.lower()} inside {func_name}",
|
|
802
|
-
location=func_node.sql()[:100] if has_sql_method(func_node) else str(func_node)[:100],
|
|
803
|
-
pattern_matched="nested_suspicious_function",
|
|
804
|
-
recommendation="Review nested function calls for evasion attempts",
|
|
805
|
-
ast_node_type="Func",
|
|
806
|
-
confidence=0.7,
|
|
807
|
-
metadata={"outer_function": func_name, "inner_function": nested_func.name.lower()},
|
|
808
|
-
)
|
|
809
|
-
)
|
|
810
|
-
|
|
811
|
-
if has_expressions(func_node) and func_node.expressions:
|
|
812
|
-
arg_count = len(func_node.expressions)
|
|
813
|
-
if func_name in {"concat", "concat_ws"} and arg_count > MAX_FUNCTION_ARGS:
|
|
814
|
-
issues.append(
|
|
815
|
-
SecurityIssue(
|
|
816
|
-
issue_type=SecurityIssueType.AST_ANOMALY,
|
|
817
|
-
risk_level=RiskLevel.MEDIUM,
|
|
818
|
-
description=f"Excessive arguments to {func_name} function ({arg_count} args)",
|
|
819
|
-
location=func_node.sql()[:100] if has_sql_method(func_node) else str(func_node)[:100],
|
|
820
|
-
pattern_matched="excessive_function_args",
|
|
821
|
-
recommendation="Review function arguments for potential injection",
|
|
822
|
-
ast_node_type="Func",
|
|
823
|
-
confidence=0.6,
|
|
824
|
-
metadata={"function": func_name, "arg_count": arg_count},
|
|
825
|
-
)
|
|
826
|
-
)
|
|
827
|
-
|
|
828
|
-
return issues
|
|
829
|
-
|
|
830
|
-
def _analyze_binary_anomalies(self, binary_node: Binary) -> "list[SecurityIssue]":
|
|
831
|
-
"""Analyze binary operations for suspicious patterns."""
|
|
832
|
-
issues: list[SecurityIssue] = []
|
|
833
|
-
|
|
834
|
-
# Check for deeply nested binary operations (potential injection)
|
|
835
|
-
depth = self._calculate_binary_depth(binary_node)
|
|
836
|
-
if depth > MAX_NESTING_LEVELS: # Arbitrary threshold
|
|
837
|
-
issues.append(
|
|
838
|
-
SecurityIssue(
|
|
839
|
-
issue_type=SecurityIssueType.AST_ANOMALY,
|
|
840
|
-
risk_level=RiskLevel.LOW,
|
|
841
|
-
description=f"Deeply nested binary operations detected (depth: {depth})",
|
|
842
|
-
location=binary_node.sql()[:100],
|
|
843
|
-
pattern_matched="deep_binary_nesting",
|
|
844
|
-
recommendation="Review complex condition structures",
|
|
845
|
-
ast_node_type="Binary",
|
|
846
|
-
confidence=0.5,
|
|
847
|
-
metadata={"nesting_depth": depth},
|
|
848
|
-
)
|
|
849
|
-
)
|
|
850
|
-
|
|
851
|
-
return issues
|
|
852
|
-
|
|
853
|
-
def _analyze_union_structure(self, union_node: Union) -> "list[SecurityIssue]":
|
|
854
|
-
"""Analyze UNION structure for injection patterns."""
|
|
855
|
-
issues: list[SecurityIssue] = []
|
|
856
|
-
|
|
857
|
-
if isinstance(union_node, exp.Union):
|
|
858
|
-
left_cols = self._count_select_columns(union_node.left)
|
|
859
|
-
right_cols = self._count_select_columns(union_node.right)
|
|
860
|
-
|
|
861
|
-
if left_cols != right_cols and left_cols > 0 and right_cols > 0:
|
|
862
|
-
issues.append(
|
|
863
|
-
SecurityIssue(
|
|
864
|
-
issue_type=SecurityIssueType.STRUCTURAL_ATTACK,
|
|
865
|
-
risk_level=RiskLevel.HIGH,
|
|
866
|
-
description=f"UNION with mismatched column counts ({left_cols} vs {right_cols})",
|
|
867
|
-
location=union_node.sql()[:100],
|
|
868
|
-
pattern_matched="union_column_mismatch",
|
|
869
|
-
recommendation="UNION queries should have matching column counts",
|
|
870
|
-
ast_node_type="Union",
|
|
871
|
-
confidence=0.9,
|
|
872
|
-
metadata={"left_columns": left_cols, "right_columns": right_cols},
|
|
873
|
-
)
|
|
874
|
-
)
|
|
875
|
-
|
|
876
|
-
return issues
|
|
877
|
-
|
|
878
|
-
@staticmethod
|
|
879
|
-
def _analyze_subquery_structure(subquery_node: Subquery) -> "list[SecurityIssue]":
|
|
880
|
-
"""Analyze subquery structure for injection patterns."""
|
|
881
|
-
issues: list[SecurityIssue] = []
|
|
882
|
-
|
|
883
|
-
if subquery_node.this and isinstance(subquery_node.this, exp.Select):
|
|
884
|
-
select_expr = subquery_node.this
|
|
885
|
-
|
|
886
|
-
if has_expressions(select_expr) and select_expr.expressions:
|
|
887
|
-
literal_count = sum(1 for expr in select_expr.expressions if isinstance(expr, Literal))
|
|
888
|
-
total_expressions = len(select_expr.expressions)
|
|
889
|
-
|
|
890
|
-
if literal_count == total_expressions and total_expressions > MIN_UNION_COUNT_FOR_INJECTION:
|
|
891
|
-
issues.append(
|
|
892
|
-
SecurityIssue(
|
|
893
|
-
issue_type=SecurityIssueType.STRUCTURAL_ATTACK,
|
|
894
|
-
risk_level=RiskLevel.MEDIUM,
|
|
895
|
-
description=f"Subquery selecting only literals ({literal_count} literals)",
|
|
896
|
-
location=subquery_node.sql()[:100],
|
|
897
|
-
pattern_matched="literal_only_subquery",
|
|
898
|
-
recommendation="Review subqueries that only select literal values",
|
|
899
|
-
ast_node_type="Subquery",
|
|
900
|
-
confidence=0.7,
|
|
901
|
-
metadata={"literal_count": literal_count, "total_expressions": total_expressions},
|
|
902
|
-
)
|
|
903
|
-
)
|
|
904
|
-
|
|
905
|
-
return issues
|
|
906
|
-
|
|
907
|
-
def _analyze_or_structure(self, or_node: Or) -> "list[SecurityIssue]":
|
|
908
|
-
"""Analyze OR conditions for tautology patterns."""
|
|
909
|
-
issues: list[SecurityIssue] = []
|
|
910
|
-
|
|
911
|
-
if isinstance(or_node, exp.Binary) and (
|
|
912
|
-
self._is_always_true_condition(or_node.left) or self._is_always_true_condition(or_node.right)
|
|
913
|
-
):
|
|
914
|
-
issues.append(
|
|
915
|
-
SecurityIssue(
|
|
916
|
-
issue_type=SecurityIssueType.STRUCTURAL_ATTACK,
|
|
917
|
-
risk_level=RiskLevel.HIGH,
|
|
918
|
-
description="OR condition with always-true clause detected",
|
|
919
|
-
location=or_node.sql()[:100],
|
|
920
|
-
pattern_matched="or_tautology_ast",
|
|
921
|
-
recommendation="Remove always-true conditions from OR clauses",
|
|
922
|
-
ast_node_type="Or",
|
|
923
|
-
confidence=0.95,
|
|
924
|
-
metadata={
|
|
925
|
-
"left_always_true": self._is_always_true_condition(or_node.left),
|
|
926
|
-
"right_always_true": self._is_always_true_condition(or_node.right),
|
|
927
|
-
},
|
|
928
|
-
)
|
|
929
|
-
)
|
|
930
|
-
|
|
931
|
-
return issues
|
|
932
|
-
|
|
933
|
-
def _calculate_binary_depth(self, node: Binary, depth: int = 0) -> int:
|
|
934
|
-
"""Calculate the depth of nested binary operations."""
|
|
935
|
-
max_depth = depth
|
|
936
|
-
|
|
937
|
-
if isinstance(node, exp.Binary) and isinstance(node.left, Binary):
|
|
938
|
-
max_depth = max(max_depth, self._calculate_binary_depth(node.left, depth + 1))
|
|
939
|
-
|
|
940
|
-
if isinstance(node, exp.Binary) and isinstance(node.right, Binary):
|
|
941
|
-
max_depth = max(max_depth, self._calculate_binary_depth(node.right, depth + 1))
|
|
942
|
-
|
|
943
|
-
return max_depth
|
|
944
|
-
|
|
945
|
-
@staticmethod
|
|
946
|
-
def _count_select_columns(node: "exp.Expression") -> int:
|
|
947
|
-
"""Count the number of columns in a SELECT statement."""
|
|
948
|
-
if isinstance(node, exp.Select) and has_expressions(node):
|
|
949
|
-
return len(node.expressions) if node.expressions else 0
|
|
950
|
-
return 0
|
|
951
|
-
|
|
952
|
-
@staticmethod
|
|
953
|
-
def _is_always_true_condition(node: "exp.Expression") -> bool:
|
|
954
|
-
"""Check if a condition is always true using AST analysis."""
|
|
955
|
-
if isinstance(node, Literal) and str(node.this).upper() in {"TRUE", "1"}:
|
|
956
|
-
return True
|
|
957
|
-
|
|
958
|
-
# Check for 1=1 or similar tautologies
|
|
959
|
-
return bool(
|
|
960
|
-
isinstance(node, EQ)
|
|
961
|
-
and isinstance(node, exp.Binary)
|
|
962
|
-
and (
|
|
963
|
-
isinstance(node.left, Literal)
|
|
964
|
-
and isinstance(node.right, Literal)
|
|
965
|
-
and str(node.left.this) == str(node.right.this)
|
|
966
|
-
)
|
|
967
|
-
)
|