sqlchecker 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlchecker/__init__.py +58 -0
- sqlchecker/detectors/__init__.py +103 -0
- sqlchecker/detectors/base.py +44 -0
- sqlchecker/detectors/complications.py +375 -0
- sqlchecker/detectors/logical.py +732 -0
- sqlchecker/detectors/semantic.py +289 -0
- sqlchecker/detectors/syntax.py +1140 -0
- sqlchecker-0.3.1.dist-info/METADATA +153 -0
- sqlchecker-0.3.1.dist-info/RECORD +11 -0
- sqlchecker-0.3.1.dist-info/WHEEL +4 -0
- sqlchecker-0.3.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,732 @@
|
|
|
1
|
+
'''Detector for logical errors in SQL queries.'''
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Callable
|
|
5
|
+
from sqlerrors import SqlErrors
|
|
6
|
+
from sqlglot import exp
|
|
7
|
+
|
|
8
|
+
from .base import BaseDetector, DetectedError
|
|
9
|
+
from sqlscope.query import Query, SetOperation
|
|
10
|
+
|
|
11
|
+
class LogicalErrorDetector(BaseDetector):
|
|
12
|
+
'''Detector for logical errors in SQL queries.'''
|
|
13
|
+
def __init__(self,
|
|
14
|
+
*,
|
|
15
|
+
query: Query,
|
|
16
|
+
update_query: Callable[[str, str | None], None],
|
|
17
|
+
solutions: list[Query] = [],
|
|
18
|
+
):
|
|
19
|
+
super().__init__(
|
|
20
|
+
query=query,
|
|
21
|
+
solutions=solutions,
|
|
22
|
+
update_query=update_query,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def run(self) -> list[DetectedError]:
|
|
26
|
+
|
|
27
|
+
# All logical errors require at least one solution to compare against
|
|
28
|
+
# If no solutions are provided, we cannot perform logical error detection
|
|
29
|
+
if not self.solutions:
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
results: list[DetectedError] = super().run()
|
|
33
|
+
|
|
34
|
+
checks = [
|
|
35
|
+
self.detect_39_and_instead_of_or, # TODO: implement
|
|
36
|
+
self.detect_52_or_instead_of_and, # TODO: refactor/implement
|
|
37
|
+
self.detect_53_extraneous_not_operator, # TODO: implement
|
|
38
|
+
self.detect_54_missing_not_operator, # TODO: implement
|
|
39
|
+
self.detect_55_substituted_existance_negation_with_less_more_than, # TODO: implement
|
|
40
|
+
self.detect_57_incorrect_comparison_operator_or_value, # TODO: refactor/implement
|
|
41
|
+
self.detect_58_59_62_table_reference_errors, # ok
|
|
42
|
+
self.detect_60_join_condition_on_incorrect_column, # ok
|
|
43
|
+
self.detect_61_join_condition_with_incorrect_comparison_operator, # ok
|
|
44
|
+
self.detect_48_missing_join_condition, # ok
|
|
45
|
+
self.detect_104_condition_on_outer_join, # ok
|
|
46
|
+
self.detect_63_improper_nesting_of_expressions, # TODO: implement
|
|
47
|
+
self.detect_64_improper_nesting_of_subqueries, # TODO: implement
|
|
48
|
+
self.detect_65_extraneous_quotes, # TODO: implement
|
|
49
|
+
self.detect_66_missing_expression, # TODO: implement
|
|
50
|
+
self.detect_68_extraneous_expression, # TODO: implement
|
|
51
|
+
self.detect_67_expression_on_incorrect_column, # TODO: implement
|
|
52
|
+
self.detect_69_expression_on_incorrect_clause, # TODO: implement
|
|
53
|
+
self.detect_43_wildcards_without_like, # ok
|
|
54
|
+
self.detect_110_111_wrong_invalid_wildcard, # ok
|
|
55
|
+
self.detect_70_extraneous_column_in_select, # ok
|
|
56
|
+
self.detect_71_missing_column_from_select, # ok
|
|
57
|
+
self.detect_72_missing_distinct_from_select, # ok
|
|
58
|
+
self.detect_73_missing_as_from_select, # ok
|
|
59
|
+
self.detect_74_missing_column_from_order_by, # TODO: refactor/implement
|
|
60
|
+
self.detect_75_incorrect_column_in_order_by, # TODO: refactor/implement
|
|
61
|
+
self.detect_77_incorrect_ordering_of_rows, # TODO: implement
|
|
62
|
+
self.detect_112_118_missing_extraneous_where_clause, # ok
|
|
63
|
+
self.detect_113_119_missing_extraneous_group_by_clause, # ok
|
|
64
|
+
self.detect_114_120_missing_extraneous_having_clause, # ok
|
|
65
|
+
self.detect_115_121_missing_extraneous_order_by_clause, # ok
|
|
66
|
+
self.detect_116_121_123_missing_extraneous_incorrect_limit_clause, # ok
|
|
67
|
+
self.detect_117_122_missing_extraneous_incorrect_offset_clause, # ok
|
|
68
|
+
self.detect_80_incorrect_function, # TODO: implement
|
|
69
|
+
self.detect_78_distinct_as_function_parameter_when_not_applicable, # TODO: implement
|
|
70
|
+
self.detect_79_missing_distinct_from_function_parameter, # TODO: implement
|
|
71
|
+
self.detect_81_incorrect_column_as_function_parameter, # TODO: implement
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
for chk in checks:
|
|
75
|
+
results.extend(chk())
|
|
76
|
+
|
|
77
|
+
return results
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def detect_39_and_instead_of_or(self) -> list[DetectedError]:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
def detect_52_or_instead_of_and(self) -> list[DetectedError]:
|
|
84
|
+
'''
|
|
85
|
+
Detects if OR is used instead of AND in the WHERE or HAVING clauses
|
|
86
|
+
by comparing the query's AST against the correct solution's AST.
|
|
87
|
+
'''
|
|
88
|
+
return []
|
|
89
|
+
|
|
90
|
+
results = []
|
|
91
|
+
clauses_to_check = ['where', 'having']
|
|
92
|
+
|
|
93
|
+
for clause_name in clauses_to_check:
|
|
94
|
+
# Safely access the clause (e.g., 'where') from both the proposed (q) and correct (s) solution ASTs.
|
|
95
|
+
q_clause = self.q_ast.get('args', {}).get(clause_name)
|
|
96
|
+
s_clause = self.s_ast.get('args', {}).get(clause_name)
|
|
97
|
+
|
|
98
|
+
# If the clause doesn't exist in both queries, skip to the next one.
|
|
99
|
+
if not q_clause or not s_clause:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# Extract the top-level operator ('And', 'Or', etc.) from the clause.
|
|
103
|
+
q_operator = q_clause.get('args', {}).get('this', {}).get('class')
|
|
104
|
+
s_operator = s_clause.get('args', {}).get('this', {}).get('class')
|
|
105
|
+
|
|
106
|
+
# Check if the proposed query incorrectly uses 'Or' when the correct solution uses 'And'.
|
|
107
|
+
if q_operator == 'Or' and s_operator == 'And':
|
|
108
|
+
results.append((
|
|
109
|
+
SqlErrors.LOG_52_OR_INSTEAD_OF_AND,
|
|
110
|
+
f"OR used instead of AND in the {clause_name.upper()} clause"
|
|
111
|
+
))
|
|
112
|
+
|
|
113
|
+
return results
|
|
114
|
+
|
|
115
|
+
def detect_53_extraneous_not_operator(self) -> list[DetectedError]:
|
|
116
|
+
return []
|
|
117
|
+
|
|
118
|
+
def detect_54_missing_not_operator(self) -> list[DetectedError]:
|
|
119
|
+
return []
|
|
120
|
+
|
|
121
|
+
def detect_55_substituted_existance_negation_with_less_more_than(self) -> list[DetectedError]:
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
def detect_57_incorrect_comparison_operator_or_value(self) -> list[DetectedError]:
|
|
125
|
+
'''
|
|
126
|
+
Flags errors in comparison operators or values in WHERE and HAVING clauses.
|
|
127
|
+
|
|
128
|
+
This function identifies two types of errors:
|
|
129
|
+
1. An incorrect comparison operator is used (e.g., '<' instead of '>').
|
|
130
|
+
2. An incorrect literal value is used in a comparison (e.g., 'Morandi' instead of 'Morando').
|
|
131
|
+
'''
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
results = []
|
|
135
|
+
|
|
136
|
+
# 1. Extract all comparison tuples from the proposed and correct queries.
|
|
137
|
+
q_comparisons = []
|
|
138
|
+
s_comparisons = []
|
|
139
|
+
|
|
140
|
+
# Extract from WHERE clause
|
|
141
|
+
for ast, comp_list in [(self.q_ast, q_comparisons), (self.s_ast, s_comparisons)]:
|
|
142
|
+
clause_node = ast.get('args', {}).get('where', {}).get('args', {}).get('this')
|
|
143
|
+
if clause_node:
|
|
144
|
+
comp_list.extend(self._get_comparisons(clause_node))
|
|
145
|
+
|
|
146
|
+
# Extract from HAVING clause
|
|
147
|
+
clause_node = ast.get('args', {}).get('having', {}).get('args', {}).get('this')
|
|
148
|
+
if clause_node:
|
|
149
|
+
comp_list.extend(self._get_comparisons(clause_node))
|
|
150
|
+
|
|
151
|
+
# 2. Create a map of the correct comparisons for efficient lookup.
|
|
152
|
+
# The key is the column name, and the value is a (operator, value) tuple.
|
|
153
|
+
s_comp_map = {comp[0]: (comp[1], comp[2]) for comp in s_comparisons}
|
|
154
|
+
|
|
155
|
+
# 3. Iterate through the proposed query's comparisons and check for mismatches.
|
|
156
|
+
for q_col, q_op, q_val in q_comparisons:
|
|
157
|
+
# Case-insensitive column lookup
|
|
158
|
+
q_col_lower = q_col.lower()
|
|
159
|
+
s_comp_map_lower = {k.lower(): v for k, v in s_comp_map.items()}
|
|
160
|
+
|
|
161
|
+
if q_col_lower in s_comp_map_lower:
|
|
162
|
+
s_op, s_val = s_comp_map_lower[q_col_lower]
|
|
163
|
+
|
|
164
|
+
# Check for an incorrect comparison operator
|
|
165
|
+
if q_op != s_op:
|
|
166
|
+
results.append((
|
|
167
|
+
SqlErrors.LOG_57_INCORRECT_COMPARISON_OPERATOR_OR_VALUE,
|
|
168
|
+
f"Incorrect operator on column '{q_col}'. Found {q_op} but expected {s_op}."
|
|
169
|
+
))
|
|
170
|
+
|
|
171
|
+
# Check for an incorrect comparison value (exact comparison for all value types)
|
|
172
|
+
if q_val != s_val:
|
|
173
|
+
results.append((
|
|
174
|
+
SqlErrors.LOG_57_INCORRECT_COMPARISON_OPERATOR_OR_VALUE,
|
|
175
|
+
f"Incorrect value in comparison for column '{q_col}'. Found '{q_val}' but expected '{s_val}'."
|
|
176
|
+
))
|
|
177
|
+
return results
|
|
178
|
+
|
|
179
|
+
def detect_58_59_62_table_reference_errors(self) -> list[DetectedError]:
|
|
180
|
+
'''
|
|
181
|
+
Detects join-related errors by comparing the tables used in the proposed query
|
|
182
|
+
against those in the correct solutions.
|
|
183
|
+
|
|
184
|
+
This function identifies three types of join errors:
|
|
185
|
+
1. Missing Join: A required table is not included in the proposed query.
|
|
186
|
+
2. Extraneous Join: An unnecessary table is included in the proposed query.
|
|
187
|
+
3. Incorrect Join: A table is included, but it is not the correct one needed for the join.
|
|
188
|
+
'''
|
|
189
|
+
|
|
190
|
+
@dataclass(frozen=True)
|
|
191
|
+
class TableCol:
|
|
192
|
+
table: str
|
|
193
|
+
column: str
|
|
194
|
+
|
|
195
|
+
results: list[DetectedError] = []
|
|
196
|
+
|
|
197
|
+
expected_tables: list[set[TableCol]] = []
|
|
198
|
+
actual_tables: set[TableCol] = set()
|
|
199
|
+
|
|
200
|
+
# Compute expected tables from solutions
|
|
201
|
+
# NOTE: We expect each solution to use the same set of tables, but we compute
|
|
202
|
+
# them separately to handle any discrepancies.
|
|
203
|
+
for solution in self.solutions:
|
|
204
|
+
solution_tables: set[TableCol] = set()
|
|
205
|
+
|
|
206
|
+
for select in solution.selects:
|
|
207
|
+
for table in select.referenced_tables:
|
|
208
|
+
if table.cte_idx is not None:
|
|
209
|
+
continue
|
|
210
|
+
solution_tables.add(TableCol(table.schema_name, table.real_name))
|
|
211
|
+
|
|
212
|
+
expected_tables.append(solution_tables)
|
|
213
|
+
|
|
214
|
+
# Compute actual tables from the proposed query
|
|
215
|
+
for select in self.query.selects:
|
|
216
|
+
for table in select.referenced_tables:
|
|
217
|
+
if table.cte_idx is not None:
|
|
218
|
+
continue
|
|
219
|
+
actual_tables.add(TableCol(table.schema_name, table.real_name))
|
|
220
|
+
|
|
221
|
+
# Check for missing joins (expected tables not in actual)
|
|
222
|
+
common_expected_tables = expected_tables[0].intersection(*expected_tables[1:])
|
|
223
|
+
all_expected_tables = expected_tables[0].union(*expected_tables[1:])
|
|
224
|
+
|
|
225
|
+
if len(actual_tables) < len(common_expected_tables):
|
|
226
|
+
for missing_table in common_expected_tables - actual_tables:
|
|
227
|
+
results.append(DetectedError(SqlErrors.MISSING_TABLE_REFERENCE, (missing_table.table, missing_table.column)))
|
|
228
|
+
elif len(actual_tables) > len(all_expected_tables):
|
|
229
|
+
for extra_table in actual_tables - all_expected_tables:
|
|
230
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_TABLE_REFERENCE, (extra_table.table, extra_table.column)))
|
|
231
|
+
else:
|
|
232
|
+
for wrong_table in actual_tables - all_expected_tables:
|
|
233
|
+
results.append(DetectedError(SqlErrors.INCORRECT_TABLE_REFERENCE, (wrong_table.table, wrong_table.column)))
|
|
234
|
+
|
|
235
|
+
return results
|
|
236
|
+
|
|
237
|
+
def detect_60_join_condition_on_incorrect_column(self) -> list[DetectedError]:
|
|
238
|
+
return []
|
|
239
|
+
|
|
240
|
+
def detect_61_join_condition_with_incorrect_comparison_operator(self) -> list[DetectedError]:
|
|
241
|
+
return []
|
|
242
|
+
|
|
243
|
+
def detect_48_missing_join_condition(self) -> list[DetectedError]:
|
|
244
|
+
return []
|
|
245
|
+
|
|
246
|
+
def detect_104_condition_on_outer_join(self) -> list[DetectedError]:
|
|
247
|
+
return []
|
|
248
|
+
|
|
249
|
+
def detect_63_improper_nesting_of_expressions(self) -> list[DetectedError]:
|
|
250
|
+
return []
|
|
251
|
+
|
|
252
|
+
def detect_64_improper_nesting_of_subqueries(self) -> list[DetectedError]:
|
|
253
|
+
return []
|
|
254
|
+
|
|
255
|
+
def detect_65_extraneous_quotes(self) -> list[DetectedError]:
|
|
256
|
+
return []
|
|
257
|
+
|
|
258
|
+
def detect_66_missing_expression(self) -> list[DetectedError]:
|
|
259
|
+
return []
|
|
260
|
+
|
|
261
|
+
def detect_68_extraneous_expression(self) -> list[DetectedError]:
|
|
262
|
+
return []
|
|
263
|
+
|
|
264
|
+
def detect_67_expression_on_incorrect_column(self) -> list[DetectedError]:
|
|
265
|
+
return []
|
|
266
|
+
|
|
267
|
+
def detect_69_expression_on_incorrect_clause(self) -> list[DetectedError]:
|
|
268
|
+
return []
|
|
269
|
+
|
|
270
|
+
def detect_43_wildcards_without_like(self) -> list[DetectedError]:
|
|
271
|
+
'''
|
|
272
|
+
Detect = '%...%' instead of LIKE
|
|
273
|
+
|
|
274
|
+
If the correct query uses equality checks containing wildcards characters ('%' or '_'),
|
|
275
|
+
the user query is unlikely to be incorrect, so we do not flag it.
|
|
276
|
+
'''
|
|
277
|
+
|
|
278
|
+
results: list[DetectedError] = []
|
|
279
|
+
|
|
280
|
+
# First check the correct solutions
|
|
281
|
+
allow_underscore = False
|
|
282
|
+
allow_percent = False
|
|
283
|
+
|
|
284
|
+
for solution in self.solutions:
|
|
285
|
+
for select in solution.selects:
|
|
286
|
+
ast = select.ast
|
|
287
|
+
|
|
288
|
+
if not ast:
|
|
289
|
+
continue
|
|
290
|
+
|
|
291
|
+
for eq in ast.find_all(exp.EQ):
|
|
292
|
+
left = eq.this
|
|
293
|
+
right = eq.expression
|
|
294
|
+
|
|
295
|
+
if isinstance(left, exp.Literal):
|
|
296
|
+
if has_character(left, '_'):
|
|
297
|
+
allow_underscore = True
|
|
298
|
+
if has_character(left, '%'):
|
|
299
|
+
allow_percent = True
|
|
300
|
+
|
|
301
|
+
if isinstance(right, exp.Literal):
|
|
302
|
+
if has_character(right, '_'):
|
|
303
|
+
allow_underscore = True
|
|
304
|
+
if has_character(right, '%'):
|
|
305
|
+
allow_percent = True
|
|
306
|
+
|
|
307
|
+
for select in self.query.selects:
|
|
308
|
+
ast = select.ast
|
|
309
|
+
|
|
310
|
+
if not ast:
|
|
311
|
+
continue
|
|
312
|
+
|
|
313
|
+
for eq in ast.find_all(exp.EQ):
|
|
314
|
+
left = eq.this
|
|
315
|
+
right = eq.expression
|
|
316
|
+
|
|
317
|
+
if isinstance(left, exp.Literal):
|
|
318
|
+
if not allow_underscore and has_character(left, '_'):
|
|
319
|
+
results.append(DetectedError(SqlErrors.WILDCARDS_WITHOUT_LIKE, (str(eq),)))
|
|
320
|
+
continue
|
|
321
|
+
if not allow_percent and has_character(left, '%'):
|
|
322
|
+
results.append(DetectedError(SqlErrors.WILDCARDS_WITHOUT_LIKE, (str(eq),)))
|
|
323
|
+
continue
|
|
324
|
+
|
|
325
|
+
if isinstance(right, exp.Literal):
|
|
326
|
+
if not allow_underscore and has_character(right, '_'):
|
|
327
|
+
results.append(DetectedError(SqlErrors.WILDCARDS_WITHOUT_LIKE, (str(eq),)))
|
|
328
|
+
continue
|
|
329
|
+
if not allow_percent and has_character(right, '%'):
|
|
330
|
+
results.append(DetectedError(SqlErrors.WILDCARDS_WITHOUT_LIKE, (str(eq),)))
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
return results
|
|
334
|
+
|
|
335
|
+
def detect_110_111_wrong_invalid_wildcard(self) -> list[DetectedError]:
|
|
336
|
+
'''
|
|
337
|
+
Detect misuse of wildcards, namely:
|
|
338
|
+
- '*' and '?'
|
|
339
|
+
- '_' instead of '%'
|
|
340
|
+
- '%' instead of '_'
|
|
341
|
+
|
|
342
|
+
If the correct solution uses the same character,
|
|
343
|
+
the user query is unlikely to be incorrect, so we do not flag it.
|
|
344
|
+
'''
|
|
345
|
+
|
|
346
|
+
results: list[DetectedError] = []
|
|
347
|
+
|
|
348
|
+
# First check the correct solutions
|
|
349
|
+
underscore_in_solution = False
|
|
350
|
+
percent_in_solution = False
|
|
351
|
+
star_in_solution = False
|
|
352
|
+
question_mark_in_solution = False
|
|
353
|
+
|
|
354
|
+
for solution in self.solutions:
|
|
355
|
+
for select in solution.selects:
|
|
356
|
+
ast = select.ast
|
|
357
|
+
|
|
358
|
+
if not ast:
|
|
359
|
+
continue
|
|
360
|
+
|
|
361
|
+
for like in ast.find_all(exp.Like):
|
|
362
|
+
pattern = like.expression
|
|
363
|
+
if isinstance(pattern, exp.Literal):
|
|
364
|
+
if has_character(pattern, '_'):
|
|
365
|
+
underscore_in_solution = True
|
|
366
|
+
if has_character(pattern, '%'):
|
|
367
|
+
percent_in_solution = True
|
|
368
|
+
if has_character(pattern, '*'):
|
|
369
|
+
star_in_solution = True
|
|
370
|
+
if has_character(pattern, '?'):
|
|
371
|
+
question_mark_in_solution = True
|
|
372
|
+
|
|
373
|
+
# Then check the user query
|
|
374
|
+
for select in self.query.selects:
|
|
375
|
+
ast = select.ast
|
|
376
|
+
|
|
377
|
+
if not ast:
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
for like in ast.find_all(exp.Like):
|
|
381
|
+
pattern = like.expression
|
|
382
|
+
if isinstance(pattern, exp.Literal):
|
|
383
|
+
# query contains '*' while solution does not
|
|
384
|
+
# most likely an attempt to use '%' wildcard
|
|
385
|
+
if not star_in_solution and has_character(pattern, '*'):
|
|
386
|
+
results.append(DetectedError(SqlErrors.INVALID_WILDCARD, (str(like),)))
|
|
387
|
+
|
|
388
|
+
# query contains '?' while solution does not
|
|
389
|
+
# most likely an attempt to use '_' wildcard
|
|
390
|
+
if not question_mark_in_solution and has_character(pattern, '?'):
|
|
391
|
+
results.append(DetectedError(SqlErrors.INVALID_WILDCARD, (str(like),)))
|
|
392
|
+
|
|
393
|
+
# '_' instead of '%'
|
|
394
|
+
if percent_in_solution and not underscore_in_solution:
|
|
395
|
+
if has_character(pattern, '_') and not has_character(pattern, '%'):
|
|
396
|
+
results.append(DetectedError(SqlErrors.WRONG_WILDCARD, (str(like),)))
|
|
397
|
+
|
|
398
|
+
# '%' instead of '_'
|
|
399
|
+
if underscore_in_solution and not percent_in_solution:
|
|
400
|
+
if has_character(pattern, '%') and not has_character(pattern, '_'):
|
|
401
|
+
results.append(DetectedError(SqlErrors.WRONG_WILDCARD, (str(like),)))
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
return results
|
|
406
|
+
|
|
407
|
+
def detect_70_extraneous_column_in_select(self) -> list[DetectedError]:
|
|
408
|
+
'''
|
|
409
|
+
Flags when an extraneous column is included in the SELECT clause.
|
|
410
|
+
'''
|
|
411
|
+
|
|
412
|
+
results: list[DetectedError] = []
|
|
413
|
+
|
|
414
|
+
# First, check if the number of columns exceeds the maximum required by any solution
|
|
415
|
+
column_number_required_max = max(len(sol.main_query.output.columns) for sol in self.solutions)
|
|
416
|
+
column_number_provided = len(self.query.main_query.output.columns)
|
|
417
|
+
|
|
418
|
+
if column_number_provided > column_number_required_max:
|
|
419
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_COLUMN_IN_SELECT, (column_number_provided, column_number_required_max)))
|
|
420
|
+
|
|
421
|
+
# Then, check for specific extraneous columns
|
|
422
|
+
columns_required = set.union(*[sol.output_columns_source for sol in self.solutions])
|
|
423
|
+
columns_provided = self.query.output_columns_source
|
|
424
|
+
extraneous_columns = columns_provided - columns_required
|
|
425
|
+
|
|
426
|
+
for schema, table, column in extraneous_columns:
|
|
427
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_COLUMN_IN_SELECT, (schema, table, column)))
|
|
428
|
+
|
|
429
|
+
return results
|
|
430
|
+
|
|
431
|
+
def detect_71_missing_column_from_select(self) -> list[DetectedError]:
|
|
432
|
+
'''
|
|
433
|
+
Flags when a required column is missing from the SELECT clause.
|
|
434
|
+
'''
|
|
435
|
+
|
|
436
|
+
results: list[DetectedError] = []
|
|
437
|
+
|
|
438
|
+
# First, check if the number of columns is less than the minimum required by any solution
|
|
439
|
+
column_number_required_min = min(len(sol.main_query.output.columns) for sol in self.solutions)
|
|
440
|
+
column_number_provided = len(self.query.main_query.output.columns)
|
|
441
|
+
|
|
442
|
+
if column_number_provided < column_number_required_min:
|
|
443
|
+
results.append(DetectedError(SqlErrors.MISSING_COLUMN_FROM_SELECT, (column_number_provided, column_number_required_min)))
|
|
444
|
+
|
|
445
|
+
# Then, check for specific missing columns
|
|
446
|
+
columns_required = set.union(*[sol.output_columns_source for sol in self.solutions])
|
|
447
|
+
columns_provided = self.query.output_columns_source
|
|
448
|
+
missing_columns = columns_required - columns_provided
|
|
449
|
+
|
|
450
|
+
for schema, table, column in missing_columns:
|
|
451
|
+
results.append(DetectedError(SqlErrors.MISSING_COLUMN_FROM_SELECT, (schema, table, column)))
|
|
452
|
+
|
|
453
|
+
return results
|
|
454
|
+
|
|
455
|
+
def detect_72_missing_distinct_from_select(self) -> list[DetectedError]:
|
|
456
|
+
'''Flags when DISTINCT is missing from a SELECT that requires it.'''
|
|
457
|
+
|
|
458
|
+
def _is_distinct(so: SetOperation) -> bool:
|
|
459
|
+
output = so.output
|
|
460
|
+
columns = len(output.columns)
|
|
461
|
+
longest_constraint = max(len(c.columns) for c in output.unique_constraints) if output.unique_constraints else 0
|
|
462
|
+
|
|
463
|
+
return longest_constraint >= columns
|
|
464
|
+
|
|
465
|
+
# ensure all solutions are DISTINCT
|
|
466
|
+
requires_distinct = all(_is_distinct(sol.main_query) for sol in self.solutions)
|
|
467
|
+
|
|
468
|
+
# At least one solution doesn't require DISTINCT, so it's not necessary for the query
|
|
469
|
+
# Skip this check
|
|
470
|
+
if not requires_distinct:
|
|
471
|
+
return []
|
|
472
|
+
|
|
473
|
+
if not _is_distinct(self.query.main_query):
|
|
474
|
+
return [DetectedError(SqlErrors.MISSING_DISTINCT_FROM_SELECT)]
|
|
475
|
+
|
|
476
|
+
return []
|
|
477
|
+
|
|
478
|
+
def detect_73_missing_as_from_select(self) -> list[DetectedError]:
|
|
479
|
+
'''
|
|
480
|
+
Flags when AS aliases are missing from required columns in the SELECT clause.
|
|
481
|
+
'''
|
|
482
|
+
|
|
483
|
+
results: list[DetectedError] = []
|
|
484
|
+
|
|
485
|
+
# ensure we have the correct columns in both amount and source
|
|
486
|
+
extraneous_columns = self.detect_70_extraneous_column_in_select()
|
|
487
|
+
missing_columns = self.detect_71_missing_column_from_select()
|
|
488
|
+
|
|
489
|
+
if extraneous_columns or missing_columns:
|
|
490
|
+
return results # skip AS check if column count is already wrong
|
|
491
|
+
|
|
492
|
+
# only consider columns that are actually aliased
|
|
493
|
+
expected_aliases: set[str] = set.intersection(*[set(col.name for col in sol.main_query.output.columns if col.name != col.real_name and not col.name.startswith('_')) for sol in self.solutions])
|
|
494
|
+
provided_aliases: set[str] = set(col.name for col in self.query.main_query.output.columns if col.name != col.real_name and not col.name.startswith('_'))
|
|
495
|
+
|
|
496
|
+
missing_aliases = expected_aliases - provided_aliases
|
|
497
|
+
|
|
498
|
+
for alias in missing_aliases:
|
|
499
|
+
results.append(DetectedError(SqlErrors.MISSING_AS_FROM_SELECT, (alias,)))
|
|
500
|
+
|
|
501
|
+
return results
|
|
502
|
+
|
|
503
|
+
def detect_74_missing_column_from_order_by(self) -> list[DetectedError]:
|
|
504
|
+
'''Flags when a required column is missing from the ORDER BY clause.'''
|
|
505
|
+
results: list[DetectedError] = []
|
|
506
|
+
|
|
507
|
+
# for select in self.query.main_query.main_selects:
|
|
508
|
+
# if not select.order_by:
|
|
509
|
+
# continue
|
|
510
|
+
|
|
511
|
+
# order_by_cols: list[] = []
|
|
512
|
+
|
|
513
|
+
# # 1. Extract columns from the query's ORDER BY clause and map them to referenced tables
|
|
514
|
+
|
|
515
|
+
return results
|
|
516
|
+
|
|
517
|
+
results = []
|
|
518
|
+
if not self.q_ast or not self.s_ast:
|
|
519
|
+
return results
|
|
520
|
+
|
|
521
|
+
q_orderby_cols = self._get_orderby_columns(self.q_ast)
|
|
522
|
+
s_orderby_cols = self._get_orderby_columns(self.s_ast)
|
|
523
|
+
|
|
524
|
+
# Create sets of column names for easy comparison (case-insensitive)
|
|
525
|
+
q_cols_set = {col.lower() for col, direction in q_orderby_cols}
|
|
526
|
+
s_cols_set = {col.lower() for col, direction in s_orderby_cols}
|
|
527
|
+
|
|
528
|
+
# Find columns in the solution's ORDER BY that are not in the user's
|
|
529
|
+
missing_cols = s_cols_set - q_cols_set
|
|
530
|
+
for col_lower in missing_cols:
|
|
531
|
+
# Find the original case from the solution
|
|
532
|
+
original_col = next((col for col, direction in s_orderby_cols if col.lower() == col_lower), col_lower)
|
|
533
|
+
results.append((
|
|
534
|
+
SqlErrors.LOG_74_MISSING_COLUMN_FROM_ORDER_BY,
|
|
535
|
+
f"The column '{original_col}' is missing from the ORDER BY clause."
|
|
536
|
+
))
|
|
537
|
+
return results
|
|
538
|
+
|
|
539
|
+
def detect_75_incorrect_column_in_order_by(self) -> list[DetectedError]:
|
|
540
|
+
'''Flags when a column is incorrectly included in the ORDER BY clause.'''
|
|
541
|
+
return []
|
|
542
|
+
|
|
543
|
+
results = []
|
|
544
|
+
if not self.q_ast or not self.s_ast:
|
|
545
|
+
return results
|
|
546
|
+
|
|
547
|
+
q_orderby_cols = self._get_orderby_columns(self.q_ast)
|
|
548
|
+
s_orderby_cols = self._get_orderby_columns(self.s_ast)
|
|
549
|
+
|
|
550
|
+
# Create sets of column names for easy comparison (case-insensitive)
|
|
551
|
+
q_cols_set = {col.lower() for col, direction in q_orderby_cols}
|
|
552
|
+
s_cols_set = {col.lower() for col, direction in s_orderby_cols}
|
|
553
|
+
|
|
554
|
+
# Find columns in the user's ORDER BY that are not in the solution's
|
|
555
|
+
incorrect_cols = q_cols_set - s_cols_set
|
|
556
|
+
for col_lower in incorrect_cols:
|
|
557
|
+
# Find the original case from the query
|
|
558
|
+
original_col = next((col for col, direction in q_orderby_cols if col.lower() == col_lower), col_lower)
|
|
559
|
+
results.append((
|
|
560
|
+
SqlErrors.LOG_75_INCORRECT_COLUMN_IN_ORDER_BY,
|
|
561
|
+
f"The column '{original_col}' should not be in the ORDER BY clause."
|
|
562
|
+
))
|
|
563
|
+
return results
|
|
564
|
+
|
|
565
|
+
def detect_77_incorrect_ordering_of_rows(self) -> list[DetectedError]:
|
|
566
|
+
return []
|
|
567
|
+
|
|
568
|
+
def detect_112_118_missing_extraneous_where_clause(self) -> list[DetectedError]:
|
|
569
|
+
results: list[DetectedError] = []
|
|
570
|
+
|
|
571
|
+
# If all solutions have a WHERE clause, then the user's query should have one as well
|
|
572
|
+
# If all solutions don't have a WHERE clause, then the user's query shouldn't have one either
|
|
573
|
+
# Otherwise, we cannot be sure if a WHERE clause is required or not, so we skip this check to avoid false positives
|
|
574
|
+
solution_has_where: set[bool] = set()
|
|
575
|
+
for solution in self.solutions:
|
|
576
|
+
solution_has_where.add(any(select.where for select in solution.selects))
|
|
577
|
+
|
|
578
|
+
user_has_where = any(select.where for select in self.query.selects)
|
|
579
|
+
|
|
580
|
+
if solution_has_where == {True} and not user_has_where:
|
|
581
|
+
results.append(DetectedError(SqlErrors.MISSING_WHERE_CLAUSE))
|
|
582
|
+
elif solution_has_where == {False} and user_has_where:
|
|
583
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_WHERE_CLAUSE))
|
|
584
|
+
|
|
585
|
+
return results
|
|
586
|
+
|
|
587
|
+
def detect_113_119_missing_extraneous_group_by_clause(self) -> list[DetectedError]:
|
|
588
|
+
results: list[DetectedError] = []
|
|
589
|
+
|
|
590
|
+
# If all solutions have a GROUP BY clause, then the user's query should have one as well
|
|
591
|
+
# If all solutions don't have a GROUP BY clause, then the user's query shouldn't have one either
|
|
592
|
+
# Otherwise, we cannot be sure if a GROUP BY clause is required or not, so we skip this check to avoid false positives
|
|
593
|
+
solution_has_group_by: set[bool] = set()
|
|
594
|
+
for solution in self.solutions:
|
|
595
|
+
solution_has_group_by.add(any(select.group_by for select in solution.selects))
|
|
596
|
+
|
|
597
|
+
user_has_group_by = any(select.group_by for select in self.query.selects)
|
|
598
|
+
|
|
599
|
+
if solution_has_group_by == {True} and not user_has_group_by:
|
|
600
|
+
results.append(DetectedError(SqlErrors.MISSING_GROUP_BY_CLAUSE))
|
|
601
|
+
elif solution_has_group_by == {False} and user_has_group_by:
|
|
602
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_GROUP_BY_CLAUSE))
|
|
603
|
+
|
|
604
|
+
return results
|
|
605
|
+
|
|
606
|
+
def detect_114_120_missing_extraneous_having_clause(self) -> list[DetectedError]:
|
|
607
|
+
results: list[DetectedError] = []
|
|
608
|
+
|
|
609
|
+
# If all solutions have a HAVING clause, then the user's query should have one as well
|
|
610
|
+
# If all solutions don't have a HAVING clause, then the user's query shouldn't have one either
|
|
611
|
+
# Otherwise, we cannot be sure if a HAVING clause is required or not, so we skip this check to avoid false positives
|
|
612
|
+
solution_has_having: set[bool] = set()
|
|
613
|
+
for solution in self.solutions:
|
|
614
|
+
solution_has_having.add(any(select.having for select in solution.selects))
|
|
615
|
+
|
|
616
|
+
user_has_having = any(select.having for select in self.query.selects)
|
|
617
|
+
|
|
618
|
+
if solution_has_having == {True} and not user_has_having:
|
|
619
|
+
results.append(DetectedError(SqlErrors.MISSING_HAVING_CLAUSE))
|
|
620
|
+
elif solution_has_having == {False} and user_has_having:
|
|
621
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_HAVING_CLAUSE))
|
|
622
|
+
|
|
623
|
+
return results
|
|
624
|
+
|
|
625
|
+
def detect_115_121_missing_extraneous_order_by_clause(self) -> list[DetectedError]:
|
|
626
|
+
results: list[DetectedError] = []
|
|
627
|
+
|
|
628
|
+
# If all solutions have an ORDER BY clause, then the user's query should have one as well
|
|
629
|
+
# If all solutions don't have an ORDER BY clause, then the user's query shouldn't have one either
|
|
630
|
+
# Otherwise, we cannot be sure if an ORDER BY clause is required or not, so we skip this check to avoid false positives
|
|
631
|
+
solution_has_order_by: set[bool] = set()
|
|
632
|
+
for solution in self.solutions:
|
|
633
|
+
solution_has_order_by.add(any(select.order_by for select in solution.selects))
|
|
634
|
+
|
|
635
|
+
user_has_order_by = any(select.order_by for select in self.query.selects)
|
|
636
|
+
|
|
637
|
+
if solution_has_order_by == {True} and not user_has_order_by:
|
|
638
|
+
results.append(DetectedError(SqlErrors.MISSING_ORDER_BY_CLAUSE))
|
|
639
|
+
elif solution_has_order_by == {False} and user_has_order_by:
|
|
640
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_ORDER_BY_CLAUSE))
|
|
641
|
+
|
|
642
|
+
return results
|
|
643
|
+
|
|
644
|
+
def detect_116_121_123_missing_extraneous_incorrect_limit_clause(self) -> list[DetectedError]:
|
|
645
|
+
results: list[DetectedError] = []
|
|
646
|
+
|
|
647
|
+
# Save all possible limit values from solutions to handle cases where multiple solutions have different limits,
|
|
648
|
+
# as well as set operations, which would be too complex to map to their limit values directly
|
|
649
|
+
solution_limits: set[int | None] = set()
|
|
650
|
+
|
|
651
|
+
# If all solutions have a LIMIT clause, then the user's query should have one as well
|
|
652
|
+
for solution in self.solutions:
|
|
653
|
+
# Only check main selects for LIMIT clause, since LIMIT on subqueries is less common and often not required
|
|
654
|
+
for select in solution.main_query.main_selects:
|
|
655
|
+
solution_limits.add(select.limit)
|
|
656
|
+
|
|
657
|
+
user_limits: set[int] = set()
|
|
658
|
+
for select in self.query.main_query.main_selects:
|
|
659
|
+
if select.limit is not None:
|
|
660
|
+
user_limits.add(select.limit)
|
|
661
|
+
|
|
662
|
+
# if at least a solution doesn't have a limit, but other solutions do, we cannot be sure if a limit is required or not, so we skip this check to avoid false positives
|
|
663
|
+
if None in solution_limits and len(solution_limits) > 1:
|
|
664
|
+
return results
|
|
665
|
+
|
|
666
|
+
solution_limits.discard(None) # remove None if present, since we already handled the case where some solutions have limits and others don't
|
|
667
|
+
|
|
668
|
+
if solution_limits and not user_limits:
|
|
669
|
+
results.append(DetectedError(SqlErrors.MISSING_LIMIT_CLAUSE))
|
|
670
|
+
elif not solution_limits and user_limits:
|
|
671
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_LIMIT_CLAUSE))
|
|
672
|
+
elif solution_limits and user_limits and not user_limits.issubset(solution_limits):
|
|
673
|
+
results.append(DetectedError(SqlErrors.INCORRECT_LIMIT, (user_limits, solution_limits)))
|
|
674
|
+
|
|
675
|
+
return results
|
|
676
|
+
|
|
677
|
+
def detect_117_122_missing_extraneous_incorrect_offset_clause(self) -> list[DetectedError]:
|
|
678
|
+
results: list[DetectedError] = []
|
|
679
|
+
|
|
680
|
+
# Save all possible offset values from solutions to handle cases where multiple solutions have different offsets,
|
|
681
|
+
# as well as set operations, which would be too complex to map to their offset values directly
|
|
682
|
+
solution_offsets: set[int | None] = set()
|
|
683
|
+
for solution in self.solutions:
|
|
684
|
+
# Only check main selects for OFFSET clause, since OFFSET on subqueries is less common and often not required
|
|
685
|
+
for select in solution.main_query.main_selects:
|
|
686
|
+
solution_offsets.add(select.offset)
|
|
687
|
+
|
|
688
|
+
user_offsets: set[int] = set()
|
|
689
|
+
for select in self.query.main_query.main_selects:
|
|
690
|
+
if select.offset is not None:
|
|
691
|
+
user_offsets.add(select.offset)
|
|
692
|
+
|
|
693
|
+
# if at least a solution doesn't have an offset, but other solutions do, we cannot be sure if an offset is required or not, so we skip this check to avoid false positives
|
|
694
|
+
if None in solution_offsets and len(solution_offsets) > 1:
|
|
695
|
+
return results
|
|
696
|
+
|
|
697
|
+
solution_offsets.discard(None) # remove None if present, since we already handled the case where some solutions have offsets and others don't
|
|
698
|
+
|
|
699
|
+
if solution_offsets and not user_offsets:
|
|
700
|
+
results.append(DetectedError(SqlErrors.MISSING_OFFSET_CLAUSE))
|
|
701
|
+
elif not solution_offsets and user_offsets:
|
|
702
|
+
results.append(DetectedError(SqlErrors.EXTRANEOUS_OFFSET_CLAUSE))
|
|
703
|
+
elif solution_offsets and user_offsets and not user_offsets.issubset(solution_offsets):
|
|
704
|
+
results.append(DetectedError(SqlErrors.INCORRECT_OFFSET, (user_offsets, solution_offsets)))
|
|
705
|
+
|
|
706
|
+
return results
|
|
707
|
+
|
|
708
|
+
def detect_80_incorrect_function(self) -> list[DetectedError]:
|
|
709
|
+
return []
|
|
710
|
+
|
|
711
|
+
def detect_78_distinct_as_function_parameter_when_not_applicable(self) -> list[DetectedError]:
|
|
712
|
+
return []
|
|
713
|
+
|
|
714
|
+
def detect_79_missing_distinct_from_function_parameter(self) -> list[DetectedError]:
|
|
715
|
+
return []
|
|
716
|
+
|
|
717
|
+
def detect_81_incorrect_column_as_function_parameter(self) -> list[DetectedError]:
|
|
718
|
+
return []
|
|
719
|
+
|
|
720
|
+
# region Helper methods
|
|
721
|
+
def has_character(literal: exp.Literal, chars: str) -> bool:
|
|
722
|
+
'''
|
|
723
|
+
Check if the literal contains a specific character.
|
|
724
|
+
If `chars` contains multiple characters, check if any of them are present.
|
|
725
|
+
'''
|
|
726
|
+
value = literal.this
|
|
727
|
+
|
|
728
|
+
if not isinstance(value, str):
|
|
729
|
+
return False
|
|
730
|
+
|
|
731
|
+
return any(c in value for c in chars)
|
|
732
|
+
# endregion
|