sql-error-categorizer 0.2.4__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/.gitignore +1 -1
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/PKG-INFO +3 -3
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/pyproject.toml +3 -3
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/requirements.txt +2 -2
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/complications.py +79 -208
- sql_error_categorizer-0.3.1/src/sql_error_categorizer/detectors/logical.py +732 -0
- sql_error_categorizer-0.3.1/src/sql_error_categorizer/detectors/semantic.py +289 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/syntax.py +106 -290
- sql_error_categorizer-0.2.4/tests/1_syn/test_002_ambiguous_column.py → sql_error_categorizer-0.3.1/tests/1_syn/test_001_ambiguous_column.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_004_undefined_column.py → sql_error_categorizer-0.3.1/tests/1_syn/test_003_undefined_column.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_005_undefined_function.py → sql_error_categorizer-0.3.1/tests/1_syn/test_004_undefined_function.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_006_undefined_parameter.py → sql_error_categorizer-0.3.1/tests/1_syn/test_005_undefined_parameter.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_007_undefined_tables.py → sql_error_categorizer-0.3.1/tests/1_syn/test_006_undefined_object.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_008_invalid_schema_names.py → sql_error_categorizer-0.3.1/tests/1_syn/test_007_invalid_schema_names.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_009_misspellings.py → sql_error_categorizer-0.3.1/tests/1_syn/test_008_misspellings.py +1 -1
- sql_error_categorizer-0.2.4/tests/1_syn/test_035_is_where_not_applicable.py → sql_error_categorizer-0.3.1/tests/1_syn/test_012_is_where_not_applicable.py +1 -1
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/1_syn/test_013_data_type_mismatch.py +1 -1
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/1_syn/test_014_aggregate_function_outside_select_or_having.py +1 -1
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/1_syn/test_015_nested_aggregate_functions.py +1 -1
- sql_error_categorizer-0.3.1/tests/1_syn/test_016_extraneous_omitted_grouping_column.py +47 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_017_having_without_group_by.py +59 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_018_too_many_columns_in_subquery.py +42 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_021_using_where_twice.py +53 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_022_omitted_from.py +51 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_024_036_additional_omitted_semicolons.py +60 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_024_comparison_with_null.py +40 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_026_duplicate_clause.py +43 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_029_keywords_order.py +36 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_032_033_curly_square_or_unmatched_brackets.py +68 -0
- sql_error_categorizer-0.3.1/tests/1_syn/test_035_nonstandard_operators.py +31 -0
- sql_error_categorizer-0.2.4/tests/2_sem/test_040_tautological_inconsistent_expressions.py → sql_error_categorizer-0.3.1/tests/2_sem/test_038_tautological_inconsistent_expressions.py +1 -1
- sql_error_categorizer-0.3.1/tests/2_sem/test_039_distinct_sum_avg.py +51 -0
- sql_error_categorizer-0.3.1/tests/2_sem/test_050_constant_column_output.py +35 -0
- sql_error_categorizer-0.3.1/tests/2_sem/test_051_duplicate_column_output.py +58 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_058_join_on_incorrect_table.py +3 -3
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_059_join_when_join_needs_to_be_omitted.py +3 -3
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_062_missing_join.py +3 -3
- sql_error_categorizer-0.3.1/tests/3_log/test_067_wildcards_without_like.py +60 -0
- sql_error_categorizer-0.3.1/tests/3_log/test_068_069_wrong_invalid_wildcard.py +85 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_070_extraneous_column_in_select.py +2 -2
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_071_missing_column_from_select.py +2 -2
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_072_missing_distinct_from_select.py +2 -2
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_073_missing_as_from_select.py +2 -2
- sql_error_categorizer-0.3.1/tests/3_log/test_112_118_missing_extraneous_where_clause.py +74 -0
- sql_error_categorizer-0.3.1/tests/3_log/test_113_119_missing_extraneous_group_by_clause.py +74 -0
- sql_error_categorizer-0.3.1/tests/3_log/test_114_120_missing_extraneous_having_clause.py +77 -0
- sql_error_categorizer-0.3.1/tests/3_log/test_115_121_missing_extraneous_order_by_clause.py +79 -0
- sql_error_categorizer-0.3.1/tests/3_log/test_116_121_123_missing_extraneous_incorrect_limit_clause.py +80 -0
- sql_error_categorizer-0.3.1/tests/3_log/test_117_122_missing_extraneous_incorrect_offset_clause.py +85 -0
- sql_error_categorizer-0.2.4/tests/4_com/test_083_unnecessary_distinct_in_select.py → sql_error_categorizer-0.3.1/tests/4_com/test_096_unnecessary_distinct_in_select.py +15 -6
- sql_error_categorizer-0.3.1/tests/4_com/test_102_like_without_wildcards.py +34 -0
- sql_error_categorizer-0.2.4/tests/4_com/test_092_unnecessary_distinct_in_aggregate_function.py → sql_error_categorizer-0.3.1/tests/4_com/test_106_unnecessary_distinct_in_aggregate_function.py +4 -4
- sql_error_categorizer-0.2.4/tests/4_com/test_095_group_by_with_singleton_groups.py → sql_error_categorizer-0.3.1/tests/4_com/test_109_group_by_with_singleton_groups.py +3 -3
- sql_error_categorizer-0.2.4/tests/4_com/test_097_group_by_can_be_replaced_by_distinct.py → sql_error_categorizer-0.3.1/tests/4_com/test_111_group_by_can_be_replaced_by_distinct.py +3 -3
- sql_error_categorizer-0.3.1/tests/4_com/test_114_order_by_in_subquery.py +42 -0
- sql_error_categorizer-0.3.1/tests/4_com/test_126_unused_cte.py +44 -0
- sql_error_categorizer-0.2.4/src/sql_error_categorizer/detectors/logical.py +0 -770
- sql_error_categorizer-0.2.4/src/sql_error_categorizer/detectors/semantic.py +0 -498
- sql_error_categorizer-0.2.4/tests/1_syn/test_016_extraneous_omitted_grouping_column.py +0 -81
- sql_error_categorizer-0.2.4/tests/1_syn/test_017_having_without_group_by.py +0 -80
- sql_error_categorizer-0.2.4/tests/1_syn/test_019_using_where_twice.py +0 -60
- sql_error_categorizer-0.2.4/tests/1_syn/test_020_missing_from.py +0 -76
- sql_error_categorizer-0.2.4/tests/1_syn/test_021_comparison_with_null.py +0 -43
- sql_error_categorizer-0.2.4/tests/1_syn/test_022_038_additional_omitted_semicolons.py +0 -56
- sql_error_categorizer-0.2.4/tests/1_syn/test_024_duplicate_clause.py +0 -71
- sql_error_categorizer-0.2.4/tests/1_syn/test_026_too_many_columns_in_subquery.py +0 -70
- sql_error_categorizer-0.2.4/tests/1_syn/test_030_keywords_order.py +0 -37
- sql_error_categorizer-0.2.4/tests/1_syn/test_034_curly_square_or_unmatched_brackets.py +0 -59
- sql_error_categorizer-0.2.4/tests/1_syn/test_037_nonstandard_operators.py +0 -29
- sql_error_categorizer-0.2.4/tests/2_sem/test_041_distinct_sum_avg.py +0 -77
- sql_error_categorizer-0.2.4/tests/2_sem/test_043_wildcards_without_like.py +0 -104
- sql_error_categorizer-0.2.4/tests/2_sem/test_044_incorrect_wildcards.py +0 -143
- sql_error_categorizer-0.2.4/tests/4_com/test_088_like_no_wildcards.py +0 -32
- sql_error_categorizer-0.2.4/tests/4_com/test_100_order_by_in_subquery.py +0 -55
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/.readthedocs.yaml +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/LICENSE +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/Makefile +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/README.md +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/catalogs/constraints.json +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/catalogs/miedema.json +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/sql/constraints.sql +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/sql/miedema.sql +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/Makefile +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/conf.py +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/index.rst +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/make.bat +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/requirements.txt +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/__init__.py +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/__init__.py +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/base.py +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/test_detector.py +0 -0
- {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql_error_categorizer
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: This project analyses SQL statements and labels possible errors or complications.
|
|
5
5
|
Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
|
|
6
6
|
Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html
|
|
@@ -14,10 +14,10 @@ Requires-Python: >=3.11
|
|
|
14
14
|
Requires-Dist: psycopg2
|
|
15
15
|
Requires-Dist: python-dateutil
|
|
16
16
|
Requires-Dist: pyyaml
|
|
17
|
-
Requires-Dist: sql-error-taxonomy
|
|
17
|
+
Requires-Dist: sql-error-taxonomy>=2.0.0
|
|
18
18
|
Requires-Dist: sqlglot
|
|
19
19
|
Requires-Dist: sqlparse
|
|
20
|
-
Requires-Dist: sqlscope>=1.0.
|
|
20
|
+
Requires-Dist: sqlscope>=1.0.16
|
|
21
21
|
Requires-Dist: z3-solver
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sql_error_categorizer"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
|
|
10
10
|
]
|
|
@@ -21,8 +21,8 @@ dependencies = [
|
|
|
21
21
|
"pyyaml",
|
|
22
22
|
"sqlparse",
|
|
23
23
|
"sqlglot",
|
|
24
|
-
"sqlscope>=1.0.
|
|
25
|
-
"sql_error_taxonomy",
|
|
24
|
+
"sqlscope>=1.0.16",
|
|
25
|
+
"sql_error_taxonomy>=2.0.0",
|
|
26
26
|
"z3-solver",
|
|
27
27
|
"python-dateutil",
|
|
28
28
|
]
|
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
'''Detector for complications in SQL queries.'''
|
|
2
2
|
|
|
3
|
-
import difflib
|
|
4
|
-
import re
|
|
5
|
-
import sqlparse
|
|
6
|
-
import sqlparse.keywords
|
|
7
3
|
from typing import Callable
|
|
8
4
|
from sqlglot import exp
|
|
9
5
|
from sql_error_taxonomy import SqlErrors
|
|
10
|
-
from sqlscope.catalog import ConstraintType, ConstraintColumn
|
|
11
|
-
from sqlscope import Query
|
|
6
|
+
from sqlscope.catalog import ConstraintType, ConstraintColumn
|
|
7
|
+
from sqlscope import Query
|
|
12
8
|
from sqlscope import util
|
|
13
9
|
|
|
14
10
|
from .base import BaseDetector, DetectedError
|
|
@@ -36,29 +32,31 @@ class ComplicationDetector(BaseDetector):
|
|
|
36
32
|
results: list[DetectedError] = super().run()
|
|
37
33
|
|
|
38
34
|
checks = [
|
|
39
|
-
self.
|
|
40
|
-
self.
|
|
41
|
-
self.
|
|
42
|
-
self.
|
|
43
|
-
self.
|
|
44
|
-
self.
|
|
45
|
-
self.
|
|
46
|
-
self.
|
|
47
|
-
self.
|
|
48
|
-
self.
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
51
|
-
self.
|
|
52
|
-
self.
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.
|
|
56
|
-
self.
|
|
57
|
-
self.
|
|
58
|
-
self.
|
|
59
|
-
self.
|
|
60
|
-
self.
|
|
61
|
-
self.
|
|
35
|
+
self.detect_82_unnecessary_complication, # ok
|
|
36
|
+
self.detect_83_unnecessary_distinct_in_select_clause, # ok
|
|
37
|
+
self.detect_84_unnecessary_table_reference, # TODO: refactor/implement
|
|
38
|
+
self.detect_85_unused_correlation_name, # TODO: implement
|
|
39
|
+
self.detect_86_tables_have_same_data, # TODO: implement
|
|
40
|
+
self.detect_125_correlation_name_identical_to_table_name, # TODO: implement
|
|
41
|
+
self.detect_87_unnecessary_general_comparison_operator, # TODO: implement
|
|
42
|
+
self.detect_88_like_without_wildcards, # ok
|
|
43
|
+
self.detect_89_unnecessarily_complicated_select_in_exists_subquery, # TODO: implement
|
|
44
|
+
self.detect_90_in_exists_can_be_replaced_by_comparison, # TODO: implement
|
|
45
|
+
self.detect_91_unnecessary_aggregate_function, # TODO: implement
|
|
46
|
+
self.detect_92_unnecessary_distinct_in_aggregate_function, # ok
|
|
47
|
+
self.detect_93_unnecessary_argument_of_count, # ok
|
|
48
|
+
self.detect_94_unnecessary_group_by_in_exists_subquery, # TODO: implement
|
|
49
|
+
self.detect_95_group_by_with_singleton_groups, # ok
|
|
50
|
+
self.detect_96_group_by_with_only_a_single_group, # TODO: implement
|
|
51
|
+
self.detect_97_group_by_can_be_replaced_by_distinct, # ok
|
|
52
|
+
self.detect_98_union_can_be_replaced_by_or, # TODO: implement
|
|
53
|
+
self.detect_99_unnecessary_column_in_order_by_clause, # TODO: refactor/implement
|
|
54
|
+
self.detect_100_order_by_in_subquery, # TODO: implement
|
|
55
|
+
self.detect_101_inefficient_having, # TODO: implement
|
|
56
|
+
self.detect_102_inefficient_union, # TODO: implement
|
|
57
|
+
self.detect_103_condition_in_the_subquery_can_be_moved_up, # TODO: implement
|
|
58
|
+
self.detect_104_outer_join_can_be_replaced_by_inner_join, # TODO: implement
|
|
59
|
+
self.detect_126_unused_cte, #
|
|
62
60
|
]
|
|
63
61
|
|
|
64
62
|
for chk in checks:
|
|
@@ -66,7 +64,11 @@ class ComplicationDetector(BaseDetector):
|
|
|
66
64
|
|
|
67
65
|
return results
|
|
68
66
|
|
|
69
|
-
def
|
|
67
|
+
def detect_82_unnecessary_complication(self) -> list[DetectedError]:
|
|
68
|
+
'''NOTE: this is an umbrella term, so it can't be directly detected.'''
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
def detect_83_unnecessary_distinct_in_select_clause(self) -> list[DetectedError]:
|
|
70
72
|
'''
|
|
71
73
|
Flags a SELECT DISTINCT clause that is unnecessary because the selected
|
|
72
74
|
columns are already unique due to existing constraints.
|
|
@@ -81,12 +83,11 @@ class ComplicationDetector(BaseDetector):
|
|
|
81
83
|
constraints = [c for c in select.output.unique_constraints if c.constraint_type != ConstraintType.DISTINCT]
|
|
82
84
|
|
|
83
85
|
if len(constraints) > 0:
|
|
84
|
-
result.append(DetectedError(SqlErrors.
|
|
86
|
+
result.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_SELECT_CLAUSE, (select.sql,)))
|
|
85
87
|
|
|
86
88
|
return result
|
|
87
89
|
|
|
88
|
-
|
|
89
|
-
def com_84_unnecessary_join(self) -> list[DetectedError]:
|
|
90
|
+
def detect_84_unnecessary_table_reference(self) -> list[DetectedError]:
|
|
90
91
|
'''
|
|
91
92
|
Flags a query that joins to a table not present in the correct solution.
|
|
92
93
|
'''
|
|
@@ -110,25 +111,25 @@ class ComplicationDetector(BaseDetector):
|
|
|
110
111
|
# Find the original table name (with alias if it was used) to report back
|
|
111
112
|
original_table_name = next((t for t in original_q_tables if t.lower().startswith(table_name_lower)), table_name_lower)
|
|
112
113
|
results.append((
|
|
113
|
-
SqlErrors.
|
|
114
|
+
SqlErrors.UNNECESSARY_TABLE_REFERENCE,
|
|
114
115
|
f"Unnecessary JOIN: The table '{original_table_name}' is not needed to answer the query."
|
|
115
116
|
))
|
|
116
117
|
|
|
117
118
|
return results
|
|
118
119
|
|
|
119
|
-
|
|
120
|
-
def com_85_unused_correlation_name(self) -> list[DetectedError]:
|
|
120
|
+
def detect_85_unused_correlation_name(self) -> list[DetectedError]:
|
|
121
121
|
return []
|
|
122
122
|
|
|
123
|
-
|
|
124
|
-
|
|
123
|
+
def detect_86_tables_have_same_data(self) -> list[DetectedError]:
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
def detect_125_correlation_name_identical_to_table_name(self) -> list[DetectedError]:
|
|
125
127
|
return []
|
|
126
128
|
|
|
127
|
-
|
|
128
|
-
def com_87_unnecessary_general_comparison_operator(self) -> list[DetectedError]:
|
|
129
|
+
def detect_87_unnecessary_general_comparison_operator(self) -> list[DetectedError]:
|
|
129
130
|
return []
|
|
130
131
|
|
|
131
|
-
def
|
|
132
|
+
def detect_88_like_without_wildcards(self) -> list[DetectedError]:
|
|
132
133
|
'''
|
|
133
134
|
Flags queries where the LIKE operator is used without wildcards ('%' or '_').
|
|
134
135
|
This indicates a potential misunderstanding, where the '=' operator should
|
|
@@ -157,23 +158,20 @@ class ComplicationDetector(BaseDetector):
|
|
|
157
158
|
if '%' not in pattern_value and '_' not in pattern_value:
|
|
158
159
|
full_expression = str(like)
|
|
159
160
|
|
|
160
|
-
results.append(DetectedError(SqlErrors.
|
|
161
|
+
results.append(DetectedError(SqlErrors.LIKE_WITHOUT_WILDCARDS, (full_expression,)))
|
|
161
162
|
|
|
162
163
|
return results
|
|
163
164
|
|
|
164
|
-
|
|
165
|
-
def com_89_unnecessarily_complicated_select_in_exists_subquery(self) -> list[DetectedError]:
|
|
165
|
+
def detect_89_unnecessarily_complicated_select_in_exists_subquery(self) -> list[DetectedError]:
|
|
166
166
|
return []
|
|
167
167
|
|
|
168
|
-
|
|
169
|
-
def com_90_in_exists_can_be_replaced_by_comparison(self) -> list[DetectedError]:
|
|
168
|
+
def detect_90_in_exists_can_be_replaced_by_comparison(self) -> list[DetectedError]:
|
|
170
169
|
return []
|
|
171
170
|
|
|
172
|
-
|
|
173
|
-
def com_91_unnecessary_aggregate_function(self) -> list[DetectedError]:
|
|
171
|
+
def detect_91_unnecessary_aggregate_function(self) -> list[DetectedError]:
|
|
174
172
|
return []
|
|
175
173
|
|
|
176
|
-
def
|
|
174
|
+
def detect_92_unnecessary_distinct_in_aggregate_function(self) -> list[DetectedError]:
|
|
177
175
|
'''MIN and MAX never require DISTINCT. For other aggregate functions, DISTINCT is unnecessary if the argument is unique.'''
|
|
178
176
|
|
|
179
177
|
results: list[DetectedError] = []
|
|
@@ -189,7 +187,7 @@ class ComplicationDetector(BaseDetector):
|
|
|
189
187
|
continue
|
|
190
188
|
|
|
191
189
|
if isinstance(agg_func, (exp.Min, exp.Max)):
|
|
192
|
-
results.append(DetectedError(SqlErrors.
|
|
190
|
+
results.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
|
|
193
191
|
continue
|
|
194
192
|
|
|
195
193
|
arg_expr = agg_func.this.expressions # `.this` is the DISTINCT, `.expressions` are the arguments
|
|
@@ -199,7 +197,7 @@ class ComplicationDetector(BaseDetector):
|
|
|
199
197
|
for expr in arg_expr:
|
|
200
198
|
# Check if the argument is a constant literal
|
|
201
199
|
if isinstance(expr, exp.Literal):
|
|
202
|
-
results.append(DetectedError(SqlErrors.
|
|
200
|
+
results.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
|
|
203
201
|
continue
|
|
204
202
|
|
|
205
203
|
# Check if the argument is a column
|
|
@@ -210,18 +208,17 @@ class ComplicationDetector(BaseDetector):
|
|
|
210
208
|
unique_constraints = [c for c in select.all_constraints if c.constraint_type == ConstraintType.UNIQUE]
|
|
211
209
|
for constraint in unique_constraints:
|
|
212
210
|
if { ConstraintColumn(column_name, table_idx=select._get_table_idx_for_column(expr)) } == constraint.columns:
|
|
213
|
-
results.append(DetectedError(SqlErrors.
|
|
211
|
+
results.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
|
|
214
212
|
break
|
|
215
213
|
return results
|
|
216
214
|
|
|
217
|
-
def
|
|
215
|
+
def detect_93_unnecessary_argument_of_count(self) -> list[DetectedError]:
|
|
218
216
|
return []
|
|
219
217
|
|
|
220
|
-
|
|
221
|
-
def com_94_unnecessary_group_by_in_exists_subquery(self) -> list[DetectedError]:
|
|
218
|
+
def detect_94_unnecessary_group_by_in_exists_subquery(self) -> list[DetectedError]:
|
|
222
219
|
return []
|
|
223
220
|
|
|
224
|
-
def
|
|
221
|
+
def detect_95_group_by_with_singleton_groups(self) -> list[DetectedError]:
|
|
225
222
|
'''
|
|
226
223
|
Flags GROUP BY clauses on singleton groups due to the presence
|
|
227
224
|
of UNIQUE constraints on the grouped columns.
|
|
@@ -241,16 +238,15 @@ class ComplicationDetector(BaseDetector):
|
|
|
241
238
|
|
|
242
239
|
for constraint in constraints:
|
|
243
240
|
if constraint.columns.issubset(group_by_constraint.columns):
|
|
244
|
-
results.append(DetectedError(SqlErrors.
|
|
241
|
+
results.append(DetectedError(SqlErrors.GROUP_BY_WITH_SINGLETON_GROUPS, (group_by_constraint, constraint)))
|
|
245
242
|
break
|
|
246
243
|
|
|
247
244
|
return results
|
|
248
245
|
|
|
249
|
-
|
|
250
|
-
def com_96_group_by_with_only_a_single_group(self) -> list[DetectedError]:
|
|
246
|
+
def detect_96_group_by_with_only_a_single_group(self) -> list[DetectedError]:
|
|
251
247
|
return []
|
|
252
248
|
|
|
253
|
-
def
|
|
249
|
+
def detect_97_group_by_can_be_replaced_by_distinct(self) -> list[DetectedError]:
|
|
254
250
|
'''
|
|
255
251
|
Flags GROUP BY clauses that can be replaced by SELECT DISTINCT.
|
|
256
252
|
This occurs when all selected columns are included in the GROUP BY clause
|
|
@@ -290,18 +286,14 @@ class ComplicationDetector(BaseDetector):
|
|
|
290
286
|
group_by_col_names = {(util.ast.column.get_real_name(col), select._get_table_idx_for_column(col)) for col in group_by_columns}
|
|
291
287
|
|
|
292
288
|
if select_col_names == group_by_col_names:
|
|
293
|
-
results.append(DetectedError(SqlErrors.
|
|
289
|
+
results.append(DetectedError(SqlErrors.GROUP_BY_CAN_BE_REPLACED_WITH_DISTINCT, (select_col_names,)))
|
|
294
290
|
|
|
295
291
|
return results
|
|
296
292
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
# TODO: implement
|
|
300
|
-
def com_98_union_can_be_replaced_by_or(self) -> list[DetectedError]:
|
|
293
|
+
def detect_98_union_can_be_replaced_by_or(self) -> list[DetectedError]:
|
|
301
294
|
return []
|
|
302
295
|
|
|
303
|
-
|
|
304
|
-
def com_99_complication_unnecessary_column_in_order_by_clause(self) -> list[DetectedError]:
|
|
296
|
+
def detect_99_unnecessary_column_in_order_by_clause(self) -> list[DetectedError]:
|
|
305
297
|
'''
|
|
306
298
|
Flags when the ORDER BY clause contains unnecessary columns in addition
|
|
307
299
|
to the required ones.
|
|
@@ -329,8 +321,7 @@ class ComplicationDetector(BaseDetector):
|
|
|
329
321
|
|
|
330
322
|
return results
|
|
331
323
|
|
|
332
|
-
|
|
333
|
-
def com_100_order_by_in_subquery(self) -> list[DetectedError]:
|
|
324
|
+
def detect_100_order_by_in_subquery(self) -> list[DetectedError]:
|
|
334
325
|
'''
|
|
335
326
|
Flags when a subquery contains an ORDER BY clause.
|
|
336
327
|
Subqueries both ORDER BY and LIMIT are considered valid.
|
|
@@ -348,157 +339,37 @@ class ComplicationDetector(BaseDetector):
|
|
|
348
339
|
|
|
349
340
|
checked_subqueries.add(subquery.sql)
|
|
350
341
|
if subquery.order_by and not subquery.limit:
|
|
351
|
-
results.append(DetectedError(SqlErrors.
|
|
342
|
+
results.append(DetectedError(SqlErrors.ORDER_BY_IN_SUBQUERY, (subquery.sql,)))
|
|
352
343
|
|
|
353
344
|
return results
|
|
354
345
|
|
|
355
|
-
|
|
356
|
-
def com_101_inefficient_having(self) -> list[DetectedError]:
|
|
357
|
-
return []
|
|
358
|
-
|
|
359
|
-
# TODO: implement
|
|
360
|
-
def com_102_inefficient_union(self) -> list[DetectedError]:
|
|
346
|
+
def detect_101_inefficient_having(self) -> list[DetectedError]:
|
|
361
347
|
return []
|
|
362
348
|
|
|
363
|
-
|
|
364
|
-
def com_103_condition_in_the_subquery_can_be_moved_up(self) -> list[DetectedError]:
|
|
349
|
+
def detect_102_inefficient_union(self) -> list[DetectedError]:
|
|
365
350
|
return []
|
|
366
351
|
|
|
367
|
-
|
|
368
|
-
def com_104_condition_on_left_table_in_left_outer_join(self) -> list[DetectedError]:
|
|
352
|
+
def detect_103_condition_in_the_subquery_can_be_moved_up(self) -> list[DetectedError]:
|
|
369
353
|
return []
|
|
370
354
|
|
|
371
|
-
|
|
372
|
-
def com_105_outer_join_can_be_replaced_by_inner_join(self) -> list[DetectedError]:
|
|
355
|
+
def detect_104_outer_join_can_be_replaced_by_inner_join(self) -> list[DetectedError]:
|
|
373
356
|
return []
|
|
374
357
|
|
|
358
|
+
def detect_126_unused_cte(self) -> list[DetectedError]:
|
|
359
|
+
results: list[DetectedError] = []
|
|
375
360
|
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
'''
|
|
379
|
-
Extracts a list of simple column names from a SELECT query's AST.
|
|
380
|
-
'''
|
|
381
|
-
columns = []
|
|
382
|
-
if not ast:
|
|
383
|
-
return columns
|
|
384
|
-
|
|
385
|
-
select_expressions = ast.get('args', {}).get('expressions', [])
|
|
386
|
-
|
|
387
|
-
for expr_node in select_expressions:
|
|
388
|
-
col_name = self._find_underlying_column(expr_node)
|
|
389
|
-
if col_name:
|
|
390
|
-
columns.append(col_name)
|
|
391
|
-
|
|
392
|
-
return columns
|
|
393
|
-
def _find_underlying_column(self, node: dict):
|
|
394
|
-
'''
|
|
395
|
-
Recursively traverses an expression node to find the underlying column identifier.
|
|
396
|
-
'''
|
|
397
|
-
if not isinstance(node, dict):
|
|
398
|
-
return None
|
|
399
|
-
|
|
400
|
-
node_class = node.get('class')
|
|
401
|
-
|
|
402
|
-
if node_class == 'Paren':
|
|
403
|
-
return self._find_underlying_column(node.get('args', {}).get('this'))
|
|
404
|
-
|
|
405
|
-
if node_class == 'Column':
|
|
406
|
-
try:
|
|
407
|
-
return node['args']['expression']['args']['this']
|
|
408
|
-
except (KeyError, TypeError):
|
|
409
|
-
try:
|
|
410
|
-
return node['args']['this']['args']['this']
|
|
411
|
-
except (KeyError, TypeError):
|
|
412
|
-
return None
|
|
413
|
-
|
|
414
|
-
if node_class == 'Alias':
|
|
415
|
-
return self._find_underlying_column(node.get('args', {}).get('this'))
|
|
416
|
-
def _get_from_tables(self, ast: dict, with_alias=False) -> list:
|
|
417
|
-
'''
|
|
418
|
-
Extracts a list of all table names from the FROM and JOIN clauses of a query's AST.
|
|
419
|
-
'''
|
|
420
|
-
tables = []
|
|
421
|
-
if not ast:
|
|
422
|
-
return tables
|
|
423
|
-
|
|
424
|
-
args = ast.get('args', {})
|
|
425
|
-
|
|
426
|
-
# 1. Process the main table from the 'from' clause
|
|
427
|
-
from_node = args.get('from')
|
|
428
|
-
if from_node:
|
|
429
|
-
# The actual table data is inside the 'this' argument of the 'From' node
|
|
430
|
-
main_table_node = from_node.get('args', {}).get('this')
|
|
431
|
-
if main_table_node:
|
|
432
|
-
self._collect_tables_recursive(main_table_node, tables, with_alias)
|
|
433
|
-
|
|
434
|
-
# 2. Process all tables from the 'joins' list
|
|
435
|
-
join_nodes = args.get('joins', [])
|
|
436
|
-
for join_node in join_nodes:
|
|
437
|
-
self._collect_tables_recursive(join_node, tables, with_alias)
|
|
438
|
-
|
|
439
|
-
return list(set(tables))
|
|
440
|
-
def _collect_tables_recursive(self, node: dict, tables: list, with_alias=False):
|
|
441
|
-
'''
|
|
442
|
-
Recursively traverses a FROM clause node (including joins) to collect table names.
|
|
443
|
-
'''
|
|
444
|
-
if not isinstance(node, dict):
|
|
445
|
-
return
|
|
446
|
-
|
|
447
|
-
node_class = node.get('class')
|
|
448
|
-
|
|
449
|
-
# This part handles aliased tables (e.g., "customer c") and regular tables
|
|
450
|
-
if node_class == 'Alias':
|
|
451
|
-
underlying_node = node.get('args', {}).get('this')
|
|
452
|
-
# Recurse in case the alias is on a subquery or another join
|
|
453
|
-
self._collect_tables_recursive(underlying_node, tables, with_alias)
|
|
454
|
-
|
|
455
|
-
elif node_class == 'Table':
|
|
456
|
-
try:
|
|
457
|
-
# The AST nests identifiers, so we go deep to get the name
|
|
458
|
-
table_name = node['args']['this']['args']['this']
|
|
459
|
-
alias_node = node.get('args', {}).get('alias')
|
|
460
|
-
if with_alias and alias_node:
|
|
461
|
-
alias_name = alias_node.get('args', {}).get('this', {}).get('args', {}).get('this')
|
|
462
|
-
tables.append(f"{table_name} AS {alias_name}")
|
|
463
|
-
else:
|
|
464
|
-
tables.append(table_name)
|
|
465
|
-
except (KeyError, TypeError):
|
|
466
|
-
pass
|
|
361
|
+
if not self.query.ctes:
|
|
362
|
+
return results
|
|
467
363
|
|
|
468
|
-
|
|
469
|
-
elif node_class == 'Join':
|
|
470
|
-
# The joined table is in the 'this' argument of the Join node
|
|
471
|
-
self._collect_tables_recursive(node.get('args', {}).get('this'), tables, with_alias)
|
|
472
|
-
# The other side of the join is already handled in the 'from' clause,
|
|
473
|
-
# but we check for 'expression' for other potential join structures.
|
|
474
|
-
if 'expression' in node.get('args', {}):
|
|
475
|
-
self._collect_tables_recursive(node.get('args', {}).get('expression'), tables, with_alias)
|
|
476
|
-
def _get_orderby_columns(self, ast: dict) -> list:
|
|
477
|
-
'''
|
|
478
|
-
Extracts a list of columns and their sort direction from an ORDER BY clause.
|
|
479
|
-
'''
|
|
480
|
-
orderby_terms = []
|
|
481
|
-
if not ast:
|
|
482
|
-
return orderby_terms
|
|
364
|
+
used_ctes: dict[int, bool] = {i: False for i in range(len(self.query.ctes))}
|
|
483
365
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
366
|
+
for select in self.query.selects:
|
|
367
|
+
for table in select.referenced_tables:
|
|
368
|
+
if table.cte_idx is not None:
|
|
369
|
+
used_ctes[table.cte_idx] = True
|
|
487
370
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
column_node = term_node.get('args', {}).get('this')
|
|
494
|
-
|
|
495
|
-
col_name = self._find_underlying_column(column_node)
|
|
496
|
-
|
|
497
|
-
if col_name:
|
|
498
|
-
direction = term_node.get('args', {}).get('direction', 'ASC').upper()
|
|
499
|
-
orderby_terms.append((col_name, direction))
|
|
500
|
-
except (KeyError, AttributeError):
|
|
501
|
-
return []
|
|
502
|
-
|
|
503
|
-
return orderby_terms
|
|
504
|
-
#endregion Utility methods
|
|
371
|
+
for cte_idx, used in used_ctes.items():
|
|
372
|
+
if not used:
|
|
373
|
+
results.append(DetectedError(SqlErrors.UNUSED_CTE, (self.query.ctes[cte_idx].sql,)))
|
|
374
|
+
|
|
375
|
+
return results
|