sql-error-categorizer 0.2.4__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/.gitignore +1 -1
  2. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/PKG-INFO +3 -3
  3. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/pyproject.toml +3 -3
  4. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/requirements.txt +2 -2
  5. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/complications.py +79 -208
  6. sql_error_categorizer-0.3.1/src/sql_error_categorizer/detectors/logical.py +732 -0
  7. sql_error_categorizer-0.3.1/src/sql_error_categorizer/detectors/semantic.py +289 -0
  8. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/syntax.py +106 -290
  9. sql_error_categorizer-0.2.4/tests/1_syn/test_002_ambiguous_column.py → sql_error_categorizer-0.3.1/tests/1_syn/test_001_ambiguous_column.py +1 -1
  10. sql_error_categorizer-0.2.4/tests/1_syn/test_004_undefined_column.py → sql_error_categorizer-0.3.1/tests/1_syn/test_003_undefined_column.py +1 -1
  11. sql_error_categorizer-0.2.4/tests/1_syn/test_005_undefined_function.py → sql_error_categorizer-0.3.1/tests/1_syn/test_004_undefined_function.py +1 -1
  12. sql_error_categorizer-0.2.4/tests/1_syn/test_006_undefined_parameter.py → sql_error_categorizer-0.3.1/tests/1_syn/test_005_undefined_parameter.py +1 -1
  13. sql_error_categorizer-0.2.4/tests/1_syn/test_007_undefined_tables.py → sql_error_categorizer-0.3.1/tests/1_syn/test_006_undefined_object.py +1 -1
  14. sql_error_categorizer-0.2.4/tests/1_syn/test_008_invalid_schema_names.py → sql_error_categorizer-0.3.1/tests/1_syn/test_007_invalid_schema_names.py +1 -1
  15. sql_error_categorizer-0.2.4/tests/1_syn/test_009_misspellings.py → sql_error_categorizer-0.3.1/tests/1_syn/test_008_misspellings.py +1 -1
  16. sql_error_categorizer-0.2.4/tests/1_syn/test_035_is_where_not_applicable.py → sql_error_categorizer-0.3.1/tests/1_syn/test_012_is_where_not_applicable.py +1 -1
  17. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/1_syn/test_013_data_type_mismatch.py +1 -1
  18. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/1_syn/test_014_aggregate_function_outside_select_or_having.py +1 -1
  19. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/1_syn/test_015_nested_aggregate_functions.py +1 -1
  20. sql_error_categorizer-0.3.1/tests/1_syn/test_016_extraneous_omitted_grouping_column.py +47 -0
  21. sql_error_categorizer-0.3.1/tests/1_syn/test_017_having_without_group_by.py +59 -0
  22. sql_error_categorizer-0.3.1/tests/1_syn/test_018_too_many_columns_in_subquery.py +42 -0
  23. sql_error_categorizer-0.3.1/tests/1_syn/test_021_using_where_twice.py +53 -0
  24. sql_error_categorizer-0.3.1/tests/1_syn/test_022_omitted_from.py +51 -0
  25. sql_error_categorizer-0.3.1/tests/1_syn/test_024_036_additional_omitted_semicolons.py +60 -0
  26. sql_error_categorizer-0.3.1/tests/1_syn/test_024_comparison_with_null.py +40 -0
  27. sql_error_categorizer-0.3.1/tests/1_syn/test_026_duplicate_clause.py +43 -0
  28. sql_error_categorizer-0.3.1/tests/1_syn/test_029_keywords_order.py +36 -0
  29. sql_error_categorizer-0.3.1/tests/1_syn/test_032_033_curly_square_or_unmatched_brackets.py +68 -0
  30. sql_error_categorizer-0.3.1/tests/1_syn/test_035_nonstandard_operators.py +31 -0
  31. sql_error_categorizer-0.2.4/tests/2_sem/test_040_tautological_inconsistent_expressions.py → sql_error_categorizer-0.3.1/tests/2_sem/test_038_tautological_inconsistent_expressions.py +1 -1
  32. sql_error_categorizer-0.3.1/tests/2_sem/test_039_distinct_sum_avg.py +51 -0
  33. sql_error_categorizer-0.3.1/tests/2_sem/test_050_constant_column_output.py +35 -0
  34. sql_error_categorizer-0.3.1/tests/2_sem/test_051_duplicate_column_output.py +58 -0
  35. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_058_join_on_incorrect_table.py +3 -3
  36. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_059_join_when_join_needs_to_be_omitted.py +3 -3
  37. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_062_missing_join.py +3 -3
  38. sql_error_categorizer-0.3.1/tests/3_log/test_067_wildcards_without_like.py +60 -0
  39. sql_error_categorizer-0.3.1/tests/3_log/test_068_069_wrong_invalid_wildcard.py +85 -0
  40. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_070_extraneous_column_in_select.py +2 -2
  41. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_071_missing_column_from_select.py +2 -2
  42. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_072_missing_distinct_from_select.py +2 -2
  43. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/3_log/test_073_missing_as_from_select.py +2 -2
  44. sql_error_categorizer-0.3.1/tests/3_log/test_112_118_missing_extraneous_where_clause.py +74 -0
  45. sql_error_categorizer-0.3.1/tests/3_log/test_113_119_missing_extraneous_group_by_clause.py +74 -0
  46. sql_error_categorizer-0.3.1/tests/3_log/test_114_120_missing_extraneous_having_clause.py +77 -0
  47. sql_error_categorizer-0.3.1/tests/3_log/test_115_121_missing_extraneous_order_by_clause.py +79 -0
  48. sql_error_categorizer-0.3.1/tests/3_log/test_116_121_123_missing_extraneous_incorrect_limit_clause.py +80 -0
  49. sql_error_categorizer-0.3.1/tests/3_log/test_117_122_missing_extraneous_incorrect_offset_clause.py +85 -0
  50. sql_error_categorizer-0.2.4/tests/4_com/test_083_unnecessary_distinct_in_select.py → sql_error_categorizer-0.3.1/tests/4_com/test_096_unnecessary_distinct_in_select.py +15 -6
  51. sql_error_categorizer-0.3.1/tests/4_com/test_102_like_without_wildcards.py +34 -0
  52. sql_error_categorizer-0.2.4/tests/4_com/test_092_unnecessary_distinct_in_aggregate_function.py → sql_error_categorizer-0.3.1/tests/4_com/test_106_unnecessary_distinct_in_aggregate_function.py +4 -4
  53. sql_error_categorizer-0.2.4/tests/4_com/test_095_group_by_with_singleton_groups.py → sql_error_categorizer-0.3.1/tests/4_com/test_109_group_by_with_singleton_groups.py +3 -3
  54. sql_error_categorizer-0.2.4/tests/4_com/test_097_group_by_can_be_replaced_by_distinct.py → sql_error_categorizer-0.3.1/tests/4_com/test_111_group_by_can_be_replaced_by_distinct.py +3 -3
  55. sql_error_categorizer-0.3.1/tests/4_com/test_114_order_by_in_subquery.py +42 -0
  56. sql_error_categorizer-0.3.1/tests/4_com/test_126_unused_cte.py +44 -0
  57. sql_error_categorizer-0.2.4/src/sql_error_categorizer/detectors/logical.py +0 -770
  58. sql_error_categorizer-0.2.4/src/sql_error_categorizer/detectors/semantic.py +0 -498
  59. sql_error_categorizer-0.2.4/tests/1_syn/test_016_extraneous_omitted_grouping_column.py +0 -81
  60. sql_error_categorizer-0.2.4/tests/1_syn/test_017_having_without_group_by.py +0 -80
  61. sql_error_categorizer-0.2.4/tests/1_syn/test_019_using_where_twice.py +0 -60
  62. sql_error_categorizer-0.2.4/tests/1_syn/test_020_missing_from.py +0 -76
  63. sql_error_categorizer-0.2.4/tests/1_syn/test_021_comparison_with_null.py +0 -43
  64. sql_error_categorizer-0.2.4/tests/1_syn/test_022_038_additional_omitted_semicolons.py +0 -56
  65. sql_error_categorizer-0.2.4/tests/1_syn/test_024_duplicate_clause.py +0 -71
  66. sql_error_categorizer-0.2.4/tests/1_syn/test_026_too_many_columns_in_subquery.py +0 -70
  67. sql_error_categorizer-0.2.4/tests/1_syn/test_030_keywords_order.py +0 -37
  68. sql_error_categorizer-0.2.4/tests/1_syn/test_034_curly_square_or_unmatched_brackets.py +0 -59
  69. sql_error_categorizer-0.2.4/tests/1_syn/test_037_nonstandard_operators.py +0 -29
  70. sql_error_categorizer-0.2.4/tests/2_sem/test_041_distinct_sum_avg.py +0 -77
  71. sql_error_categorizer-0.2.4/tests/2_sem/test_043_wildcards_without_like.py +0 -104
  72. sql_error_categorizer-0.2.4/tests/2_sem/test_044_incorrect_wildcards.py +0 -143
  73. sql_error_categorizer-0.2.4/tests/4_com/test_088_like_no_wildcards.py +0 -32
  74. sql_error_categorizer-0.2.4/tests/4_com/test_100_order_by_in_subquery.py +0 -55
  75. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/.readthedocs.yaml +0 -0
  76. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/LICENSE +0 -0
  77. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/Makefile +0 -0
  78. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/README.md +0 -0
  79. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/catalogs/constraints.json +0 -0
  80. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/catalogs/miedema.json +0 -0
  81. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/sql/constraints.sql +0 -0
  82. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/datasets/sql/miedema.sql +0 -0
  83. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/Makefile +0 -0
  84. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/conf.py +0 -0
  85. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/index.rst +0 -0
  86. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/make.bat +0 -0
  87. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/docs/requirements.txt +0 -0
  88. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/__init__.py +0 -0
  89. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/__init__.py +0 -0
  90. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/src/sql_error_categorizer/detectors/base.py +0 -0
  91. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/test_detector.py +0 -0
  92. {sql_error_categorizer-0.2.4 → sql_error_categorizer-0.3.1}/tests/__init__.py +0 -0
@@ -1,7 +1,7 @@
1
1
  # CUSTOM
2
2
  test_q.sql
3
3
  test_s?.sql
4
-
4
+ .codex
5
5
 
6
6
  # Byte-compiled / optimized / DLL files
7
7
  __pycache__/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql_error_categorizer
3
- Version: 0.2.4
3
+ Version: 0.3.1
4
4
  Summary: This project analyses SQL statements and labels possible errors or complications.
5
5
  Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
6
6
  Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html
@@ -14,10 +14,10 @@ Requires-Python: >=3.11
14
14
  Requires-Dist: psycopg2
15
15
  Requires-Dist: python-dateutil
16
16
  Requires-Dist: pyyaml
17
- Requires-Dist: sql-error-taxonomy
17
+ Requires-Dist: sql-error-taxonomy>=2.0.0
18
18
  Requires-Dist: sqlglot
19
19
  Requires-Dist: sqlparse
20
- Requires-Dist: sqlscope>=1.0.15
20
+ Requires-Dist: sqlscope>=1.0.16
21
21
  Requires-Dist: z3-solver
22
22
  Description-Content-Type: text/markdown
23
23
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sql_error_categorizer"
7
- version = "0.2.4"
7
+ version = "0.3.1"
8
8
  authors = [
9
9
  { name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
10
10
  ]
@@ -21,8 +21,8 @@ dependencies = [
21
21
  "pyyaml",
22
22
  "sqlparse",
23
23
  "sqlglot",
24
- "sqlscope>=1.0.15",
25
- "sql_error_taxonomy",
24
+ "sqlscope>=1.0.16",
25
+ "sql_error_taxonomy>=2.0.0",
26
26
  "z3-solver",
27
27
  "python-dateutil",
28
28
  ]
@@ -4,8 +4,8 @@ sqlparse
4
4
  sqlglot==28.0.0
5
5
  z3-solver
6
6
  python-dateutil
7
- sql_error_taxonomy>=1.0.1
8
- sqlscope>=1.0.15
7
+ sql_error_taxonomy>=2.0.0
8
+ sqlscope>=1.0.16
9
9
 
10
10
  # Dependencies for development
11
11
  ipython
@@ -1,14 +1,10 @@
1
1
  '''Detector for complications in SQL queries.'''
2
2
 
3
- import difflib
4
- import re
5
- import sqlparse
6
- import sqlparse.keywords
7
3
  from typing import Callable
8
4
  from sqlglot import exp
9
5
  from sql_error_taxonomy import SqlErrors
10
- from sqlscope.catalog import ConstraintType, ConstraintColumn, Constraint
11
- from sqlscope import Query, Catalog
6
+ from sqlscope.catalog import ConstraintType, ConstraintColumn
7
+ from sqlscope import Query
12
8
  from sqlscope import util
13
9
 
14
10
  from .base import BaseDetector, DetectedError
@@ -36,29 +32,31 @@ class ComplicationDetector(BaseDetector):
36
32
  results: list[DetectedError] = super().run()
37
33
 
38
34
  checks = [
39
- self.com_83_unnecessary_distinct_in_select_clause,
40
- self.com_84_unnecessary_join,
41
- self.com_85_unused_correlation_name,
42
- self.com_86_correlation_names_are_always_identical,
43
- self.com_87_unnecessary_general_comparison_operator,
44
- self.com_88_like_without_wildcards,
45
- self.com_89_unnecessarily_complicated_select_in_exists_subquery,
46
- self.com_90_in_exists_can_be_replaced_by_comparison,
47
- self.com_91_unnecessary_aggregate_function,
48
- self.com_92_unnecessary_distinct_in_aggregate_function,
49
- self.com_93_unnecessary_argument_of_count,
50
- self.com_94_unnecessary_group_by_in_exists_subquery,
51
- self.com_95_group_by_with_singleton_groups,
52
- self.com_96_group_by_with_only_a_single_group,
53
- self.com_97_group_by_can_be_replaced_by_distinct,
54
- self.com_98_union_can_be_replaced_by_or,
55
- self.com_99_complication_unnecessary_column_in_order_by_clause,
56
- self.com_100_order_by_in_subquery,
57
- self.com_101_inefficient_having,
58
- self.com_102_inefficient_union,
59
- self.com_103_condition_in_the_subquery_can_be_moved_up,
60
- self.com_104_condition_on_left_table_in_left_outer_join,
61
- self.com_105_outer_join_can_be_replaced_by_inner_join,
35
+ self.detect_82_unnecessary_complication, # ok
36
+ self.detect_83_unnecessary_distinct_in_select_clause, # ok
37
+ self.detect_84_unnecessary_table_reference, # TODO: refactor/implement
38
+ self.detect_85_unused_correlation_name, # TODO: implement
39
+ self.detect_86_tables_have_same_data, # TODO: implement
40
+ self.detect_125_correlation_name_identical_to_table_name, # TODO: implement
41
+ self.detect_87_unnecessary_general_comparison_operator, # TODO: implement
42
+ self.detect_88_like_without_wildcards, # ok
43
+ self.detect_89_unnecessarily_complicated_select_in_exists_subquery, # TODO: implement
44
+ self.detect_90_in_exists_can_be_replaced_by_comparison, # TODO: implement
45
+ self.detect_91_unnecessary_aggregate_function, # TODO: implement
46
+ self.detect_92_unnecessary_distinct_in_aggregate_function, # ok
47
+ self.detect_93_unnecessary_argument_of_count, # ok
48
+ self.detect_94_unnecessary_group_by_in_exists_subquery, # TODO: implement
49
+ self.detect_95_group_by_with_singleton_groups, # ok
50
+ self.detect_96_group_by_with_only_a_single_group, # TODO: implement
51
+ self.detect_97_group_by_can_be_replaced_by_distinct, # ok
52
+ self.detect_98_union_can_be_replaced_by_or, # TODO: implement
53
+ self.detect_99_unnecessary_column_in_order_by_clause, # TODO: refactor/implement
54
+ self.detect_100_order_by_in_subquery, # TODO: implement
55
+ self.detect_101_inefficient_having, # TODO: implement
56
+ self.detect_102_inefficient_union, # TODO: implement
57
+ self.detect_103_condition_in_the_subquery_can_be_moved_up, # TODO: implement
58
+ self.detect_104_outer_join_can_be_replaced_by_inner_join, # TODO: implement
59
+ self.detect_126_unused_cte, #
62
60
  ]
63
61
 
64
62
  for chk in checks:
@@ -66,7 +64,11 @@ class ComplicationDetector(BaseDetector):
66
64
 
67
65
  return results
68
66
 
69
- def com_83_unnecessary_distinct_in_select_clause(self) -> list[DetectedError]:
67
+ def detect_82_unnecessary_complication(self) -> list[DetectedError]:
68
+ '''NOTE: this is an umbrella term, so it can't be directly detected.'''
69
+ return []
70
+
71
+ def detect_83_unnecessary_distinct_in_select_clause(self) -> list[DetectedError]:
70
72
  '''
71
73
  Flags a SELECT DISTINCT clause that is unnecessary because the selected
72
74
  columns are already unique due to existing constraints.
@@ -81,12 +83,11 @@ class ComplicationDetector(BaseDetector):
81
83
  constraints = [c for c in select.output.unique_constraints if c.constraint_type != ConstraintType.DISTINCT]
82
84
 
83
85
  if len(constraints) > 0:
84
- result.append(DetectedError(SqlErrors.COM_83_UNNECESSARY_DISTINCT_IN_SELECT_CLAUSE, (select.sql,)))
86
+ result.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_SELECT_CLAUSE, (select.sql,)))
85
87
 
86
88
  return result
87
89
 
88
- # TODO: refactor
89
- def com_84_unnecessary_join(self) -> list[DetectedError]:
90
+ def detect_84_unnecessary_table_reference(self) -> list[DetectedError]:
90
91
  '''
91
92
  Flags a query that joins to a table not present in the correct solution.
92
93
  '''
@@ -110,25 +111,25 @@ class ComplicationDetector(BaseDetector):
110
111
  # Find the original table name (with alias if it was used) to report back
111
112
  original_table_name = next((t for t in original_q_tables if t.lower().startswith(table_name_lower)), table_name_lower)
112
113
  results.append((
113
- SqlErrors.COM_84_UNNECESSARY_JOIN,
114
+ SqlErrors.UNNECESSARY_TABLE_REFERENCE,
114
115
  f"Unnecessary JOIN: The table '{original_table_name}' is not needed to answer the query."
115
116
  ))
116
117
 
117
118
  return results
118
119
 
119
- # TODO: implement
120
- def com_85_unused_correlation_name(self) -> list[DetectedError]:
120
+ def detect_85_unused_correlation_name(self) -> list[DetectedError]:
121
121
  return []
122
122
 
123
- # TODO: implement
124
- def com_86_correlation_names_are_always_identical(self) -> list[DetectedError]:
123
+ def detect_86_tables_have_same_data(self) -> list[DetectedError]:
124
+ return []
125
+
126
+ def detect_125_correlation_name_identical_to_table_name(self) -> list[DetectedError]:
125
127
  return []
126
128
 
127
- # TODO: implement
128
- def com_87_unnecessary_general_comparison_operator(self) -> list[DetectedError]:
129
+ def detect_87_unnecessary_general_comparison_operator(self) -> list[DetectedError]:
129
130
  return []
130
131
 
131
- def com_88_like_without_wildcards(self) -> list[DetectedError]:
132
+ def detect_88_like_without_wildcards(self) -> list[DetectedError]:
132
133
  '''
133
134
  Flags queries where the LIKE operator is used without wildcards ('%' or '_').
134
135
  This indicates a potential misunderstanding, where the '=' operator should
@@ -157,23 +158,20 @@ class ComplicationDetector(BaseDetector):
157
158
  if '%' not in pattern_value and '_' not in pattern_value:
158
159
  full_expression = str(like)
159
160
 
160
- results.append(DetectedError(SqlErrors.COM_88_LIKE_WITHOUT_WILDCARDS, (full_expression,)))
161
+ results.append(DetectedError(SqlErrors.LIKE_WITHOUT_WILDCARDS, (full_expression,)))
161
162
 
162
163
  return results
163
164
 
164
- # TODO: implement
165
- def com_89_unnecessarily_complicated_select_in_exists_subquery(self) -> list[DetectedError]:
165
+ def detect_89_unnecessarily_complicated_select_in_exists_subquery(self) -> list[DetectedError]:
166
166
  return []
167
167
 
168
- # TODO: implement
169
- def com_90_in_exists_can_be_replaced_by_comparison(self) -> list[DetectedError]:
168
+ def detect_90_in_exists_can_be_replaced_by_comparison(self) -> list[DetectedError]:
170
169
  return []
171
170
 
172
- # TODO: implement
173
- def com_91_unnecessary_aggregate_function(self) -> list[DetectedError]:
171
+ def detect_91_unnecessary_aggregate_function(self) -> list[DetectedError]:
174
172
  return []
175
173
 
176
- def com_92_unnecessary_distinct_in_aggregate_function(self) -> list[DetectedError]:
174
+ def detect_92_unnecessary_distinct_in_aggregate_function(self) -> list[DetectedError]:
177
175
  '''MIN and MAX never require DISTINCT. For other aggregate functions, DISTINCT is unnecessary if the argument is unique.'''
178
176
 
179
177
  results: list[DetectedError] = []
@@ -189,7 +187,7 @@ class ComplicationDetector(BaseDetector):
189
187
  continue
190
188
 
191
189
  if isinstance(agg_func, (exp.Min, exp.Max)):
192
- results.append(DetectedError(SqlErrors.COM_92_UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
190
+ results.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
193
191
  continue
194
192
 
195
193
  arg_expr = agg_func.this.expressions # `.this` is the DISTINCT, `.expressions` are the arguments
@@ -199,7 +197,7 @@ class ComplicationDetector(BaseDetector):
199
197
  for expr in arg_expr:
200
198
  # Check if the argument is a constant literal
201
199
  if isinstance(expr, exp.Literal):
202
- results.append(DetectedError(SqlErrors.COM_92_UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
200
+ results.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
203
201
  continue
204
202
 
205
203
  # Check if the argument is a column
@@ -210,18 +208,17 @@ class ComplicationDetector(BaseDetector):
210
208
  unique_constraints = [c for c in select.all_constraints if c.constraint_type == ConstraintType.UNIQUE]
211
209
  for constraint in unique_constraints:
212
210
  if { ConstraintColumn(column_name, table_idx=select._get_table_idx_for_column(expr)) } == constraint.columns:
213
- results.append(DetectedError(SqlErrors.COM_92_UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
211
+ results.append(DetectedError(SqlErrors.UNNECESSARY_DISTINCT_IN_AGGREGATE_FUNCTION, (str(agg_func),)))
214
212
  break
215
213
  return results
216
214
 
217
- def com_93_unnecessary_argument_of_count(self) -> list[DetectedError]:
215
+ def detect_93_unnecessary_argument_of_count(self) -> list[DetectedError]:
218
216
  return []
219
217
 
220
- # TODO: implement
221
- def com_94_unnecessary_group_by_in_exists_subquery(self) -> list[DetectedError]:
218
+ def detect_94_unnecessary_group_by_in_exists_subquery(self) -> list[DetectedError]:
222
219
  return []
223
220
 
224
- def com_95_group_by_with_singleton_groups(self) -> list[DetectedError]:
221
+ def detect_95_group_by_with_singleton_groups(self) -> list[DetectedError]:
225
222
  '''
226
223
  Flags GROUP BY clauses on singleton groups due to the presence
227
224
  of UNIQUE constraints on the grouped columns.
@@ -241,16 +238,15 @@ class ComplicationDetector(BaseDetector):
241
238
 
242
239
  for constraint in constraints:
243
240
  if constraint.columns.issubset(group_by_constraint.columns):
244
- results.append(DetectedError(SqlErrors.COM_95_GROUP_BY_WITH_SINGLETON_GROUPS, (group_by_constraint, constraint)))
241
+ results.append(DetectedError(SqlErrors.GROUP_BY_WITH_SINGLETON_GROUPS, (group_by_constraint, constraint)))
245
242
  break
246
243
 
247
244
  return results
248
245
 
249
- # TODO: implement
250
- def com_96_group_by_with_only_a_single_group(self) -> list[DetectedError]:
246
+ def detect_96_group_by_with_only_a_single_group(self) -> list[DetectedError]:
251
247
  return []
252
248
 
253
- def com_97_group_by_can_be_replaced_by_distinct(self) -> list[DetectedError]:
249
+ def detect_97_group_by_can_be_replaced_by_distinct(self) -> list[DetectedError]:
254
250
  '''
255
251
  Flags GROUP BY clauses that can be replaced by SELECT DISTINCT.
256
252
  This occurs when all selected columns are included in the GROUP BY clause
@@ -290,18 +286,14 @@ class ComplicationDetector(BaseDetector):
290
286
  group_by_col_names = {(util.ast.column.get_real_name(col), select._get_table_idx_for_column(col)) for col in group_by_columns}
291
287
 
292
288
  if select_col_names == group_by_col_names:
293
- results.append(DetectedError(SqlErrors.COM_97_GROUP_BY_CAN_BE_REPLACED_WITH_DISTINCT, (select_col_names,)))
289
+ results.append(DetectedError(SqlErrors.GROUP_BY_CAN_BE_REPLACED_WITH_DISTINCT, (select_col_names,)))
294
290
 
295
291
  return results
296
292
 
297
-
298
-
299
- # TODO: implement
300
- def com_98_union_can_be_replaced_by_or(self) -> list[DetectedError]:
293
+ def detect_98_union_can_be_replaced_by_or(self) -> list[DetectedError]:
301
294
  return []
302
295
 
303
- # TODO: refactor
304
- def com_99_complication_unnecessary_column_in_order_by_clause(self) -> list[DetectedError]:
296
+ def detect_99_unnecessary_column_in_order_by_clause(self) -> list[DetectedError]:
305
297
  '''
306
298
  Flags when the ORDER BY clause contains unnecessary columns in addition
307
299
  to the required ones.
@@ -329,8 +321,7 @@ class ComplicationDetector(BaseDetector):
329
321
 
330
322
  return results
331
323
 
332
- # TODO: implement
333
- def com_100_order_by_in_subquery(self) -> list[DetectedError]:
324
+ def detect_100_order_by_in_subquery(self) -> list[DetectedError]:
334
325
  '''
335
326
  Flags when a subquery contains an ORDER BY clause.
336
327
  Subqueries both ORDER BY and LIMIT are considered valid.
@@ -348,157 +339,37 @@ class ComplicationDetector(BaseDetector):
348
339
 
349
340
  checked_subqueries.add(subquery.sql)
350
341
  if subquery.order_by and not subquery.limit:
351
- results.append(DetectedError(SqlErrors.COM_100_ORDER_BY_IN_SUBQUERY, (subquery.sql,)))
342
+ results.append(DetectedError(SqlErrors.ORDER_BY_IN_SUBQUERY, (subquery.sql,)))
352
343
 
353
344
  return results
354
345
 
355
- # TODO: implement
356
- def com_101_inefficient_having(self) -> list[DetectedError]:
357
- return []
358
-
359
- # TODO: implement
360
- def com_102_inefficient_union(self) -> list[DetectedError]:
346
+ def detect_101_inefficient_having(self) -> list[DetectedError]:
361
347
  return []
362
348
 
363
- # TODO: implement
364
- def com_103_condition_in_the_subquery_can_be_moved_up(self) -> list[DetectedError]:
349
+ def detect_102_inefficient_union(self) -> list[DetectedError]:
365
350
  return []
366
351
 
367
- # TODO: implement
368
- def com_104_condition_on_left_table_in_left_outer_join(self) -> list[DetectedError]:
352
+ def detect_103_condition_in_the_subquery_can_be_moved_up(self) -> list[DetectedError]:
369
353
  return []
370
354
 
371
- # TODO: implement
372
- def com_105_outer_join_can_be_replaced_by_inner_join(self) -> list[DetectedError]:
355
+ def detect_104_outer_join_can_be_replaced_by_inner_join(self) -> list[DetectedError]:
373
356
  return []
374
357
 
358
+ def detect_126_unused_cte(self) -> list[DetectedError]:
359
+ results: list[DetectedError] = []
375
360
 
376
- #region Utility methods
377
- def _get_select_columns(self, ast: dict) -> list:
378
- '''
379
- Extracts a list of simple column names from a SELECT query's AST.
380
- '''
381
- columns = []
382
- if not ast:
383
- return columns
384
-
385
- select_expressions = ast.get('args', {}).get('expressions', [])
386
-
387
- for expr_node in select_expressions:
388
- col_name = self._find_underlying_column(expr_node)
389
- if col_name:
390
- columns.append(col_name)
391
-
392
- return columns
393
- def _find_underlying_column(self, node: dict):
394
- '''
395
- Recursively traverses an expression node to find the underlying column identifier.
396
- '''
397
- if not isinstance(node, dict):
398
- return None
399
-
400
- node_class = node.get('class')
401
-
402
- if node_class == 'Paren':
403
- return self._find_underlying_column(node.get('args', {}).get('this'))
404
-
405
- if node_class == 'Column':
406
- try:
407
- return node['args']['expression']['args']['this']
408
- except (KeyError, TypeError):
409
- try:
410
- return node['args']['this']['args']['this']
411
- except (KeyError, TypeError):
412
- return None
413
-
414
- if node_class == 'Alias':
415
- return self._find_underlying_column(node.get('args', {}).get('this'))
416
- def _get_from_tables(self, ast: dict, with_alias=False) -> list:
417
- '''
418
- Extracts a list of all table names from the FROM and JOIN clauses of a query's AST.
419
- '''
420
- tables = []
421
- if not ast:
422
- return tables
423
-
424
- args = ast.get('args', {})
425
-
426
- # 1. Process the main table from the 'from' clause
427
- from_node = args.get('from')
428
- if from_node:
429
- # The actual table data is inside the 'this' argument of the 'From' node
430
- main_table_node = from_node.get('args', {}).get('this')
431
- if main_table_node:
432
- self._collect_tables_recursive(main_table_node, tables, with_alias)
433
-
434
- # 2. Process all tables from the 'joins' list
435
- join_nodes = args.get('joins', [])
436
- for join_node in join_nodes:
437
- self._collect_tables_recursive(join_node, tables, with_alias)
438
-
439
- return list(set(tables))
440
- def _collect_tables_recursive(self, node: dict, tables: list, with_alias=False):
441
- '''
442
- Recursively traverses a FROM clause node (including joins) to collect table names.
443
- '''
444
- if not isinstance(node, dict):
445
- return
446
-
447
- node_class = node.get('class')
448
-
449
- # This part handles aliased tables (e.g., "customer c") and regular tables
450
- if node_class == 'Alias':
451
- underlying_node = node.get('args', {}).get('this')
452
- # Recurse in case the alias is on a subquery or another join
453
- self._collect_tables_recursive(underlying_node, tables, with_alias)
454
-
455
- elif node_class == 'Table':
456
- try:
457
- # The AST nests identifiers, so we go deep to get the name
458
- table_name = node['args']['this']['args']['this']
459
- alias_node = node.get('args', {}).get('alias')
460
- if with_alias and alias_node:
461
- alias_name = alias_node.get('args', {}).get('this', {}).get('args', {}).get('this')
462
- tables.append(f"{table_name} AS {alias_name}")
463
- else:
464
- tables.append(table_name)
465
- except (KeyError, TypeError):
466
- pass
361
+ if not self.query.ctes:
362
+ return results
467
363
 
468
- # This part handles Join nodes found in the 'joins' list
469
- elif node_class == 'Join':
470
- # The joined table is in the 'this' argument of the Join node
471
- self._collect_tables_recursive(node.get('args', {}).get('this'), tables, with_alias)
472
- # The other side of the join is already handled in the 'from' clause,
473
- # but we check for 'expression' for other potential join structures.
474
- if 'expression' in node.get('args', {}):
475
- self._collect_tables_recursive(node.get('args', {}).get('expression'), tables, with_alias)
476
- def _get_orderby_columns(self, ast: dict) -> list:
477
- '''
478
- Extracts a list of columns and their sort direction from an ORDER BY clause.
479
- '''
480
- orderby_terms = []
481
- if not ast:
482
- return orderby_terms
364
+ used_ctes: dict[int, bool] = {i: False for i in range(len(self.query.ctes))}
483
365
 
484
- orderby_node = ast.get('args', {}).get('order')
485
- if not orderby_node:
486
- return orderby_terms
366
+ for select in self.query.selects:
367
+ for table in select.referenced_tables:
368
+ if table.cte_idx is not None:
369
+ used_ctes[table.cte_idx] = True
487
370
 
488
- try:
489
- for term_node in orderby_node['args']['expressions']:
490
- if term_node.get('class') != 'Ordered':
491
- continue
492
-
493
- column_node = term_node.get('args', {}).get('this')
494
-
495
- col_name = self._find_underlying_column(column_node)
496
-
497
- if col_name:
498
- direction = term_node.get('args', {}).get('direction', 'ASC').upper()
499
- orderby_terms.append((col_name, direction))
500
- except (KeyError, AttributeError):
501
- return []
502
-
503
- return orderby_terms
504
- #endregion Utility methods
371
+ for cte_idx, used in used_ctes.items():
372
+ if not used:
373
+ results.append(DetectedError(SqlErrors.UNUSED_CTE, (self.query.ctes[cte_idx].sql,)))
374
+
375
+ return results