sql-error-categorizer 0.1.8__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/Makefile +2 -5
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/PKG-INFO +1 -1
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/docs/conf.py +3 -1
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/pyproject.toml +1 -1
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/logical.py +2 -2
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/syntax.py +41 -18
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/__init__.py +4 -8
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/binary_set_operation.py +13 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/set_operation.py +0 -12
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/smt.py +1 -1
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/base.py +1 -2
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/binary_ops.py +8 -4
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/functions.py +6 -5
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/predicates.py +11 -7
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/primitives.py +9 -3
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/types.py +3 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/unary_ops.py +8 -1
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/util.py +2 -1
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/sql_errors.py +2 -2
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/__init__.py +27 -6
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/table.py +0 -2
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_016_extraneous_omitted_grouping_column.py +26 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/2_sem/test_040_tautological_inconsistent_expressions.py +16 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/query/test_typechecking.py +0 -7
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/.gitignore +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/.readthedocs.yaml +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/LICENSE +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/README.md +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/datasets/catalogs/constraints.json +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/datasets/catalogs/miedema.json +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/datasets/sql/constraints.sql +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/datasets/sql/miedema.sql +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/docs/Makefile +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/docs/index.rst +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/docs/make.bat +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/docs/requirements.txt +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/requirements.txt +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/builder/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/builder/queries.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/catalog.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/column.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/constraint.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/schema.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/catalog/table.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/base.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/complications.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/semantic.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/extractors.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/query.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/select.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/tokenized_sql.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/queries.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/column.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/function.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/subquery.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/sql.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/tokens.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/test_detector.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/__init__.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_002_ambiguous_column.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_004_undefined_column.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_005_undefined_function.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_006_undefined_parameter.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_007_undefined_tables.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_008_invalid_schema_names.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_009_misspellings.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_013_data_type_mismatch.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_014_aggregate_function_outside_select_or_having.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_015_nested_aggregate_functions.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_017_having_without_group_by.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_019_using_where_twice.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_020_missing_from.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_021_comparison_with_null.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_022_038_additional_omitted_semicolons.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_024_duplicate_clause.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_026_too_many_columns_in_subquery.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_030_keywords_order.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_034_curly_square_or_unmatched_brackets.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_035_is_where_not_applicable.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_037_nonstandard_operators.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/2_sem/test_041_distinct_sum_avg.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/2_sem/test_043_wildcards_without_like.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/2_sem/test_044_incorrect_wildcards.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_058_join_on_incorrect_table.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_059_join_when_join_needs_to_be_omitted.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_062_missing_join.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_070_extraneous_column_in_select.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_071_missing_column_from_select.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_072_missing_distinct_from_select.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/3_log/test_073_missing_as_from_select.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/4_com/test_083_unnecessary_distinct_in_select.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/4_com/test_088_like_no_wildcards.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/4_com/test_092_unnecessary_distinct_in_aggregate_function.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/4_com/test_095_group_by_with_singleton_groups.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/4_com/test_097_group_by_can_be_replaced_by_distinct.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/4_com/test_100_order_by_in_subquery.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/query/test_extractors.py +0 -0
- {sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/query/test_query.py +0 -0
|
@@ -12,7 +12,7 @@ else
|
|
|
12
12
|
VENV_BIN=$(VENV)/bin
|
|
13
13
|
endif
|
|
14
14
|
|
|
15
|
-
.PHONY: install build uninstall documentation test upload download clean
|
|
15
|
+
.PHONY: install build uninstall documentation test upload download clean coverage
|
|
16
16
|
|
|
17
17
|
$(VENV):
|
|
18
18
|
python -m venv --clear $(VENV)
|
|
@@ -34,7 +34,7 @@ uninstall: $(VENV)
|
|
|
34
34
|
$(VENV_BIN)/python -m pip uninstall -y $(NAME)
|
|
35
35
|
|
|
36
36
|
documentation:
|
|
37
|
-
make html -C docs/
|
|
37
|
+
make html SPHINXBUILD="../$(VENV_BIN)/sphinx-build" -C docs/
|
|
38
38
|
|
|
39
39
|
test: install
|
|
40
40
|
$(VENV_BIN)/python -m pytest
|
|
@@ -54,7 +54,4 @@ clean:
|
|
|
54
54
|
find . -type d -name '__pycache__' -print0 | xargs -0 rm -r || true
|
|
55
55
|
rm -rf dist docs/_build .pytest_cache .coverage tests/htmlcov
|
|
56
56
|
|
|
57
|
-
ipython:
|
|
58
|
-
$(VENV_BIN)/ipython
|
|
59
|
-
|
|
60
57
|
########## Makefile end ##########
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql_error_categorizer
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: This project analyses SQL statements and labels possible errors or complications.
|
|
5
5
|
Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
|
|
6
6
|
Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html
|
|
@@ -29,7 +29,9 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
|
|
29
29
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
|
30
30
|
|
|
31
31
|
html_theme = 'alabaster'
|
|
32
|
-
html_static_path = [
|
|
32
|
+
html_static_path = [
|
|
33
|
+
# '_static',
|
|
34
|
+
]
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
# -- Autoapi -----------------------------------------------------------------
|
|
@@ -711,14 +711,14 @@ class LogicalErrorDetector(BaseDetector):
|
|
|
711
711
|
|
|
712
712
|
def _selects_star(self, ast: dict) -> bool:
|
|
713
713
|
'''
|
|
714
|
-
Checks if a
|
|
714
|
+
Checks if a `SELECT *` is used in the query by looking for a 'Star'
|
|
715
715
|
node in the AST's expression list.
|
|
716
716
|
|
|
717
717
|
Args:
|
|
718
718
|
ast: The Abstract Syntax Tree of the query.
|
|
719
719
|
|
|
720
720
|
Returns:
|
|
721
|
-
True if
|
|
721
|
+
True if `SELECT *` is found, otherwise False.
|
|
722
722
|
'''
|
|
723
723
|
if not ast:
|
|
724
724
|
return False
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
'''Detector for syntax errors in SQL queries.'''
|
|
2
2
|
|
|
3
|
+
from dataclasses import dataclass
|
|
3
4
|
import difflib
|
|
4
5
|
import re
|
|
5
6
|
import sqlparse
|
|
@@ -752,10 +753,23 @@ class SyntaxErrorDetector(BaseDetector):
|
|
|
752
753
|
|
|
753
754
|
def syn_16_extraneous_or_omitted_grouping_column(self) -> list[DetectedError]:
|
|
754
755
|
'''
|
|
755
|
-
|
|
756
|
-
|
|
756
|
+
All columns in SELECT must be either included in the GROUP BY clause or aggregated.
|
|
757
|
+
|
|
758
|
+
All non-aggregated columns in HAVING must not be included in the GROUP BY clause.
|
|
757
759
|
'''
|
|
758
760
|
|
|
761
|
+
@dataclass(frozen=True)
|
|
762
|
+
class ColumnInfo:
|
|
763
|
+
name: str
|
|
764
|
+
alias: str
|
|
765
|
+
is_aggregated: bool = False
|
|
766
|
+
|
|
767
|
+
def get_column_name(col: exp.Column | exp.Alias) -> ColumnInfo:
|
|
768
|
+
'''Return normalized column name and alias. If no alias, both are the same.'''
|
|
769
|
+
col_name = util.ast.column.get_real_name(col)
|
|
770
|
+
col_alias = util.ast.column.get_name(col)
|
|
771
|
+
return ColumnInfo(col_name, col_alias)
|
|
772
|
+
|
|
759
773
|
results: list[DetectedError] = []
|
|
760
774
|
|
|
761
775
|
for select in self.query.selects:
|
|
@@ -765,32 +779,29 @@ class SyntaxErrorDetector(BaseDetector):
|
|
|
765
779
|
if not select.group_by:
|
|
766
780
|
continue # no GROUP BY, skip
|
|
767
781
|
|
|
768
|
-
select_columns: list[
|
|
769
|
-
|
|
770
|
-
def get_column_name(col: exp.Column | exp.Alias) -> tuple[str, str]:
|
|
771
|
-
'''Return normalized column name and alias. If no alias, both are the same.'''
|
|
772
|
-
col_name = util.ast.column.get_real_name(col)
|
|
773
|
-
col_alias = util.ast.column.get_name(col)
|
|
774
|
-
return col_name, col_alias
|
|
782
|
+
select_columns: list[ColumnInfo] = [] # we need a list for positional GROUP BY handling
|
|
775
783
|
|
|
784
|
+
# Gather non-aggregated columns from SELECT
|
|
776
785
|
for col in select.ast.expressions:
|
|
777
786
|
if isinstance(col, exp.Star):
|
|
778
787
|
# SELECT * case: expand to all columns from all referenced tables
|
|
779
788
|
for table in select.referenced_tables:
|
|
780
789
|
for table_col in table.columns:
|
|
781
|
-
select_columns.append((table_col.name, table_col.name))
|
|
790
|
+
select_columns.append(ColumnInfo(table_col.name, table_col.name))
|
|
782
791
|
if isinstance(col, exp.Column) or isinstance(col, exp.Alias):
|
|
783
792
|
col_name = get_column_name(col)
|
|
784
793
|
select_columns.append(col_name)
|
|
785
794
|
elif isinstance(col, exp.Func):
|
|
786
|
-
|
|
795
|
+
# aggregated, add the column but skip it later
|
|
796
|
+
select_columns.append(ColumnInfo(col.sql(), col.sql(), is_aggregated=True))
|
|
787
797
|
else:
|
|
788
798
|
# Complex expression: try to extract columns
|
|
789
799
|
for c in col.find_all(exp.Column):
|
|
790
800
|
col_name = get_column_name(c)
|
|
791
801
|
select_columns.append(col_name)
|
|
792
802
|
|
|
793
|
-
|
|
803
|
+
# Gather columns from GROUP BY
|
|
804
|
+
group_by_columns: set[ColumnInfo] = set()
|
|
794
805
|
for gb in select.group_by:
|
|
795
806
|
if isinstance(gb, exp.Column):
|
|
796
807
|
gb_name = get_column_name(gb)
|
|
@@ -803,21 +814,33 @@ class SyntaxErrorDetector(BaseDetector):
|
|
|
803
814
|
group_by_columns.add(select_columns[val - 1])
|
|
804
815
|
except ValueError:
|
|
805
816
|
continue
|
|
817
|
+
elif isinstance(gb, exp.AggFunc):
|
|
818
|
+
group_by_columns.add(ColumnInfo(gb.sql(), gb.sql(), is_aggregated=True))
|
|
806
819
|
else:
|
|
807
820
|
# Complex expression in GROUP BY: try to extract columns
|
|
808
821
|
for c in gb.find_all(exp.Column):
|
|
809
822
|
gb_name = get_column_name(c)
|
|
810
823
|
group_by_columns.add(gb_name)
|
|
811
824
|
|
|
812
|
-
|
|
813
|
-
|
|
825
|
+
|
|
826
|
+
# Ensure all non-aggregated columns in SELECT are in GROUP BY
|
|
827
|
+
for select_col in set(select_columns): # convert to set to avoid outputting the same error multiple times
|
|
828
|
+
if select_col.is_aggregated:
|
|
829
|
+
continue # aggregated, skip
|
|
830
|
+
if any(select_col.name == group_col.name or select_col.alias == group_col.alias for group_col in group_by_columns):
|
|
814
831
|
continue # valid: in GROUP BY
|
|
815
|
-
results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(
|
|
832
|
+
results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(select_col.name, 'ONLY IN SELECT')))
|
|
816
833
|
|
|
817
|
-
|
|
818
|
-
|
|
834
|
+
# Ensure all non-aggregated columns in GROUP BY are in SELECT
|
|
835
|
+
# (Note: aggregated columns in GROUP BY are invalid)
|
|
836
|
+
for group_col in group_by_columns:
|
|
837
|
+
if group_col.is_aggregated:
|
|
838
|
+
results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(group_col.name, 'AGGREGATED IN GROUP BY')))
|
|
839
|
+
continue
|
|
840
|
+
if any(group_col.name == select_col.name or group_col.alias == select_col.alias for select_col in select_columns):
|
|
819
841
|
continue # valid: in SELECT
|
|
820
|
-
results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(group_col, 'ONLY IN GROUP BY')))
|
|
842
|
+
results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(group_col.name, 'ONLY IN GROUP BY')))
|
|
843
|
+
# Ensure all non-aggregated columns in HAVING are in GROUP BY
|
|
821
844
|
|
|
822
845
|
return results
|
|
823
846
|
|
|
@@ -78,11 +78,9 @@ def create_set_operation_tree(sql: str, catalog: Catalog = Catalog(), search_pat
|
|
|
78
78
|
def parse_op_token(tok: sqlparse.sql.Token) -> tuple[str, bool | None] | None:
|
|
79
79
|
'''
|
|
80
80
|
Parse "UNION", "INTERSECT", "EXCEPT" with optional inline ALL/DISTINCT.
|
|
81
|
+
|
|
81
82
|
Returns:
|
|
82
|
-
tuple: `(op, all_flag)` where all_flag is:
|
|
83
|
-
- True if ALL inline (e.g., "UNION ALL")
|
|
84
|
-
- False if DISTINCT inline (e.g., "EXCEPT DISTINCT")
|
|
85
|
-
- None if no modifier inline (so caller may look right).
|
|
83
|
+
tuple: `(op, all_flag)` where all_flag is: True if ALL inline (e.g., "UNION ALL"); False if DISTINCT inline (e.g., "EXCEPT DISTINCT"); None if no modifier inline (so caller may look right).
|
|
86
84
|
'''
|
|
87
85
|
if tok.ttype is not Keyword:
|
|
88
86
|
return None
|
|
@@ -106,11 +104,9 @@ def split_on(tokens: list[sqlparse.sql.Token], idx: int, all_in_token: bool | No
|
|
|
106
104
|
'''
|
|
107
105
|
Splits around the operator at idx. If the modifier wasn't inline,
|
|
108
106
|
consume a single immediate ALL/DISTINCT to the right.
|
|
107
|
+
|
|
109
108
|
Returns:
|
|
110
|
-
tuple: A tuple containing:
|
|
111
|
-
- left_tokens (list[sqlparse.sql.Token]): Tokens to the left of the operator.
|
|
112
|
-
- right_tokens (list[sqlparse.sql.Token]): Tokens to the right of the operator
|
|
113
|
-
- all_flag (bool | None): True if ALL, False if DISTINCT, None if unspecified.
|
|
109
|
+
tuple: A tuple containing: left_tokens (list[sqlparse.sql.Token]): Tokens to the left of the operator; right_tokens (list[sqlparse.sql.Token]): Tokens to the right of the operator; all_flag (bool | None): True if ALL, False if DISTINCT, None if unspecified.
|
|
114
110
|
'''
|
|
115
111
|
left_tokens = tokens[:idx]
|
|
116
112
|
right_tokens = tokens[idx + 1:]
|
|
@@ -3,6 +3,7 @@ from ...catalog import Table, Constraint, ConstraintType, ConstraintColumn
|
|
|
3
3
|
|
|
4
4
|
from abc import ABC
|
|
5
5
|
from copy import deepcopy
|
|
6
|
+
import sqlglot
|
|
6
7
|
from sqlglot import exp
|
|
7
8
|
|
|
8
9
|
from typing import TYPE_CHECKING
|
|
@@ -44,6 +45,18 @@ class BinarySetOperation(SetOperation, ABC):
|
|
|
44
45
|
|
|
45
46
|
return result
|
|
46
47
|
|
|
48
|
+
@property
|
|
49
|
+
def trailing_ast(self) -> exp.Expression | None:
|
|
50
|
+
'''Parses and returns the AST of the trailing SQL clauses (e.g., ORDER BY, LIMIT) if present, with a fake `SELECT 1` prefix.'''
|
|
51
|
+
if self.trailing_sql is None:
|
|
52
|
+
return None
|
|
53
|
+
if self._trailing_ast is None:
|
|
54
|
+
# Parse trailing SQL with a fake SELECT to get valid AST
|
|
55
|
+
fake_sql = f'SELECT 1 {self.trailing_sql}'
|
|
56
|
+
parsed = sqlglot.parse_one(fake_sql)
|
|
57
|
+
self._trailing_ast = parsed
|
|
58
|
+
return self._trailing_ast
|
|
59
|
+
|
|
47
60
|
@property
|
|
48
61
|
def output(self) -> Table:
|
|
49
62
|
# Assume the output schema is the same as the left input
|
|
@@ -40,18 +40,6 @@ class SetOperation(ABC):
|
|
|
40
40
|
@abstractmethod
|
|
41
41
|
def print_tree(self, pre: str = '') -> None:
|
|
42
42
|
pass
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def trailing_ast(self) -> exp.Expression | None:
|
|
46
|
-
'''Parses and returns the AST of the trailing SQL clauses (e.g., ORDER BY, LIMIT) if present, with a fake `SELECT 1` prefix.'''
|
|
47
|
-
if self.trailing_sql is None:
|
|
48
|
-
return None
|
|
49
|
-
if self._trailing_ast is None:
|
|
50
|
-
# Parse trailing SQL with a fake SELECT to get valid AST
|
|
51
|
-
fake_sql = f'SELECT 1 {self.trailing_sql}'
|
|
52
|
-
parsed = sqlglot.parse_one(fake_sql)
|
|
53
|
-
self._trailing_ast = parsed
|
|
54
|
-
return self._trailing_ast
|
|
55
43
|
|
|
56
44
|
@property
|
|
57
45
|
@abstractmethod
|
{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/smt.py
RENAMED
|
@@ -244,8 +244,8 @@ def sql_to_z3(expr, variables: dict[str, ExprRef] = {}) -> Any:
|
|
|
244
244
|
if wildcard_count > 2:
|
|
245
245
|
return target == StringVal(pattern)
|
|
246
246
|
|
|
247
|
-
# PREFIX pattern: abc%
|
|
248
247
|
if '%' in pattern and '_' not in pattern:
|
|
248
|
+
# PREFIX pattern: abc%
|
|
249
249
|
if pattern.endswith('%') and pattern.count('%') == 1:
|
|
250
250
|
prefix = pattern[:-1]
|
|
251
251
|
return PrefixOf(StringVal(prefix), target)
|
|
@@ -2,9 +2,8 @@ import sqlglot.expressions as exp
|
|
|
2
2
|
from .types import AtomicType, ResultType
|
|
3
3
|
from ...catalog import Catalog
|
|
4
4
|
from functools import singledispatch
|
|
5
|
-
from .util import error_message
|
|
6
5
|
|
|
7
6
|
@singledispatch
|
|
8
7
|
def get_type(expression: exp.Expression, catalog: Catalog, search_path: str) -> ResultType:
|
|
9
8
|
'''Returns the type of the given SQL expression.'''
|
|
10
|
-
return AtomicType(
|
|
9
|
+
return AtomicType() # Default to unhandled expression
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from .base import get_type
|
|
2
2
|
from ...catalog import Catalog
|
|
3
3
|
from sqlglot import exp
|
|
4
|
-
from .types import ResultType, AtomicType
|
|
4
|
+
from .types import ResultType, AtomicType
|
|
5
|
+
from sqlglot.expressions import DataType
|
|
5
6
|
from .util import is_number, to_number, to_date, error_message
|
|
6
7
|
|
|
7
8
|
@get_type.register
|
|
@@ -18,13 +19,13 @@ def _(expression: exp.Binary, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
18
19
|
|
|
19
20
|
if left_type != right_type:
|
|
20
21
|
|
|
21
|
-
if not to_number(left_type) and left_type.data_type != DataType.Type.NULL:
|
|
22
|
+
if left_type.data_type != DataType.Type.UNKNOWN and not to_number(left_type) and left_type.data_type != DataType.Type.NULL:
|
|
22
23
|
old_messages.append(error_message(expression, left_type, "numeric"))
|
|
23
24
|
|
|
24
|
-
if not to_number(right_type) and right_type.data_type != DataType.Type.NULL:
|
|
25
|
+
if right_type.data_type != DataType.Type.UNKNOWN and not to_number(right_type) and right_type.data_type != DataType.Type.NULL:
|
|
25
26
|
old_messages.append(error_message(expression, right_type, "numeric"))
|
|
26
27
|
|
|
27
|
-
elif not is_number(left_type.data_type) and not is_number(right_type.data_type):
|
|
28
|
+
elif DataType.Type.UNKNOWN != left_type.data_type and not is_number(left_type.data_type) and not is_number(right_type.data_type):
|
|
28
29
|
if left_type.data_type != DataType.Type.NULL or right_type.data_type != DataType.Type.NULL:
|
|
29
30
|
old_messages.append(error_message(expression, left_type, "numeric"))
|
|
30
31
|
|
|
@@ -33,6 +34,9 @@ def _(expression: exp.Binary, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
33
34
|
# handle comparison typechecking (e.g =, <, >, etc.)
|
|
34
35
|
def typecheck_comparisons(left_type: ResultType, right_type: ResultType, expression: exp.Binary, old_messages: list) -> ResultType:
|
|
35
36
|
|
|
37
|
+
if DataType.Type.UNKNOWN in (left_type.data_type, right_type.data_type):
|
|
38
|
+
return AtomicType(data_type=expression.type.this,messages=old_messages)
|
|
39
|
+
|
|
36
40
|
# for boolean comparisons we can have only equality/inequality
|
|
37
41
|
if DataType.Type.BOOLEAN == left_type.data_type == right_type.data_type:
|
|
38
42
|
if not isinstance(expression, (exp.EQ, exp.NEQ)):
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from .base import get_type
|
|
2
2
|
from ...catalog import Catalog
|
|
3
3
|
from sqlglot import exp
|
|
4
|
-
from .types import ResultType, AtomicType
|
|
4
|
+
from .types import ResultType, AtomicType
|
|
5
|
+
from sqlglot.expressions import DataType
|
|
5
6
|
from .util import is_number, error_message
|
|
6
7
|
|
|
7
8
|
@get_type.register
|
|
@@ -16,7 +17,7 @@ def _(expression: exp.Avg, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
16
17
|
|
|
17
18
|
old_messages = inner_type.messages
|
|
18
19
|
|
|
19
|
-
if not is_number(inner_type.data_type):
|
|
20
|
+
if inner_type.data_type != DataType.Type.UNKNOWN and not is_number(inner_type.data_type):
|
|
20
21
|
old_messages.append(error_message(expression, inner_type, "NUMERIC"))
|
|
21
22
|
|
|
22
23
|
return AtomicType(data_type=expression.type.this, nullable=True, constant=True, messages=old_messages)
|
|
@@ -27,7 +28,7 @@ def _(expression: exp.Sum, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
27
28
|
|
|
28
29
|
old_messages = inner_type.messages
|
|
29
30
|
|
|
30
|
-
if not is_number(inner_type.data_type):
|
|
31
|
+
if inner_type.data_type != DataType.Type.UNKNOWN and not is_number(inner_type.data_type):
|
|
31
32
|
old_messages.append(error_message(expression, inner_type, "NUMERIC"))
|
|
32
33
|
|
|
33
34
|
return AtomicType(data_type=expression.type.this, nullable=True, constant=True, messages=old_messages)
|
|
@@ -38,7 +39,7 @@ def _(expression: exp.Min, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
38
39
|
|
|
39
40
|
old_messages = inner_type.messages
|
|
40
41
|
|
|
41
|
-
if inner_type.data_type
|
|
42
|
+
if inner_type.data_type != DataType.Type.UNKNOWN and inner_type.data_type == DataType.Type.BOOLEAN:
|
|
42
43
|
old_messages.append(error_message(expression, inner_type))
|
|
43
44
|
|
|
44
45
|
return AtomicType(data_type=inner_type.data_type, nullable=inner_type.nullable, constant=True, messages=old_messages)
|
|
@@ -49,7 +50,7 @@ def _(expression: exp.Max, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
49
50
|
|
|
50
51
|
old_messages = inner_type.messages
|
|
51
52
|
|
|
52
|
-
if inner_type.data_type
|
|
53
|
+
if inner_type.data_type != DataType.Type.UNKNOWN and inner_type.data_type == DataType.Type.BOOLEAN:
|
|
53
54
|
old_messages.append(error_message(expression, inner_type))
|
|
54
55
|
|
|
55
56
|
return AtomicType(data_type=inner_type.data_type, nullable=inner_type.nullable, constant=True, messages=old_messages)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from .base import get_type
|
|
2
2
|
from ...catalog import Catalog
|
|
3
3
|
from sqlglot import exp
|
|
4
|
-
from .types import ResultType, AtomicType
|
|
4
|
+
from .types import ResultType, AtomicType
|
|
5
|
+
from sqlglot.expressions import DataType
|
|
5
6
|
from .util import is_string, to_number, to_date, error_message
|
|
6
7
|
|
|
7
8
|
@get_type.register
|
|
@@ -11,10 +12,10 @@ def _(expression: exp.Like, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
11
12
|
|
|
12
13
|
old_messages = left_type.messages + right_type.messages
|
|
13
14
|
|
|
14
|
-
if not is_string(left_type.data_type) and left_type.data_type != DataType.Type.NULL:
|
|
15
|
+
if left_type.data_type != DataType.Type.UNKNOWN and not is_string(left_type.data_type) and left_type.data_type != DataType.Type.NULL:
|
|
15
16
|
old_messages.append(error_message(expression, left_type, 'string'))
|
|
16
17
|
|
|
17
|
-
if not is_string(right_type.data_type) and right_type.data_type != DataType.Type.NULL:
|
|
18
|
+
if right_type.data_type != DataType.Type.UNKNOWN and not is_string(right_type.data_type) and right_type.data_type != DataType.Type.NULL:
|
|
18
19
|
old_messages.append(error_message(expression, right_type, 'string'))
|
|
19
20
|
|
|
20
21
|
# Always returns boolean
|
|
@@ -32,7 +33,7 @@ def _(expression: exp.Is, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
32
33
|
old_messages.append(error_message(expression, right_type, 'boolean|null'))
|
|
33
34
|
|
|
34
35
|
# if right is BOOLEAN and left is not NULL, left must be BOOLEAN
|
|
35
|
-
if right_type.data_type == DataType.Type.BOOLEAN and left_type.data_type != DataType.Type.NULL:
|
|
36
|
+
if left_type.data_type != DataType.Type.UNKNOWN and right_type.data_type == DataType.Type.BOOLEAN and left_type.data_type != DataType.Type.NULL:
|
|
36
37
|
if left_type.data_type != DataType.Type.BOOLEAN:
|
|
37
38
|
old_messages.append(error_message(expression, left_type, 'boolean'))
|
|
38
39
|
|
|
@@ -48,16 +49,16 @@ def _(expression: exp.Between, catalog: Catalog, search_path: str) -> ResultType
|
|
|
48
49
|
old_messages = target_type.messages + low_type.messages + high_type.messages
|
|
49
50
|
|
|
50
51
|
# if the target is NULL, the result will always be NULL (no matter the bounds)
|
|
51
|
-
if target_type.data_type == DataType.Type.NULL:
|
|
52
|
+
if target_type.data_type == DataType.Type.UNKNOWN or target_type.data_type == DataType.Type.NULL:
|
|
52
53
|
return AtomicType(data_type=expression.type.this, constant=True, messages=old_messages)
|
|
53
54
|
|
|
54
|
-
if low_type.data_type != target_type.data_type and low_type.data_type != DataType.Type.NULL:
|
|
55
|
+
if low_type.data_type != DataType.Type.UNKNOWN and low_type.data_type != target_type.data_type and low_type.data_type != DataType.Type.NULL:
|
|
55
56
|
|
|
56
57
|
# check for implicit casts
|
|
57
58
|
if (to_number(target_type) and not to_number(low_type)) or (to_date(target_type) and not to_date(low_type)):
|
|
58
59
|
old_messages.append(error_message(expression, low_type, target_type))
|
|
59
60
|
|
|
60
|
-
if high_type.data_type != target_type.data_type and high_type.data_type != DataType.Type.NULL:
|
|
61
|
+
if high_type.data_type != DataType.Type.UNKNOWN and high_type.data_type != target_type.data_type and high_type.data_type != DataType.Type.NULL:
|
|
61
62
|
|
|
62
63
|
# check for implicit casts
|
|
63
64
|
if (to_number(target_type) and not to_number(high_type)) or (to_date(target_type) and not to_date(high_type)):
|
|
@@ -73,6 +74,9 @@ def _(expression: exp.In, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
73
74
|
|
|
74
75
|
old_messages = target_type.messages
|
|
75
76
|
|
|
77
|
+
if target_type.data_type == DataType.Type.UNKNOWN:
|
|
78
|
+
return AtomicType(data_type=expression.type.this, messages=old_messages)
|
|
79
|
+
|
|
76
80
|
# Case IN (<list>)
|
|
77
81
|
for item in expression.expressions:
|
|
78
82
|
item_type = get_type(item, catalog, search_path)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from .base import get_type
|
|
2
2
|
from ...catalog import Catalog
|
|
3
3
|
from sqlglot import exp
|
|
4
|
-
from .types import ResultType, AtomicType,
|
|
4
|
+
from .types import ResultType, AtomicType, TupleType
|
|
5
|
+
from sqlglot.expressions import DataType
|
|
5
6
|
from .util import is_number, is_date, to_number, to_date, error_message
|
|
6
7
|
from ...util.ast.column import get_real_name, get_schema
|
|
7
8
|
|
|
@@ -42,8 +43,13 @@ def _(expression: exp.Cast, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
42
43
|
|
|
43
44
|
old_messages = original_type.messages
|
|
44
45
|
|
|
45
|
-
if
|
|
46
|
+
# if casting to unknown type, return error
|
|
47
|
+
if new_type == DataType.Type.USERDEFINED:
|
|
46
48
|
old_messages.append(error_message(expression, "Invalid type."))
|
|
49
|
+
return AtomicType(data_type=original_type.data_type, nullable=original_type.nullable, constant=original_type.constant, messages=old_messages, value=original_type.value)
|
|
50
|
+
|
|
51
|
+
if original_type.data_type == DataType.Type.UNKNOWN:
|
|
52
|
+
return AtomicType(data_type=new_type, messages=old_messages)
|
|
47
53
|
|
|
48
54
|
# handle cast to numeric types
|
|
49
55
|
if is_number(new_type) and not to_number(original_type):
|
|
@@ -65,7 +71,7 @@ def _(expression: exp.CurrentTimestamp, catalog: Catalog, search_path: str) -> R
|
|
|
65
71
|
@get_type.register
|
|
66
72
|
def _(expression: exp.Column, catalog: Catalog, search_path: str) -> ResultType:
|
|
67
73
|
if expression.type.this in (DataType.Type.UNKNOWN, DataType.Type.USERDEFINED):
|
|
68
|
-
return AtomicType(
|
|
74
|
+
return AtomicType() # unknown column
|
|
69
75
|
else:
|
|
70
76
|
schema = get_schema(expression) or search_path
|
|
71
77
|
table = get_real_name(expression)
|
|
@@ -25,6 +25,9 @@ class AtomicType(ResultType):
|
|
|
25
25
|
def __eq__(self, other):
|
|
26
26
|
if not isinstance(other, AtomicType):
|
|
27
27
|
return False
|
|
28
|
+
|
|
29
|
+
if other.data_type == DataType.Type.UNKNOWN or self.data_type == DataType.Type.UNKNOWN:
|
|
30
|
+
return True
|
|
28
31
|
|
|
29
32
|
# handle numeric equivalence (e.g. INT and FLOAT are compatible)
|
|
30
33
|
if self.data_type in DataType.NUMERIC_TYPES:
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from .base import get_type
|
|
2
2
|
from ...catalog import Catalog
|
|
3
3
|
from sqlglot import exp
|
|
4
|
-
from .types import ResultType, AtomicType
|
|
4
|
+
from .types import ResultType, AtomicType
|
|
5
|
+
from sqlglot.expressions import DataType
|
|
5
6
|
from .util import is_number, error_message
|
|
6
7
|
|
|
7
8
|
@get_type.register
|
|
@@ -10,6 +11,9 @@ def _(expression: exp.Neg, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
10
11
|
|
|
11
12
|
old_messages = inner_type.messages
|
|
12
13
|
|
|
14
|
+
if inner_type.data_type == DataType.Type.UNKNOWN:
|
|
15
|
+
return AtomicType(data_type=expression.type.this, messages=old_messages)
|
|
16
|
+
|
|
13
17
|
if not is_number(expression.type.this):
|
|
14
18
|
old_messages.append(error_message(expression, 'numeric', inner_type))
|
|
15
19
|
|
|
@@ -21,6 +25,9 @@ def _(expression: exp.Not, catalog: Catalog, search_path: str) -> ResultType:
|
|
|
21
25
|
|
|
22
26
|
old_messages = inner_type.messages
|
|
23
27
|
|
|
28
|
+
if inner_type.data_type == DataType.Type.UNKNOWN:
|
|
29
|
+
return AtomicType(data_type=expression.type.this, messages=old_messages)
|
|
30
|
+
|
|
24
31
|
if inner_type.data_type != DataType.Type.BOOLEAN:
|
|
25
32
|
old_messages.append(error_message(expression, 'boolean', inner_type))
|
|
26
33
|
|
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
'''Utility functions for processing SQL ASTs made with sqlglot.'''
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .function import *
|
|
5
|
-
from .subquery import *
|
|
6
|
-
from .table import *
|
|
3
|
+
from . import column, function, subquery, table
|
|
7
4
|
|
|
8
5
|
import sqlglot.optimizer.normalize
|
|
9
6
|
from sqlglot import exp
|
|
@@ -13,22 +10,46 @@ def extract_DNF(expr) -> list[exp.Expression]:
|
|
|
13
10
|
'''Given a boolean expression, extract its Disjunctive Normal Form (DNF)'''
|
|
14
11
|
expr = deepcopy(expr) # Avoid modifying the original expression
|
|
15
12
|
|
|
13
|
+
# Remove outer parentheses
|
|
14
|
+
while isinstance(expr, exp.Paren):
|
|
15
|
+
expr = expr.this
|
|
16
|
+
|
|
16
17
|
dnf_expr = sqlglot.optimizer.normalize.normalize(expr, dnf=True)
|
|
17
18
|
|
|
18
19
|
if not isinstance(dnf_expr, exp.Or):
|
|
19
20
|
return [dnf_expr]
|
|
20
21
|
|
|
21
22
|
disjuncts = dnf_expr.flatten() # list Di (A1 OR A2 OR ... OR Dn)
|
|
22
|
-
|
|
23
|
+
|
|
24
|
+
result: list[exp.Expression] = []
|
|
25
|
+
for disj in disjuncts:
|
|
26
|
+
# Remove outer parentheses from each disjunct
|
|
27
|
+
while isinstance(disj, exp.Paren):
|
|
28
|
+
disj = disj.this
|
|
29
|
+
result.append(disj)
|
|
30
|
+
|
|
31
|
+
return result
|
|
23
32
|
|
|
24
33
|
def extract_CNF(expr) -> list[exp.Expression]:
|
|
25
34
|
'''Given a boolean expression, extract its Conjunctive Normal Form (CNF)'''
|
|
26
35
|
expr = deepcopy(expr) # Avoid modifying the original expression
|
|
27
36
|
|
|
37
|
+
# Remove outer parentheses
|
|
38
|
+
while isinstance(expr, exp.Paren):
|
|
39
|
+
expr = expr.this
|
|
40
|
+
|
|
28
41
|
cnf_expr = sqlglot.optimizer.normalize.normalize(expr, dnf=False)
|
|
29
42
|
|
|
30
43
|
if not isinstance(cnf_expr, exp.And):
|
|
31
44
|
return [cnf_expr]
|
|
32
45
|
|
|
33
46
|
conjuncts = cnf_expr.flatten() # list Ci (A1 AND A2 AND ... AND Cn)
|
|
34
|
-
|
|
47
|
+
|
|
48
|
+
result: list[exp.Expression] = []
|
|
49
|
+
for conj in conjuncts:
|
|
50
|
+
# Remove outer parentheses from each conjunct
|
|
51
|
+
while isinstance(conj, exp.Paren):
|
|
52
|
+
conj = conj.this
|
|
53
|
+
result.append(conj)
|
|
54
|
+
|
|
55
|
+
return result
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
'''Utility functions related to SQL tables in ASTs made with sqlglot.'''
|
|
2
2
|
|
|
3
|
-
import sqlglot.optimizer.normalize
|
|
4
3
|
from sqlglot import exp
|
|
5
|
-
from copy import deepcopy
|
|
6
4
|
|
|
7
5
|
def get_real_name(table: exp.Table) -> str:
|
|
8
6
|
'''Returns the table real name, in lowercase if unquoted.'''
|
|
@@ -21,6 +21,32 @@ def test_extraneous_grouping_column():
|
|
|
21
21
|
('col2', 'ONLY IN GROUP BY'),
|
|
22
22
|
)
|
|
23
23
|
|
|
24
|
+
def test_aggregated_column_in_group_by():
|
|
25
|
+
detected_errors = run_test(
|
|
26
|
+
query='SELECT id, SUM(col2) FROM store GROUP BY 1, 2',
|
|
27
|
+
detectors=[SyntaxErrorDetector],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
assert count_errors(detected_errors, SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN) == 1
|
|
31
|
+
assert has_error(
|
|
32
|
+
detected_errors,
|
|
33
|
+
SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,
|
|
34
|
+
('SUM(col2)', 'AGGREGATED IN GROUP BY'),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def test_aggregated_column_in_group2():
|
|
38
|
+
detected_errors = run_test(
|
|
39
|
+
query='SELECT id, SUM(col2) FROM store GROUP BY id, SUM(col2)',
|
|
40
|
+
detectors=[SyntaxErrorDetector],
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
assert count_errors(detected_errors, SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN) == 1
|
|
44
|
+
assert has_error(
|
|
45
|
+
detected_errors,
|
|
46
|
+
SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,
|
|
47
|
+
('SUM(col2)', 'AGGREGATED IN GROUP BY'),
|
|
48
|
+
)
|
|
49
|
+
|
|
24
50
|
def test_omitted_grouping_column():
|
|
25
51
|
detected_errors = run_test(
|
|
26
52
|
query='SELECT id, col2, sum(col3) FROM store GROUP BY id',
|
|
@@ -10,11 +10,27 @@ ERROR = SqlErrors.SEM_40_TAUTOLOGICAL_OR_INCONSISTENT_EXPRESSION
|
|
|
10
10
|
[('tautology',)],
|
|
11
11
|
None
|
|
12
12
|
),
|
|
13
|
+
(
|
|
14
|
+
"SELECT * FROM orders WHERE (a = a)",
|
|
15
|
+
[('tautology',)],
|
|
16
|
+
None
|
|
17
|
+
),
|
|
13
18
|
(
|
|
14
19
|
"SELECT * FROM orders WHERE 1 = 0",
|
|
15
20
|
[('contradiction',), ('redundant_disjunct', '1 = 0')],
|
|
16
21
|
None
|
|
17
22
|
),
|
|
23
|
+
(
|
|
24
|
+
"SELECT * FROM orders WHERE a = b OR a <> a",
|
|
25
|
+
[('redundant_disjunct', 'a <> a')],
|
|
26
|
+
None
|
|
27
|
+
),
|
|
28
|
+
(
|
|
29
|
+
"SELECT * FROM orders WHERE (a = b OR a <> a)",
|
|
30
|
+
[('redundant_disjunct', 'a <> a')],
|
|
31
|
+
None
|
|
32
|
+
),
|
|
33
|
+
|
|
18
34
|
(
|
|
19
35
|
"SELECT * FROM orders WHERE (sal < 500 AND comm > 1000) OR sal >= 500",
|
|
20
36
|
[('redundant_conjunct', ('sal < 500 AND comm > 1000', 'sal < 500'))],
|
{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/query/test_typechecking.py
RENAMED
|
@@ -24,13 +24,6 @@ def test_type_columns(make_query):
|
|
|
24
24
|
|
|
25
25
|
assert result == ['decimal', 'varchar', 'varchar', 'varchar']
|
|
26
26
|
|
|
27
|
-
def test_wrong_column_reference(make_query):
|
|
28
|
-
sql = "SELECT pippo FROM store;"
|
|
29
|
-
query = make_query(sql, 'miedema')
|
|
30
|
-
|
|
31
|
-
messages = collect_errors(query.main_query.typed_ast, query.catalog, query.search_path)
|
|
32
|
-
assert messages == [("pippo", "unknown column type", None)]
|
|
33
|
-
|
|
34
27
|
@pytest.mark.parametrize('sql, expected_types', [
|
|
35
28
|
("SELECT 1 + (2 - '4') AS sum_col;", []),
|
|
36
29
|
("SELECT sid FROM store WHERE sid > '3';", []),
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/datasets/catalogs/constraints.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/sql.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|