PyPI - sql-error-categorizer - Versions diffs - 0.1.8__tar.gz → 0.1.10__tar.gz - Mend

sql-error-categorizer 0.1.8tar.gz → 0.1.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/Makefile RENAMED Viewed

@@ -12,7 +12,7 @@ else
 	VENV_BIN=$(VENV)/bin
 endif
-.PHONY: install build uninstall documentation test upload download clean ipython coverage
+.PHONY: install build uninstall documentation test upload download clean coverage
 $(VENV):
 	python -m venv --clear $(VENV)
@@ -34,7 +34,7 @@ uninstall: $(VENV)
 	$(VENV_BIN)/python -m pip uninstall -y $(NAME)
 documentation:
-	make html -C docs/
+	make html SPHINXBUILD="../$(VENV_BIN)/sphinx-build" -C docs/
 test: install
 	$(VENV_BIN)/python -m pytest
@@ -54,7 +54,4 @@ clean:
 	find . -type d -name '__pycache__' -print0 | xargs -0 rm -r || true
 	rm -rf dist docs/_build .pytest_cache .coverage tests/htmlcov
-ipython:
-	$(VENV_BIN)/ipython
 ########## Makefile end ##########

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sql_error_categorizer
-Version: 0.1.8
+Version: 0.1.10
 Summary: This project analyses SQL statements and labels possible errors or complications.
 Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
 Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/docs/conf.py RENAMED Viewed

@@ -29,7 +29,9 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 html_theme = 'alabaster'
-html_static_path = ['_static']
+html_static_path = [
+    # '_static',
+]
 # -- Autoapi -----------------------------------------------------------------

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sql_error_categorizer"
-version = "0.1.8"
+version = "0.1.10"
 authors = [
   { name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
 ]

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/logical.py RENAMED Viewed

@@ -711,14 +711,14 @@ class LogicalErrorDetector(BaseDetector):
     def _selects_star(self, ast: dict) -> bool:
         '''
-        Checks if a 'SELECT *' is used in the query by looking for a 'Star'
+        Checks if a `SELECT *` is used in the query by looking for a 'Star'
         node in the AST's expression list.
         Args:
             ast: The Abstract Syntax Tree of the query.
         Returns:
-            True if 'SELECT *' is found, otherwise False.
+            True if `SELECT *` is found, otherwise False.
         '''
         if not ast:
             return False

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/detectors/syntax.py RENAMED Viewed

@@ -1,5 +1,6 @@
 '''Detector for syntax errors in SQL queries.'''
+from dataclasses import dataclass
 import difflib
 import re
 import sqlparse
@@ -752,10 +753,23 @@ class SyntaxErrorDetector(BaseDetector):
     def syn_16_extraneous_or_omitted_grouping_column(self) -> list[DetectedError]:
         '''
-        Enforces the SQL "single-value rule":
-        All selected columns must be either included in the GROUP BY clause or aggregated.
+            All columns in SELECT must be either included in the GROUP BY clause or aggregated.
+            All non-aggregated columns in HAVING must not be included in the GROUP BY clause.
         '''
+        @dataclass(frozen=True)
+        class ColumnInfo:
+            name: str
+            alias: str
+            is_aggregated: bool = False
+        def get_column_name(col: exp.Column | exp.Alias) -> ColumnInfo:
+            '''Return normalized column name and alias. If no alias, both are the same.'''
+            col_name = util.ast.column.get_real_name(col)
+            col_alias = util.ast.column.get_name(col)
+            return ColumnInfo(col_name, col_alias)
         results: list[DetectedError] = []
         for select in self.query.selects:
@@ -765,32 +779,29 @@ class SyntaxErrorDetector(BaseDetector):
             if not select.group_by:
                 continue    # no GROUP BY, skip
-            select_columns: list[tuple[str, str]] = [] # we need a list for positional GROUP BY handling
-            def get_column_name(col: exp.Column | exp.Alias) -> tuple[str, str]:
-                '''Return normalized column name and alias. If no alias, both are the same.'''
-                col_name = util.ast.column.get_real_name(col)
-                col_alias = util.ast.column.get_name(col)
-                return col_name, col_alias
+            select_columns: list[ColumnInfo] = [] # we need a list for positional GROUP BY handling
+            # Gather non-aggregated columns from SELECT
             for col in select.ast.expressions:
                 if isinstance(col, exp.Star):
                     # SELECT * case: expand to all columns from all referenced tables
                     for table in select.referenced_tables:
                         for table_col in table.columns:
-                            select_columns.append((table_col.name, table_col.name))
+                            select_columns.append(ColumnInfo(table_col.name, table_col.name))
                 if isinstance(col, exp.Column) or isinstance(col, exp.Alias):
                     col_name = get_column_name(col)
                     select_columns.append(col_name)
                 elif isinstance(col, exp.Func):
-                    continue  # aggregated, skip
+                    # aggregated, add the column but skip it later
+                    select_columns.append(ColumnInfo(col.sql(), col.sql(), is_aggregated=True))
                 else:
                     # Complex expression: try to extract columns
                     for c in col.find_all(exp.Column):
                         col_name = get_column_name(c)
                         select_columns.append(col_name)
-            group_by_columns = set()
+            # Gather columns from GROUP BY
+            group_by_columns: set[ColumnInfo] = set()
             for gb in select.group_by:
                 if isinstance(gb, exp.Column):
                     gb_name = get_column_name(gb)
@@ -803,21 +814,33 @@ class SyntaxErrorDetector(BaseDetector):
                             group_by_columns.add(select_columns[val - 1])
                     except ValueError:
                         continue
+                elif isinstance(gb, exp.AggFunc):
+                    group_by_columns.add(ColumnInfo(gb.sql(), gb.sql(), is_aggregated=True))
                 else:
                     # Complex expression in GROUP BY: try to extract columns
                     for c in gb.find_all(exp.Column):
                         gb_name = get_column_name(c)
                         group_by_columns.add(gb_name)
-            for sel_col, sel_alias in set(select_columns):  # convert to set to avoid outputting the same error multiple times
-                if any(sel_col == group_col or sel_alias == group_alias for group_col, group_alias in group_by_columns):
+            # Ensure all non-aggregated columns in SELECT are in GROUP BY
+            for select_col in set(select_columns):  # convert to set to avoid outputting the same error multiple times
+                if select_col.is_aggregated:
+                    continue    # aggregated, skip
+                if any(select_col.name == group_col.name or select_col.alias == group_col.alias for group_col in group_by_columns):
                     continue    # valid: in GROUP BY
-                results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(sel_col, 'ONLY IN SELECT')))
+                results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(select_col.name, 'ONLY IN SELECT')))
-            for group_col, group_alias in group_by_columns:
-                if any(group_col == select_col or group_alias == select_alias for select_col, select_alias in select_columns):
+            # Ensure all non-aggregated columns in GROUP BY are in SELECT
+            # (Note: aggregated columns in GROUP BY are invalid)
+            for group_col in group_by_columns:
+                if group_col.is_aggregated:
+                    results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(group_col.name, 'AGGREGATED IN GROUP BY')))
+                    continue
+                if any(group_col.name == select_col.name or group_col.alias == select_col.alias for select_col in select_columns):
                     continue # valid: in SELECT
-                results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(group_col, 'ONLY IN GROUP BY')))
+                results.append(DetectedError(SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,(group_col.name, 'ONLY IN GROUP BY')))
+            # Ensure all non-aggregated columns in HAVING are in GROUP BY
         return results

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/__init__.py RENAMED Viewed

@@ -78,11 +78,9 @@ def create_set_operation_tree(sql: str, catalog: Catalog = Catalog(), search_pat
 def parse_op_token(tok: sqlparse.sql.Token) -> tuple[str, bool | None] | None:
     '''
     Parse "UNION", "INTERSECT", "EXCEPT" with optional inline ALL/DISTINCT.
     Returns:
-        tuple: `(op, all_flag)` where all_flag is:
-        - True  if ALL inline (e.g., "UNION ALL")
-        - False if DISTINCT inline (e.g., "EXCEPT DISTINCT")
-        - None  if no modifier inline (so caller may look right).
+        tuple: `(op, all_flag)` where all_flag is: True  if ALL inline (e.g., "UNION ALL"); False if DISTINCT inline (e.g., "EXCEPT DISTINCT"); None  if no modifier inline (so caller may look right).
     '''
     if tok.ttype is not Keyword:
         return None
@@ -106,11 +104,9 @@ def split_on(tokens: list[sqlparse.sql.Token], idx: int, all_in_token: bool | No
     '''
     Splits around the operator at idx. If the modifier wasn't inline,
     consume a single immediate ALL/DISTINCT to the right.
     Returns:
-        tuple: A tuple containing:
-            - left_tokens (list[sqlparse.sql.Token]): Tokens to the left of the operator.
-            - right_tokens (list[sqlparse.sql.Token]): Tokens to the right of the operator
-            - all_flag (bool | None): True if ALL, False if DISTINCT, None if unspecified.
+        tuple: A tuple containing: left_tokens (list[sqlparse.sql.Token]): Tokens to the left of the operator; right_tokens (list[sqlparse.sql.Token]): Tokens to the right of the operator; all_flag (bool | None): True if ALL, False if DISTINCT, None if unspecified.
     '''
     left_tokens = tokens[:idx]
     right_tokens = tokens[idx + 1:]

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/binary_set_operation.py RENAMED Viewed

@@ -3,6 +3,7 @@ from ...catalog import Table, Constraint, ConstraintType, ConstraintColumn
 from abc import ABC
 from copy import deepcopy
+import sqlglot
 from sqlglot import exp
 from typing import TYPE_CHECKING
@@ -44,6 +45,18 @@ class BinarySetOperation(SetOperation, ABC):
         return result
+    @property
+    def trailing_ast(self) -> exp.Expression | None:
+        '''Parses and returns the AST of the trailing SQL clauses (e.g., ORDER BY, LIMIT) if present, with a fake `SELECT 1` prefix.'''
+        if self.trailing_sql is None:
+            return None
+        if self._trailing_ast is None:
+            # Parse trailing SQL with a fake SELECT to get valid AST
+            fake_sql = f'SELECT 1 {self.trailing_sql}'
+            parsed = sqlglot.parse_one(fake_sql)
+            self._trailing_ast = parsed
+        return self._trailing_ast
     @property
     def output(self) -> Table:
         # Assume the output schema is the same as the left input

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/set_operations/set_operation.py RENAMED Viewed

@@ -40,18 +40,6 @@ class SetOperation(ABC):
     @abstractmethod
     def print_tree(self, pre: str = '') -> None:
         pass
-    @property
-    def trailing_ast(self) -> exp.Expression | None:
-        '''Parses and returns the AST of the trailing SQL clauses (e.g., ORDER BY, LIMIT) if present, with a fake `SELECT 1` prefix.'''
-        if self.trailing_sql is None:
-            return None
-        if self._trailing_ast is None:
-            # Parse trailing SQL with a fake SELECT to get valid AST
-            fake_sql = f'SELECT 1 {self.trailing_sql}'
-            parsed = sqlglot.parse_one(fake_sql)
-            self._trailing_ast = parsed
-        return self._trailing_ast
     @property
     @abstractmethod

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/smt.py RENAMED Viewed

@@ -244,8 +244,8 @@ def sql_to_z3(expr, variables: dict[str, ExprRef] = {}) -> Any:
         if wildcard_count > 2:
             return target == StringVal(pattern)
-        # PREFIX pattern: abc%
         if '%' in pattern and '_' not in pattern:
+            # PREFIX pattern: abc%
             if pattern.endswith('%') and pattern.count('%') == 1:
                 prefix = pattern[:-1]
                 return PrefixOf(StringVal(prefix), target)

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/base.py RENAMED Viewed

@@ -2,9 +2,8 @@ import sqlglot.expressions as exp
 from .types import AtomicType, ResultType
 from ...catalog import Catalog
 from functools import singledispatch
-from .util import error_message
 @singledispatch
 def get_type(expression: exp.Expression, catalog: Catalog, search_path: str) -> ResultType:
     '''Returns the type of the given SQL expression.'''
-    return AtomicType(messages=[error_message(expression, "Unknown expression type")])
+    return AtomicType() # Default to unhandled expression

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/binary_ops.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from .base import get_type
 from ...catalog import Catalog
 from sqlglot import exp
-from .types import ResultType, AtomicType, DataType
+from .types import ResultType, AtomicType
+from sqlglot.expressions import DataType
 from .util import is_number, to_number, to_date, error_message
 @get_type.register
@@ -18,13 +19,13 @@ def _(expression: exp.Binary, catalog: Catalog, search_path: str) -> ResultType:
     if left_type != right_type:
-        if not to_number(left_type) and left_type.data_type != DataType.Type.NULL:
+        if left_type.data_type != DataType.Type.UNKNOWN and not to_number(left_type) and left_type.data_type != DataType.Type.NULL:
             old_messages.append(error_message(expression, left_type, "numeric"))
-        if not to_number(right_type) and right_type.data_type != DataType.Type.NULL:
+        if right_type.data_type != DataType.Type.UNKNOWN and not to_number(right_type) and right_type.data_type != DataType.Type.NULL:
             old_messages.append(error_message(expression, right_type, "numeric"))
-    elif not is_number(left_type.data_type) and not is_number(right_type.data_type):
+    elif DataType.Type.UNKNOWN != left_type.data_type and not is_number(left_type.data_type) and not is_number(right_type.data_type):
         if left_type.data_type != DataType.Type.NULL or right_type.data_type != DataType.Type.NULL:
             old_messages.append(error_message(expression, left_type, "numeric"))
@@ -33,6 +34,9 @@ def _(expression: exp.Binary, catalog: Catalog, search_path: str) -> ResultType:
 # handle comparison typechecking (e.g =, <, >, etc.)
 def typecheck_comparisons(left_type: ResultType, right_type: ResultType, expression: exp.Binary, old_messages: list) -> ResultType:
+    if DataType.Type.UNKNOWN in (left_type.data_type, right_type.data_type):
+        return AtomicType(data_type=expression.type.this,messages=old_messages)
     # for boolean comparisons we can have only equality/inequality
     if DataType.Type.BOOLEAN == left_type.data_type == right_type.data_type:
         if not isinstance(expression, (exp.EQ, exp.NEQ)):

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/functions.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from .base import get_type
 from ...catalog import Catalog
 from sqlglot import exp
-from .types import ResultType, AtomicType, DataType
+from .types import ResultType, AtomicType
+from sqlglot.expressions import DataType
 from .util import is_number, error_message
 @get_type.register
@@ -16,7 +17,7 @@ def _(expression: exp.Avg, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = inner_type.messages
-    if not is_number(inner_type.data_type):
+    if inner_type.data_type != DataType.Type.UNKNOWN and not is_number(inner_type.data_type):
         old_messages.append(error_message(expression, inner_type, "NUMERIC"))
     return AtomicType(data_type=expression.type.this, nullable=True, constant=True, messages=old_messages)
@@ -27,7 +28,7 @@ def _(expression: exp.Sum, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = inner_type.messages
-    if not is_number(inner_type.data_type):
+    if inner_type.data_type != DataType.Type.UNKNOWN and not is_number(inner_type.data_type):
         old_messages.append(error_message(expression, inner_type, "NUMERIC"))
     return AtomicType(data_type=expression.type.this, nullable=True, constant=True, messages=old_messages)
@@ -38,7 +39,7 @@ def _(expression: exp.Min, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = inner_type.messages
-    if inner_type.data_type in (DataType.Type.BOOLEAN, DataType.Type.UNKNOWN, DataType.Type.USERDEFINED):
+    if inner_type.data_type != DataType.Type.UNKNOWN and inner_type.data_type == DataType.Type.BOOLEAN:
         old_messages.append(error_message(expression, inner_type))
     return AtomicType(data_type=inner_type.data_type, nullable=inner_type.nullable, constant=True, messages=old_messages)
@@ -49,7 +50,7 @@ def _(expression: exp.Max, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = inner_type.messages
-    if inner_type.data_type in (DataType.Type.BOOLEAN, DataType.Type.UNKNOWN, DataType.Type.USERDEFINED):
+    if inner_type.data_type != DataType.Type.UNKNOWN and inner_type.data_type == DataType.Type.BOOLEAN:
         old_messages.append(error_message(expression, inner_type))
     return AtomicType(data_type=inner_type.data_type, nullable=inner_type.nullable, constant=True, messages=old_messages)

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/predicates.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from .base import get_type
 from ...catalog import Catalog
 from sqlglot import exp
-from .types import ResultType, AtomicType, DataType
+from .types import ResultType, AtomicType
+from sqlglot.expressions import DataType
 from .util import is_string, to_number, to_date, error_message
 @get_type.register
@@ -11,10 +12,10 @@ def _(expression: exp.Like, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = left_type.messages + right_type.messages
-    if not is_string(left_type.data_type) and left_type.data_type != DataType.Type.NULL:
+    if left_type.data_type != DataType.Type.UNKNOWN and not is_string(left_type.data_type) and left_type.data_type != DataType.Type.NULL:
         old_messages.append(error_message(expression, left_type, 'string'))
-    if not is_string(right_type.data_type) and right_type.data_type != DataType.Type.NULL:
+    if right_type.data_type != DataType.Type.UNKNOWN and not is_string(right_type.data_type) and right_type.data_type != DataType.Type.NULL:
         old_messages.append(error_message(expression, right_type, 'string'))
     # Always returns boolean
@@ -32,7 +33,7 @@ def _(expression: exp.Is, catalog: Catalog, search_path: str) -> ResultType:
         old_messages.append(error_message(expression, right_type, 'boolean|null'))
     # if right is BOOLEAN and left is not NULL, left must be BOOLEAN
-    if right_type.data_type == DataType.Type.BOOLEAN and left_type.data_type != DataType.Type.NULL:
+    if left_type.data_type != DataType.Type.UNKNOWN and right_type.data_type == DataType.Type.BOOLEAN and left_type.data_type != DataType.Type.NULL:
         if left_type.data_type != DataType.Type.BOOLEAN:
             old_messages.append(error_message(expression, left_type, 'boolean'))
@@ -48,16 +49,16 @@ def _(expression: exp.Between, catalog: Catalog, search_path: str) -> ResultType
     old_messages = target_type.messages + low_type.messages + high_type.messages
     # if the target is NULL, the result will always be NULL (no matter the bounds)
-    if target_type.data_type == DataType.Type.NULL:
+    if target_type.data_type == DataType.Type.UNKNOWN or target_type.data_type == DataType.Type.NULL:
         return AtomicType(data_type=expression.type.this, constant=True, messages=old_messages)
-    if low_type.data_type != target_type.data_type and low_type.data_type != DataType.Type.NULL:
+    if low_type.data_type != DataType.Type.UNKNOWN and low_type.data_type != target_type.data_type and low_type.data_type != DataType.Type.NULL:
         # check for implicit casts
         if (to_number(target_type) and not to_number(low_type)) or (to_date(target_type) and not to_date(low_type)):
             old_messages.append(error_message(expression, low_type, target_type))
-    if high_type.data_type != target_type.data_type and high_type.data_type != DataType.Type.NULL:
+    if high_type.data_type != DataType.Type.UNKNOWN and high_type.data_type != target_type.data_type and high_type.data_type != DataType.Type.NULL:
         # check for implicit casts
         if (to_number(target_type) and not to_number(high_type)) or (to_date(target_type) and not to_date(high_type)):
@@ -73,6 +74,9 @@ def _(expression: exp.In, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = target_type.messages
+    if target_type.data_type == DataType.Type.UNKNOWN:
+        return AtomicType(data_type=expression.type.this, messages=old_messages)
     # Case IN (<list>)
     for item in expression.expressions:
         item_type = get_type(item, catalog, search_path)

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/primitives.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from .base import get_type
 from ...catalog import Catalog
 from sqlglot import exp
-from .types import ResultType, AtomicType, DataType, TupleType
+from .types import ResultType, AtomicType, TupleType
+from sqlglot.expressions import DataType
 from .util import is_number, is_date, to_number, to_date, error_message
 from ...util.ast.column import get_real_name, get_schema
@@ -42,8 +43,13 @@ def _(expression: exp.Cast, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = original_type.messages
-    if new_type in (DataType.Type.UNKNOWN, DataType.Type.USERDEFINED):
+    # if casting to unknown type, return error
+    if new_type == DataType.Type.USERDEFINED:
         old_messages.append(error_message(expression, "Invalid type."))
+        return AtomicType(data_type=original_type.data_type, nullable=original_type.nullable, constant=original_type.constant, messages=old_messages, value=original_type.value)
+    if original_type.data_type == DataType.Type.UNKNOWN:
+        return AtomicType(data_type=new_type, messages=old_messages)
     # handle cast to numeric types
     if is_number(new_type) and not to_number(original_type):
@@ -65,7 +71,7 @@ def _(expression: exp.CurrentTimestamp, catalog: Catalog, search_path: str) -> R
 @get_type.register
 def _(expression: exp.Column, catalog: Catalog, search_path: str) -> ResultType:
     if expression.type.this in (DataType.Type.UNKNOWN, DataType.Type.USERDEFINED):
-        return AtomicType(messages=[error_message(expression.name, "Unknown column type")])
+        return AtomicType() # unknown column
     else:
         schema = get_schema(expression) or search_path
         table = get_real_name(expression)

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/types.py RENAMED Viewed

@@ -25,6 +25,9 @@ class AtomicType(ResultType):
     def __eq__(self, other):
         if not isinstance(other, AtomicType):
             return False
+        if other.data_type == DataType.Type.UNKNOWN or self.data_type == DataType.Type.UNKNOWN:
+            return True
         # handle numeric equivalence (e.g. INT and FLOAT are compatible)
         if self.data_type in DataType.NUMERIC_TYPES:

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/unary_ops.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from .base import get_type
 from ...catalog import Catalog
 from sqlglot import exp
-from .types import ResultType, AtomicType, DataType
+from .types import ResultType, AtomicType
+from sqlglot.expressions import DataType
 from .util import is_number, error_message
 @get_type.register
@@ -10,6 +11,9 @@ def _(expression: exp.Neg, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = inner_type.messages
+    if inner_type.data_type == DataType.Type.UNKNOWN:
+        return AtomicType(data_type=expression.type.this, messages=old_messages)
     if not is_number(expression.type.this):
         old_messages.append(error_message(expression, 'numeric', inner_type))
@@ -21,6 +25,9 @@ def _(expression: exp.Not, catalog: Catalog, search_path: str) -> ResultType:
     old_messages = inner_type.messages
+    if inner_type.data_type == DataType.Type.UNKNOWN:
+        return AtomicType(data_type=expression.type.this, messages=old_messages)
     if inner_type.data_type != DataType.Type.BOOLEAN:
         old_messages.append(error_message(expression, 'boolean', inner_type))

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/query/typechecking/util.py RENAMED Viewed

@@ -1,4 +1,5 @@
-from .types import ResultType, DataType
+from .types import ResultType
+from sqlglot.expressions import DataType
 from dateutil.parser import parse
 from sqlglot import exp

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/sql_errors.py RENAMED Viewed

@@ -1,7 +1,7 @@
-from enum import Enum
+from enum import IntEnum
-class SqlErrors(Enum):
+class SqlErrors(IntEnum):
     '''Enumeration of SQL error types with unique identifiers.'''
     SYN_1_OMITTING_CORRELATION_NAMES                                    = 1
     SYN_2_AMBIGUOUS_COLUMN                                              = 2

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/__init__.py RENAMED Viewed

@@ -1,9 +1,6 @@
 '''Utility functions for processing SQL ASTs made with sqlglot.'''
-from .column import *
-from .function import *
-from .subquery import *
-from .table import *
+from . import column, function, subquery, table
 import sqlglot.optimizer.normalize
 from sqlglot import exp
@@ -13,22 +10,46 @@ def extract_DNF(expr) -> list[exp.Expression]:
     '''Given a boolean expression, extract its Disjunctive Normal Form (DNF)'''
     expr = deepcopy(expr)       # Avoid modifying the original expression
+    # Remove outer parentheses
+    while isinstance(expr, exp.Paren):
+        expr = expr.this
     dnf_expr = sqlglot.optimizer.normalize.normalize(expr, dnf=True)
     if not isinstance(dnf_expr, exp.Or):
         return [dnf_expr]
     disjuncts = dnf_expr.flatten()  # list Di (A1 OR A2 OR ... OR Dn)
-    return list(disjuncts)
+    result: list[exp.Expression] = []
+    for disj in disjuncts:
+        # Remove outer parentheses from each disjunct
+        while isinstance(disj, exp.Paren):
+            disj = disj.this
+        result.append(disj)
+    return result
 def extract_CNF(expr) -> list[exp.Expression]:
     '''Given a boolean expression, extract its Conjunctive Normal Form (CNF)'''
     expr = deepcopy(expr)       # Avoid modifying the original expression
+    # Remove outer parentheses
+    while isinstance(expr, exp.Paren):
+        expr = expr.this
     cnf_expr = sqlglot.optimizer.normalize.normalize(expr, dnf=False)
     if not isinstance(cnf_expr, exp.And):
         return [cnf_expr]
     conjuncts = cnf_expr.flatten()  # list Ci (A1 AND A2 AND ... AND Cn)
-    return list(conjuncts)
+    result: list[exp.Expression] = []
+    for conj in conjuncts:
+        # Remove outer parentheses from each conjunct
+        while isinstance(conj, exp.Paren):
+            conj = conj.this
+        result.append(conj)
+    return result

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/src/sql_error_categorizer/util/ast/table.py RENAMED Viewed

@@ -1,8 +1,6 @@
 '''Utility functions related to SQL tables in ASTs made with sqlglot.'''
-import sqlglot.optimizer.normalize
 from sqlglot import exp
-from copy import deepcopy
 def get_real_name(table: exp.Table) -> str:
     '''Returns the table real name, in lowercase if unquoted.'''

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/1_syn/test_016_extraneous_omitted_grouping_column.py RENAMED Viewed

@@ -21,6 +21,32 @@ def test_extraneous_grouping_column():
         ('col2', 'ONLY IN GROUP BY'),
     )
+def test_aggregated_column_in_group_by():
+    detected_errors = run_test(
+        query='SELECT id, SUM(col2) FROM store GROUP BY 1, 2',
+        detectors=[SyntaxErrorDetector],
+    )
+    assert count_errors(detected_errors, SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN) == 1
+    assert has_error(
+        detected_errors,
+        SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,
+        ('SUM(col2)', 'AGGREGATED IN GROUP BY'),
+    )
+def test_aggregated_column_in_group2():
+    detected_errors = run_test(
+        query='SELECT id, SUM(col2) FROM store GROUP BY id, SUM(col2)',
+        detectors=[SyntaxErrorDetector],
+    )
+    assert count_errors(detected_errors, SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN) == 1
+    assert has_error(
+        detected_errors,
+        SqlErrors.SYN_16_EXTRANEOUS_OR_OMITTED_GROUPING_COLUMN,
+        ('SUM(col2)', 'AGGREGATED IN GROUP BY'),
+    )
 def test_omitted_grouping_column():
     detected_errors = run_test(
         query='SELECT id, col2, sum(col3) FROM store GROUP BY id',

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/detectors/2_sem/test_040_tautological_inconsistent_expressions.py RENAMED Viewed

@@ -10,11 +10,27 @@ ERROR = SqlErrors.SEM_40_TAUTOLOGICAL_OR_INCONSISTENT_EXPRESSION
         [('tautology',)],
         None
     ),
+    (
+        "SELECT * FROM orders WHERE (a = a)",
+        [('tautology',)],
+        None
+    ),
     (
         "SELECT * FROM orders WHERE 1 = 0",
         [('contradiction',), ('redundant_disjunct', '1 = 0')],
         None
     ),
+    (
+        "SELECT * FROM orders WHERE a = b OR a <> a",
+        [('redundant_disjunct', 'a <> a')],
+        None
+    ),
+    (
+        "SELECT * FROM orders WHERE (a = b OR a <> a)",
+        [('redundant_disjunct', 'a <> a')],
+        None
+    ),
     (
         "SELECT * FROM orders WHERE (sal < 500 AND comm > 1000) OR sal >= 500",
         [('redundant_conjunct', ('sal < 500 AND comm > 1000', 'sal < 500'))],

{sql_error_categorizer-0.1.8 → sql_error_categorizer-0.1.10}/tests/query/test_typechecking.py RENAMED Viewed

@@ -24,13 +24,6 @@ def test_type_columns(make_query):
     assert result == ['decimal', 'varchar', 'varchar', 'varchar']
-def test_wrong_column_reference(make_query):
-    sql = "SELECT pippo FROM store;"
-    query = make_query(sql, 'miedema')
-    messages = collect_errors(query.main_query.typed_ast, query.catalog, query.search_path)
-    assert messages == [("pippo", "unknown column type", None)]
 @pytest.mark.parametrize('sql, expected_types', [
     ("SELECT 1 + (2 - '4') AS sum_col;", []),
     ("SELECT sid FROM store WHERE sid > '3';", []),