sql-error-categorizer 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/.gitignore +5 -1
  2. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/PKG-INFO +1 -1
  3. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/pyproject.toml +1 -1
  4. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/detectors/semantic.py +3 -1
  5. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/detectors/syntax.py +74 -75
  6. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/query.py +1 -15
  7. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/set_operations/binary_set_operation.py +4 -0
  8. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/set_operations/select.py +5 -9
  9. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/set_operations/set_operation.py +6 -0
  10. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/smt.py +161 -35
  11. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/predicates.py +2 -0
  12. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/primitives.py +3 -2
  13. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/queries.py +3 -3
  14. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/ast/column.py +11 -0
  15. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_002_ambiguous_column.py +5 -3
  16. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_004_undefined_column.py +5 -3
  17. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_005_undefined_function.py +5 -3
  18. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_006_undefined_parameter.py +5 -3
  19. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_007_undefined_tables.py +5 -3
  20. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_008_invalid_schema_names.py +5 -3
  21. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_009_misspellings.py +5 -3
  22. sql_error_categorizer-0.1.8/tests/detectors/1_syn/test_013_data_type_mismatch.py +50 -0
  23. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_014_aggregate_function_outside_select_or_having.py +5 -3
  24. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_015_nested_aggregate_functions.py +5 -4
  25. sql_error_categorizer-0.1.8/tests/detectors/1_syn/test_035_is_where_not_applicable.py +49 -0
  26. sql_error_categorizer-0.1.8/tests/detectors/2_sem/test_040_tautological_inconsistent_expressions.py +101 -0
  27. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/4_com/test_092_unnecessary_distinct_in_aggregate_function.py +5 -3
  28. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/4_com/test_095_group_by_with_singleton_groups.py +5 -3
  29. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/4_com/test_097_group_by_can_be_replaced_by_distinct.py +5 -3
  30. sql_error_categorizer-0.1.7/tests/detectors/2_sem/test_040_tautological_inconsistent_expressions.py +0 -135
  31. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/.readthedocs.yaml +0 -0
  32. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/LICENSE +0 -0
  33. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/Makefile +0 -0
  34. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/README.md +0 -0
  35. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/datasets/catalogs/constraints.json +0 -0
  36. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/datasets/catalogs/miedema.json +0 -0
  37. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/datasets/sql/constraints.sql +0 -0
  38. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/datasets/sql/miedema.sql +0 -0
  39. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/docs/Makefile +0 -0
  40. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/docs/conf.py +0 -0
  41. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/docs/index.rst +0 -0
  42. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/docs/make.bat +0 -0
  43. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/docs/requirements.txt +0 -0
  44. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/requirements.txt +0 -0
  45. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/__init__.py +0 -0
  46. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/__init__.py +0 -0
  47. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/builder/__init__.py +0 -0
  48. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/builder/queries.py +0 -0
  49. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/catalog.py +0 -0
  50. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/column.py +0 -0
  51. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/constraint.py +0 -0
  52. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/schema.py +0 -0
  53. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/catalog/table.py +0 -0
  54. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/detectors/__init__.py +0 -0
  55. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/detectors/base.py +0 -0
  56. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/detectors/complications.py +0 -0
  57. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/detectors/logical.py +0 -0
  58. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/__init__.py +0 -0
  59. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/extractors.py +0 -0
  60. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/set_operations/__init__.py +0 -0
  61. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/tokenized_sql.py +0 -0
  62. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/__init__.py +0 -0
  63. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/base.py +0 -0
  64. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/binary_ops.py +0 -0
  65. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/functions.py +0 -0
  66. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/types.py +0 -0
  67. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/unary_ops.py +0 -0
  68. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/query/typechecking/util.py +0 -0
  69. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/sql_errors.py +0 -0
  70. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/__init__.py +0 -0
  71. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/ast/__init__.py +0 -0
  72. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/ast/function.py +0 -0
  73. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/ast/subquery.py +0 -0
  74. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/ast/table.py +0 -0
  75. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/sql.py +0 -0
  76. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/src/sql_error_categorizer/util/tokens.py +0 -0
  77. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/test_detector.py +0 -0
  78. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/__init__.py +0 -0
  79. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_016_extraneous_omitted_grouping_column.py +0 -0
  80. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_017_having_without_group_by.py +0 -0
  81. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_019_using_where_twice.py +0 -0
  82. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_020_missing_from.py +0 -0
  83. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_021_comparison_with_null.py +0 -0
  84. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_022_038_additional_omitted_semicolons.py +0 -0
  85. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_024_duplicate_clause.py +0 -0
  86. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_026_too_many_columns_in_subquery.py +0 -0
  87. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_030_keywords_order.py +0 -0
  88. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_034_curly_square_or_unmatched_brackets.py +0 -0
  89. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/1_syn/test_037_nonstandard_operators.py +0 -0
  90. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/2_sem/test_041_distinct_sum_avg.py +0 -0
  91. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/2_sem/test_043_wildcards_without_like.py +0 -0
  92. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/2_sem/test_044_incorrect_wildcards.py +0 -0
  93. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_058_join_on_incorrect_table.py +0 -0
  94. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_059_join_when_join_needs_to_be_omitted.py +0 -0
  95. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_062_missing_join.py +0 -0
  96. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_070_extraneous_column_in_select.py +0 -0
  97. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_071_missing_column_from_select.py +0 -0
  98. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_072_missing_distinct_from_select.py +0 -0
  99. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/3_log/test_073_missing_as_from_select.py +0 -0
  100. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/4_com/test_083_unnecessary_distinct_in_select.py +0 -0
  101. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/4_com/test_088_like_no_wildcards.py +0 -0
  102. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/detectors/4_com/test_100_order_by_in_subquery.py +0 -0
  103. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/query/test_extractors.py +0 -0
  104. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/query/test_query.py +0 -0
  105. {sql_error_categorizer-0.1.7 → sql_error_categorizer-0.1.8}/tests/query/test_typechecking.py +0 -0
@@ -178,5 +178,9 @@ cython_debug/
178
178
  # PyPI configuration file
179
179
  .pypirc
180
180
 
181
+ <<<<<<< Updated upstream
181
182
  # VS Code
182
- .vscode/
183
+ .vscode/
184
+ =======
185
+ .vscode
186
+ >>>>>>> Stashed changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql_error_categorizer
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: This project analyses SQL statements and labels possible errors or complications.
5
5
  Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
6
6
  Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sql_error_categorizer"
7
- version = "0.1.7"
7
+ version = "0.1.8"
8
8
  authors = [
9
9
  { name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
10
10
  ]
@@ -74,10 +74,12 @@ class SemanticErrorDetector(BaseDetector):
74
74
 
75
75
  dnf = util.ast.extract_DNF(where)
76
76
 
77
+
77
78
  # Refer to Brass & Goldberg, 2006 for these checks (error #8)
78
79
  # (1) whole formula
79
80
  try:
80
- whole = Or(*[smt.sql_to_z3(C, variables) for C in dnf])
81
+ whole_clauses = [smt.sql_to_z3(C, variables) for C in dnf]
82
+ whole = Or(*whole_clauses)
81
83
  except Exception:
82
84
  continue # skip if cannot convert to z3
83
85
 
@@ -7,6 +7,9 @@ from sqlglot import exp
7
7
  from typing import Callable
8
8
  from copy import deepcopy
9
9
 
10
+ from sql_error_categorizer.query.set_operations.set_operation import SetOperation
11
+ from ..query.typechecking import get_type, collect_errors
12
+
10
13
  from .base import BaseDetector, DetectedError
11
14
  from ..query import Query
12
15
  from ..sql_errors import SqlErrors
@@ -659,90 +662,51 @@ class SyntaxErrorDetector(BaseDetector):
659
662
  def syn_12_failure_to_specify_column_name_twice(self) -> list[DetectedError]:
660
663
  return []
661
664
 
662
- # TODO: refactor, needs AST
663
665
  def syn_13_data_type_mismatch(self) -> list[DetectedError]:
664
666
  '''
665
667
  Checks for data type mismatches in comparisons within the query.
666
668
  '''
667
669
 
668
- return []
669
-
670
- # Check for data type mismatches in the query.
671
- results: list[DetectedError] = []
670
+ def parse_set_operation(set_op: 'SetOperation', location: str) -> list[DetectedError]:
672
671
 
673
- comparison_operators = {'=', '<>', '!=', '<', '>', '<=', '>='}
672
+ '''
673
+ Util function to parse a SetOperation and check for data type mismatches among its main selects.
674
+ '''
675
+ errors: list[DetectedError] = []
676
+ expected_output = None # type of the first select's output
677
+ for select in set_op.main_selects:
674
678
 
675
-
676
- tokens = self.tokens
677
- alias_map = self.query_map.get('alias_mapping', {})
678
- all_table_columns = self.catalog.get("table_columns", {})
679
- column_metadata = self.catalog.get("column_metadata", {})
679
+ typed_ast = select.typed_ast
680
+
681
+ if typed_ast is None:
682
+ continue
680
683
 
681
- # Build reverse alias map for resolving columns to tables
682
- column_to_table = {}
683
- for table, columns in all_table_columns.items():
684
- for col in columns:
685
- column_to_table.setdefault(col.lower(), set()).add(table)
684
+ columns_type = get_type(typed_ast, select.catalog, select.search_path)
686
685
 
687
- i = 0
688
- while i < len(tokens):
689
- tt, val = tokens[i]
690
- if val in comparison_operators:
691
- lhs_token = tokens[i - 1] if i - 1 >= 0 else None
692
- rhs_token = tokens[i + 1] if i + 1 < len(tokens) else None
693
-
694
- lhs_type = rhs_type = None
695
-
696
- # --- LHS type resolution ---
697
- if lhs_token:
698
- lhs_val = lhs_token[1].strip('"`')
699
- if '.' in lhs_val:
700
- tbl, col = lhs_val.split('.', 1)
701
- tbl = alias_map.get(tbl, [tbl])[0] if tbl in alias_map else tbl
702
- lhs_type = column_metadata.get(tbl, {}).get(col, {}).get("type")
703
- elif lhs_val.lower() in column_to_table:
704
- for tbl in column_to_table[lhs_val.lower()]:
705
- t = column_metadata.get(tbl, {}).get(lhs_val, {}).get("type")
706
- if t:
707
- lhs_type = t
708
- break
709
- elif lhs_val.startswith("'") and lhs_val.endswith("'"):
710
- lhs_type = "text"
711
- elif re.match(r'^\d+\.\d+$', lhs_val):
712
- lhs_type = "float"
713
- elif lhs_val.isdigit():
714
- lhs_type = "int"
715
-
716
- # --- RHS type resolution ---
717
- if rhs_token:
718
- rhs_val = rhs_token[1].strip('"`')
719
- if '.' in rhs_val:
720
- tbl, col = rhs_val.split('.', 1)
721
- tbl = alias_map.get(tbl, [tbl])[0] if tbl in alias_map else tbl
722
- rhs_type = column_metadata.get(tbl, {}).get(col, {}).get("type")
723
- elif rhs_val.lower() in column_to_table:
724
- for tbl in column_to_table[rhs_val.lower()]:
725
- t = column_metadata.get(tbl, {}).get(rhs_val, {}).get("type")
726
- if t:
727
- rhs_type = t
728
- break
729
- elif rhs_val.startswith("'") and rhs_val.endswith("'"):
730
- rhs_type = "text"
731
- elif re.match(r'^\d+\.\d+$', rhs_val):
732
- rhs_type = "float"
733
- elif rhs_val.isdigit():
734
- rhs_type = "int"
735
-
736
- # --- Check mismatch ---
737
- if lhs_type and rhs_type and not self._are_types_compatible(lhs_type, rhs_type):
738
- results.append((
739
- SqlErrors.SYN_13_DATA_TYPE_MISMATCH,
740
- f"Comparison type mismatch: {lhs_token[1]} ({lhs_type}) {val} {rhs_token[1]} ({rhs_type})"
741
- ))
686
+ # 1st select: set expected output type
687
+ if expected_output is None:
688
+ expected_output = columns_type
689
+ else:
690
+ # compare with expected output type
691
+ if expected_output != columns_type:
692
+ errors.append(DetectedError(SqlErrors.SYN_13_DATA_TYPE_MISMATCH, (location,"setop types inconsistent")))
742
693
 
743
- i += 1
694
+ # load found messages
695
+ for message in columns_type.messages:
696
+ errors.append(DetectedError(SqlErrors.SYN_13_DATA_TYPE_MISMATCH, message))
697
+
698
+ return errors
699
+
700
+ results: list[DetectedError] = []
701
+
702
+ # CTEs
703
+ for cte in self.query.ctes:
704
+ results.extend(parse_set_operation(cte, f"CTE {cte.output.name}"))
744
705
 
745
- return results
706
+ # Main Query
707
+ results.extend(parse_set_operation(self.query.main_query, "Main Query"))
708
+
709
+ return results
746
710
 
747
711
  def syn_14_aggregate_function_outside_select_or_having(self) -> list[DetectedError]:
748
712
  '''
@@ -1284,9 +1248,44 @@ class SyntaxErrorDetector(BaseDetector):
1284
1248
 
1285
1249
  return results
1286
1250
 
1287
- #TODO: implement
1251
+
1288
1252
  def syn_35_is_where_not_applicable(self) -> list[DetectedError]:
1289
- return []
1253
+ '''
1254
+ Find all erroneous usages of IS where it is not applicable
1255
+ '''
1256
+
1257
+ def parse_set_operation(set_operation: 'SetOperation') -> list[DetectedError]:
1258
+ '''
1259
+ Util function to parse a SetOperation and check for invalid usage of IS in all its main selects.
1260
+ '''
1261
+
1262
+ errors: list[DetectedError] = []
1263
+ for select in set_operation.main_selects:
1264
+
1265
+ typed_ast = select.typed_ast
1266
+
1267
+ if typed_ast is None:
1268
+ continue
1269
+
1270
+ for is_expr in typed_ast.find_all(exp.Is):
1271
+ for error in collect_errors(is_expr, select.catalog, select.search_path):
1272
+
1273
+ # if the expected type is boolean|null, it means that the part after IS is not valid
1274
+ if error[2] == 'boolean|null':
1275
+ errors.append(DetectedError(SqlErrors.SYN_35_IS_WHERE_NOT_APPLICABLE, error))
1276
+
1277
+ return errors
1278
+
1279
+ results: list[DetectedError] = []
1280
+
1281
+ # CTEs
1282
+ for cte in self.query.ctes:
1283
+ results.extend(parse_set_operation(cte))
1284
+
1285
+ # Main Query
1286
+ results.extend(parse_set_operation(self.query.main_query))
1287
+
1288
+ return results
1290
1289
 
1291
1290
  #TODO: implement
1292
1291
  def syn_36_nonstandard_keywords_or_standard_keywords_in_wrong_context(self) -> list[DetectedError]:
@@ -121,21 +121,7 @@ class Query(TokenizedSQL):
121
121
 
122
122
  result.extend(self.main_query.selects)
123
123
 
124
- return result
125
-
126
-
127
- @property
128
- def main_selects(self) -> list[Select]:
129
-
130
- def _gather_selects_from_set_operation(so: SetOperation) -> list[Select]:
131
- if isinstance(so, Select):
132
- return [so]
133
- elif isinstance(so, BinarySetOperation):
134
- return _gather_selects_from_set_operation(so.left) + _gather_selects_from_set_operation(so.right)
135
- else:
136
- return []
137
-
138
- return _gather_selects_from_set_operation(self.main_query)
124
+ return result
139
125
 
140
126
  @property
141
127
  def output_columns_source(self) -> set[tuple[str, str | None, str]]:
@@ -74,6 +74,10 @@ class BinarySetOperation(SetOperation, ABC):
74
74
  print( f'{pre}`- Right:')
75
75
  self.right.print_tree(pre= f'{pre} ')
76
76
 
77
+ @property
78
+ def main_selects(self) -> list['Select']:
79
+ return self.left.main_selects + self.right.main_selects
80
+
77
81
  @property
78
82
  def selects(self) -> list['Select']:
79
83
  return self.left.selects + self.right.selects
@@ -84,18 +84,10 @@ class Select(SetOperation, TokenizedSQL):
84
84
  # Table: look it up in the IN catalog
85
85
  elif isinstance(expr, exp.Table):
86
86
  # schema name
87
- schema_name = util.ast.table.get_schema(expr)
87
+ schema_name = util.ast.table.get_schema(expr) or self.search_path
88
88
  table_name_in = util.ast.table.get_real_name(expr)
89
89
  table_name_out = util.ast.table.get_name(expr)
90
90
 
91
- if schema_name is None:
92
- # If no schema is specified, try to find the table in the CTEs
93
- if self.catalog.has_table(schema_name='', table_name=table_name_in):
94
- schema_name = ''
95
- # If not found in CTEs, use the search path
96
- else:
97
- schema_name = self.search_path
98
-
99
91
  # check if the table exists in the catalog
100
92
  if self.catalog.has_table(schema_name=schema_name, table_name=table_name_in):
101
93
  # Table exists
@@ -651,6 +643,10 @@ class Select(SetOperation, TokenizedSQL):
651
643
  return int(offset_exp.expression.this)
652
644
  except ValueError:
653
645
  return None
646
+
647
+ @property
648
+ def main_selects(self) -> list['Select']:
649
+ return [self]
654
650
 
655
651
  @property
656
652
  def selects(self) -> list['Select']:
@@ -52,6 +52,12 @@ class SetOperation(ABC):
52
52
  parsed = sqlglot.parse_one(fake_sql)
53
53
  self._trailing_ast = parsed
54
54
  return self._trailing_ast
55
+
56
+ @property
57
+ @abstractmethod
58
+ def main_selects(self) -> list['Select']:
59
+ '''Returns a list of selects that are part of a set operation.'''
60
+ return []
55
61
 
56
62
  @property
57
63
  @abstractmethod
@@ -12,31 +12,93 @@ from z3 import (
12
12
  unsat,
13
13
  is_expr,
14
14
  BoolSort,
15
- ExprRef
15
+ ExprRef,
16
+ Re,
17
+ AllChar,
18
+ Concat,
19
+ InRe,
20
+ PrefixOf,
21
+ SuffixOf,
22
+ Contains,
16
23
  )
17
24
 
18
25
  from ..catalog import Table
19
26
 
20
- def create_z3_var(variables: dict[str, Any], table_name: str | None, col_name: str, col_type: Callable[[str], ExprRef] | None = None) -> None:
27
+
28
+ # ----------------------------------------------------------------------
29
+ # Z3 variable creation
30
+ # ----------------------------------------------------------------------
31
+
32
+ def create_z3_var(variables: dict[str, Any], table_name: str | None,
33
+ col_name: str, col_type: Callable[[str], ExprRef] | None = None) -> None:
21
34
  '''
22
- Create a Z3 variable for the given column name and type, and add it to the variables dictionary.
23
- If col_type is None, default to Int.
35
+ Create a Z3 variable for the given column name and type, and add it to the
36
+ variables dictionary. If col_type is None, default to Int.
24
37
  '''
25
-
26
38
  if col_type is None:
27
39
  col_type = Int # default type
28
40
 
29
- # Add both unqualified and qualified names and null flags
41
+ # unqualified
30
42
  variables[col_name] = col_type(col_name)
31
43
  variables[f'{col_name}_isnull'] = Bool(f'{col_name}_isnull')
32
44
 
45
+ # qualified
33
46
  if table_name:
34
47
  variables[f'{table_name}.{col_name}'] = col_type(f'{table_name}.{col_name}')
35
48
  variables[f'{table_name}.{col_name}_isnull'] = Bool(f'{table_name}.{col_name}_isnull')
36
49
 
50
+
51
+ def fresh_symbol(prefix: str, sort: str):
52
+ '''Generate a fresh Z3 symbol with the given prefix and sort.'''
53
+ if sort == 'int':
54
+ return Int(f'{prefix}_{id(prefix)}')
55
+ if sort == 'real':
56
+ return Real(f'{prefix}_{id(prefix)}')
57
+ if sort == 'bool':
58
+ return Bool(f'{prefix}_{id(prefix)}')
59
+ return String(f'{prefix}_{id(prefix)}')
60
+
61
+
62
+ # ----------------------------------------------------------------------
63
+ # Infer expected type of a subquery based on parent expression
64
+ # ----------------------------------------------------------------------
65
+
66
+ def infer_subquery_sort_from_parent(expr) -> str:
67
+ '''
68
+ Infer the expected Z3 sort of a subquery based on its parent expression.
69
+ '''
70
+ parent = expr.parent
71
+
72
+ # Arithmetic context → numeric
73
+ if isinstance(parent, (exp.Add, exp.Sub, exp.Mul, exp.Div, exp.Mod, exp.Pow)):
74
+ return 'real'
75
+
76
+ # Comparison context → numeric
77
+ if isinstance(parent, (exp.GT, exp.GTE, exp.LT, exp.LTE)):
78
+ return 'real'
79
+
80
+ # BETWEEN → numeric
81
+ if isinstance(parent, exp.Between):
82
+ return 'real'
83
+
84
+ # LIKE → string
85
+ if isinstance(parent, exp.Like):
86
+ return 'string'
87
+
88
+ # String concatenation (|| operator)
89
+ if isinstance(parent, exp.Concat):
90
+ return 'string'
91
+
92
+ # Default: boolean (EXISTS, WHERE (...))
93
+ return 'bool'
94
+
95
+
96
+ # ----------------------------------------------------------------------
97
+ # Catalog → Z3 vars
98
+ # ----------------------------------------------------------------------
99
+
37
100
  def catalog_table_to_z3_vars(table: Table) -> dict[str, ExprRef]:
38
101
  '''Convert catalog table columns to Z3 variables.'''
39
-
40
102
  variables = {}
41
103
  for column in table.columns:
42
104
  col_name = column.name
@@ -52,11 +114,16 @@ def catalog_table_to_z3_vars(table: Table) -> dict[str, ExprRef]:
52
114
  create_z3_var(variables, table.name, col_name, String)
53
115
  else:
54
116
  create_z3_var(variables, table.name, col_name)
55
-
56
117
  return variables
57
118
 
119
+
120
+ # ----------------------------------------------------------------------
121
+ # SQL → Z3 conversion
122
+ # ----------------------------------------------------------------------
123
+
58
124
  def sql_to_z3(expr, variables: dict[str, ExprRef] = {}) -> Any:
59
125
  '''Convert a SQLGlot expression to a Z3 expression.'''
126
+
60
127
  # --- Columns ---
61
128
  if isinstance(expr, exp.Column):
62
129
  name = expr.name.lower()
@@ -76,7 +143,6 @@ def sql_to_z3(expr, variables: dict[str, ExprRef] = {}) -> Any:
76
143
  elif val.upper() in ('TRUE', 'FALSE'):
77
144
  return BoolVal(val.upper() == 'TRUE')
78
145
  elif val.upper() == 'NULL':
79
- # Represent NULL as a special None (handled by IS NULL)
80
146
  return None
81
147
  else:
82
148
  raise NotImplementedError(f"Unsupported literal: {val}")
@@ -122,21 +188,24 @@ def sql_to_z3(expr, variables: dict[str, ExprRef] = {}) -> Any:
122
188
  elif isinstance(expr, exp.Pow):
123
189
  return sql_to_z3(expr.left, variables) ** sql_to_z3(expr.right, variables)
124
190
 
125
- # --- BETWEEN a AND b ---String
191
+ # --- BETWEEN ---
126
192
  elif isinstance(expr, exp.Between):
127
193
  target = sql_to_z3(expr.this, variables)
128
194
  low = sql_to_z3(expr.args['low'], variables)
129
195
  high = sql_to_z3(expr.args['high'], variables)
130
196
  return And(target >= low, target <= high)
131
197
 
132
- # --- IN (list) ---
198
+ # --- IN ---
133
199
  elif isinstance(expr, exp.In):
134
200
  target = sql_to_z3(expr.this, variables)
135
- if isinstance(expr.args['expressions'], exp.Subquery):
136
- # Subquery handling can be complex; skipping for now
137
- return BoolVal(True)
138
-
201
+
202
+ if isinstance(expr.args.get('query'), exp.Subquery):
203
+ # subquery → symbolic value
204
+ sym = fresh_symbol('subq_in', 'string')
205
+ return target == sym
206
+
139
207
  options = [sql_to_z3(e, variables) for e in expr.expressions]
208
+
140
209
  return Or(*[target == o for o in options])
141
210
 
142
211
  # --- IS / IS NOT ---
@@ -144,65 +213,122 @@ def sql_to_z3(expr, variables: dict[str, ExprRef] = {}) -> Any:
144
213
  target_expr = expr.this
145
214
  right_expr = expr.args.get('expression')
146
215
 
147
- # handle IS NULL and IS NOT NULL
148
216
  if isinstance(right_expr, exp.Null):
149
- # x IS NULL → x_isnull = True
150
217
  if isinstance(target_expr, exp.Column):
151
218
  name = target_expr.name.lower()
152
219
  flag = variables.setdefault(f'{name}_isnull', Bool(f'{name}_isnull'))
153
220
  return flag
154
- else:
155
- return BoolVal(False)
221
+ return BoolVal(False)
156
222
 
157
- elif isinstance(right_expr, exp.Not) and isinstance(right_expr.this, exp.Null):
158
- # x IS NOT NULL → ¬x_isnull
223
+ if isinstance(right_expr, exp.Not) and isinstance(right_expr.this, exp.Null):
159
224
  if isinstance(target_expr, exp.Column):
160
225
  name = target_expr.name.lower()
161
226
  flag = variables.setdefault(f'{name}_isnull', Bool(f'{name}_isnull'))
162
227
  return Not(flag)
163
- else:
164
- return BoolVal(True)
228
+ return BoolVal(True)
229
+
230
+ return sql_to_z3(target_expr, variables) == sql_to_z3(right_expr, variables)
231
+
232
+ # --- LIKE ---
233
+ elif isinstance(expr, exp.Like):
234
+ target = sql_to_z3(expr.this, variables)
235
+ pattern_expr = sql_to_z3(expr.expression, variables)
236
+
237
+ # If pattern is a variable → fallback
238
+ if not isinstance(expr.expression, exp.Literal):
239
+ return target == pattern_expr
240
+
241
+ pattern = expr.expression.this.strip("'")
242
+ wildcard_count = pattern.count('%') + pattern.count('_')
243
+
244
+ if wildcard_count > 2:
245
+ return target == StringVal(pattern)
246
+
247
+ # PREFIX pattern: abc%
248
+ if '%' in pattern and '_' not in pattern:
249
+ if pattern.endswith('%') and pattern.count('%') == 1:
250
+ prefix = pattern[:-1]
251
+ return PrefixOf(StringVal(prefix), target)
252
+
253
+ # CONTAINS: %abc%
254
+ if pattern.startswith('%') and pattern.endswith('%') and pattern.count('%') == 2:
255
+ mid = pattern[1:-1]
256
+ return Contains(target, StringVal(mid))
257
+
258
+ # SUFFIX: %abc
259
+ if pattern.startswith('%') and pattern.count('%') == 1:
260
+ suffix = pattern[1:]
261
+ return SuffixOf(StringVal(suffix), target)
262
+
263
+ # EXACTLY ONE '_' wildcard
264
+ if '_' in pattern and '%' not in pattern and wildcard_count == 1:
265
+ parts = pattern.split('_')
266
+ regex = None
267
+ for i, p in enumerate(parts):
268
+ r = Re(StringVal(p))
269
+ regex = r if regex is None else Concat(regex, r)
270
+ if i < len(parts) - 1:
271
+ regex = Concat(regex, AllChar(r.sort()))
272
+ return InRe(target, regex)
165
273
 
274
+ return target == StringVal(pattern)
275
+
276
+ # --- EXISTS ---
277
+ elif isinstance(expr, exp.Exists):
278
+ return fresh_symbol('subq_exists', 'bool')
279
+
280
+ # --- SUBQUERY ---
281
+ elif isinstance(expr, exp.Subquery):
282
+ sort = infer_subquery_sort_from_parent(expr)
283
+ if sort == 'int':
284
+ return fresh_symbol('subq_val', 'int')
285
+ elif sort == 'real':
286
+ return fresh_symbol('subq_val', 'real')
287
+ elif sort == 'string':
288
+ return fresh_symbol('subq_val', 'string')
166
289
  else:
167
- # generic IS (e.g., IS TRUE, IS FALSE)
168
- return sql_to_z3(target_expr, variables) == sql_to_z3(right_expr, variables)
290
+ return fresh_symbol('subq_bool', 'bool')
169
291
 
170
- # Fallback: skip unsupported expressions
292
+ # --- Fallback ---
171
293
  return BoolVal(True)
172
294
 
295
+
296
+ # ----------------------------------------------------------------------
297
+ # Formula checking
298
+ # ----------------------------------------------------------------------
299
+
173
300
  def check_formula(expr) -> str:
174
301
  '''Check if the given SQLGlot expression is a tautology, contradiction, or contingent.'''
302
+
175
303
  formula = sql_to_z3(expr, {})
304
+
176
305
  if formula is None:
177
306
  return 'unknown'
178
307
 
179
308
  solver = Solver()
180
309
 
181
- # Check for contradiction
182
310
  solver.push()
183
311
  solver.add(formula)
312
+
184
313
  if solver.check() == unsat:
185
- solver.pop()
186
314
  return 'contradiction'
187
- solver.pop()
188
315
 
189
- # Check for tautology
316
+ solver.pop()
190
317
  solver.push()
191
318
  solver.add(Not(formula))
319
+
192
320
  if solver.check() == unsat:
193
- solver.pop()
194
321
  return 'tautology'
195
- solver.pop()
196
322
 
197
323
  return 'contingent'
198
324
 
199
325
  def is_satisfiable(expr_z3) -> bool:
200
- '''Check if the given Z3 expression is satisfiable.'''
326
+
201
327
  solver = Solver()
202
328
  solver.add(expr_z3)
329
+ result = solver.check() != unsat
203
330
 
204
- return solver.check() != unsat
331
+ return result
205
332
 
206
333
  def is_bool_expr(e) -> bool:
207
- '''Check if the given Z3 expression is boolean.'''
208
334
  return is_expr(e) and e.sort().kind() == BoolSort().kind()
@@ -76,12 +76,14 @@ def _(expression: exp.In, catalog: Catalog, search_path: str) -> ResultType:
76
76
  # Case IN (<list>)
77
77
  for item in expression.expressions:
78
78
  item_type = get_type(item, catalog, search_path)
79
+ old_messages.extend(item_type.messages)
79
80
  if target_type != item_type:
80
81
  old_messages.append(error_message(expression, item_type, target_type))
81
82
 
82
83
  # Case IN (subquery)
83
84
  if expression.args.get("query"):
84
85
  subquery_type = get_type(expression.args.get("query"), catalog, search_path)
86
+ old_messages.extend(subquery_type.messages)
85
87
  if target_type != subquery_type:
86
88
  old_messages.append(error_message(expression, subquery_type, target_type))
87
89
 
@@ -3,6 +3,7 @@ from ...catalog import Catalog
3
3
  from sqlglot import exp
4
4
  from .types import ResultType, AtomicType, DataType, TupleType
5
5
  from .util import is_number, is_date, to_number, to_date, error_message
6
+ from ...util.ast.column import get_real_name, get_schema
6
7
 
7
8
  @get_type.register
8
9
  def _(expression: exp.Literal, catalog: Catalog, search_path: str) -> ResultType:
@@ -66,8 +67,8 @@ def _(expression: exp.Column, catalog: Catalog, search_path: str) -> ResultType:
66
67
  if expression.type.this in (DataType.Type.UNKNOWN, DataType.Type.USERDEFINED):
67
68
  return AtomicType(messages=[error_message(expression.name, "Unknown column type")])
68
69
  else:
69
- schema = expression.args.get("db") or search_path
70
- table = expression.args.get("table")
70
+ schema = get_schema(expression) or search_path
71
+ table = get_real_name(expression)
71
72
 
72
73
  nullable = catalog[schema][table][expression.name].is_nullable
73
74
  return AtomicType(data_type=expression.type.this, constant=False, nullable=nullable)
@@ -24,11 +24,11 @@ def _(expression: exp.Select, catalog: Catalog, search_path: str) -> ResultType:
24
24
 
25
25
  having = expression.args.get("having")
26
26
  if having:
27
- old_messages.extend(get_type(having, catalog, search_path).messages)
27
+ old_messages.extend(get_type(having.this, catalog, search_path).messages)
28
28
  if len(types) == 1:
29
- return AtomicType(data_type=types[0].data_type, messages=old_messages, nullable=types[0].nullable, constant=types[0].constant)
29
+ return AtomicType(data_type=types[0].data_type, messages=old_messages, nullable=types[0].nullable)
30
30
 
31
- return TupleType(types=types, messages=old_messages, nullable=any(t.nullable for t in types), constant=all(t.constant for t in types))
31
+ return TupleType(types=types, messages=old_messages, nullable=any(t.nullable for t in types))
32
32
 
33
33
  @get_type.register
34
34
  def _(expression: exp.Subquery, catalog: Catalog, search_path: str) -> ResultType: