sql-error-categorizer 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/Makefile +1 -1
  2. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/PKG-INFO +1 -1
  3. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/docs/conf.py +3 -1
  4. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/pyproject.toml +1 -1
  5. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/__init__.py +1 -1
  6. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/catalog/__init__.py +1 -1
  7. sql_error_categorizer-0.1.11/src/sql_error_categorizer/catalog/builder/__init__.py +8 -0
  8. sql_error_categorizer-0.1.9/src/sql_error_categorizer/catalog/builder/__init__.py → sql_error_categorizer-0.1.11/src/sql_error_categorizer/catalog/builder/postgres.py +73 -9
  9. sql_error_categorizer-0.1.11/src/sql_error_categorizer/catalog/builder/sql.py +219 -0
  10. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/detectors/complications.py +1 -1
  11. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/detectors/logical.py +2 -2
  12. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/detectors/syntax.py +1 -1
  13. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/set_operations/__init__.py +4 -8
  14. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/set_operations/binary_set_operation.py +13 -0
  15. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/set_operations/set_operation.py +0 -12
  16. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/binary_ops.py +2 -1
  17. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/functions.py +2 -1
  18. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/predicates.py +2 -1
  19. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/primitives.py +2 -1
  20. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/unary_ops.py +2 -1
  21. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/util.py +2 -1
  22. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/sql_errors.py +2 -2
  23. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/ast/__init__.py +1 -4
  24. sql_error_categorizer-0.1.9/src/sql_error_categorizer/catalog/builder/queries.py +0 -60
  25. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/.gitignore +0 -0
  26. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/.readthedocs.yaml +0 -0
  27. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/LICENSE +0 -0
  28. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/README.md +0 -0
  29. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/datasets/catalogs/constraints.json +0 -0
  30. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/datasets/catalogs/miedema.json +0 -0
  31. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/datasets/sql/constraints.sql +0 -0
  32. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/datasets/sql/miedema.sql +0 -0
  33. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/docs/Makefile +0 -0
  34. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/docs/index.rst +0 -0
  35. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/docs/make.bat +0 -0
  36. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/docs/requirements.txt +0 -0
  37. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/requirements.txt +0 -0
  38. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/catalog/catalog.py +0 -0
  39. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/catalog/column.py +0 -0
  40. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/catalog/constraint.py +0 -0
  41. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/catalog/schema.py +0 -0
  42. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/catalog/table.py +0 -0
  43. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/detectors/__init__.py +0 -0
  44. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/detectors/base.py +0 -0
  45. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/detectors/semantic.py +0 -0
  46. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/__init__.py +0 -0
  47. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/extractors.py +0 -0
  48. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/query.py +0 -0
  49. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/set_operations/select.py +0 -0
  50. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/smt.py +0 -0
  51. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/tokenized_sql.py +0 -0
  52. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/__init__.py +0 -0
  53. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/base.py +0 -0
  54. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/queries.py +0 -0
  55. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/query/typechecking/types.py +0 -0
  56. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/__init__.py +0 -0
  57. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/ast/column.py +0 -0
  58. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/ast/function.py +0 -0
  59. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/ast/subquery.py +0 -0
  60. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/ast/table.py +0 -0
  61. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/sql.py +0 -0
  62. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/src/sql_error_categorizer/util/tokens.py +0 -0
  63. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/test_detector.py +0 -0
  64. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/__init__.py +0 -0
  65. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_002_ambiguous_column.py +0 -0
  66. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_004_undefined_column.py +0 -0
  67. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_005_undefined_function.py +0 -0
  68. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_006_undefined_parameter.py +0 -0
  69. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_007_undefined_tables.py +0 -0
  70. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_008_invalid_schema_names.py +0 -0
  71. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_009_misspellings.py +0 -0
  72. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_013_data_type_mismatch.py +0 -0
  73. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_014_aggregate_function_outside_select_or_having.py +0 -0
  74. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_015_nested_aggregate_functions.py +0 -0
  75. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_016_extraneous_omitted_grouping_column.py +0 -0
  76. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_017_having_without_group_by.py +0 -0
  77. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_019_using_where_twice.py +0 -0
  78. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_020_missing_from.py +0 -0
  79. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_021_comparison_with_null.py +0 -0
  80. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_022_038_additional_omitted_semicolons.py +0 -0
  81. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_024_duplicate_clause.py +0 -0
  82. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_026_too_many_columns_in_subquery.py +0 -0
  83. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_030_keywords_order.py +0 -0
  84. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_034_curly_square_or_unmatched_brackets.py +0 -0
  85. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_035_is_where_not_applicable.py +0 -0
  86. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/1_syn/test_037_nonstandard_operators.py +0 -0
  87. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/2_sem/test_040_tautological_inconsistent_expressions.py +0 -0
  88. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/2_sem/test_041_distinct_sum_avg.py +0 -0
  89. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/2_sem/test_043_wildcards_without_like.py +0 -0
  90. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/2_sem/test_044_incorrect_wildcards.py +0 -0
  91. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_058_join_on_incorrect_table.py +0 -0
  92. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_059_join_when_join_needs_to_be_omitted.py +0 -0
  93. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_062_missing_join.py +0 -0
  94. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_070_extraneous_column_in_select.py +0 -0
  95. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_071_missing_column_from_select.py +0 -0
  96. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_072_missing_distinct_from_select.py +0 -0
  97. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/3_log/test_073_missing_as_from_select.py +0 -0
  98. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/4_com/test_083_unnecessary_distinct_in_select.py +0 -0
  99. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/4_com/test_088_like_no_wildcards.py +0 -0
  100. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/4_com/test_092_unnecessary_distinct_in_aggregate_function.py +0 -0
  101. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/4_com/test_095_group_by_with_singleton_groups.py +0 -0
  102. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/4_com/test_097_group_by_can_be_replaced_by_distinct.py +0 -0
  103. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/detectors/4_com/test_100_order_by_in_subquery.py +0 -0
  104. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/query/test_extractors.py +0 -0
  105. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/query/test_query.py +0 -0
  106. {sql_error_categorizer-0.1.9 → sql_error_categorizer-0.1.11}/tests/query/test_typechecking.py +0 -0
@@ -34,7 +34,7 @@ uninstall: $(VENV)
34
34
  $(VENV_BIN)/python -m pip uninstall -y $(NAME)
35
35
 
36
36
  documentation:
37
- make html -C docs/
37
+ make html SPHINXBUILD="../$(VENV_BIN)/sphinx-build" -C docs/
38
38
 
39
39
  test: install
40
40
  $(VENV_BIN)/python -m pytest
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql_error_categorizer
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: This project analyses SQL statements and labels possible errors or complications.
5
5
  Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
6
6
  Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html
@@ -29,7 +29,9 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
29
29
  # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
30
30
 
31
31
  html_theme = 'alabaster'
32
- html_static_path = ['_static']
32
+ html_static_path = [
33
+ # '_static',
34
+ ]
33
35
 
34
36
 
35
37
  # -- Autoapi -----------------------------------------------------------------
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sql_error_categorizer"
7
- version = "0.1.9"
7
+ version = "0.1.11"
8
8
  authors = [
9
9
  { name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
10
10
  ]
@@ -5,7 +5,7 @@ from .detectors import BaseDetector as _BaseDetector, Detector as _Detector
5
5
 
6
6
  # Public API
7
7
  from .sql_errors import SqlErrors
8
- from .catalog import Catalog, build_catalog, load_catalog, build_catalog_from_postgres
8
+ from .catalog import Catalog, build_catalog, load_catalog, build_catalog_from_postgres, build_catalog_from_sql
9
9
  from .detectors import SyntaxErrorDetector, SemanticErrorDetector, LogicalErrorDetector, ComplicationDetector, DetectedError
10
10
 
11
11
  def get_errors(query_str: str,
@@ -6,7 +6,7 @@ from .column import Column
6
6
  from .table import Table
7
7
  from .schema import Schema
8
8
  from .catalog import Catalog
9
- from .builder import CatalogColumnInfo, CatalogUniqueConstraintInfo, build_catalog, build_catalog_from_postgres, load_catalog
9
+ from .builder import CatalogColumnInfo, CatalogUniqueConstraintInfo, build_catalog, build_catalog_from_postgres, load_catalog, build_catalog_from_sql
10
10
 
11
11
 
12
12
 
@@ -0,0 +1,8 @@
1
+ from ..catalog import Catalog
2
+ from .postgres import build_catalog, build_catalog_from_postgres, CatalogColumnInfo, CatalogUniqueConstraintInfo
3
+ from .sql import build_catalog_from_sql
4
+
5
+ def load_catalog(path: str) -> Catalog:
6
+ '''Loads a catalog from a JSON file.'''
7
+ return Catalog.load_json(path)
8
+
@@ -1,10 +1,11 @@
1
- from dataclasses import dataclass
2
- from . import queries
3
1
  from ..catalog import Catalog
4
2
  from ..constraint import ConstraintType
3
+
5
4
  import psycopg2
6
5
  import time
6
+ from dataclasses import dataclass
7
7
 
8
+ # region Data Classes
8
9
  @dataclass(frozen=True)
9
10
  class CatalogColumnInfo:
10
11
  '''Holds information about a database column.'''
@@ -48,7 +49,9 @@ class CatalogUniqueConstraintInfo:
48
49
  'constraint_type': self.constraint_type,
49
50
  'columns': self.columns,
50
51
  }
51
-
52
+ # endregion
53
+
54
+ # region Catalog Builder
52
55
  def build_catalog(columns_info: list[CatalogColumnInfo], unique_constraints_info: list[CatalogUniqueConstraintInfo]) -> Catalog:
53
56
  '''Builds a catalog from the provided column and unique constraint information.'''
54
57
  result = Catalog()
@@ -75,6 +78,7 @@ def build_catalog(columns_info: list[CatalogColumnInfo], unique_constraints_info
75
78
 
76
79
  return result
77
80
 
81
+
78
82
  def build_catalog_from_postgres(sql_string: str, *, hostname: str, port: int, user: str, password: str, schema: str | None = None, create_temp_schema: bool = False) -> Catalog:
79
83
  '''Builds a catalog by executing the provided SQL string in a temporary PostgreSQL database.'''
80
84
  if sql_string.strip() == '':
@@ -98,7 +102,7 @@ def build_catalog_from_postgres(sql_string: str, *, hostname: str, port: int, us
98
102
  cur.execute(sql_string)
99
103
 
100
104
  # Fetch the catalog information
101
- cur.execute(queries.COLUMNS(schema_name))
105
+ cur.execute(COLUMNS(schema_name))
102
106
  columns_info = cur.fetchall()
103
107
 
104
108
  columns_data = [
@@ -118,7 +122,7 @@ def build_catalog_from_postgres(sql_string: str, *, hostname: str, port: int, us
118
122
  ]
119
123
 
120
124
  # Fetch unique constraints (including primary keys)
121
- cur.execute(queries.UNIQUE_COLUMNS(schema_name))
125
+ cur.execute(UNIQUE_COLUMNS(schema_name))
122
126
  unique_constraints_info = cur.fetchall()
123
127
 
124
128
  unique_constraints_data = [
@@ -137,7 +141,67 @@ def build_catalog_from_postgres(sql_string: str, *, hostname: str, port: int, us
137
141
  conn.rollback() # no need to save anything
138
142
 
139
143
  return build_catalog(columns_data, unique_constraints_data)
140
-
141
- def load_catalog(path: str) -> Catalog:
142
- '''Loads a catalog from a JSON file.'''
143
- return Catalog.load_json(path)
144
+ # endregion
145
+
146
+ # region SQL Queries
147
+ def UNIQUE_COLUMNS(schema_name: str = '%') -> str:
148
+ return f'''
149
+ SELECT
150
+ kcu.table_schema AS schema_name,
151
+ kcu.table_name,
152
+ tc.constraint_type,
153
+ array_agg(kcu.column_name ORDER BY kcu.ordinal_position) AS columns
154
+ FROM information_schema.table_constraints tc
155
+ JOIN information_schema.key_column_usage kcu
156
+ ON tc.constraint_name = kcu.constraint_name
157
+ AND tc.constraint_schema = kcu.constraint_schema
158
+ WHERE tc.constraint_type IN ('UNIQUE', 'PRIMARY KEY')
159
+ AND kcu.table_schema LIKE '{schema_name}'
160
+ GROUP BY
161
+ kcu.table_schema,
162
+ kcu.table_name,
163
+ kcu.constraint_name,
164
+ tc.constraint_type;
165
+ '''
166
+
167
+ def COLUMNS(schema_name: str = '%') -> str:
168
+ return f'''
169
+ SELECT
170
+ cols.table_schema AS schema_name,
171
+ cols.table_name,
172
+ cols.column_name,
173
+ cols.data_type AS column_type,
174
+ cols.numeric_precision,
175
+ cols.numeric_scale,
176
+ (cols.is_nullable = 'YES') AS is_nullable,
177
+ fk.foreign_table_schema AS foreign_key_schema,
178
+ fk.foreign_table_name AS foreign_key_table,
179
+ fk.foreign_column_name AS foreign_key_column
180
+ FROM information_schema.columns AS cols
181
+
182
+ -- Foreign Key
183
+ LEFT JOIN (
184
+ SELECT
185
+ kcu.table_schema,
186
+ kcu.table_name,
187
+ kcu.column_name,
188
+ ccu.table_schema AS foreign_table_schema,
189
+ ccu.table_name AS foreign_table_name,
190
+ ccu.column_name AS foreign_column_name
191
+ FROM information_schema.table_constraints AS tc
192
+ JOIN information_schema.key_column_usage AS kcu
193
+ ON tc.constraint_name = kcu.constraint_name
194
+ AND tc.constraint_schema = kcu.constraint_schema
195
+ AND tc.table_schema = kcu.table_schema
196
+ AND tc.table_name = kcu.table_name
197
+ JOIN information_schema.constraint_column_usage AS ccu
198
+ ON tc.constraint_name = ccu.constraint_name
199
+ AND tc.constraint_schema = ccu.constraint_schema
200
+ WHERE tc.constraint_type = 'FOREIGN KEY'
201
+ ) fk ON fk.table_schema = cols.table_schema
202
+ AND fk.table_name = cols.table_name
203
+ AND fk.column_name = cols.column_name
204
+
205
+ WHERE cols.table_schema LIKE '{schema_name}'
206
+ '''
207
+ # endregion
@@ -0,0 +1,219 @@
1
+ import sqlglot
2
+ from sqlglot import exp
3
+ from ..catalog import Catalog
4
+ from ..constraint import ConstraintType
5
+
6
+ def _get_identifier_name(identifier_exp: exp.Identifier) -> str:
7
+ '''Returns the normalized name from an Identifier expression.'''
8
+
9
+ if identifier_exp.quoted:
10
+ return identifier_exp.name
11
+ return identifier_exp.name.lower()
12
+
13
+ def _get_table_name(table_exp: exp.Table) -> str:
14
+ '''Returns the normalized table name from a Table expression.'''
15
+
16
+ if isinstance(table_exp.this, exp.Identifier):
17
+ return _get_identifier_name(table_exp.this)
18
+ return str(table_exp.this).lower()
19
+
20
+ def _get_schema_name(table_exp: exp.Table, default_schema: str) -> str:
21
+ '''Returns the normalized schema name from a Table expression, or the default schema if not specified.'''
22
+
23
+ if table_exp.db:
24
+ if isinstance(table_exp.db, exp.Identifier):
25
+ return _get_identifier_name(table_exp.db)
26
+ return str(table_exp.db).lower()
27
+ return default_schema
28
+
29
+ def _get_column_name(column_exp: exp.ColumnDef) -> str:
30
+ '''Returns the normalized column name from a ColumnDef expression.'''
31
+
32
+ if isinstance(column_exp.this, exp.Identifier):
33
+ return _get_identifier_name(column_exp.this)
34
+ return str(column_exp.this).lower()
35
+
36
+ def _extract_datatype(column_exp: exp.ColumnDef) -> tuple[str, int | None, int | None]:
37
+ '''Extracts datatype information from a ColumnDef expression.'''
38
+
39
+ datatype_exp = column_exp.kind
40
+ assert isinstance(datatype_exp, exp.DataType), 'Expected DataType expression in ColumnDef'
41
+
42
+ datatype = datatype_exp.this.value
43
+
44
+ numeric_precision = None
45
+ numeric_scale = None
46
+
47
+ if datatype_exp.expressions:
48
+ if datatype in {'DECIMAL', 'NUMERIC', 'NUMBER', 'FLOAT'}:
49
+ if len(datatype_exp.expressions) >= 1:
50
+ precision_exp = datatype_exp.expressions[0]
51
+ if isinstance(precision_exp, exp.Literal) and precision_exp.is_int:
52
+ numeric_precision = int(precision_exp.name)
53
+ if len(datatype_exp.expressions) == 2:
54
+ scale_exp = datatype_exp.expressions[1]
55
+ if isinstance(scale_exp, exp.Literal) and scale_exp.is_int:
56
+ numeric_scale = int(scale_exp.name)
57
+
58
+ return datatype, numeric_precision, numeric_scale
59
+
60
+ def build_catalog_from_sql(sql_string: str, search_path: str = 'public') -> Catalog:
61
+ '''Builds a catalog from the provided SQL string without executing it in a database.'''
62
+
63
+ statements = sqlglot.parse(sql_string)
64
+
65
+ # Filter to only CREATE TABLE statements
66
+ statements = [stmt for stmt in statements if isinstance(stmt, exp.Create) and stmt.kind and stmt.kind.upper() == 'TABLE']
67
+
68
+ catalog = Catalog()
69
+
70
+ for statement in statements:
71
+ table_exp = statement.find(exp.Table)
72
+
73
+ assert table_exp is not None, 'Expected Table expression in CREATE TABLE statement'
74
+
75
+ # "CREATE TABLE <schema_name>.<table_name>" handling
76
+ table_name = _get_table_name(table_exp)
77
+ schema_name = _get_schema_name(table_exp, search_path)
78
+
79
+ # Extract other relevant information
80
+ column_exps: list[exp.ColumnDef] = list(statement.find_all(exp.ColumnDef))
81
+ '''Column definitions'''
82
+
83
+ pk_exp: exp.PrimaryKey | None = statement.find(exp.PrimaryKey)
84
+ '''PRIMARY KEY defined at table level, e.g., PRIMARY KEY (col1, col2)'''
85
+
86
+ fk_exps: list[exp.ForeignKey] = list(statement.find_all(exp.ForeignKey))
87
+ '''FOREIGN KEY defined at table level, e.g., FOREIGN KEY (col1) REFERENCES other_table(other_col)'''
88
+
89
+ fks: dict[str, tuple[str, str, str]] = {}
90
+ '''Mapping of foreign key column names to (schema, table, column) tuples'''
91
+ # NOTE: this needs to be filled in before adding columns to the catalog
92
+
93
+ unique_exps: list[exp.UniqueColumnConstraint] = list(statement.find_all(exp.UniqueColumnConstraint))
94
+ '''UNIQUE constraints defined at table level, e.g., UNIQUE (col1, col2)'''
95
+
96
+ pk_col_names: set[str] = set()
97
+ '''Set to keep track of primary key column names'''
98
+
99
+ unique_col_names: list[set[str]] = []
100
+ '''List to keep track of unique constraint column name sets'''
101
+
102
+ # Process table-level Foreign Key constraints
103
+ for fk_exp in fk_exps:
104
+ fk_id_exps = fk_exp.expressions
105
+ fk_column_names = [_get_identifier_name(col_exp) for col_exp in fk_id_exps]
106
+
107
+ ref_exp = fk_exp.find(exp.Reference)
108
+ assert ref_exp is not None, 'Expected Reference expression in Foreign Key definition'
109
+
110
+ ref_schema_exp = ref_exp.this
111
+ assert isinstance(ref_schema_exp, exp.Schema), 'Expected Schema expression in Foreign Key reference'
112
+
113
+ ref_table_exp = ref_schema_exp.this
114
+ assert isinstance(ref_table_exp, exp.Table), 'Expected Table expression in Foreign Key reference'
115
+
116
+ ref_schema_name = _get_schema_name(ref_table_exp, search_path)
117
+ ref_table_name = _get_table_name(ref_table_exp)
118
+
119
+ ref_id_exps = ref_schema_exp.expressions
120
+ ref_column_names = [_get_identifier_name(col_exp) for col_exp in ref_id_exps]
121
+
122
+ # e.g. "FOREIGN KEY (tenant_id, order_id) REFERENCES orders (tenant_id, order_id)"
123
+ for fk_col_name, ref_col_name in zip(fk_column_names, ref_column_names):
124
+ fks[fk_col_name] = (ref_schema_name, ref_table_name, ref_col_name)
125
+
126
+ # Process columns
127
+ for column_exp in column_exps:
128
+ column_name = _get_column_name(column_exp)
129
+
130
+ # Primary Key handling
131
+ is_pk = any(isinstance(c.kind, exp.PrimaryKeyColumnConstraint) for c in column_exp.constraints)
132
+ if is_pk:
133
+ pk_col_names.add(column_name)
134
+
135
+ # Unique handling
136
+ is_unique = any(isinstance(c.kind, exp.UniqueColumnConstraint) for c in column_exp.constraints)
137
+ if is_unique:
138
+ unique_col_names.append({column_name})
139
+
140
+ # Not Null handling
141
+ is_not_null = any(isinstance(c.kind, exp.NotNullColumnConstraint) for c in column_exp.constraints)
142
+
143
+ # Foreign Key handling
144
+ fk_constraint = next((c for c in column_exp.constraints if isinstance(c.kind, exp.Reference)), None)
145
+
146
+ if fk_constraint:
147
+ fk_reference = fk_constraint.kind
148
+ assert isinstance(fk_reference, exp.Reference), 'Expected Reference expression in Foreign Key constraint'
149
+
150
+ fk_schema_exp = fk_reference.this
151
+ assert isinstance(fk_schema_exp, exp.Schema), 'Expected Schema expression in Foreign Key constraint'
152
+
153
+ fk_table_exp = fk_schema_exp.this
154
+ assert isinstance(fk_table_exp, exp.Table), 'Expected Table expression in Foreign Key constraint'
155
+
156
+ fk_schema_name = _get_schema_name(fk_table_exp, search_path)
157
+ fk_table_name = _get_table_name(fk_table_exp)
158
+
159
+ fk_column_exp = fk_schema_exp.expressions[0]
160
+ assert isinstance(fk_column_exp, exp.Identifier), 'Expected Identifier expression in Foreign Key column'
161
+ fk_column_name = _get_identifier_name(fk_column_exp)
162
+ elif column_name in fks:
163
+ fk_schema_name, fk_table_name, fk_column_name = fks[column_name]
164
+ else:
165
+ fk_schema_name = None
166
+ fk_table_name = None
167
+ fk_column_name = None
168
+
169
+ # Datatype handling
170
+ column_type, numeric_precision, numeric_scale = _extract_datatype(column_exp)
171
+
172
+ # Add column to catalog
173
+ catalog[schema_name][table_name].add_column(
174
+ name=column_name,
175
+ column_type=column_type,
176
+ real_name=column_name,
177
+ numeric_precision=numeric_precision,
178
+ numeric_scale=numeric_scale,
179
+ is_nullable=not is_not_null,
180
+ fk_schema=fk_schema_name,
181
+ fk_table=fk_table_name,
182
+ fk_column=fk_column_name)
183
+
184
+ # Process table-level Primary Key constraint
185
+ if pk_exp:
186
+ for ordered_exp in pk_exp.expressions:
187
+ col_exp = ordered_exp.find(exp.Column)
188
+ assert col_exp is not None, 'Expected Column expression in Primary Key definition'
189
+ col_name = _get_column_name(col_exp)
190
+ pk_col_names.add(col_name)
191
+
192
+ # Process table-level Unique constraints
193
+ for unique_exp in unique_exps:
194
+ unique_schema_exp = unique_exp.this
195
+ assert isinstance(unique_schema_exp, exp.Schema), 'Expected Schema expression in Unique constraint'
196
+
197
+ unique_column_names = set()
198
+ for col_id_exp in unique_exp.expressions:
199
+ col_name = _get_identifier_name(col_id_exp)
200
+ unique_column_names.add(col_name)
201
+ unique_col_names.append(unique_column_names)
202
+
203
+ # Add Primary Key constraint to catalog
204
+ # NOTE: needs to be perfomed after all columns have been added, since PKs can be defined at both column and table level
205
+ assert len(pk_col_names) > 0, 'Primary Key columns should have been identified'
206
+ catalog[schema_name][table_name].add_unique_constraint(
207
+ columns=pk_col_names,
208
+ constraint_type=ConstraintType.PRIMARY_KEY
209
+ )
210
+
211
+ # Add Unique constraints to catalog
212
+ # NOTE: needs to be perfomed after all columns have been added, since Unique constraints can be defined at both column and table level
213
+ for unique_col_name_set in unique_col_names:
214
+ catalog[schema_name][table_name].add_unique_constraint(
215
+ columns=unique_col_name_set,
216
+ constraint_type=ConstraintType.UNIQUE
217
+ )
218
+
219
+ return catalog
@@ -7,7 +7,7 @@ import sqlparse.keywords
7
7
  from typing import Callable
8
8
  from sqlglot import exp
9
9
 
10
- from sql_error_categorizer.catalog import ConstraintType, ConstraintColumn, Constraint
10
+ from ..catalog import ConstraintType, ConstraintColumn, Constraint
11
11
 
12
12
  from .base import BaseDetector, DetectedError
13
13
  from ..query import Query
@@ -711,14 +711,14 @@ class LogicalErrorDetector(BaseDetector):
711
711
 
712
712
  def _selects_star(self, ast: dict) -> bool:
713
713
  '''
714
- Checks if a 'SELECT *' is used in the query by looking for a 'Star'
714
+ Checks if a `SELECT *` is used in the query by looking for a 'Star'
715
715
  node in the AST's expression list.
716
716
 
717
717
  Args:
718
718
  ast: The Abstract Syntax Tree of the query.
719
719
 
720
720
  Returns:
721
- True if 'SELECT *' is found, otherwise False.
721
+ True if `SELECT *` is found, otherwise False.
722
722
  '''
723
723
  if not ast:
724
724
  return False
@@ -8,7 +8,7 @@ from sqlglot import exp
8
8
  from typing import Callable
9
9
  from copy import deepcopy
10
10
 
11
- from sql_error_categorizer.query.set_operations.set_operation import SetOperation
11
+ from ..query.set_operations.set_operation import SetOperation
12
12
  from ..query.typechecking import get_type, collect_errors
13
13
 
14
14
  from .base import BaseDetector, DetectedError
@@ -78,11 +78,9 @@ def create_set_operation_tree(sql: str, catalog: Catalog = Catalog(), search_pat
78
78
  def parse_op_token(tok: sqlparse.sql.Token) -> tuple[str, bool | None] | None:
79
79
  '''
80
80
  Parse "UNION", "INTERSECT", "EXCEPT" with optional inline ALL/DISTINCT.
81
+
81
82
  Returns:
82
- tuple: `(op, all_flag)` where all_flag is:
83
- - True if ALL inline (e.g., "UNION ALL")
84
- - False if DISTINCT inline (e.g., "EXCEPT DISTINCT")
85
- - None if no modifier inline (so caller may look right).
83
+ tuple: `(op, all_flag)` where all_flag is: True if ALL inline (e.g., "UNION ALL"); False if DISTINCT inline (e.g., "EXCEPT DISTINCT"); None if no modifier inline (so caller may look right).
86
84
  '''
87
85
  if tok.ttype is not Keyword:
88
86
  return None
@@ -106,11 +104,9 @@ def split_on(tokens: list[sqlparse.sql.Token], idx: int, all_in_token: bool | No
106
104
  '''
107
105
  Splits around the operator at idx. If the modifier wasn't inline,
108
106
  consume a single immediate ALL/DISTINCT to the right.
107
+
109
108
  Returns:
110
- tuple: A tuple containing:
111
- - left_tokens (list[sqlparse.sql.Token]): Tokens to the left of the operator.
112
- - right_tokens (list[sqlparse.sql.Token]): Tokens to the right of the operator
113
- - all_flag (bool | None): True if ALL, False if DISTINCT, None if unspecified.
109
+ tuple: A tuple containing: left_tokens (list[sqlparse.sql.Token]): Tokens to the left of the operator; right_tokens (list[sqlparse.sql.Token]): Tokens to the right of the operator; all_flag (bool | None): True if ALL, False if DISTINCT, None if unspecified.
114
110
  '''
115
111
  left_tokens = tokens[:idx]
116
112
  right_tokens = tokens[idx + 1:]
@@ -3,6 +3,7 @@ from ...catalog import Table, Constraint, ConstraintType, ConstraintColumn
3
3
 
4
4
  from abc import ABC
5
5
  from copy import deepcopy
6
+ import sqlglot
6
7
  from sqlglot import exp
7
8
 
8
9
  from typing import TYPE_CHECKING
@@ -44,6 +45,18 @@ class BinarySetOperation(SetOperation, ABC):
44
45
 
45
46
  return result
46
47
 
48
+ @property
49
+ def trailing_ast(self) -> exp.Expression | None:
50
+ '''Parses and returns the AST of the trailing SQL clauses (e.g., ORDER BY, LIMIT) if present, with a fake `SELECT 1` prefix.'''
51
+ if self.trailing_sql is None:
52
+ return None
53
+ if self._trailing_ast is None:
54
+ # Parse trailing SQL with a fake SELECT to get valid AST
55
+ fake_sql = f'SELECT 1 {self.trailing_sql}'
56
+ parsed = sqlglot.parse_one(fake_sql)
57
+ self._trailing_ast = parsed
58
+ return self._trailing_ast
59
+
47
60
  @property
48
61
  def output(self) -> Table:
49
62
  # Assume the output schema is the same as the left input
@@ -40,18 +40,6 @@ class SetOperation(ABC):
40
40
  @abstractmethod
41
41
  def print_tree(self, pre: str = '') -> None:
42
42
  pass
43
-
44
- @property
45
- def trailing_ast(self) -> exp.Expression | None:
46
- '''Parses and returns the AST of the trailing SQL clauses (e.g., ORDER BY, LIMIT) if present, with a fake `SELECT 1` prefix.'''
47
- if self.trailing_sql is None:
48
- return None
49
- if self._trailing_ast is None:
50
- # Parse trailing SQL with a fake SELECT to get valid AST
51
- fake_sql = f'SELECT 1 {self.trailing_sql}'
52
- parsed = sqlglot.parse_one(fake_sql)
53
- self._trailing_ast = parsed
54
- return self._trailing_ast
55
43
 
56
44
  @property
57
45
  @abstractmethod
@@ -1,7 +1,8 @@
1
1
  from .base import get_type
2
2
  from ...catalog import Catalog
3
3
  from sqlglot import exp
4
- from .types import ResultType, AtomicType, DataType
4
+ from .types import ResultType, AtomicType
5
+ from sqlglot.expressions import DataType
5
6
  from .util import is_number, to_number, to_date, error_message
6
7
 
7
8
  @get_type.register
@@ -1,7 +1,8 @@
1
1
  from .base import get_type
2
2
  from ...catalog import Catalog
3
3
  from sqlglot import exp
4
- from .types import ResultType, AtomicType, DataType
4
+ from .types import ResultType, AtomicType
5
+ from sqlglot.expressions import DataType
5
6
  from .util import is_number, error_message
6
7
 
7
8
  @get_type.register
@@ -1,7 +1,8 @@
1
1
  from .base import get_type
2
2
  from ...catalog import Catalog
3
3
  from sqlglot import exp
4
- from .types import ResultType, AtomicType, DataType
4
+ from .types import ResultType, AtomicType
5
+ from sqlglot.expressions import DataType
5
6
  from .util import is_string, to_number, to_date, error_message
6
7
 
7
8
  @get_type.register
@@ -1,7 +1,8 @@
1
1
  from .base import get_type
2
2
  from ...catalog import Catalog
3
3
  from sqlglot import exp
4
- from .types import ResultType, AtomicType, DataType, TupleType
4
+ from .types import ResultType, AtomicType, TupleType
5
+ from sqlglot.expressions import DataType
5
6
  from .util import is_number, is_date, to_number, to_date, error_message
6
7
  from ...util.ast.column import get_real_name, get_schema
7
8
 
@@ -1,7 +1,8 @@
1
1
  from .base import get_type
2
2
  from ...catalog import Catalog
3
3
  from sqlglot import exp
4
- from .types import ResultType, AtomicType, DataType
4
+ from .types import ResultType, AtomicType
5
+ from sqlglot.expressions import DataType
5
6
  from .util import is_number, error_message
6
7
 
7
8
  @get_type.register
@@ -1,4 +1,5 @@
1
- from .types import ResultType, DataType
1
+ from .types import ResultType
2
+ from sqlglot.expressions import DataType
2
3
  from dateutil.parser import parse
3
4
  from sqlglot import exp
4
5
 
@@ -1,7 +1,7 @@
1
- from enum import Enum
1
+ from enum import IntEnum
2
2
 
3
3
 
4
- class SqlErrors(Enum):
4
+ class SqlErrors(IntEnum):
5
5
  '''Enumeration of SQL error types with unique identifiers.'''
6
6
  SYN_1_OMITTING_CORRELATION_NAMES = 1
7
7
  SYN_2_AMBIGUOUS_COLUMN = 2
@@ -1,9 +1,6 @@
1
1
  '''Utility functions for processing SQL ASTs made with sqlglot.'''
2
2
 
3
- from .column import *
4
- from .function import *
5
- from .subquery import *
6
- from .table import *
3
+ from . import column, function, subquery, table
7
4
 
8
5
  import sqlglot.optimizer.normalize
9
6
  from sqlglot import exp
@@ -1,60 +0,0 @@
1
- def UNIQUE_COLUMNS(schema_name: str = '%') -> str:
2
- return f'''
3
- SELECT
4
- kcu.table_schema AS schema_name,
5
- kcu.table_name,
6
- tc.constraint_type,
7
- array_agg(kcu.column_name ORDER BY kcu.ordinal_position) AS columns
8
- FROM information_schema.table_constraints tc
9
- JOIN information_schema.key_column_usage kcu
10
- ON tc.constraint_name = kcu.constraint_name
11
- AND tc.constraint_schema = kcu.constraint_schema
12
- WHERE tc.constraint_type IN ('UNIQUE', 'PRIMARY KEY')
13
- AND kcu.table_schema LIKE '{schema_name}'
14
- GROUP BY
15
- kcu.table_schema,
16
- kcu.table_name,
17
- kcu.constraint_name,
18
- tc.constraint_type;
19
- '''
20
-
21
- def COLUMNS(schema_name: str = '%') -> str:
22
- return f'''
23
- SELECT
24
- cols.table_schema AS schema_name,
25
- cols.table_name,
26
- cols.column_name,
27
- cols.data_type AS column_type,
28
- cols.numeric_precision,
29
- cols.numeric_scale,
30
- (cols.is_nullable = 'YES') AS is_nullable,
31
- fk.foreign_table_schema AS foreign_key_schema,
32
- fk.foreign_table_name AS foreign_key_table,
33
- fk.foreign_column_name AS foreign_key_column
34
- FROM information_schema.columns AS cols
35
-
36
- -- Foreign Key
37
- LEFT JOIN (
38
- SELECT
39
- kcu.table_schema,
40
- kcu.table_name,
41
- kcu.column_name,
42
- ccu.table_schema AS foreign_table_schema,
43
- ccu.table_name AS foreign_table_name,
44
- ccu.column_name AS foreign_column_name
45
- FROM information_schema.table_constraints AS tc
46
- JOIN information_schema.key_column_usage AS kcu
47
- ON tc.constraint_name = kcu.constraint_name
48
- AND tc.constraint_schema = kcu.constraint_schema
49
- AND tc.table_schema = kcu.table_schema
50
- AND tc.table_name = kcu.table_name
51
- JOIN information_schema.constraint_column_usage AS ccu
52
- ON tc.constraint_name = ccu.constraint_name
53
- AND tc.constraint_schema = ccu.constraint_schema
54
- WHERE tc.constraint_type = 'FOREIGN KEY'
55
- ) fk ON fk.table_schema = cols.table_schema
56
- AND fk.table_name = cols.table_name
57
- AND fk.column_name = cols.column_name
58
-
59
- WHERE cols.table_schema LIKE '{schema_name}'
60
- '''