sql-error-categorizer 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/PKG-INFO +3 -3
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/README.md +1 -1
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/docs/conf.py +2 -2
- sql_error_categorizer-0.1.2/docs/index.rst +38 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/pyproject.toml +2 -2
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/__init__.py +2 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/catalog/__init__.py +1 -1
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/catalog/catalog.py +26 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/detectors/__init__.py +5 -1
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/detectors/base.py +6 -1
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/detectors/complications.py +4 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/detectors/logical.py +3 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/detectors/semantic.py +4 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/detectors/syntax.py +6 -3
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/__init__.py +2 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/set_operations/__init__.py +2 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/set_operations/select.py +1 -1
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/smt.py +2 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/tokenized_sql.py +2 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/util.py +6 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/sql_errors.py +1 -0
- sql_error_categorizer-0.1.0/docs/index.rst +0 -175
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/.gitignore +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/.readthedocs.yaml +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/LICENSE +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/Makefile +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/docs/Makefile +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/docs/make.bat +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/docs/requirements.txt +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/q_cte.sql +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/q_q.sql +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/q_s.sql +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/requirements.txt +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/catalog/queries.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/extractors.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/query.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/set_operations/binary_set_operation.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/set_operations/set_operation.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/typechecking.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/util.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/test_detector.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/__init__.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/datasets/cat_miedema.json +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_02_ambiguous_column.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_04_undefined_column.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_05_undefined_function.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_06_undefined_functions.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_07_undefined_tables.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_08_invalid_schema_names.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_09_misspellings.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_14_aggregate_function_outside_select_or_having.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_15_nested_aggregate_functions.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_16_extraneous_omitted_grouping_column.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_17_having_without_group_by.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_19_using_where_twice.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_20_missing_from.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_21_comparison_with_null.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_22_38_additional_omitted_semicolons.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_24_duplicate_clause.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_26_too_many_columns_in_subquery.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_30_keywords_order.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_34_curly_square_or_unmatched_brackets.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/1_syn/test_37_nonstandard_operators.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/2_sem/test_40_tautological_inconsistent_expressions.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/2_sem/test_41_distinct_sum_avg.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/2_sem/test_43_wildcards_without_like.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/2_sem/test_44_incorrect_wildcards.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/detectors/4_com/test_88_like_no_wildcards.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/query/conftest.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/query/test_extractors.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/query/test_query.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/query/test_typechecking.py +0 -0
- {sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/test_query.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql_error_categorizer
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: This project analyses SQL statements and labels possible errors or complications.
|
|
5
5
|
Project-URL: Repository, https://github.com/DavidePonzini/sql_error_categorizer
|
|
6
|
-
Project-URL: Documentation, https://
|
|
6
|
+
Project-URL: Documentation, https://sql-error-categorizer.readthedocs.io/en/latest/index.html
|
|
7
7
|
Project-URL: Bug Tracker, https://github.com/DavidePonzini/sql_error_categorizer/issues
|
|
8
8
|
Author-email: Davide Ponzini <davide.ponzini95@gmail.com>
|
|
9
9
|
License-File: LICENSE
|
|
@@ -21,7 +21,7 @@ Description-Content-Type: text/markdown
|
|
|
21
21
|
This project analyses SQL statements and labels possible errors or complications.
|
|
22
22
|
|
|
23
23
|
# Credits
|
|
24
|
-
Special thanks to Davide Miggiano for
|
|
24
|
+
Special thanks to Davide Miggiano and Flavio Venturini for their valuable contributions to the development of this project.
|
|
25
25
|
|
|
26
26
|
# Limitations
|
|
27
27
|
- Fully identified schema names are not supported when specifying column names (e.g. `SELECT schema.table.column [...]`)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
This project analyses SQL statements and labels possible errors or complications.
|
|
3
3
|
|
|
4
4
|
# Credits
|
|
5
|
-
Special thanks to Davide Miggiano for
|
|
5
|
+
Special thanks to Davide Miggiano and Flavio Venturini for their valuable contributions to the development of this project.
|
|
6
6
|
|
|
7
7
|
# Limitations
|
|
8
8
|
- Fully identified schema names are not supported when specifying column names (e.g. `SELECT schema.table.column [...]`)
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
|
8
8
|
|
|
9
9
|
project = 'sql_error_categorizer'
|
|
10
|
-
copyright = '2025, Davide Ponzini
|
|
11
|
-
author = 'Davide Ponzini
|
|
10
|
+
copyright = '2025, Davide Ponzini'
|
|
11
|
+
author = 'Davide Ponzini'
|
|
12
12
|
|
|
13
13
|
# -- General configuration ---------------------------------------------------
|
|
14
14
|
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
.. dav-tools documentation master file, created by
|
|
2
|
+
sphinx-quickstart on Sun Jul 16 15:00:51 2023.
|
|
3
|
+
You can adapt this file completely to your liking, but it should at least
|
|
4
|
+
contain the root `toctree` directive.
|
|
5
|
+
|
|
6
|
+
Welcome to sql_error_categorizer's documentation!
|
|
7
|
+
=================================================
|
|
8
|
+
This project analyses SQL statements to highlight possible **errors**.
|
|
9
|
+
The detection engine tokenises the input query and applies a set of rules.
|
|
10
|
+
Additional rules are applied on the AST (Abstract Syntax Tree) generated from
|
|
11
|
+
the query.
|
|
12
|
+
When a rule matches, it reports the type of
|
|
13
|
+
error together with the relevant context.
|
|
14
|
+
|
|
15
|
+
The logic is implemented in `sql_query_analyzer/detectors/` and the available
|
|
16
|
+
error identifiers are listed in `sql_query_analyzer/sql_errors.py`.
|
|
17
|
+
|
|
18
|
+
Below you will find a short explanation that anyone can follow, followed by a
|
|
19
|
+
section with technical details for developers.
|
|
20
|
+
|
|
21
|
+
Contents
|
|
22
|
+
========
|
|
23
|
+
|
|
24
|
+
.. toctree::
|
|
25
|
+
:maxdepth: 4
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
Indices and tables
|
|
29
|
+
==================
|
|
30
|
+
|
|
31
|
+
* :ref:`genindex`
|
|
32
|
+
* :ref:`modindex`
|
|
33
|
+
* :ref:`search`
|
|
34
|
+
|
|
35
|
+
Installation
|
|
36
|
+
============
|
|
37
|
+
``$ pip install sql_error_categorizer``
|
|
38
|
+
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sql_error_categorizer"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
|
|
10
10
|
]
|
|
@@ -25,5 +25,5 @@ dependencies = [
|
|
|
25
25
|
|
|
26
26
|
[project.urls]
|
|
27
27
|
"Repository" = "https://github.com/DavidePonzini/sql_error_categorizer"
|
|
28
|
-
"Documentation" = "https://
|
|
28
|
+
"Documentation" = "https://sql-error-categorizer.readthedocs.io/en/latest/index.html"
|
|
29
29
|
"Bug Tracker" = "https://github.com/DavidePonzini/sql_error_categorizer/issues"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
'''
|
|
1
|
+
'''Represents a catalog of database schemas, tables, and columns.'''
|
|
2
2
|
|
|
3
3
|
from .catalog import Catalog, Schema, Table, Column, UniqueConstraintType
|
|
4
4
|
import psycopg2
|
|
@@ -6,10 +6,12 @@ from copy import deepcopy
|
|
|
6
6
|
|
|
7
7
|
# region UniqueConstraint
|
|
8
8
|
class UniqueConstraintType(Enum):
|
|
9
|
+
'''Enumeration of unique constraint types.'''
|
|
9
10
|
PRIMARY_KEY = 'PRIMARY KEY'
|
|
10
11
|
UNIQUE = 'UNIQUE'
|
|
11
12
|
|
|
12
13
|
class UniqueConstraint:
|
|
14
|
+
'''A unique constraint on a set of columns in a table.'''
|
|
13
15
|
def __init__(self, columns: set[str], constraint_type: UniqueConstraintType) -> None:
|
|
14
16
|
self.columns = columns
|
|
15
17
|
self.constraint_type = constraint_type
|
|
@@ -19,6 +21,7 @@ class UniqueConstraint:
|
|
|
19
21
|
return f'{indent}UniqueConstraint({self.constraint_type.value}: {self.columns})'
|
|
20
22
|
|
|
21
23
|
def to_dict(self) -> dict:
|
|
24
|
+
'''Converts the UniqueConstraint to a dictionary.'''
|
|
22
25
|
return {
|
|
23
26
|
'columns': list(self.columns), # JSON-friendly (list)
|
|
24
27
|
'constraint_type': self.constraint_type.value,
|
|
@@ -26,6 +29,7 @@ class UniqueConstraint:
|
|
|
26
29
|
|
|
27
30
|
@classmethod
|
|
28
31
|
def from_dict(cls, data: dict) -> 'UniqueConstraint':
|
|
32
|
+
'''Creates a UniqueConstraint from a dictionary.'''
|
|
29
33
|
return cls(columns=set(c.lower() for c in data['columns']),
|
|
30
34
|
constraint_type=UniqueConstraintType(data['constraint_type']))
|
|
31
35
|
# endregion
|
|
@@ -33,6 +37,8 @@ class UniqueConstraint:
|
|
|
33
37
|
# region Column
|
|
34
38
|
@dataclass
|
|
35
39
|
class Column:
|
|
40
|
+
'''A database table column, with type and constraints.'''
|
|
41
|
+
|
|
36
42
|
name: str
|
|
37
43
|
column_type: str = 'UNKNOWN'
|
|
38
44
|
numeric_precision: int | None = None
|
|
@@ -45,6 +51,7 @@ class Column:
|
|
|
45
51
|
|
|
46
52
|
@property
|
|
47
53
|
def is_fk(self) -> bool:
|
|
54
|
+
'''Returns True if the column is a foreign key.'''
|
|
48
55
|
return all([self.fk_schema, self.fk_table, self.fk_column])
|
|
49
56
|
|
|
50
57
|
def __repr__(self, level: int = 0) -> str:
|
|
@@ -52,6 +59,7 @@ class Column:
|
|
|
52
59
|
return f'{indent}Column(name=\'{self.name}\', type=\'{self.column_type}\', is_fk={self.is_fk}, is_nullable={self.is_nullable}, is_constant={self.is_constant})'
|
|
53
60
|
|
|
54
61
|
def to_dict(self) -> dict:
|
|
62
|
+
'''Converts the Column to a dictionary.'''
|
|
55
63
|
return {
|
|
56
64
|
'name': self.name,
|
|
57
65
|
'column_type': self.column_type,
|
|
@@ -65,6 +73,7 @@ class Column:
|
|
|
65
73
|
|
|
66
74
|
@classmethod
|
|
67
75
|
def from_dict(cls, data: dict) -> 'Column':
|
|
76
|
+
'''Creates a Column from a dictionary.'''
|
|
68
77
|
return cls(
|
|
69
78
|
name=data['name'],
|
|
70
79
|
column_type=data['column_type'],
|
|
@@ -81,11 +90,13 @@ class Column:
|
|
|
81
90
|
@dataclass
|
|
82
91
|
class Table:
|
|
83
92
|
'''A database table, with columns and unique constraints. Supports multiple columns with the same name (e.g. from joins).'''
|
|
93
|
+
|
|
84
94
|
name: str
|
|
85
95
|
unique_constraints: list[UniqueConstraint] = field(default_factory=list)
|
|
86
96
|
columns: list[Column] = field(default_factory=list)
|
|
87
97
|
|
|
88
98
|
def add_unique_constraint(self, columns: set[str], constraint_type: UniqueConstraintType) -> None:
|
|
99
|
+
'''Adds a unique constraint to the table.'''
|
|
89
100
|
self.unique_constraints.append(UniqueConstraint(columns, constraint_type))
|
|
90
101
|
|
|
91
102
|
def add_column(self,
|
|
@@ -98,6 +109,7 @@ class Table:
|
|
|
98
109
|
fk_schema: str | None = None,
|
|
99
110
|
fk_table: str | None = None,
|
|
100
111
|
fk_column: str | None = None) -> Column:
|
|
112
|
+
'''Adds a column to the table and returns it.'''
|
|
101
113
|
column = Column(name=name,
|
|
102
114
|
column_type=column_type,
|
|
103
115
|
numeric_precision=numeric_precision,
|
|
@@ -139,6 +151,7 @@ class Table:
|
|
|
139
151
|
return f'{indent}Table(name=\'{self.name}\', columns=[{columns}], unique_constraints=[{unique_constraints_str}])'
|
|
140
152
|
|
|
141
153
|
def to_dict(self) -> dict:
|
|
154
|
+
'''Converts the Table to a dictionary.'''
|
|
142
155
|
return {
|
|
143
156
|
'name': self.name,
|
|
144
157
|
'unique_constraints': [uc.to_dict() for uc in self.unique_constraints],
|
|
@@ -147,6 +160,7 @@ class Table:
|
|
|
147
160
|
|
|
148
161
|
@classmethod
|
|
149
162
|
def from_dict(cls, data: dict) -> 'Table':
|
|
163
|
+
'''Creates a Table from a dictionary.'''
|
|
150
164
|
table = cls(name=data['name'])
|
|
151
165
|
# Unique constraints first (so Column.is_pk works immediately on repr, etc.)
|
|
152
166
|
for uc_data in data.get('unique_constraints', []):
|
|
@@ -163,6 +177,8 @@ class Table:
|
|
|
163
177
|
# region Schema
|
|
164
178
|
@dataclass
|
|
165
179
|
class Schema:
|
|
180
|
+
'''A database schema, with tables and functions.'''
|
|
181
|
+
|
|
166
182
|
name: str
|
|
167
183
|
_tables: dict[str, Table] = field(default_factory=dict)
|
|
168
184
|
functions: set[str] = field(default_factory=set)
|
|
@@ -198,6 +214,7 @@ class Schema:
|
|
|
198
214
|
return f'{indent}Schema(name=\'{self.name}\', tables=[\n{tables}\n{indent}])'
|
|
199
215
|
|
|
200
216
|
def to_dict(self) -> dict:
|
|
217
|
+
'''Converts the Schema to a dictionary.'''
|
|
201
218
|
return {
|
|
202
219
|
'name': self.name,
|
|
203
220
|
'tables': {name: tbl.to_dict() for name, tbl in self._tables.items()},
|
|
@@ -205,6 +222,7 @@ class Schema:
|
|
|
205
222
|
|
|
206
223
|
@classmethod
|
|
207
224
|
def from_dict(cls, data: dict) -> 'Schema':
|
|
225
|
+
'''Creates a Schema from a dictionary.'''
|
|
208
226
|
schema = cls(name=data['name'])
|
|
209
227
|
for _, tbl_data in (data.get('tables') or {}).items():
|
|
210
228
|
tbl = Table.from_dict(tbl_data)
|
|
@@ -215,6 +233,8 @@ class Schema:
|
|
|
215
233
|
# region Catalog
|
|
216
234
|
@dataclass
|
|
217
235
|
class Catalog:
|
|
236
|
+
'''A database catalog, with schemas, tables, and columns.'''
|
|
237
|
+
|
|
218
238
|
_schemas: dict[str, Schema] = field(default_factory=dict)
|
|
219
239
|
|
|
220
240
|
def __getitem__(self, schema_name: str) -> Schema:
|
|
@@ -295,6 +315,7 @@ class Catalog:
|
|
|
295
315
|
|
|
296
316
|
|
|
297
317
|
def to_dict(self) -> dict:
|
|
318
|
+
'''Converts the Catalog to a dictionary.'''
|
|
298
319
|
return {
|
|
299
320
|
'version': 1,
|
|
300
321
|
'schemas': {name: sch.to_dict() for name, sch in self._schemas.items()},
|
|
@@ -302,6 +323,7 @@ class Catalog:
|
|
|
302
323
|
|
|
303
324
|
@classmethod
|
|
304
325
|
def from_dict(cls, data: dict) -> 'Catalog':
|
|
326
|
+
'''Creates a Catalog from a dictionary.'''
|
|
305
327
|
cat = cls()
|
|
306
328
|
for _, sch_data in (data.get('schemas') or {}).items():
|
|
307
329
|
sch = Schema.from_dict(sch_data)
|
|
@@ -310,19 +332,23 @@ class Catalog:
|
|
|
310
332
|
|
|
311
333
|
# String-based JSON (handy for DB/blob storage)
|
|
312
334
|
def to_json(self, *, indent: int | None = 2) -> str:
|
|
335
|
+
'''Converts the Catalog to a JSON string.'''
|
|
313
336
|
return json.dumps(self.to_dict(), indent=indent)
|
|
314
337
|
|
|
315
338
|
@classmethod
|
|
316
339
|
def from_json(cls, s: str) -> 'Catalog':
|
|
340
|
+
'''Creates a Catalog from a JSON string.'''
|
|
317
341
|
return cls.from_dict(json.loads(s))
|
|
318
342
|
|
|
319
343
|
# Convenience file helpers
|
|
320
344
|
def save_json(self, path: str, *, indent: int | None = 2) -> None:
|
|
345
|
+
'''Saves the Catalog to a JSON file.'''
|
|
321
346
|
with open(path, 'w', encoding='utf-8') as f:
|
|
322
347
|
json.dump(self.to_dict(), f, indent=indent)
|
|
323
348
|
|
|
324
349
|
@classmethod
|
|
325
350
|
def load_json(cls, path: str) -> 'Catalog':
|
|
351
|
+
'''Loads a Catalog from a JSON file.'''
|
|
326
352
|
with open(path, 'r', encoding='utf-8') as f:
|
|
327
353
|
data = json.load(f)
|
|
328
354
|
return cls.from_dict(data)
|
|
@@ -1,13 +1,17 @@
|
|
|
1
|
+
'''SQL error detectors.'''
|
|
2
|
+
|
|
1
3
|
from .. import catalog
|
|
2
4
|
from ..query import Query
|
|
3
|
-
from ..sql_errors import SqlErrors
|
|
4
5
|
from .base import BaseDetector, DetectedError
|
|
6
|
+
|
|
7
|
+
# exported detectors
|
|
5
8
|
from .syntax import SyntaxErrorDetector
|
|
6
9
|
from .semantic import SemanticErrorDetector
|
|
7
10
|
from .logical import LogicalErrorDetector
|
|
8
11
|
from .complications import ComplicationDetector
|
|
9
12
|
|
|
10
13
|
class Detector:
|
|
14
|
+
'''Manages and runs SQL error detectors on a query.'''
|
|
11
15
|
def __init__(self,
|
|
12
16
|
query: str,
|
|
13
17
|
*,
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
+
'''Base classes for SQL error detectors.'''
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from dataclasses import dataclass, field
|
|
3
5
|
from typing import Any, Callable
|
|
4
6
|
|
|
5
7
|
from ..sql_errors import SqlErrors
|
|
6
8
|
from ..query import Query
|
|
7
|
-
from ..catalog import Catalog
|
|
8
9
|
|
|
9
10
|
@dataclass(repr=False)
|
|
10
11
|
class DetectedError:
|
|
12
|
+
'''Represents a detected SQL error with its type and associated data.'''
|
|
13
|
+
|
|
11
14
|
error: SqlErrors
|
|
12
15
|
data: tuple[Any, ...] = field(default_factory=tuple)
|
|
13
16
|
|
|
@@ -23,6 +26,8 @@ class DetectedError:
|
|
|
23
26
|
return hash((self.error, self.data))
|
|
24
27
|
|
|
25
28
|
class BaseDetector(ABC):
|
|
29
|
+
'''Abstract base class for SQL error detectors.'''
|
|
30
|
+
|
|
26
31
|
def __init__(self, *,
|
|
27
32
|
query: Query,
|
|
28
33
|
solutions: list[Query] = [],
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
'''Detector for complications in SQL queries.'''
|
|
2
|
+
|
|
1
3
|
import difflib
|
|
2
4
|
import re
|
|
3
5
|
import sqlparse
|
|
@@ -12,6 +14,8 @@ from ..catalog import Catalog
|
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class ComplicationDetector(BaseDetector):
|
|
17
|
+
'''Detector for complications in SQL queries.'''
|
|
18
|
+
|
|
15
19
|
def __init__(self,
|
|
16
20
|
*,
|
|
17
21
|
query: Query,
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
'''Detector for logical errors in SQL queries.'''
|
|
2
|
+
|
|
1
3
|
import difflib
|
|
2
4
|
import re
|
|
3
5
|
import sqlparse
|
|
@@ -11,6 +13,7 @@ from ..catalog import Catalog
|
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class LogicalErrorDetector(BaseDetector):
|
|
16
|
+
'''Detector for logical errors in SQL queries.'''
|
|
14
17
|
def __init__(self,
|
|
15
18
|
*,
|
|
16
19
|
query: Query,
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
'''Detector for semantic errors in SQL queries.'''
|
|
2
|
+
|
|
1
3
|
import difflib
|
|
2
4
|
import re
|
|
3
5
|
import sqlparse
|
|
@@ -14,6 +16,8 @@ from ..sql_errors import SqlErrors
|
|
|
14
16
|
from ..catalog import Catalog
|
|
15
17
|
|
|
16
18
|
class SemanticErrorDetector(BaseDetector):
|
|
19
|
+
'''Detector for semantic errors in SQL queries.'''
|
|
20
|
+
|
|
17
21
|
def __init__(self,
|
|
18
22
|
*,
|
|
19
23
|
query: Query,
|
|
@@ -1,18 +1,21 @@
|
|
|
1
|
+
'''Detector for syntax errors in SQL queries.'''
|
|
2
|
+
|
|
1
3
|
import difflib
|
|
2
4
|
import re
|
|
3
5
|
import sqlparse
|
|
4
6
|
from sqlglot import exp
|
|
5
|
-
from typing import
|
|
7
|
+
from typing import Callable
|
|
6
8
|
from copy import deepcopy
|
|
7
9
|
|
|
8
10
|
from .base import BaseDetector, DetectedError
|
|
9
|
-
from ..query import Query
|
|
11
|
+
from ..query import Query
|
|
10
12
|
from ..sql_errors import SqlErrors
|
|
11
|
-
from ..catalog import Catalog
|
|
12
13
|
from .. import util
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class SyntaxErrorDetector(BaseDetector):
|
|
17
|
+
'''Detector for syntax errors in SQL queries.'''
|
|
18
|
+
|
|
16
19
|
def __init__(self,
|
|
17
20
|
*,
|
|
18
21
|
query: Query,
|
{sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/util.py
RENAMED
|
@@ -1,18 +1,23 @@
|
|
|
1
|
+
'''Utility functions for SQL query processing.'''
|
|
2
|
+
|
|
1
3
|
import sqlparse
|
|
2
4
|
from sqlparse.tokens import Whitespace, Newline
|
|
3
5
|
from sqlglot.optimizer.normalize import normalize
|
|
4
6
|
from sqlglot import exp
|
|
5
7
|
|
|
6
8
|
def strip_ws(tokens: list[sqlparse.sql.Token]) -> list[sqlparse.sql.Token]:
|
|
9
|
+
'''Remove whitespace and newline tokens from a list of tokens.'''
|
|
7
10
|
return [t for t in tokens if t.ttype not in (Whitespace, Newline)]
|
|
8
11
|
|
|
9
12
|
def remove_parentheses(sql: str) -> str:
|
|
13
|
+
'''Remove outer parentheses from a SQL string.'''
|
|
10
14
|
sql = sql.strip()
|
|
11
15
|
while sql.startswith('(') and sql.endswith(')'):
|
|
12
16
|
sql = sql[1:-1].strip()
|
|
13
17
|
return sql
|
|
14
18
|
|
|
15
19
|
def extract_DNF(expr) -> list[exp.Expression]:
|
|
20
|
+
'''Given a boolean expression, extract its Disjunctive Normal Form (DNF)'''
|
|
16
21
|
dnf_expr = normalize(expr, dnf=True)
|
|
17
22
|
|
|
18
23
|
if not isinstance(dnf_expr, exp.Or):
|
|
@@ -22,6 +27,7 @@ def extract_DNF(expr) -> list[exp.Expression]:
|
|
|
22
27
|
return list(disjuncts)
|
|
23
28
|
|
|
24
29
|
def extract_function_name(func_expr: exp.Func) -> str:
|
|
30
|
+
'''Extract the function name from a function expression.'''
|
|
25
31
|
if isinstance(func_expr, exp.Anonymous):
|
|
26
32
|
return func_expr.name.upper()
|
|
27
33
|
return func_expr.__class__.__name__.lower()
|
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
.. dav-tools documentation master file, created by
|
|
2
|
-
sphinx-quickstart on Sun Jul 16 15:00:51 2023.
|
|
3
|
-
You can adapt this file completely to your liking, but it should at least
|
|
4
|
-
contain the root `toctree` directive.
|
|
5
|
-
|
|
6
|
-
Welcome to sql_error_categorizer's documentation!
|
|
7
|
-
=====================================
|
|
8
|
-
This project analyses SQL statements to highlight possible **misconceptions**
|
|
9
|
-
(common mistakes or misunderstandings). The detection engine tokenises the input
|
|
10
|
-
query and applies a set of rules. When a rule matches, it reports the type of
|
|
11
|
-
misconception together with the token or fragment that triggered it.
|
|
12
|
-
|
|
13
|
-
The logic is implemented in
|
|
14
|
-
`sql_query_analyzer/utils/misconception_detector.py` and the available
|
|
15
|
-
misconception identifiers are listed in
|
|
16
|
-
`sql_query_analyzer/utils/misconceptions.py`.
|
|
17
|
-
|
|
18
|
-
Below you will find a short explanation that anyone can follow, followed by a
|
|
19
|
-
section with technical details for developers.
|
|
20
|
-
|
|
21
|
-
Contents
|
|
22
|
-
========
|
|
23
|
-
|
|
24
|
-
.. toctree::
|
|
25
|
-
:maxdepth: 4
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
Indices and tables
|
|
29
|
-
==================
|
|
30
|
-
|
|
31
|
-
* :ref:`genindex`
|
|
32
|
-
* :ref:`modindex`
|
|
33
|
-
* :ref:`search`
|
|
34
|
-
|
|
35
|
-
Installation
|
|
36
|
-
============
|
|
37
|
-
``$ pip install sql_error_categorizer``
|
|
38
|
-
|
|
39
|
-
What is checked right now?
|
|
40
|
-
==========================
|
|
41
|
-
|
|
42
|
-
At the moment the tool recognises the following issues:
|
|
43
|
-
|
|
44
|
-
- Missing `FROM` clause in a `SELECT` statement.
|
|
45
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_OMITTING_THE_FROM_CLAUSE`
|
|
46
|
-
- Comparing a value with `NULL` using operators like `=` or `!=`.
|
|
47
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_COMPARISON_WITH_NULL`
|
|
48
|
-
- Omitting the final semicolon or adding extra semicolons.
|
|
49
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_OMITTING_THE_SEMICOLON`
|
|
50
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_ADDITIONAL_SEMICOLON`
|
|
51
|
-
- Ambiguous column references when multiple tables share the same column name.
|
|
52
|
-
- `SYN_1_AMBIGUOUS_DATABASE_OBJECT_OMITTING_CORRELATION_NAMES`
|
|
53
|
-
- Duplicate `WHERE` clauses or repeated `FROM`, `GROUP`, `HAVING` or `ORDER` clauses.
|
|
54
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_USING_WHERE_TWICE`
|
|
55
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_DUPLICATE_CLAUSE`
|
|
56
|
-
- Using a column alias that has not been defined.
|
|
57
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_USING_AN_UNDEFINED_CORRELATION_NAME`
|
|
58
|
-
- Leaving out commas between expressions in the `SELECT` list.
|
|
59
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_OMITTING_COMMAS`
|
|
60
|
-
- Mismatched parentheses, brackets or braces.
|
|
61
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_CURLY_SQUARE_OR_UNMATCHED_BRACKETS`
|
|
62
|
-
- Non-standard operators such as `==`, `&&` or `||`.
|
|
63
|
-
- `SYN_6_COMMON_SYNTAX_ERROR_NONSTANDARD_OPERATORS`
|
|
64
|
-
- Unqualified column names that exist in more than one table.
|
|
65
|
-
- `SYN_1_AMBIGUOUS_DATABASE_OBJECT_AMBIGUOUS_COLUMN`
|
|
66
|
-
- References to tables or columns that do not exist in the provided catalogue.
|
|
67
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_COLUMN`
|
|
68
|
-
- Invalid schema names.
|
|
69
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_INVALID_SCHEMA_NAME`
|
|
70
|
-
- Text values without single quotes or with double quotes instead.
|
|
71
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_OMITTING_QUOTES_AROUND_CHARACTER_DATA`
|
|
72
|
-
- Unknown functions, parameters or general identifiers.
|
|
73
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_FUNCTION`
|
|
74
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_PARAMETER`
|
|
75
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_OBJECT`
|
|
76
|
-
- Misspellings or use of synonyms for table/column names.
|
|
77
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_MISSPELLINGS`
|
|
78
|
-
- `SYN_2_UNDEFINED_DATABASE_OBJECT_SYNONYMS`
|
|
79
|
-
|
|
80
|
-
Other misconception categories defined in `misconceptions.py` are placeholders
|
|
81
|
-
for future development.
|
|
82
|
-
|
|
83
|
-
Technical details
|
|
84
|
-
=================
|
|
85
|
-
|
|
86
|
-
Each detection rule scans the tokenised query and returns a list of
|
|
87
|
-
`(Misconceptions, token)` tuples. The list below summarises how each rule works
|
|
88
|
-
and where it is implemented.
|
|
89
|
-
|
|
90
|
-
- SYN_6_COMMON_SYNTAX_ERROR_OMITTING_THE_FROM_CLAUSE
|
|
91
|
-
Lines [81-84] of `misconception_detector.py` check if a SELECT statement lacks a
|
|
92
|
-
FROM clause. If so, the token `'FROM'` is reported.
|
|
93
|
-
|
|
94
|
-
- SYN_6_COMMON_SYNTAX_ERROR_COMPARISON_WITH_NULL
|
|
95
|
-
Lines [85-89] scan tokens for comparison operators followed by `NULL`. If found,
|
|
96
|
-
the token `'NULL'` is returned.
|
|
97
|
-
|
|
98
|
-
- SYN_6_COMMON_SYNTAX_ERROR_OMITTING_THE_SEMICOLON
|
|
99
|
-
Lines [91-93] verify that the original query text ends with a semicolon. Missing
|
|
100
|
-
termination results in this misconception.
|
|
101
|
-
|
|
102
|
-
- SYN_6_COMMON_SYNTAX_ERROR_ADDITIONAL_SEMICOLON
|
|
103
|
-
Lines [95-97] detect more than one semicolon in the query, flagging an
|
|
104
|
-
additional semicolon error.
|
|
105
|
-
|
|
106
|
-
- SYN_1_AMBIGUOUS_DATABASE_OBJECT_OMITTING_CORRELATION_NAMES
|
|
107
|
-
Lines [99-109] evaluate column names appearing in multiple tables without a
|
|
108
|
-
correlation alias. The offending column name is reported when ambiguity arises.
|
|
109
|
-
|
|
110
|
-
- SYN_6_COMMON_SYNTAX_ERROR_USING_WHERE_TWICE
|
|
111
|
-
Lines [112-130] count top-level WHERE clauses. If more than one occurs, the
|
|
112
|
-
misconception is raised with the token `'WHERE'`.
|
|
113
|
-
|
|
114
|
-
- SYN_6_COMMON_SYNTAX_ERROR_DUPLICATE_CLAUSE
|
|
115
|
-
The same loop also counts FROM, GROUP, HAVING, and ORDER clauses. When a clause
|
|
116
|
-
appears twice, its name is used as the token (lines 112‑130).
|
|
117
|
-
|
|
118
|
-
- SYN_6_COMMON_SYNTAX_ERROR_USING_AN_UNDEFINED_CORRELATION_NAME
|
|
119
|
-
Lines [132-136] check for `alias.column` references where the alias was never
|
|
120
|
-
introduced. The unknown alias is returned.
|
|
121
|
-
|
|
122
|
-
- SYN_6_COMMON_SYNTAX_ERROR_OMITTING_COMMAS
|
|
123
|
-
Lines [138-155] track the SELECT list, counting identifiers and commas. If more
|
|
124
|
-
than one column appears without a comma, the comma token is reported.
|
|
125
|
-
|
|
126
|
-
- SYN_6_COMMON_SYNTAX_ERROR_CURLY_SQUARE_OR_UNMATCHED_BRACKETS
|
|
127
|
-
Lines [157-172] parse the raw query string for mismatched parentheses or square/
|
|
128
|
-
curly brackets. The unmatched character is returned.
|
|
129
|
-
|
|
130
|
-
- SYN_6_COMMON_SYNTAX_ERROR_NONSTANDARD_OPERATORS
|
|
131
|
-
Lines [174-178] search for operators `==`, `&&` or `||` in the tokenised query.
|
|
132
|
-
The offending operator is returned when found.
|
|
133
|
-
|
|
134
|
-
- SYN_1_AMBIGUOUS_DATABASE_OBJECT_AMBIGUOUS_COLUMN
|
|
135
|
-
Lines [181-195] look for duplicate column names in the SELECT clause where no
|
|
136
|
-
qualifier follows. The repeated column name triggers this misconception.
|
|
137
|
-
|
|
138
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_COLUMN
|
|
139
|
-
Lines [198-211] validate CTE and subquery aliases against cataloged columns.
|
|
140
|
-
Unknown columns referenced through an alias produce this error. The same rule
|
|
141
|
-
is reused later for general identifiers (lines 237‑248).
|
|
142
|
-
|
|
143
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_INVALID_SCHEMA_NAME
|
|
144
|
-
Lines [213-219] identify identifiers containing a schema prefix. If the schema
|
|
145
|
-
is not listed in the catalog, it is reported as invalid.
|
|
146
|
-
|
|
147
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_OMITTING_QUOTES_AROUND_CHARACTER_DATA
|
|
148
|
-
Lines [230-235] look for double-quoted identifiers that are not table or column
|
|
149
|
-
names. Additionally, lines 249‑251 flag bare alphabetic tokens treated as string
|
|
150
|
-
literals without quotes.
|
|
151
|
-
|
|
152
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_FUNCTION
|
|
153
|
-
Lines [237-242] detect alphabetic tokens followed by `(` that are not known
|
|
154
|
-
objects. These are assumed to be undefined functions.
|
|
155
|
-
|
|
156
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_PARAMETER
|
|
157
|
-
Lines [243-244] catch identifiers starting with `:` `@` or `?`, reporting them as
|
|
158
|
-
undefined parameters.
|
|
159
|
-
|
|
160
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_UNDEFINED_OBJECT
|
|
161
|
-
Lines [249-252] handle remaining unknown identifiers that do not match previous
|
|
162
|
-
cases, labelling them as undefined objects.
|
|
163
|
-
|
|
164
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_MISSPELLINGS
|
|
165
|
-
Lines [254-264] compare every alphabetic token not found in the catalog against
|
|
166
|
-
known names using `difflib.get_close_matches`. If a near match exists, the token
|
|
167
|
-
is flagged as a misspelling.
|
|
168
|
-
|
|
169
|
-
- SYN_2_UNDEFINED_DATABASE_OBJECT_SYNONYMS
|
|
170
|
-
Lines [266-269] check tokens against a `synonyms` list in the catalog and report
|
|
171
|
-
usage when present.
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
Other misconception types listed in `misconceptions.py` have no detection logic yet and are reserved for future work.
|
|
175
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/query/query.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/src/sql_error_categorizer/util.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sql_error_categorizer-0.1.0 → sql_error_categorizer-0.1.2}/tests/query/test_typechecking.py
RENAMED
|
File without changes
|
|
File without changes
|