sql-metadata 2.19.0__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql_metadata
3
- Version: 2.19.0
4
- Summary: Uses tokenized query returned by python-sqlparse and generates query metadata
3
+ Version: 3.0.0
4
+ Summary: Uses sqlglot to parse SQL queries and extract metadata
5
5
  License: MIT
6
6
  License-File: LICENSE
7
7
  Author: Maciej Brencz
8
8
  Author-email: maciej.brencz@gmail.com
9
- Requires-Python: >=3.9,<4.0
9
+ Requires-Python: >=3.10,<4.0
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
14
  Classifier: Programming Language :: Python :: 3.12
16
15
  Classifier: Programming Language :: Python :: 3.13
17
16
  Classifier: Programming Language :: Python :: 3.14
18
- Requires-Dist: sqlparse (>=0.4.1,<0.6.0)
17
+ Requires-Dist: sqlglot (>=30.0.3,<31.0.0)
19
18
  Project-URL: Homepage, https://github.com/macbre/sql-metadata
20
19
  Project-URL: Repository, https://github.com/macbre/sql-metadata
21
20
  Description-Content-Type: text/markdown
@@ -25,11 +24,11 @@ Description-Content-Type: text/markdown
25
24
  [![PyPI](https://img.shields.io/pypi/v/sql_metadata.svg)](https://pypi.python.org/pypi/sql_metadata)
26
25
  [![Tests](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml/badge.svg)](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml)
27
26
  [![Coverage Status](https://coveralls.io/repos/github/macbre/sql-metadata/badge.svg?branch=master&1)](https://coveralls.io/github/macbre/sql-metadata?branch=master)
28
- <a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
27
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
29
28
  [![Maintenance](https://img.shields.io/badge/maintained%3F-yes-green.svg)](https://github.com/macbre/sql-metadata/graphs/commit-activity)
30
29
  [![Downloads](https://pepy.tech/badge/sql-metadata/month)](https://pepy.tech/project/sql-metadata)
31
30
 
32
- Uses tokenized query returned by [`python-sqlparse`](https://github.com/andialbrecht/sqlparse) and generates query metadata.
31
+ Uses [`sqlglot`](https://github.com/tobymao/sqlglot) to parse SQL queries and extract metadata.
33
32
 
34
33
  **Extracts column names and tables** used by the query.
35
34
  Automatically conduct **column alias resolution**, **sub queries aliases resolution** as well as **tables aliases resolving**.
@@ -80,7 +79,7 @@ parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b
80
79
 
81
80
  # note that aliases are auto-resolved
82
81
  parser.columns
83
- # ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
82
+ # ['product_a.users.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
84
83
 
85
84
  # note that you can also extract columns with their place in the query
86
85
  # which will return dict with lists divided into select, where, order_by, group_by, join, insert and update
@@ -116,6 +115,71 @@ assert parser.columns_dict == {'order_by': ['b', 'c', 'u'],
116
115
  'select': ['a', 'b', 'c', 'u', 'd']}
117
116
  ```
118
117
 
118
+ ### Extracting output column names
119
+
120
+ ```python
121
+ from sql_metadata import Parser
122
+
123
+ # output_columns returns the ordered list of names that the SELECT would produce,
124
+ # preserving aliases (unlike `columns`, which resolves aliases back to real columns)
125
+ Parser("SELECT a, b AS c FROM t").output_columns
126
+ # ['a', 'c']
127
+
128
+ # works with function calls, window functions, computed aliases
129
+ Parser("""SELECT
130
+ id,
131
+ UPPER(email) AS email_upper,
132
+ ROW_NUMBER() OVER (PARTITION BY country ORDER BY created_at) AS rn
133
+ FROM users""").output_columns
134
+ # ['id', 'email_upper', 'rn']
135
+
136
+ # SELECT * stays as '*'
137
+ Parser("SELECT * FROM t").output_columns
138
+ # ['*']
139
+
140
+ # non-SELECT queries return an empty list
141
+ Parser("CREATE TABLE t (id INT)").output_columns
142
+ # []
143
+ ```
144
+
145
+ ### Detecting query type
146
+
147
+ ```python
148
+ from sql_metadata import Parser, QueryType
149
+
150
+ Parser("SELECT * FROM foo").query_type
151
+ # <QueryType.SELECT: 'SELECT'>
152
+
153
+ # QueryType is a str-enum, so it compares equal to both strings and enum values
154
+ Parser("INSERT INTO foo VALUES (1)").query_type == QueryType.INSERT # True
155
+ Parser("INSERT INTO foo VALUES (1)").query_type == "INSERT" # True
156
+
157
+ # REPLACE INTO is reported distinctly from INSERT
158
+ Parser("REPLACE INTO foo VALUES (1)").query_type
159
+ # <QueryType.REPLACE: 'REPLACE'>
160
+
161
+ # Supported types: SELECT, INSERT, REPLACE, UPDATE, DELETE,
162
+ # CREATE, ALTER, DROP, TRUNCATE, MERGE
163
+ ```
164
+
165
+ ### Handling invalid queries
166
+
167
+ ```python
168
+ from sql_metadata import Parser, InvalidQueryDefinition
169
+
170
+ # structurally invalid SQL raises `InvalidQueryDefinition` (a subclass of
171
+ # `ValueError`, so existing `except ValueError` handlers keep working)
172
+ try:
173
+ Parser("").query_type
174
+ except InvalidQueryDefinition as exc:
175
+ print(exc) # "Empty queries are not supported!"
176
+
177
+ try:
178
+ Parser("THIS IS NOT SQL").query_type
179
+ except InvalidQueryDefinition as exc:
180
+ print(exc) # "Not supported query type!"
181
+ ```
182
+
119
183
  ### Extracting tables from query
120
184
 
121
185
  ```python
@@ -203,9 +267,10 @@ parser.with_names
203
267
  # ["database1.tableFromWith", "test"]
204
268
 
205
269
  # get definition of with queries
270
+ # (sqlglot normalises keyword casing and spacing when rendering the body SQL)
206
271
  parser.with_queries
207
- # {"database1.tableFromWith": "SELECT aa.* FROM table3 as aa left join table4 on aa.col1=table4.col2"
208
- # "test": "SELECT * from table3"}
272
+ # {"database1.tableFromWith": "SELECT aa.* FROM table3 AS aa LEFT JOIN table4 ON aa.col1 = table4.col2",
273
+ # "test": "SELECT * FROM table3"}
209
274
 
210
275
  # note that names of with statements do not appear in tables
211
276
  parser.tables
@@ -227,9 +292,10 @@ ON a.task_id = b.task_id;
227
292
  )
228
293
 
229
294
  # get sub-queries dictionary
295
+ # (sqlglot normalises keyword casing — implicit table aliases become explicit `AS`)
230
296
  parser.subqueries
231
- # {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
232
- # "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"}
297
+ # {"a": "SELECT std.task_id FROM some_task_detail AS std WHERE std.STATUS = 1",
298
+ # "b": "SELECT st.task_id FROM some_task AS st WHERE task_type_id = 80"}
233
299
 
234
300
 
235
301
  # get names/ aliases of sub-queries / derived tables
@@ -272,30 +338,23 @@ parser.comments
272
338
 
273
339
  See `test/test_normalization.py` file for more examples of a bit more complex queries.
274
340
 
275
- ## Migrating from `sql_metadata` 1.x
276
-
277
- `sql_metadata.compat` module has been implemented to make the introduction of sql-metadata v2.0 smoother.
341
+ ## Migrating from `sql_metadata` 1.x / 2.x
278
342
 
279
- You can use it by simply changing the imports in your code from:
343
+ The `sql_metadata.compat` module (previously provided for v1 v2 migration) has been **removed in v3**. Port your code to the class-based `Parser` API shown in the examples above:
280
344
 
281
- ```python
282
- from sql_metadata import get_query_columns, get_query_tables
283
- ```
284
-
285
- into:
286
-
287
- ```python
288
- from sql_metadata.compat import get_query_columns, get_query_tables
289
- ```
345
+ | Old v1 helper | v3 replacement |
346
+ |-------------------------------|-------------------------------------------|
347
+ | `generalize_sql(sql)` | `Parser(sql).generalize` |
348
+ | `get_query_columns(sql)` | `Parser(sql).columns` |
349
+ | `get_query_tables(sql)` | `Parser(sql).tables` |
350
+ | `get_query_limit_and_offset(sql)` | `Parser(sql).limit_and_offset` |
351
+ | `get_query_tokens(sql)` | `Parser(sql).tokens` |
352
+ | `preprocess_query(sql)` | `Parser(sql).query` |
290
353
 
291
- The following functions from the old API are available in the `sql_metadata.compat` module:
354
+ For v2 v3 users, the public `Parser` API is unchanged except:
292
355
 
293
- * `generalize_sql`
294
- * `get_query_columns` (since #131 columns aliases ARE NOT returned by this function)
295
- * `get_query_limit_and_offset`
296
- * `get_query_tables`
297
- * `get_query_tokens`
298
- * `preprocess_query`
356
+ * The parsing engine is now [sqlglot](https://github.com/tobymao/sqlglot), which may normalise the *casing* and *spacing* of rendered CTE/subquery bodies (see the `with_queries` / `subqueries` examples above).
357
+ * Malformed SQL now raises `InvalidQueryDefinition` (a `ValueError` subclass) instead of a plain `ValueError` existing `except ValueError:` handlers continue to work.
299
358
 
300
359
  ## Authors and contributors
301
360
 
@@ -3,11 +3,11 @@
3
3
  [![PyPI](https://img.shields.io/pypi/v/sql_metadata.svg)](https://pypi.python.org/pypi/sql_metadata)
4
4
  [![Tests](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml/badge.svg)](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml)
5
5
  [![Coverage Status](https://coveralls.io/repos/github/macbre/sql-metadata/badge.svg?branch=master&1)](https://coveralls.io/github/macbre/sql-metadata?branch=master)
6
- <a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
6
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
7
7
  [![Maintenance](https://img.shields.io/badge/maintained%3F-yes-green.svg)](https://github.com/macbre/sql-metadata/graphs/commit-activity)
8
8
  [![Downloads](https://pepy.tech/badge/sql-metadata/month)](https://pepy.tech/project/sql-metadata)
9
9
 
10
- Uses tokenized query returned by [`python-sqlparse`](https://github.com/andialbrecht/sqlparse) and generates query metadata.
10
+ Uses [`sqlglot`](https://github.com/tobymao/sqlglot) to parse SQL queries and extract metadata.
11
11
 
12
12
  **Extracts column names and tables** used by the query.
13
13
  Automatically conduct **column alias resolution**, **sub queries aliases resolution** as well as **tables aliases resolving**.
@@ -58,7 +58,7 @@ parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b
58
58
 
59
59
  # note that aliases are auto-resolved
60
60
  parser.columns
61
- # ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
61
+ # ['product_a.users.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
62
62
 
63
63
  # note that you can also extract columns with their place in the query
64
64
  # which will return dict with lists divided into select, where, order_by, group_by, join, insert and update
@@ -94,6 +94,71 @@ assert parser.columns_dict == {'order_by': ['b', 'c', 'u'],
94
94
  'select': ['a', 'b', 'c', 'u', 'd']}
95
95
  ```
96
96
 
97
+ ### Extracting output column names
98
+
99
+ ```python
100
+ from sql_metadata import Parser
101
+
102
+ # output_columns returns the ordered list of names that the SELECT would produce,
103
+ # preserving aliases (unlike `columns`, which resolves aliases back to real columns)
104
+ Parser("SELECT a, b AS c FROM t").output_columns
105
+ # ['a', 'c']
106
+
107
+ # works with function calls, window functions, computed aliases
108
+ Parser("""SELECT
109
+ id,
110
+ UPPER(email) AS email_upper,
111
+ ROW_NUMBER() OVER (PARTITION BY country ORDER BY created_at) AS rn
112
+ FROM users""").output_columns
113
+ # ['id', 'email_upper', 'rn']
114
+
115
+ # SELECT * stays as '*'
116
+ Parser("SELECT * FROM t").output_columns
117
+ # ['*']
118
+
119
+ # non-SELECT queries return an empty list
120
+ Parser("CREATE TABLE t (id INT)").output_columns
121
+ # []
122
+ ```
123
+
124
+ ### Detecting query type
125
+
126
+ ```python
127
+ from sql_metadata import Parser, QueryType
128
+
129
+ Parser("SELECT * FROM foo").query_type
130
+ # <QueryType.SELECT: 'SELECT'>
131
+
132
+ # QueryType is a str-enum, so it compares equal to both strings and enum values
133
+ Parser("INSERT INTO foo VALUES (1)").query_type == QueryType.INSERT # True
134
+ Parser("INSERT INTO foo VALUES (1)").query_type == "INSERT" # True
135
+
136
+ # REPLACE INTO is reported distinctly from INSERT
137
+ Parser("REPLACE INTO foo VALUES (1)").query_type
138
+ # <QueryType.REPLACE: 'REPLACE'>
139
+
140
+ # Supported types: SELECT, INSERT, REPLACE, UPDATE, DELETE,
141
+ # CREATE, ALTER, DROP, TRUNCATE, MERGE
142
+ ```
143
+
144
+ ### Handling invalid queries
145
+
146
+ ```python
147
+ from sql_metadata import Parser, InvalidQueryDefinition
148
+
149
+ # structurally invalid SQL raises `InvalidQueryDefinition` (a subclass of
150
+ # `ValueError`, so existing `except ValueError` handlers keep working)
151
+ try:
152
+ Parser("").query_type
153
+ except InvalidQueryDefinition as exc:
154
+ print(exc) # "Empty queries are not supported!"
155
+
156
+ try:
157
+ Parser("THIS IS NOT SQL").query_type
158
+ except InvalidQueryDefinition as exc:
159
+ print(exc) # "Not supported query type!"
160
+ ```
161
+
97
162
  ### Extracting tables from query
98
163
 
99
164
  ```python
@@ -181,9 +246,10 @@ parser.with_names
181
246
  # ["database1.tableFromWith", "test"]
182
247
 
183
248
  # get definition of with queries
249
+ # (sqlglot normalises keyword casing and spacing when rendering the body SQL)
184
250
  parser.with_queries
185
- # {"database1.tableFromWith": "SELECT aa.* FROM table3 as aa left join table4 on aa.col1=table4.col2"
186
- # "test": "SELECT * from table3"}
251
+ # {"database1.tableFromWith": "SELECT aa.* FROM table3 AS aa LEFT JOIN table4 ON aa.col1 = table4.col2",
252
+ # "test": "SELECT * FROM table3"}
187
253
 
188
254
  # note that names of with statements do not appear in tables
189
255
  parser.tables
@@ -205,9 +271,10 @@ ON a.task_id = b.task_id;
205
271
  )
206
272
 
207
273
  # get sub-queries dictionary
274
+ # (sqlglot normalises keyword casing — implicit table aliases become explicit `AS`)
208
275
  parser.subqueries
209
- # {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
210
- # "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"}
276
+ # {"a": "SELECT std.task_id FROM some_task_detail AS std WHERE std.STATUS = 1",
277
+ # "b": "SELECT st.task_id FROM some_task AS st WHERE task_type_id = 80"}
211
278
 
212
279
 
213
280
  # get names/ aliases of sub-queries / derived tables
@@ -250,30 +317,23 @@ parser.comments
250
317
 
251
318
  See `test/test_normalization.py` file for more examples of a bit more complex queries.
252
319
 
253
- ## Migrating from `sql_metadata` 1.x
254
-
255
- `sql_metadata.compat` module has been implemented to make the introduction of sql-metadata v2.0 smoother.
320
+ ## Migrating from `sql_metadata` 1.x / 2.x
256
321
 
257
- You can use it by simply changing the imports in your code from:
322
+ The `sql_metadata.compat` module (previously provided for v1 v2 migration) has been **removed in v3**. Port your code to the class-based `Parser` API shown in the examples above:
258
323
 
259
- ```python
260
- from sql_metadata import get_query_columns, get_query_tables
261
- ```
262
-
263
- into:
264
-
265
- ```python
266
- from sql_metadata.compat import get_query_columns, get_query_tables
267
- ```
324
+ | Old v1 helper | v3 replacement |
325
+ |-------------------------------|-------------------------------------------|
326
+ | `generalize_sql(sql)` | `Parser(sql).generalize` |
327
+ | `get_query_columns(sql)` | `Parser(sql).columns` |
328
+ | `get_query_tables(sql)` | `Parser(sql).tables` |
329
+ | `get_query_limit_and_offset(sql)` | `Parser(sql).limit_and_offset` |
330
+ | `get_query_tokens(sql)` | `Parser(sql).tokens` |
331
+ | `preprocess_query(sql)` | `Parser(sql).query` |
268
332
 
269
- The following functions from the old API are available in the `sql_metadata.compat` module:
333
+ For v2 v3 users, the public `Parser` API is unchanged except:
270
334
 
271
- * `generalize_sql`
272
- * `get_query_columns` (since #131 columns aliases ARE NOT returned by this function)
273
- * `get_query_limit_and_offset`
274
- * `get_query_tables`
275
- * `get_query_tokens`
276
- * `preprocess_query`
335
+ * The parsing engine is now [sqlglot](https://github.com/tobymao/sqlglot), which may normalise the *casing* and *spacing* of rendered CTE/subquery bodies (see the `with_queries` / `subqueries` examples above).
336
+ * Malformed SQL now raises `InvalidQueryDefinition` (a `ValueError` subclass) instead of a plain `ValueError` existing `except ValueError:` handlers continue to work.
277
337
 
278
338
  ## Authors and contributors
279
339
 
@@ -0,0 +1,54 @@
1
+ [tool.poetry]
2
+ name = "sql_metadata"
3
+ version = "3.0.0"
4
+ license="MIT"
5
+ description = "Uses sqlglot to parse SQL queries and extract metadata"
6
+ authors = ["Maciej Brencz <maciej.brencz@gmail.com>", "Radosław Drążkiewicz <collerek@gmail.com>"]
7
+ readme = "README.md"
8
+ homepage = "https://github.com/macbre/sql-metadata"
9
+ repository = "https://github.com/macbre/sql-metadata"
10
+
11
+ packages = [
12
+ { include="sql_metadata" }
13
+ ]
14
+
15
+ [tool.poetry.dependencies]
16
+ python = "^3.10"
17
+ sqlglot = "^30.0.3"
18
+
19
+ [tool.poetry.group.dev.dependencies]
20
+ coverage = {extras = ["toml"], version = "^7.13"}
21
+ pytest = "^9.0.3"
22
+ pytest-cov = "^7.1.0"
23
+ ruff = "^0.11"
24
+ mypy = "^1.19"
25
+
26
+ [build-system]
27
+ requires = ["poetry-core>=1.0.0"]
28
+ build-backend = "poetry.core.masonry.api"
29
+
30
+ [tool.ruff]
31
+ line-length = 88
32
+ target-version = "py310"
33
+
34
+ [tool.ruff.lint]
35
+ select = ["E", "F", "W", "C90", "I"]
36
+
37
+ [tool.ruff.lint.mccabe]
38
+ max-complexity = 8
39
+
40
+ [tool.mypy]
41
+ python_version = "3.10"
42
+ warn_return_any = true
43
+ warn_unused_configs = true
44
+ check_untyped_defs = true
45
+ disallow_untyped_defs = true
46
+ disallow_any_generics = true
47
+ ignore_missing_imports = true
48
+
49
+ [tool.coverage.run]
50
+ relative_files = true
51
+
52
+ [tool.coverage.report]
53
+ show_missing = true
54
+ fail_under = 100
@@ -0,0 +1,22 @@
1
+ """Parse SQL queries and extract structural metadata.
2
+
3
+ The ``sql-metadata`` package analyses SQL statements and returns the
4
+ tables, columns, aliases, CTE definitions, subqueries, values, comments,
5
+ and query type they contain. The primary entry point is :class:`Parser`::
6
+
7
+ from sql_metadata import Parser
8
+
9
+ parser = Parser("SELECT id, name FROM users WHERE active = 1")
10
+ print(parser.tables) # ['users']
11
+ print(parser.columns) # ['id', 'name', 'active']
12
+
13
+ Under the hood the library delegates to `sqlglot <https://github.com/tobymao/sqlglot>`_
14
+ for AST construction and tokenization, with custom dialect handling for
15
+ MSSQL, MySQL, Hive/Spark, and TSQL bracket notation.
16
+ """
17
+
18
+ from sql_metadata.exceptions import InvalidQueryDefinition
19
+ from sql_metadata.keywords_lists import QueryType
20
+ from sql_metadata.parser import Parser
21
+
22
+ __all__ = ["InvalidQueryDefinition", "Parser", "QueryType"]
@@ -0,0 +1,117 @@
1
+ """Wrap ``sqlglot.parse()`` to produce an AST from raw SQL strings.
2
+
3
+ Thin orchestrator that composes :class:`~sql_cleaner.SqlCleaner` (raw SQL
4
+ preprocessing) and :class:`~dialect_parser.DialectParser` (dialect
5
+ detection, parsing, quality validation) so that downstream extractors
6
+ always receive a clean ``sqlglot.exp.Expression`` tree (or ``None`` /
7
+ ``ValueError``).
8
+ """
9
+
10
+ from sqlglot import exp
11
+ from sqlglot.dialects.dialect import DialectType
12
+
13
+ from sql_metadata.dialect_parser import DialectParser
14
+ from sql_metadata.sql_cleaner import SqlCleaner
15
+
16
+
17
+ class ASTParser:
18
+ """Lazy wrapper around SQL parsing with dialect auto-detection.
19
+
20
+ Instantiated once per :class:`Parser` with the raw SQL string. The
21
+ actual parsing is deferred until :attr:`ast` is first accessed, at
22
+ which point the SQL is cleaned and parsed through one or more sqlglot
23
+ dialects until a satisfactory AST is obtained.
24
+
25
+ :param sql: Raw SQL query string.
26
+ :type sql: str
27
+ """
28
+
29
+ def __init__(self, sql: str) -> None:
30
+ self._raw_sql = sql
31
+ self._ast: exp.Expression | None = None
32
+ self._dialect: DialectType = None
33
+ self._parsed = False
34
+ self._is_replace = False
35
+ self._cte_name_map: dict[str, str] = {}
36
+
37
+ @property
38
+ def ast(self) -> exp.Expression | None:
39
+ """The sqlglot AST for the query, lazily parsed on first access.
40
+
41
+ :returns: Root AST node, or ``None`` for empty/comment-only queries.
42
+ :rtype: exp.Expression
43
+ :raises ValueError: If the SQL is malformed and cannot be parsed.
44
+ """
45
+ if self._parsed:
46
+ return self._ast
47
+ self._parsed = True
48
+ self._ast = self._parse(self._raw_sql)
49
+ return self._ast
50
+
51
+ def _ensure_parsed(self) -> None:
52
+ """Trigger lazy parsing so side-effect fields are populated."""
53
+ _ = self.ast
54
+
55
+ @property
56
+ def dialect(self) -> DialectType:
57
+ """The sqlglot dialect that produced the current AST.
58
+
59
+ Set as a side-effect of :attr:`ast` access. May be ``None``
60
+ (default dialect), a string like ``"mysql"``, or a custom
61
+ :class:`Dialect` subclass such as :class:`HashVarDialect`.
62
+
63
+ :rtype: DialectType
64
+ """
65
+ self._ensure_parsed()
66
+ return self._dialect
67
+
68
+ @property
69
+ def is_replace(self) -> bool:
70
+ """Whether the original query was a ``REPLACE INTO`` statement.
71
+
72
+ ``REPLACE INTO`` is rewritten to ``INSERT INTO`` before parsing
73
+ (sqlglot otherwise produces an opaque ``Command`` node). This
74
+ flag allows :attr:`Parser.query_type` to restore the correct
75
+ :class:`QueryType.REPLACE` value.
76
+
77
+ :rtype: bool
78
+ """
79
+ self._ensure_parsed()
80
+ return self._is_replace
81
+
82
+ @property
83
+ def cte_name_map(self) -> dict[str, str]:
84
+ """Map of placeholder CTE names back to their original qualified form.
85
+
86
+ Keys are underscore-separated placeholders (``db__DOT__name``),
87
+ values are the original dotted names (``db.name``).
88
+
89
+ :rtype: dict[str, str]
90
+ """
91
+ self._ensure_parsed()
92
+ return self._cte_name_map
93
+
94
+ def _parse(self, sql: str) -> exp.Expression | None:
95
+ """Parse *sql* into a sqlglot AST.
96
+
97
+ Delegates preprocessing to :class:`SqlCleaner` and dialect
98
+ detection / parsing to :class:`DialectParser`.
99
+
100
+ :param sql: Raw SQL string (may include comments).
101
+ :type sql: str
102
+ :returns: Root AST node, or ``None`` for empty input.
103
+ :rtype: exp.Expression | None
104
+ :raises ValueError: If the SQL is malformed.
105
+ """
106
+ if not sql or not sql.strip():
107
+ return None
108
+
109
+ result = SqlCleaner.clean(sql)
110
+ if result.sql is None:
111
+ return None
112
+
113
+ self._is_replace = result.is_replace
114
+ self._cte_name_map = result.cte_name_map
115
+
116
+ ast, self._dialect = DialectParser().parse(result.sql)
117
+ return ast