sql-metadata 3.0.0__tar.gz → 3.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/PKG-INFO +2 -2
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/pyproject.toml +6 -6
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/column_extractor.py +6 -2
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/comments.py +9 -2
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/nested_resolver.py +11 -4
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/parser.py +16 -14
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/table_extractor.py +1 -1
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/LICENSE +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/README.md +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/__init__.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/ast_parser.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/dialect_parser.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/exceptions.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/generalizator.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/keywords_lists.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/py.typed +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/query_type_extractor.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/sql_cleaner.py +0 -0
- {sql_metadata-3.0.0 → sql_metadata-3.0.1}/sql_metadata/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql_metadata
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.1
|
|
4
4
|
Summary: Uses sqlglot to parse SQL queries and extract metadata
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
-
Requires-Dist: sqlglot (>=30.0
|
|
17
|
+
Requires-Dist: sqlglot (>=30.12.0,<31.0.0)
|
|
18
18
|
Project-URL: Homepage, https://github.com/macbre/sql-metadata
|
|
19
19
|
Project-URL: Repository, https://github.com/macbre/sql-metadata
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "sql_metadata"
|
|
3
|
-
version = "3.0.
|
|
3
|
+
version = "3.0.1"
|
|
4
4
|
license="MIT"
|
|
5
5
|
description = "Uses sqlglot to parse SQL queries and extract metadata"
|
|
6
6
|
authors = ["Maciej Brencz <maciej.brencz@gmail.com>", "Radosław Drążkiewicz <collerek@gmail.com>"]
|
|
@@ -14,14 +14,14 @@ packages = [
|
|
|
14
14
|
|
|
15
15
|
[tool.poetry.dependencies]
|
|
16
16
|
python = "^3.10"
|
|
17
|
-
sqlglot = "^30.0
|
|
17
|
+
sqlglot = "^30.12.0"
|
|
18
18
|
|
|
19
19
|
[tool.poetry.group.dev.dependencies]
|
|
20
|
-
coverage = {extras = ["toml"], version = "^7.
|
|
21
|
-
pytest = "^9.
|
|
20
|
+
coverage = {extras = ["toml"], version = "^7.14"}
|
|
21
|
+
pytest = "^9.1.1"
|
|
22
22
|
pytest-cov = "^7.1.0"
|
|
23
|
-
ruff = "^0.
|
|
24
|
-
mypy = "^1
|
|
23
|
+
ruff = "^0.15"
|
|
24
|
+
mypy = "^2.1"
|
|
25
25
|
|
|
26
26
|
[build-system]
|
|
27
27
|
requires = ["poetry-core>=1.0.0"]
|
|
@@ -199,8 +199,12 @@ class _Collector:
|
|
|
199
199
|
self.alias_names.append(name)
|
|
200
200
|
if clause:
|
|
201
201
|
self.alias_dict.setdefault(clause, UniqueList()).append(name)
|
|
202
|
-
if target is
|
|
203
|
-
|
|
202
|
+
if target is None:
|
|
203
|
+
return
|
|
204
|
+
existing = self.alias_map.get(name, [])
|
|
205
|
+
merged = UniqueList(existing if isinstance(existing, list) else [existing])
|
|
206
|
+
merged.extend(target if isinstance(target, list) else [target])
|
|
207
|
+
self.alias_map[name] = merged if len(merged) > 1 else merged[0]
|
|
204
208
|
|
|
205
209
|
|
|
206
210
|
# ---------------------------------------------------------------------------
|
|
@@ -41,7 +41,11 @@ def _choose_tokenizer(sql: str) -> Tokenizer:
|
|
|
41
41
|
if "#" in sql and not _has_hash_variables(sql):
|
|
42
42
|
from sqlglot.dialects.mysql import MySQL
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
# Pass dialect="mysql" so the cached TokenizerCore (sqlglot >=30.7.0
|
|
45
|
+
# caches per-class) is built with MySQL identifier semantics — without
|
|
46
|
+
# it the cache is primed from the default dialect and later mysql
|
|
47
|
+
# parses misclassify e.g. ``0020_big_table`` as NUMBER + VAR.
|
|
48
|
+
return MySQL.Tokenizer(dialect="mysql")
|
|
45
49
|
return Tokenizer()
|
|
46
50
|
|
|
47
51
|
|
|
@@ -167,7 +171,10 @@ def strip_comments_for_parsing(sql: str) -> str:
|
|
|
167
171
|
else:
|
|
168
172
|
from sqlglot.dialects.mysql import MySQL
|
|
169
173
|
|
|
170
|
-
|
|
174
|
+
# See _choose_tokenizer — the explicit dialect prevents the
|
|
175
|
+
# sqlglot >=30.7.0 TokenizerCore cache from being primed with
|
|
176
|
+
# default-dialect semantics.
|
|
177
|
+
tokenizer = MySQL.Tokenizer(dialect="mysql")
|
|
171
178
|
try:
|
|
172
179
|
tokens = list(tokenizer.tokenize(sql))
|
|
173
180
|
except TokenError:
|
|
@@ -15,6 +15,7 @@ if TYPE_CHECKING:
|
|
|
15
15
|
from sql_metadata.parser import Parser
|
|
16
16
|
|
|
17
17
|
from sqlglot import exp
|
|
18
|
+
from sqlglot.errors import ErrorLevel
|
|
18
19
|
from sqlglot.generator import Generator
|
|
19
20
|
|
|
20
21
|
from sql_metadata.utils import (
|
|
@@ -206,7 +207,7 @@ class NestedResolver:
|
|
|
206
207
|
def extract_cte_names(
|
|
207
208
|
self,
|
|
208
209
|
cte_name_map: dict[str, str],
|
|
209
|
-
) ->
|
|
210
|
+
) -> UniqueList:
|
|
210
211
|
"""Extract CTE names from the AST.
|
|
211
212
|
|
|
212
213
|
Called by :attr:`Parser.with_names`.
|
|
@@ -246,7 +247,7 @@ class NestedResolver:
|
|
|
246
247
|
@staticmethod
|
|
247
248
|
def extract_subqueries(
|
|
248
249
|
ast: exp.Expression,
|
|
249
|
-
) -> tuple[
|
|
250
|
+
) -> tuple[UniqueList, dict[str, str]]:
|
|
250
251
|
"""Extract subquery names and bodies in a single post-order walk.
|
|
251
252
|
|
|
252
253
|
Aliased subqueries keep their alias as the name. Unaliased
|
|
@@ -261,7 +262,7 @@ class NestedResolver:
|
|
|
261
262
|
:returns: ``(names, bodies)`` where *names* is ordered innermost-first,
|
|
262
263
|
e.g. ``(["subquery_1", "sub"], {...})``.
|
|
263
264
|
"""
|
|
264
|
-
names
|
|
265
|
+
names = UniqueList()
|
|
265
266
|
bodies: dict[str, str] = {}
|
|
266
267
|
NestedResolver._walk_subqueries(ast, names, bodies, 0)
|
|
267
268
|
return names, bodies
|
|
@@ -678,7 +679,13 @@ class NestedResolver:
|
|
|
678
679
|
body = node.copy()
|
|
679
680
|
for ident in body.find_all(exp.Identifier):
|
|
680
681
|
ident.set("quoted", False)
|
|
681
|
-
|
|
682
|
+
# IGNORE unsupported-feature warnings: the rendered SQL is only fed
|
|
683
|
+
# back into a sub-Parser and never shown to the user, so warnings
|
|
684
|
+
# about constructs sqlglot can't faithfully re-emit (e.g. T-SQL
|
|
685
|
+
# FOR XML PATH) are noise.
|
|
686
|
+
return _PreservingGenerator(unsupported_level=ErrorLevel.IGNORE).generate(
|
|
687
|
+
body, copy=False
|
|
688
|
+
)
|
|
682
689
|
|
|
683
690
|
@staticmethod
|
|
684
691
|
def _walk_subqueries(
|
|
@@ -63,13 +63,13 @@ class Parser:
|
|
|
63
63
|
self._columns_aliases_dict: dict[str, UniqueList] = {}
|
|
64
64
|
self._output_columns: list[str] = []
|
|
65
65
|
|
|
66
|
-
self._tables:
|
|
66
|
+
self._tables: UniqueList | None = None
|
|
67
67
|
self._table_aliases: dict[str, str] | None = None
|
|
68
68
|
|
|
69
|
-
self._with_names:
|
|
69
|
+
self._with_names: UniqueList | None = None
|
|
70
70
|
self._with_queries: dict[str, str] | None = None
|
|
71
71
|
self._subqueries: dict[str, str] | None = None
|
|
72
|
-
self._subqueries_names:
|
|
72
|
+
self._subqueries_names: UniqueList | None = None
|
|
73
73
|
|
|
74
74
|
self._limit_and_offset: tuple[int, int] | None = None
|
|
75
75
|
|
|
@@ -168,7 +168,7 @@ class Parser:
|
|
|
168
168
|
return self._tokens
|
|
169
169
|
|
|
170
170
|
@property
|
|
171
|
-
def columns(self) ->
|
|
171
|
+
def columns(self) -> UniqueList:
|
|
172
172
|
"""Return the list of column names referenced in the query.
|
|
173
173
|
|
|
174
174
|
Walks the sqlglot AST via :class:`ColumnExtractor` in a single DFS
|
|
@@ -177,7 +177,7 @@ class Parser:
|
|
|
177
177
|
SQL), falls back to a regex extraction of ``INTO … (col1, col2)``
|
|
178
178
|
column lists.
|
|
179
179
|
|
|
180
|
-
:rtype:
|
|
180
|
+
:rtype: UniqueList
|
|
181
181
|
"""
|
|
182
182
|
if self._columns_extracted:
|
|
183
183
|
return self._columns
|
|
@@ -276,10 +276,10 @@ class Parser:
|
|
|
276
276
|
return self._columns_aliases_dict
|
|
277
277
|
|
|
278
278
|
@property
|
|
279
|
-
def columns_aliases_names(self) ->
|
|
279
|
+
def columns_aliases_names(self) -> UniqueList:
|
|
280
280
|
"""Return the names of all column aliases used in the query.
|
|
281
281
|
|
|
282
|
-
:rtype:
|
|
282
|
+
:rtype: UniqueList
|
|
283
283
|
"""
|
|
284
284
|
if not self._columns_extracted:
|
|
285
285
|
_ = self.columns
|
|
@@ -299,14 +299,14 @@ class Parser:
|
|
|
299
299
|
return self._output_columns
|
|
300
300
|
|
|
301
301
|
@property
|
|
302
|
-
def tables(self) ->
|
|
302
|
+
def tables(self) -> UniqueList:
|
|
303
303
|
"""Return the list of table names referenced in the query.
|
|
304
304
|
|
|
305
305
|
Tables are extracted from the AST by :class:`TableExtractor`,
|
|
306
306
|
sorted by their position in the SQL text, and filtered to exclude
|
|
307
307
|
CTE names (which appear in :attr:`with_names` instead).
|
|
308
308
|
|
|
309
|
-
:rtype:
|
|
309
|
+
:rtype: UniqueList
|
|
310
310
|
"""
|
|
311
311
|
if self._tables is not None:
|
|
312
312
|
return self._tables
|
|
@@ -339,10 +339,10 @@ class Parser:
|
|
|
339
339
|
return self._table_aliases
|
|
340
340
|
|
|
341
341
|
@property
|
|
342
|
-
def with_names(self) ->
|
|
342
|
+
def with_names(self) -> UniqueList:
|
|
343
343
|
"""Return the CTE (Common Table Expression) names from the query.
|
|
344
344
|
|
|
345
|
-
:rtype:
|
|
345
|
+
:rtype: UniqueList
|
|
346
346
|
"""
|
|
347
347
|
if self._with_names is not None:
|
|
348
348
|
return self._with_names
|
|
@@ -387,13 +387,13 @@ class Parser:
|
|
|
387
387
|
return self._subqueries
|
|
388
388
|
|
|
389
389
|
@property
|
|
390
|
-
def subqueries_names(self) ->
|
|
390
|
+
def subqueries_names(self) -> UniqueList:
|
|
391
391
|
"""Return the names of all subqueries (innermost first).
|
|
392
392
|
|
|
393
393
|
Aliased subqueries use their alias; unaliased ones get
|
|
394
394
|
auto-generated names (``subquery_1``, ``subquery_2``, …).
|
|
395
395
|
|
|
396
|
-
:rtype:
|
|
396
|
+
:rtype: UniqueList
|
|
397
397
|
"""
|
|
398
398
|
if self._subqueries_names is not None:
|
|
399
399
|
return self._subqueries_names
|
|
@@ -482,7 +482,9 @@ class Parser:
|
|
|
482
482
|
is_multi = values and isinstance(values[0], list)
|
|
483
483
|
first_row = values[0] if is_multi else values
|
|
484
484
|
if not columns:
|
|
485
|
-
columns =
|
|
485
|
+
columns = UniqueList(
|
|
486
|
+
f"column_{ind + 1}" for ind in range(len(first_row))
|
|
487
|
+
)
|
|
486
488
|
|
|
487
489
|
if is_multi:
|
|
488
490
|
self._values_dict = {
|
|
@@ -141,7 +141,7 @@ class TableExtractor:
|
|
|
141
141
|
# Public API
|
|
142
142
|
# -------------------------------------------------------------------
|
|
143
143
|
|
|
144
|
-
def extract(self) ->
|
|
144
|
+
def extract(self) -> UniqueList:
|
|
145
145
|
"""Extract table names, excluding CTE definitions.
|
|
146
146
|
|
|
147
147
|
For ``CREATE TABLE`` statements, the target table is always placed
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|