sql-metadata 2.19.0__tar.gz → 3.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_metadata-2.19.0 → sql_metadata-3.0.0}/PKG-INFO +91 -32
- {sql_metadata-2.19.0 → sql_metadata-3.0.0}/README.md +87 -27
- sql_metadata-3.0.0/pyproject.toml +54 -0
- sql_metadata-3.0.0/sql_metadata/__init__.py +22 -0
- sql_metadata-3.0.0/sql_metadata/ast_parser.py +117 -0
- sql_metadata-3.0.0/sql_metadata/column_extractor.py +1109 -0
- sql_metadata-3.0.0/sql_metadata/comments.py +200 -0
- sql_metadata-3.0.0/sql_metadata/dialect_parser.py +316 -0
- sql_metadata-3.0.0/sql_metadata/exceptions.py +5 -0
- sql_metadata-3.0.0/sql_metadata/generalizator.py +130 -0
- sql_metadata-3.0.0/sql_metadata/keywords_lists.py +27 -0
- sql_metadata-3.0.0/sql_metadata/nested_resolver.py +715 -0
- sql_metadata-3.0.0/sql_metadata/parser.py +634 -0
- sql_metadata-3.0.0/sql_metadata/py.typed +0 -0
- sql_metadata-3.0.0/sql_metadata/query_type_extractor.py +140 -0
- sql_metadata-3.0.0/sql_metadata/sql_cleaner.py +255 -0
- sql_metadata-3.0.0/sql_metadata/table_extractor.py +307 -0
- sql_metadata-3.0.0/sql_metadata/utils.py +86 -0
- sql_metadata-2.19.0/pyproject.toml +0 -36
- sql_metadata-2.19.0/sql_metadata/__init__.py +0 -10
- sql_metadata-2.19.0/sql_metadata/compat.py +0 -58
- sql_metadata-2.19.0/sql_metadata/generalizator.py +0 -94
- sql_metadata-2.19.0/sql_metadata/keywords_lists.py +0 -134
- sql_metadata-2.19.0/sql_metadata/parser.py +0 -1119
- sql_metadata-2.19.0/sql_metadata/token.py +0 -566
- sql_metadata-2.19.0/sql_metadata/utils.py +0 -35
- {sql_metadata-2.19.0 → sql_metadata-3.0.0}/LICENSE +0 -0
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql_metadata
|
|
3
|
-
Version:
|
|
4
|
-
Summary: Uses
|
|
3
|
+
Version: 3.0.0
|
|
4
|
+
Summary: Uses sqlglot to parse SQL queries and extract metadata
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Author: Maciej Brencz
|
|
8
8
|
Author-email: maciej.brencz@gmail.com
|
|
9
|
-
Requires-Python: >=3.
|
|
9
|
+
Requires-Python: >=3.10,<4.0
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
-
Requires-Dist:
|
|
17
|
+
Requires-Dist: sqlglot (>=30.0.3,<31.0.0)
|
|
19
18
|
Project-URL: Homepage, https://github.com/macbre/sql-metadata
|
|
20
19
|
Project-URL: Repository, https://github.com/macbre/sql-metadata
|
|
21
20
|
Description-Content-Type: text/markdown
|
|
@@ -25,11 +24,11 @@ Description-Content-Type: text/markdown
|
|
|
25
24
|
[](https://pypi.python.org/pypi/sql_metadata)
|
|
26
25
|
[](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml)
|
|
27
26
|
[](https://coveralls.io/github/macbre/sql-metadata?branch=master)
|
|
28
|
-
|
|
27
|
+
[](https://github.com/astral-sh/ruff)
|
|
29
28
|
[](https://github.com/macbre/sql-metadata/graphs/commit-activity)
|
|
30
29
|
[](https://pepy.tech/project/sql-metadata)
|
|
31
30
|
|
|
32
|
-
Uses
|
|
31
|
+
Uses [`sqlglot`](https://github.com/tobymao/sqlglot) to parse SQL queries and extract metadata.
|
|
33
32
|
|
|
34
33
|
**Extracts column names and tables** used by the query.
|
|
35
34
|
Automatically conduct **column alias resolution**, **sub queries aliases resolution** as well as **tables aliases resolving**.
|
|
@@ -80,7 +79,7 @@ parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b
|
|
|
80
79
|
|
|
81
80
|
# note that aliases are auto-resolved
|
|
82
81
|
parser.columns
|
|
83
|
-
# ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
|
|
82
|
+
# ['product_a.users.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
|
|
84
83
|
|
|
85
84
|
# note that you can also extract columns with their place in the query
|
|
86
85
|
# which will return dict with lists divided into select, where, order_by, group_by, join, insert and update
|
|
@@ -116,6 +115,71 @@ assert parser.columns_dict == {'order_by': ['b', 'c', 'u'],
|
|
|
116
115
|
'select': ['a', 'b', 'c', 'u', 'd']}
|
|
117
116
|
```
|
|
118
117
|
|
|
118
|
+
### Extracting output column names
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from sql_metadata import Parser
|
|
122
|
+
|
|
123
|
+
# output_columns returns the ordered list of names that the SELECT would produce,
|
|
124
|
+
# preserving aliases (unlike `columns`, which resolves aliases back to real columns)
|
|
125
|
+
Parser("SELECT a, b AS c FROM t").output_columns
|
|
126
|
+
# ['a', 'c']
|
|
127
|
+
|
|
128
|
+
# works with function calls, window functions, computed aliases
|
|
129
|
+
Parser("""SELECT
|
|
130
|
+
id,
|
|
131
|
+
UPPER(email) AS email_upper,
|
|
132
|
+
ROW_NUMBER() OVER (PARTITION BY country ORDER BY created_at) AS rn
|
|
133
|
+
FROM users""").output_columns
|
|
134
|
+
# ['id', 'email_upper', 'rn']
|
|
135
|
+
|
|
136
|
+
# SELECT * stays as '*'
|
|
137
|
+
Parser("SELECT * FROM t").output_columns
|
|
138
|
+
# ['*']
|
|
139
|
+
|
|
140
|
+
# non-SELECT queries return an empty list
|
|
141
|
+
Parser("CREATE TABLE t (id INT)").output_columns
|
|
142
|
+
# []
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Detecting query type
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from sql_metadata import Parser, QueryType
|
|
149
|
+
|
|
150
|
+
Parser("SELECT * FROM foo").query_type
|
|
151
|
+
# <QueryType.SELECT: 'SELECT'>
|
|
152
|
+
|
|
153
|
+
# QueryType is a str-enum, so it compares equal to both strings and enum values
|
|
154
|
+
Parser("INSERT INTO foo VALUES (1)").query_type == QueryType.INSERT # True
|
|
155
|
+
Parser("INSERT INTO foo VALUES (1)").query_type == "INSERT" # True
|
|
156
|
+
|
|
157
|
+
# REPLACE INTO is reported distinctly from INSERT
|
|
158
|
+
Parser("REPLACE INTO foo VALUES (1)").query_type
|
|
159
|
+
# <QueryType.REPLACE: 'REPLACE'>
|
|
160
|
+
|
|
161
|
+
# Supported types: SELECT, INSERT, REPLACE, UPDATE, DELETE,
|
|
162
|
+
# CREATE, ALTER, DROP, TRUNCATE, MERGE
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Handling invalid queries
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from sql_metadata import Parser, InvalidQueryDefinition
|
|
169
|
+
|
|
170
|
+
# structurally invalid SQL raises `InvalidQueryDefinition` (a subclass of
|
|
171
|
+
# `ValueError`, so existing `except ValueError` handlers keep working)
|
|
172
|
+
try:
|
|
173
|
+
Parser("").query_type
|
|
174
|
+
except InvalidQueryDefinition as exc:
|
|
175
|
+
print(exc) # "Empty queries are not supported!"
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
Parser("THIS IS NOT SQL").query_type
|
|
179
|
+
except InvalidQueryDefinition as exc:
|
|
180
|
+
print(exc) # "Not supported query type!"
|
|
181
|
+
```
|
|
182
|
+
|
|
119
183
|
### Extracting tables from query
|
|
120
184
|
|
|
121
185
|
```python
|
|
@@ -203,9 +267,10 @@ parser.with_names
|
|
|
203
267
|
# ["database1.tableFromWith", "test"]
|
|
204
268
|
|
|
205
269
|
# get definition of with queries
|
|
270
|
+
# (sqlglot normalises keyword casing and spacing when rendering the body SQL)
|
|
206
271
|
parser.with_queries
|
|
207
|
-
# {"database1.tableFromWith": "SELECT aa.* FROM table3
|
|
208
|
-
# "test": "SELECT *
|
|
272
|
+
# {"database1.tableFromWith": "SELECT aa.* FROM table3 AS aa LEFT JOIN table4 ON aa.col1 = table4.col2",
|
|
273
|
+
# "test": "SELECT * FROM table3"}
|
|
209
274
|
|
|
210
275
|
# note that names of with statements do not appear in tables
|
|
211
276
|
parser.tables
|
|
@@ -227,9 +292,10 @@ ON a.task_id = b.task_id;
|
|
|
227
292
|
)
|
|
228
293
|
|
|
229
294
|
# get sub-queries dictionary
|
|
295
|
+
# (sqlglot normalises keyword casing — implicit table aliases become explicit `AS`)
|
|
230
296
|
parser.subqueries
|
|
231
|
-
# {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
|
|
232
|
-
# "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"}
|
|
297
|
+
# {"a": "SELECT std.task_id FROM some_task_detail AS std WHERE std.STATUS = 1",
|
|
298
|
+
# "b": "SELECT st.task_id FROM some_task AS st WHERE task_type_id = 80"}
|
|
233
299
|
|
|
234
300
|
|
|
235
301
|
# get names/ aliases of sub-queries / derived tables
|
|
@@ -272,30 +338,23 @@ parser.comments
|
|
|
272
338
|
|
|
273
339
|
See `test/test_normalization.py` file for more examples of a bit more complex queries.
|
|
274
340
|
|
|
275
|
-
## Migrating from `sql_metadata` 1.x
|
|
276
|
-
|
|
277
|
-
`sql_metadata.compat` module has been implemented to make the introduction of sql-metadata v2.0 smoother.
|
|
341
|
+
## Migrating from `sql_metadata` 1.x / 2.x
|
|
278
342
|
|
|
279
|
-
|
|
343
|
+
The `sql_metadata.compat` module (previously provided for v1 → v2 migration) has been **removed in v3**. Port your code to the class-based `Parser` API shown in the examples above:
|
|
280
344
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
```
|
|
345
|
+
| Old v1 helper | v3 replacement |
|
|
346
|
+
|-------------------------------|-------------------------------------------|
|
|
347
|
+
| `generalize_sql(sql)` | `Parser(sql).generalize` |
|
|
348
|
+
| `get_query_columns(sql)` | `Parser(sql).columns` |
|
|
349
|
+
| `get_query_tables(sql)` | `Parser(sql).tables` |
|
|
350
|
+
| `get_query_limit_and_offset(sql)` | `Parser(sql).limit_and_offset` |
|
|
351
|
+
| `get_query_tokens(sql)` | `Parser(sql).tokens` |
|
|
352
|
+
| `preprocess_query(sql)` | `Parser(sql).query` |
|
|
290
353
|
|
|
291
|
-
|
|
354
|
+
For v2 → v3 users, the public `Parser` API is unchanged except:
|
|
292
355
|
|
|
293
|
-
* `
|
|
294
|
-
* `
|
|
295
|
-
* `get_query_limit_and_offset`
|
|
296
|
-
* `get_query_tables`
|
|
297
|
-
* `get_query_tokens`
|
|
298
|
-
* `preprocess_query`
|
|
356
|
+
* The parsing engine is now [sqlglot](https://github.com/tobymao/sqlglot), which may normalise the *casing* and *spacing* of rendered CTE/subquery bodies (see the `with_queries` / `subqueries` examples above).
|
|
357
|
+
* Malformed SQL now raises `InvalidQueryDefinition` (a `ValueError` subclass) instead of a plain `ValueError` — existing `except ValueError:` handlers continue to work.
|
|
299
358
|
|
|
300
359
|
## Authors and contributors
|
|
301
360
|
|
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
[](https://pypi.python.org/pypi/sql_metadata)
|
|
4
4
|
[](https://github.com/macbre/sql-metadata/actions/workflows/python-ci.yml)
|
|
5
5
|
[](https://coveralls.io/github/macbre/sql-metadata?branch=master)
|
|
6
|
-
|
|
6
|
+
[](https://github.com/astral-sh/ruff)
|
|
7
7
|
[](https://github.com/macbre/sql-metadata/graphs/commit-activity)
|
|
8
8
|
[](https://pepy.tech/project/sql-metadata)
|
|
9
9
|
|
|
10
|
-
Uses
|
|
10
|
+
Uses [`sqlglot`](https://github.com/tobymao/sqlglot) to parse SQL queries and extract metadata.
|
|
11
11
|
|
|
12
12
|
**Extracts column names and tables** used by the query.
|
|
13
13
|
Automatically conduct **column alias resolution**, **sub queries aliases resolution** as well as **tables aliases resolving**.
|
|
@@ -58,7 +58,7 @@ parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b
|
|
|
58
58
|
|
|
59
59
|
# note that aliases are auto-resolved
|
|
60
60
|
parser.columns
|
|
61
|
-
# ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
|
|
61
|
+
# ['product_a.users.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
|
|
62
62
|
|
|
63
63
|
# note that you can also extract columns with their place in the query
|
|
64
64
|
# which will return dict with lists divided into select, where, order_by, group_by, join, insert and update
|
|
@@ -94,6 +94,71 @@ assert parser.columns_dict == {'order_by': ['b', 'c', 'u'],
|
|
|
94
94
|
'select': ['a', 'b', 'c', 'u', 'd']}
|
|
95
95
|
```
|
|
96
96
|
|
|
97
|
+
### Extracting output column names
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from sql_metadata import Parser
|
|
101
|
+
|
|
102
|
+
# output_columns returns the ordered list of names that the SELECT would produce,
|
|
103
|
+
# preserving aliases (unlike `columns`, which resolves aliases back to real columns)
|
|
104
|
+
Parser("SELECT a, b AS c FROM t").output_columns
|
|
105
|
+
# ['a', 'c']
|
|
106
|
+
|
|
107
|
+
# works with function calls, window functions, computed aliases
|
|
108
|
+
Parser("""SELECT
|
|
109
|
+
id,
|
|
110
|
+
UPPER(email) AS email_upper,
|
|
111
|
+
ROW_NUMBER() OVER (PARTITION BY country ORDER BY created_at) AS rn
|
|
112
|
+
FROM users""").output_columns
|
|
113
|
+
# ['id', 'email_upper', 'rn']
|
|
114
|
+
|
|
115
|
+
# SELECT * stays as '*'
|
|
116
|
+
Parser("SELECT * FROM t").output_columns
|
|
117
|
+
# ['*']
|
|
118
|
+
|
|
119
|
+
# non-SELECT queries return an empty list
|
|
120
|
+
Parser("CREATE TABLE t (id INT)").output_columns
|
|
121
|
+
# []
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Detecting query type
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from sql_metadata import Parser, QueryType
|
|
128
|
+
|
|
129
|
+
Parser("SELECT * FROM foo").query_type
|
|
130
|
+
# <QueryType.SELECT: 'SELECT'>
|
|
131
|
+
|
|
132
|
+
# QueryType is a str-enum, so it compares equal to both strings and enum values
|
|
133
|
+
Parser("INSERT INTO foo VALUES (1)").query_type == QueryType.INSERT # True
|
|
134
|
+
Parser("INSERT INTO foo VALUES (1)").query_type == "INSERT" # True
|
|
135
|
+
|
|
136
|
+
# REPLACE INTO is reported distinctly from INSERT
|
|
137
|
+
Parser("REPLACE INTO foo VALUES (1)").query_type
|
|
138
|
+
# <QueryType.REPLACE: 'REPLACE'>
|
|
139
|
+
|
|
140
|
+
# Supported types: SELECT, INSERT, REPLACE, UPDATE, DELETE,
|
|
141
|
+
# CREATE, ALTER, DROP, TRUNCATE, MERGE
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Handling invalid queries
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from sql_metadata import Parser, InvalidQueryDefinition
|
|
148
|
+
|
|
149
|
+
# structurally invalid SQL raises `InvalidQueryDefinition` (a subclass of
|
|
150
|
+
# `ValueError`, so existing `except ValueError` handlers keep working)
|
|
151
|
+
try:
|
|
152
|
+
Parser("").query_type
|
|
153
|
+
except InvalidQueryDefinition as exc:
|
|
154
|
+
print(exc) # "Empty queries are not supported!"
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
Parser("THIS IS NOT SQL").query_type
|
|
158
|
+
except InvalidQueryDefinition as exc:
|
|
159
|
+
print(exc) # "Not supported query type!"
|
|
160
|
+
```
|
|
161
|
+
|
|
97
162
|
### Extracting tables from query
|
|
98
163
|
|
|
99
164
|
```python
|
|
@@ -181,9 +246,10 @@ parser.with_names
|
|
|
181
246
|
# ["database1.tableFromWith", "test"]
|
|
182
247
|
|
|
183
248
|
# get definition of with queries
|
|
249
|
+
# (sqlglot normalises keyword casing and spacing when rendering the body SQL)
|
|
184
250
|
parser.with_queries
|
|
185
|
-
# {"database1.tableFromWith": "SELECT aa.* FROM table3
|
|
186
|
-
# "test": "SELECT *
|
|
251
|
+
# {"database1.tableFromWith": "SELECT aa.* FROM table3 AS aa LEFT JOIN table4 ON aa.col1 = table4.col2",
|
|
252
|
+
# "test": "SELECT * FROM table3"}
|
|
187
253
|
|
|
188
254
|
# note that names of with statements do not appear in tables
|
|
189
255
|
parser.tables
|
|
@@ -205,9 +271,10 @@ ON a.task_id = b.task_id;
|
|
|
205
271
|
)
|
|
206
272
|
|
|
207
273
|
# get sub-queries dictionary
|
|
274
|
+
# (sqlglot normalises keyword casing — implicit table aliases become explicit `AS`)
|
|
208
275
|
parser.subqueries
|
|
209
|
-
# {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
|
|
210
|
-
# "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"}
|
|
276
|
+
# {"a": "SELECT std.task_id FROM some_task_detail AS std WHERE std.STATUS = 1",
|
|
277
|
+
# "b": "SELECT st.task_id FROM some_task AS st WHERE task_type_id = 80"}
|
|
211
278
|
|
|
212
279
|
|
|
213
280
|
# get names/ aliases of sub-queries / derived tables
|
|
@@ -250,30 +317,23 @@ parser.comments
|
|
|
250
317
|
|
|
251
318
|
See `test/test_normalization.py` file for more examples of a bit more complex queries.
|
|
252
319
|
|
|
253
|
-
## Migrating from `sql_metadata` 1.x
|
|
254
|
-
|
|
255
|
-
`sql_metadata.compat` module has been implemented to make the introduction of sql-metadata v2.0 smoother.
|
|
320
|
+
## Migrating from `sql_metadata` 1.x / 2.x
|
|
256
321
|
|
|
257
|
-
|
|
322
|
+
The `sql_metadata.compat` module (previously provided for v1 → v2 migration) has been **removed in v3**. Port your code to the class-based `Parser` API shown in the examples above:
|
|
258
323
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
```
|
|
324
|
+
| Old v1 helper | v3 replacement |
|
|
325
|
+
|-------------------------------|-------------------------------------------|
|
|
326
|
+
| `generalize_sql(sql)` | `Parser(sql).generalize` |
|
|
327
|
+
| `get_query_columns(sql)` | `Parser(sql).columns` |
|
|
328
|
+
| `get_query_tables(sql)` | `Parser(sql).tables` |
|
|
329
|
+
| `get_query_limit_and_offset(sql)` | `Parser(sql).limit_and_offset` |
|
|
330
|
+
| `get_query_tokens(sql)` | `Parser(sql).tokens` |
|
|
331
|
+
| `preprocess_query(sql)` | `Parser(sql).query` |
|
|
268
332
|
|
|
269
|
-
|
|
333
|
+
For v2 → v3 users, the public `Parser` API is unchanged except:
|
|
270
334
|
|
|
271
|
-
* `
|
|
272
|
-
* `
|
|
273
|
-
* `get_query_limit_and_offset`
|
|
274
|
-
* `get_query_tables`
|
|
275
|
-
* `get_query_tokens`
|
|
276
|
-
* `preprocess_query`
|
|
335
|
+
* The parsing engine is now [sqlglot](https://github.com/tobymao/sqlglot), which may normalise the *casing* and *spacing* of rendered CTE/subquery bodies (see the `with_queries` / `subqueries` examples above).
|
|
336
|
+
* Malformed SQL now raises `InvalidQueryDefinition` (a `ValueError` subclass) instead of a plain `ValueError` — existing `except ValueError:` handlers continue to work.
|
|
277
337
|
|
|
278
338
|
## Authors and contributors
|
|
279
339
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "sql_metadata"
|
|
3
|
+
version = "3.0.0"
|
|
4
|
+
license="MIT"
|
|
5
|
+
description = "Uses sqlglot to parse SQL queries and extract metadata"
|
|
6
|
+
authors = ["Maciej Brencz <maciej.brencz@gmail.com>", "Radosław Drążkiewicz <collerek@gmail.com>"]
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
homepage = "https://github.com/macbre/sql-metadata"
|
|
9
|
+
repository = "https://github.com/macbre/sql-metadata"
|
|
10
|
+
|
|
11
|
+
packages = [
|
|
12
|
+
{ include="sql_metadata" }
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[tool.poetry.dependencies]
|
|
16
|
+
python = "^3.10"
|
|
17
|
+
sqlglot = "^30.0.3"
|
|
18
|
+
|
|
19
|
+
[tool.poetry.group.dev.dependencies]
|
|
20
|
+
coverage = {extras = ["toml"], version = "^7.13"}
|
|
21
|
+
pytest = "^9.0.3"
|
|
22
|
+
pytest-cov = "^7.1.0"
|
|
23
|
+
ruff = "^0.11"
|
|
24
|
+
mypy = "^1.19"
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["poetry-core>=1.0.0"]
|
|
28
|
+
build-backend = "poetry.core.masonry.api"
|
|
29
|
+
|
|
30
|
+
[tool.ruff]
|
|
31
|
+
line-length = 88
|
|
32
|
+
target-version = "py310"
|
|
33
|
+
|
|
34
|
+
[tool.ruff.lint]
|
|
35
|
+
select = ["E", "F", "W", "C90", "I"]
|
|
36
|
+
|
|
37
|
+
[tool.ruff.lint.mccabe]
|
|
38
|
+
max-complexity = 8
|
|
39
|
+
|
|
40
|
+
[tool.mypy]
|
|
41
|
+
python_version = "3.10"
|
|
42
|
+
warn_return_any = true
|
|
43
|
+
warn_unused_configs = true
|
|
44
|
+
check_untyped_defs = true
|
|
45
|
+
disallow_untyped_defs = true
|
|
46
|
+
disallow_any_generics = true
|
|
47
|
+
ignore_missing_imports = true
|
|
48
|
+
|
|
49
|
+
[tool.coverage.run]
|
|
50
|
+
relative_files = true
|
|
51
|
+
|
|
52
|
+
[tool.coverage.report]
|
|
53
|
+
show_missing = true
|
|
54
|
+
fail_under = 100
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Parse SQL queries and extract structural metadata.
|
|
2
|
+
|
|
3
|
+
The ``sql-metadata`` package analyses SQL statements and returns the
|
|
4
|
+
tables, columns, aliases, CTE definitions, subqueries, values, comments,
|
|
5
|
+
and query type they contain. The primary entry point is :class:`Parser`::
|
|
6
|
+
|
|
7
|
+
from sql_metadata import Parser
|
|
8
|
+
|
|
9
|
+
parser = Parser("SELECT id, name FROM users WHERE active = 1")
|
|
10
|
+
print(parser.tables) # ['users']
|
|
11
|
+
print(parser.columns) # ['id', 'name', 'active']
|
|
12
|
+
|
|
13
|
+
Under the hood the library delegates to `sqlglot <https://github.com/tobymao/sqlglot>`_
|
|
14
|
+
for AST construction and tokenization, with custom dialect handling for
|
|
15
|
+
MSSQL, MySQL, Hive/Spark, and TSQL bracket notation.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from sql_metadata.exceptions import InvalidQueryDefinition
|
|
19
|
+
from sql_metadata.keywords_lists import QueryType
|
|
20
|
+
from sql_metadata.parser import Parser
|
|
21
|
+
|
|
22
|
+
__all__ = ["InvalidQueryDefinition", "Parser", "QueryType"]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Wrap ``sqlglot.parse()`` to produce an AST from raw SQL strings.
|
|
2
|
+
|
|
3
|
+
Thin orchestrator that composes :class:`~sql_cleaner.SqlCleaner` (raw SQL
|
|
4
|
+
preprocessing) and :class:`~dialect_parser.DialectParser` (dialect
|
|
5
|
+
detection, parsing, quality validation) so that downstream extractors
|
|
6
|
+
always receive a clean ``sqlglot.exp.Expression`` tree (or ``None`` /
|
|
7
|
+
``ValueError``).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from sqlglot import exp
|
|
11
|
+
from sqlglot.dialects.dialect import DialectType
|
|
12
|
+
|
|
13
|
+
from sql_metadata.dialect_parser import DialectParser
|
|
14
|
+
from sql_metadata.sql_cleaner import SqlCleaner
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ASTParser:
|
|
18
|
+
"""Lazy wrapper around SQL parsing with dialect auto-detection.
|
|
19
|
+
|
|
20
|
+
Instantiated once per :class:`Parser` with the raw SQL string. The
|
|
21
|
+
actual parsing is deferred until :attr:`ast` is first accessed, at
|
|
22
|
+
which point the SQL is cleaned and parsed through one or more sqlglot
|
|
23
|
+
dialects until a satisfactory AST is obtained.
|
|
24
|
+
|
|
25
|
+
:param sql: Raw SQL query string.
|
|
26
|
+
:type sql: str
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, sql: str) -> None:
|
|
30
|
+
self._raw_sql = sql
|
|
31
|
+
self._ast: exp.Expression | None = None
|
|
32
|
+
self._dialect: DialectType = None
|
|
33
|
+
self._parsed = False
|
|
34
|
+
self._is_replace = False
|
|
35
|
+
self._cte_name_map: dict[str, str] = {}
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def ast(self) -> exp.Expression | None:
|
|
39
|
+
"""The sqlglot AST for the query, lazily parsed on first access.
|
|
40
|
+
|
|
41
|
+
:returns: Root AST node, or ``None`` for empty/comment-only queries.
|
|
42
|
+
:rtype: exp.Expression
|
|
43
|
+
:raises ValueError: If the SQL is malformed and cannot be parsed.
|
|
44
|
+
"""
|
|
45
|
+
if self._parsed:
|
|
46
|
+
return self._ast
|
|
47
|
+
self._parsed = True
|
|
48
|
+
self._ast = self._parse(self._raw_sql)
|
|
49
|
+
return self._ast
|
|
50
|
+
|
|
51
|
+
def _ensure_parsed(self) -> None:
|
|
52
|
+
"""Trigger lazy parsing so side-effect fields are populated."""
|
|
53
|
+
_ = self.ast
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def dialect(self) -> DialectType:
|
|
57
|
+
"""The sqlglot dialect that produced the current AST.
|
|
58
|
+
|
|
59
|
+
Set as a side-effect of :attr:`ast` access. May be ``None``
|
|
60
|
+
(default dialect), a string like ``"mysql"``, or a custom
|
|
61
|
+
:class:`Dialect` subclass such as :class:`HashVarDialect`.
|
|
62
|
+
|
|
63
|
+
:rtype: DialectType
|
|
64
|
+
"""
|
|
65
|
+
self._ensure_parsed()
|
|
66
|
+
return self._dialect
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def is_replace(self) -> bool:
|
|
70
|
+
"""Whether the original query was a ``REPLACE INTO`` statement.
|
|
71
|
+
|
|
72
|
+
``REPLACE INTO`` is rewritten to ``INSERT INTO`` before parsing
|
|
73
|
+
(sqlglot otherwise produces an opaque ``Command`` node). This
|
|
74
|
+
flag allows :attr:`Parser.query_type` to restore the correct
|
|
75
|
+
:class:`QueryType.REPLACE` value.
|
|
76
|
+
|
|
77
|
+
:rtype: bool
|
|
78
|
+
"""
|
|
79
|
+
self._ensure_parsed()
|
|
80
|
+
return self._is_replace
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def cte_name_map(self) -> dict[str, str]:
|
|
84
|
+
"""Map of placeholder CTE names back to their original qualified form.
|
|
85
|
+
|
|
86
|
+
Keys are underscore-separated placeholders (``db__DOT__name``),
|
|
87
|
+
values are the original dotted names (``db.name``).
|
|
88
|
+
|
|
89
|
+
:rtype: dict[str, str]
|
|
90
|
+
"""
|
|
91
|
+
self._ensure_parsed()
|
|
92
|
+
return self._cte_name_map
|
|
93
|
+
|
|
94
|
+
def _parse(self, sql: str) -> exp.Expression | None:
|
|
95
|
+
"""Parse *sql* into a sqlglot AST.
|
|
96
|
+
|
|
97
|
+
Delegates preprocessing to :class:`SqlCleaner` and dialect
|
|
98
|
+
detection / parsing to :class:`DialectParser`.
|
|
99
|
+
|
|
100
|
+
:param sql: Raw SQL string (may include comments).
|
|
101
|
+
:type sql: str
|
|
102
|
+
:returns: Root AST node, or ``None`` for empty input.
|
|
103
|
+
:rtype: exp.Expression | None
|
|
104
|
+
:raises ValueError: If the SQL is malformed.
|
|
105
|
+
"""
|
|
106
|
+
if not sql or not sql.strip():
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
result = SqlCleaner.clean(sql)
|
|
110
|
+
if result.sql is None:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
self._is_replace = result.is_replace
|
|
114
|
+
self._cte_name_map = result.cte_name_map
|
|
115
|
+
|
|
116
|
+
ast, self._dialect = DialectParser().parse(result.sql)
|
|
117
|
+
return ast
|