sqlglot 2.2.1__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlglot-2.2.1 → sqlglot-3.1.0}/LICENSE +0 -0
- {sqlglot-2.2.1/sqlglot.egg-info → sqlglot-3.1.0}/PKG-INFO +161 -91
- {sqlglot-2.2.1 → sqlglot-3.1.0}/README.md +160 -90
- {sqlglot-2.2.1 → sqlglot-3.1.0}/setup.cfg +0 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/setup.py +2 -2
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/__init__.py +8 -46
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/__main__.py +6 -1
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/constants.py +0 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/dialects.py +109 -59
- sqlglot-3.1.0/sqlglot/errors.py +29 -0
- sqlglot-3.1.0/sqlglot/executor/__init__.py +40 -0
- sqlglot-3.1.0/sqlglot/executor/context.py +69 -0
- sqlglot-3.1.0/sqlglot/executor/env.py +31 -0
- sqlglot-3.1.0/sqlglot/executor/python.py +334 -0
- sqlglot-3.1.0/sqlglot/executor/table.py +81 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/expressions.py +567 -179
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/generator.py +145 -100
- sqlglot-3.1.0/sqlglot/helper.py +144 -0
- sqlglot-3.1.0/sqlglot/optimizer/__init__.py +2 -0
- sqlglot-3.1.0/sqlglot/optimizer/decorrelate_subqueries.py +86 -0
- sqlglot-3.1.0/sqlglot/optimizer/eliminate_subqueries.py +48 -0
- sqlglot-3.1.0/sqlglot/optimizer/expand_multi_table_selects.py +16 -0
- sqlglot-3.1.0/sqlglot/optimizer/isolate_table_selects.py +31 -0
- sqlglot-3.1.0/sqlglot/optimizer/normalize.py +154 -0
- sqlglot-3.1.0/sqlglot/optimizer/optimize_joins.py +76 -0
- sqlglot-3.1.0/sqlglot/optimizer/optimizer.py +43 -0
- sqlglot-3.1.0/sqlglot/optimizer/pushdown_predicates.py +178 -0
- sqlglot-3.1.0/sqlglot/optimizer/pushdown_projections.py +76 -0
- sqlglot-3.1.0/sqlglot/optimizer/qualify_columns.py +212 -0
- sqlglot-3.1.0/sqlglot/optimizer/qualify_tables.py +54 -0
- sqlglot-3.1.0/sqlglot/optimizer/quote_identities.py +25 -0
- sqlglot-3.1.0/sqlglot/optimizer/schema.py +129 -0
- sqlglot-3.1.0/sqlglot/optimizer/scope.py +421 -0
- sqlglot-3.1.0/sqlglot/optimizer/simplify.py +347 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/parser.py +199 -103
- sqlglot-3.1.0/sqlglot/planner.py +338 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/time.py +0 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/tokens.py +35 -4
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/trie.py +0 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0/sqlglot.egg-info}/PKG-INFO +161 -91
- sqlglot-3.1.0/sqlglot.egg-info/SOURCES.txt +41 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot.egg-info/dependency_links.txt +0 -0
- {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot.egg-info/top_level.txt +0 -0
- sqlglot-2.2.1/sqlglot/errors.py +0 -21
- sqlglot-2.2.1/sqlglot/helper.py +0 -69
- sqlglot-2.2.1/sqlglot.egg-info/SOURCES.txt +0 -19
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sqlglot
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: An easily customizable SQL parser and transpiler
|
|
5
5
|
Home-page: https://github.com/tobymao/sqlglot
|
|
6
6
|
Author: Toby Mao
|
|
@@ -19,7 +19,7 @@ License-File: LICENSE
|
|
|
19
19
|
|
|
20
20
|
# SQLGlot
|
|
21
21
|
|
|
22
|
-
SQLGlot is a no dependency Python SQL parser and
|
|
22
|
+
SQLGlot is a no dependency Python SQL parser, transpiler, and optimizer. It can be used to format SQL or translate between different dialects like Presto, Spark, and Hive. It aims to read a wide variety of SQL inputs and output syntatically correct SQL in the targeted dialects.
|
|
23
23
|
|
|
24
24
|
It is currently the [fastest](#benchmarks) pure-Python SQL parser.
|
|
25
25
|
|
|
@@ -75,100 +75,45 @@ sqlglot.transpile(sql, write='spark', identify=True, pretty=True)[0]
|
|
|
75
75
|
```
|
|
76
76
|
|
|
77
77
|
```sql
|
|
78
|
-
WITH baz AS (
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
78
|
+
WITH `baz` AS (
|
|
79
|
+
SELECT
|
|
80
|
+
`a`,
|
|
81
|
+
`c`
|
|
82
|
+
FROM `foo`
|
|
83
|
+
WHERE
|
|
84
|
+
`a` = 1
|
|
85
85
|
)
|
|
86
86
|
SELECT
|
|
87
87
|
`f`.`a`,
|
|
88
88
|
`b`.`b`,
|
|
89
89
|
`baz`.`c`,
|
|
90
|
-
CAST(`b`.`a` AS FLOAT) AS d
|
|
91
|
-
FROM `foo` AS f
|
|
92
|
-
JOIN `bar` AS b
|
|
93
|
-
`f`.`a` = `b`.`a`
|
|
94
|
-
LEFT JOIN `baz`
|
|
95
|
-
`f`.`a` = `baz`.`a`
|
|
90
|
+
CAST(`b`.`a` AS FLOAT) AS `d`
|
|
91
|
+
FROM `foo` AS `f`
|
|
92
|
+
JOIN `bar` AS `b`
|
|
93
|
+
ON `f`.`a` = `b`.`a`
|
|
94
|
+
LEFT JOIN `baz`
|
|
95
|
+
ON `f`.`a` = `baz`.`a`
|
|
96
96
|
```
|
|
97
97
|
|
|
98
|
-
###
|
|
99
|
-
#### Custom Types
|
|
100
|
-
A simple transform on types can be accomplished by providing a corresponding mapping:
|
|
101
|
-
```python
|
|
102
|
-
|
|
103
|
-
from sqlglot import *
|
|
104
|
-
from sqlglot import expressions as exp
|
|
105
|
-
|
|
106
|
-
transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
```sql
|
|
110
|
-
SELECT CAST(a AS SPECIAL INT) FROM x
|
|
111
|
-
```
|
|
98
|
+
### Metadata
|
|
112
99
|
|
|
113
|
-
|
|
114
|
-
#### Custom Functions
|
|
115
|
-
In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
|
|
100
|
+
You can explore SQL with expression helpers to do things like find columns and tables.
|
|
116
101
|
|
|
117
102
|
```python
|
|
118
|
-
from sqlglot import
|
|
119
|
-
from sqlglot.expressions import Func
|
|
103
|
+
from sqlglot import parse_one, exp
|
|
120
104
|
|
|
121
|
-
|
|
122
|
-
|
|
105
|
+
# print all column references (a and b)
|
|
106
|
+
for column in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Column):
|
|
107
|
+
print(column.alias_or_name)
|
|
123
108
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
109
|
+
# find all projections in select statements (a and c)
|
|
110
|
+
for select in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Select):
|
|
111
|
+
for projection in select.args["expressions"]:
|
|
112
|
+
print(projection.alias_or_name)
|
|
127
113
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
<Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
|
|
132
|
-
<Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
|
|
133
|
-
<Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
|
|
134
|
-
<Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
|
|
135
|
-
<Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
|
|
136
|
-
<Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
|
|
137
|
-
<Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
|
|
138
|
-
<Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
|
|
139
|
-
]
|
|
140
|
-
|
|
141
|
-
```
|
|
142
|
-
```python
|
|
143
|
-
expression = Parser(functions={
|
|
144
|
-
**SpecialUdf.default_parser_mappings(),
|
|
145
|
-
}).parse(tokens)[0]
|
|
146
|
-
```
|
|
147
|
-
|
|
148
|
-
The expression tree produced by the parser:
|
|
149
|
-
|
|
150
|
-
```
|
|
151
|
-
(SELECT distinct: False, expressions:
|
|
152
|
-
(SPECIALUDF a:
|
|
153
|
-
(COLUMN this:
|
|
154
|
-
(IDENTIFIER this: a, quoted: False)), b:
|
|
155
|
-
(COLUMN this:
|
|
156
|
-
(IDENTIFIER this: b, quoted: False))), from:
|
|
157
|
-
(FROM expressions:
|
|
158
|
-
(TABLE this:
|
|
159
|
-
(IDENTIFIER this: x, quoted: False))))
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
Finally generating the new SQL:
|
|
163
|
-
|
|
164
|
-
```python
|
|
165
|
-
Generator(transforms={
|
|
166
|
-
SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
|
|
167
|
-
}).generate(expression)
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
```sql
|
|
171
|
-
SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
|
|
114
|
+
# find all tables (x, y, z)
|
|
115
|
+
for table in parse_one("SELECT * FROM x JOIN y JOIN z").find_all(exp.Table):
|
|
116
|
+
print(table.name)
|
|
172
117
|
```
|
|
173
118
|
|
|
174
119
|
### Parser Errors
|
|
@@ -177,8 +122,8 @@ A syntax error will result in a parser error.
|
|
|
177
122
|
transpile("SELECT foo( FROM bar")
|
|
178
123
|
```
|
|
179
124
|
```
|
|
180
|
-
sqlglot.errors.ParseError:
|
|
181
|
-
SELECT foo(
|
|
125
|
+
sqlglot.errors.ParseError: Expecting ). Line 1, Col: 13.
|
|
126
|
+
SELECT foo( FROM bar
|
|
182
127
|
```
|
|
183
128
|
### Unsupported Errors
|
|
184
129
|
Presto APPROX_DISTINCT supports the accuracy argument which is not supported in Spark.
|
|
@@ -226,14 +171,13 @@ SELECT x FROM y, z
|
|
|
226
171
|
There is also a way to recursively transform the parsed tree by applying a mapping function to each tree node:
|
|
227
172
|
|
|
228
173
|
```python
|
|
229
|
-
import
|
|
230
|
-
import sqlglot.expressions as exp
|
|
174
|
+
from sqlglot import exp, parse_one
|
|
231
175
|
|
|
232
|
-
expression_tree =
|
|
176
|
+
expression_tree = parse_one("SELECT a FROM x")
|
|
233
177
|
|
|
234
178
|
def transformer(node):
|
|
235
|
-
if isinstance(node, exp.Column) and node.
|
|
236
|
-
return
|
|
179
|
+
if isinstance(node, exp.Column) and node.name == "a":
|
|
180
|
+
return parse_one("FUN(a)")
|
|
237
181
|
return node
|
|
238
182
|
|
|
239
183
|
transformed_tree = expression_tree.transform(transformer)
|
|
@@ -244,6 +188,42 @@ Which outputs:
|
|
|
244
188
|
SELECT FUN(a) FROM x
|
|
245
189
|
```
|
|
246
190
|
|
|
191
|
+
### SQL Optimizer
|
|
192
|
+
|
|
193
|
+
SQLGlot can rewrite queries into an "optimized" form. It performs a variety of [techniques](sqlglot/optimizer/optimizer.py) to create a new canonical AST. This AST can be used to standaradize queries or provide the foundations for implementing an actual engine.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
import sqlglot
|
|
197
|
+
from sqlglot.optimizer import optimize
|
|
198
|
+
|
|
199
|
+
>>>
|
|
200
|
+
optimize(
|
|
201
|
+
sqlglot.parse_one("""
|
|
202
|
+
SELECT A OR (B OR (C AND D))
|
|
203
|
+
FROM x
|
|
204
|
+
WHERE Z = date '2021-01-01' + INTERVAL '1' month OR 1 = 0
|
|
205
|
+
"""),
|
|
206
|
+
schema={"x": {"A": "INT", "B": "INT", "C": "INT", "D": "INT", "Z": "STRING"}}
|
|
207
|
+
).sql(pretty=True)
|
|
208
|
+
|
|
209
|
+
"""
|
|
210
|
+
SELECT
|
|
211
|
+
(
|
|
212
|
+
"x"."A"
|
|
213
|
+
OR "x"."B"
|
|
214
|
+
OR "x"."C"
|
|
215
|
+
)
|
|
216
|
+
AND (
|
|
217
|
+
"x"."A"
|
|
218
|
+
OR "x"."B"
|
|
219
|
+
OR "x"."D"
|
|
220
|
+
) AS "_col_0"
|
|
221
|
+
FROM "x" AS "x"
|
|
222
|
+
WHERE
|
|
223
|
+
"x"."Z" = CAST('2021-02-01' AS DATE)
|
|
224
|
+
"""
|
|
225
|
+
```
|
|
226
|
+
|
|
247
227
|
### SQL Annotations
|
|
248
228
|
|
|
249
229
|
SQLGlot supports annotations in the sql expression. This is an experimental feature that is not part of any of the SQL standards but it can be useful when needing to annotate what a selected field is supposed to be. Below is an example:
|
|
@@ -255,6 +235,83 @@ SELECT
|
|
|
255
235
|
FROM users
|
|
256
236
|
```
|
|
257
237
|
|
|
238
|
+
### Customization
|
|
239
|
+
#### Custom Types
|
|
240
|
+
A simple transform on types can be accomplished by providing a corresponding mapping:
|
|
241
|
+
```python
|
|
242
|
+
|
|
243
|
+
from sqlglot import *
|
|
244
|
+
|
|
245
|
+
transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
```sql
|
|
249
|
+
SELECT CAST(a AS SPECIAL INT) FROM x
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
More complicated transforms can be accomplished by using the Tokenizer, Parser, and Generator directly.
|
|
253
|
+
#### Custom Functions
|
|
254
|
+
In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
from sqlglot import *
|
|
258
|
+
from sqlglot.expressions import Func
|
|
259
|
+
|
|
260
|
+
class SpecialUdf(Func):
|
|
261
|
+
arg_types = {'a': True, 'b': True}
|
|
262
|
+
|
|
263
|
+
tokens = Tokenizer().tokenize("SELECT SPECIAL_UDF(a, b) FROM x")
|
|
264
|
+
```
|
|
265
|
+
Here is the output of the tokenizer:
|
|
266
|
+
|
|
267
|
+
```
|
|
268
|
+
[
|
|
269
|
+
<Token token_type: TokenType.SELECT, text: SELECT, line: 0, col: 0>,
|
|
270
|
+
<Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
|
|
271
|
+
<Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
|
|
272
|
+
<Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
|
|
273
|
+
<Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
|
|
274
|
+
<Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
|
|
275
|
+
<Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
|
|
276
|
+
<Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
|
|
277
|
+
<Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
```
|
|
281
|
+
```python
|
|
282
|
+
expression = Parser(functions={
|
|
283
|
+
**SpecialUdf.default_parser_mappings(),
|
|
284
|
+
}).parse(tokens)[0]
|
|
285
|
+
|
|
286
|
+
repr(expression)
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
The expression tree produced by the parser:
|
|
290
|
+
|
|
291
|
+
```
|
|
292
|
+
(SELECT distinct: False, expressions:
|
|
293
|
+
(SPECIALUDF a:
|
|
294
|
+
(COLUMN this:
|
|
295
|
+
(IDENTIFIER this: a, quoted: False)), b:
|
|
296
|
+
(COLUMN this:
|
|
297
|
+
(IDENTIFIER this: b, quoted: False))), from:
|
|
298
|
+
(FROM expressions:
|
|
299
|
+
(TABLE this:
|
|
300
|
+
(IDENTIFIER this: x, quoted: False))))
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
Finally generating the new SQL:
|
|
304
|
+
|
|
305
|
+
```python
|
|
306
|
+
Generator(transforms={
|
|
307
|
+
SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
|
|
308
|
+
}).generate(expression)
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
```sql
|
|
312
|
+
SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
|
|
313
|
+
```
|
|
314
|
+
|
|
258
315
|
### Benchmarks
|
|
259
316
|
|
|
260
317
|
[Benchmarks](benchmarks) run on Python 3.9.6 in seconds.
|
|
@@ -267,6 +324,19 @@ FROM users
|
|
|
267
324
|
|
|
268
325
|
|
|
269
326
|
## Run Tests and Lint
|
|
270
|
-
```
|
|
327
|
+
```
|
|
328
|
+
pip install -r requirements.txt
|
|
329
|
+
./format_code.sh
|
|
330
|
+
./run_checks.sh
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
## Optional Dependencies
|
|
334
|
+
SQLGlot uses [dateutil](https://github.com/dateutil/dateutil) to simplify literal timedelta expressions. The optimizer will not simplify expressions like
|
|
335
|
+
|
|
336
|
+
```sql
|
|
337
|
+
x + interval '1' month
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
if the module cannot be found.
|
|
271
341
|
|
|
272
342
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# SQLGlot
|
|
2
2
|
|
|
3
|
-
SQLGlot is a no dependency Python SQL parser and
|
|
3
|
+
SQLGlot is a no dependency Python SQL parser, transpiler, and optimizer. It can be used to format SQL or translate between different dialects like Presto, Spark, and Hive. It aims to read a wide variety of SQL inputs and output syntatically correct SQL in the targeted dialects.
|
|
4
4
|
|
|
5
5
|
It is currently the [fastest](#benchmarks) pure-Python SQL parser.
|
|
6
6
|
|
|
@@ -56,100 +56,45 @@ sqlglot.transpile(sql, write='spark', identify=True, pretty=True)[0]
|
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
```sql
|
|
59
|
-
WITH baz AS (
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
59
|
+
WITH `baz` AS (
|
|
60
|
+
SELECT
|
|
61
|
+
`a`,
|
|
62
|
+
`c`
|
|
63
|
+
FROM `foo`
|
|
64
|
+
WHERE
|
|
65
|
+
`a` = 1
|
|
66
66
|
)
|
|
67
67
|
SELECT
|
|
68
68
|
`f`.`a`,
|
|
69
69
|
`b`.`b`,
|
|
70
70
|
`baz`.`c`,
|
|
71
|
-
CAST(`b`.`a` AS FLOAT) AS d
|
|
72
|
-
FROM `foo` AS f
|
|
73
|
-
JOIN `bar` AS b
|
|
74
|
-
`f`.`a` = `b`.`a`
|
|
75
|
-
LEFT JOIN `baz`
|
|
76
|
-
`f`.`a` = `baz`.`a`
|
|
71
|
+
CAST(`b`.`a` AS FLOAT) AS `d`
|
|
72
|
+
FROM `foo` AS `f`
|
|
73
|
+
JOIN `bar` AS `b`
|
|
74
|
+
ON `f`.`a` = `b`.`a`
|
|
75
|
+
LEFT JOIN `baz`
|
|
76
|
+
ON `f`.`a` = `baz`.`a`
|
|
77
77
|
```
|
|
78
78
|
|
|
79
|
-
###
|
|
80
|
-
#### Custom Types
|
|
81
|
-
A simple transform on types can be accomplished by providing a corresponding mapping:
|
|
82
|
-
```python
|
|
83
|
-
|
|
84
|
-
from sqlglot import *
|
|
85
|
-
from sqlglot import expressions as exp
|
|
86
|
-
|
|
87
|
-
transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
```sql
|
|
91
|
-
SELECT CAST(a AS SPECIAL INT) FROM x
|
|
92
|
-
```
|
|
79
|
+
### Metadata
|
|
93
80
|
|
|
94
|
-
|
|
95
|
-
#### Custom Functions
|
|
96
|
-
In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
|
|
81
|
+
You can explore SQL with expression helpers to do things like find columns and tables.
|
|
97
82
|
|
|
98
83
|
```python
|
|
99
|
-
from sqlglot import
|
|
100
|
-
from sqlglot.expressions import Func
|
|
84
|
+
from sqlglot import parse_one, exp
|
|
101
85
|
|
|
102
|
-
|
|
103
|
-
|
|
86
|
+
# print all column references (a and b)
|
|
87
|
+
for column in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Column):
|
|
88
|
+
print(column.alias_or_name)
|
|
104
89
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
90
|
+
# find all projections in select statements (a and c)
|
|
91
|
+
for select in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Select):
|
|
92
|
+
for projection in select.args["expressions"]:
|
|
93
|
+
print(projection.alias_or_name)
|
|
108
94
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
<Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
|
|
113
|
-
<Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
|
|
114
|
-
<Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
|
|
115
|
-
<Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
|
|
116
|
-
<Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
|
|
117
|
-
<Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
|
|
118
|
-
<Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
|
|
119
|
-
<Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
|
|
120
|
-
]
|
|
121
|
-
|
|
122
|
-
```
|
|
123
|
-
```python
|
|
124
|
-
expression = Parser(functions={
|
|
125
|
-
**SpecialUdf.default_parser_mappings(),
|
|
126
|
-
}).parse(tokens)[0]
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
The expression tree produced by the parser:
|
|
130
|
-
|
|
131
|
-
```
|
|
132
|
-
(SELECT distinct: False, expressions:
|
|
133
|
-
(SPECIALUDF a:
|
|
134
|
-
(COLUMN this:
|
|
135
|
-
(IDENTIFIER this: a, quoted: False)), b:
|
|
136
|
-
(COLUMN this:
|
|
137
|
-
(IDENTIFIER this: b, quoted: False))), from:
|
|
138
|
-
(FROM expressions:
|
|
139
|
-
(TABLE this:
|
|
140
|
-
(IDENTIFIER this: x, quoted: False))))
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
Finally generating the new SQL:
|
|
144
|
-
|
|
145
|
-
```python
|
|
146
|
-
Generator(transforms={
|
|
147
|
-
SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
|
|
148
|
-
}).generate(expression)
|
|
149
|
-
```
|
|
150
|
-
|
|
151
|
-
```sql
|
|
152
|
-
SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
|
|
95
|
+
# find all tables (x, y, z)
|
|
96
|
+
for table in parse_one("SELECT * FROM x JOIN y JOIN z").find_all(exp.Table):
|
|
97
|
+
print(table.name)
|
|
153
98
|
```
|
|
154
99
|
|
|
155
100
|
### Parser Errors
|
|
@@ -158,8 +103,8 @@ A syntax error will result in a parser error.
|
|
|
158
103
|
transpile("SELECT foo( FROM bar")
|
|
159
104
|
```
|
|
160
105
|
```
|
|
161
|
-
sqlglot.errors.ParseError:
|
|
162
|
-
SELECT foo(
|
|
106
|
+
sqlglot.errors.ParseError: Expecting ). Line 1, Col: 13.
|
|
107
|
+
SELECT foo( FROM bar
|
|
163
108
|
```
|
|
164
109
|
### Unsupported Errors
|
|
165
110
|
Presto APPROX_DISTINCT supports the accuracy argument which is not supported in Spark.
|
|
@@ -207,14 +152,13 @@ SELECT x FROM y, z
|
|
|
207
152
|
There is also a way to recursively transform the parsed tree by applying a mapping function to each tree node:
|
|
208
153
|
|
|
209
154
|
```python
|
|
210
|
-
import
|
|
211
|
-
import sqlglot.expressions as exp
|
|
155
|
+
from sqlglot import exp, parse_one
|
|
212
156
|
|
|
213
|
-
expression_tree =
|
|
157
|
+
expression_tree = parse_one("SELECT a FROM x")
|
|
214
158
|
|
|
215
159
|
def transformer(node):
|
|
216
|
-
if isinstance(node, exp.Column) and node.
|
|
217
|
-
return
|
|
160
|
+
if isinstance(node, exp.Column) and node.name == "a":
|
|
161
|
+
return parse_one("FUN(a)")
|
|
218
162
|
return node
|
|
219
163
|
|
|
220
164
|
transformed_tree = expression_tree.transform(transformer)
|
|
@@ -225,6 +169,42 @@ Which outputs:
|
|
|
225
169
|
SELECT FUN(a) FROM x
|
|
226
170
|
```
|
|
227
171
|
|
|
172
|
+
### SQL Optimizer
|
|
173
|
+
|
|
174
|
+
SQLGlot can rewrite queries into an "optimized" form. It performs a variety of [techniques](sqlglot/optimizer/optimizer.py) to create a new canonical AST. This AST can be used to standaradize queries or provide the foundations for implementing an actual engine.
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
import sqlglot
|
|
178
|
+
from sqlglot.optimizer import optimize
|
|
179
|
+
|
|
180
|
+
>>>
|
|
181
|
+
optimize(
|
|
182
|
+
sqlglot.parse_one("""
|
|
183
|
+
SELECT A OR (B OR (C AND D))
|
|
184
|
+
FROM x
|
|
185
|
+
WHERE Z = date '2021-01-01' + INTERVAL '1' month OR 1 = 0
|
|
186
|
+
"""),
|
|
187
|
+
schema={"x": {"A": "INT", "B": "INT", "C": "INT", "D": "INT", "Z": "STRING"}}
|
|
188
|
+
).sql(pretty=True)
|
|
189
|
+
|
|
190
|
+
"""
|
|
191
|
+
SELECT
|
|
192
|
+
(
|
|
193
|
+
"x"."A"
|
|
194
|
+
OR "x"."B"
|
|
195
|
+
OR "x"."C"
|
|
196
|
+
)
|
|
197
|
+
AND (
|
|
198
|
+
"x"."A"
|
|
199
|
+
OR "x"."B"
|
|
200
|
+
OR "x"."D"
|
|
201
|
+
) AS "_col_0"
|
|
202
|
+
FROM "x" AS "x"
|
|
203
|
+
WHERE
|
|
204
|
+
"x"."Z" = CAST('2021-02-01' AS DATE)
|
|
205
|
+
"""
|
|
206
|
+
```
|
|
207
|
+
|
|
228
208
|
### SQL Annotations
|
|
229
209
|
|
|
230
210
|
SQLGlot supports annotations in the sql expression. This is an experimental feature that is not part of any of the SQL standards but it can be useful when needing to annotate what a selected field is supposed to be. Below is an example:
|
|
@@ -236,6 +216,83 @@ SELECT
|
|
|
236
216
|
FROM users
|
|
237
217
|
```
|
|
238
218
|
|
|
219
|
+
### Customization
|
|
220
|
+
#### Custom Types
|
|
221
|
+
A simple transform on types can be accomplished by providing a corresponding mapping:
|
|
222
|
+
```python
|
|
223
|
+
|
|
224
|
+
from sqlglot import *
|
|
225
|
+
|
|
226
|
+
transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
```sql
|
|
230
|
+
SELECT CAST(a AS SPECIAL INT) FROM x
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
More complicated transforms can be accomplished by using the Tokenizer, Parser, and Generator directly.
|
|
234
|
+
#### Custom Functions
|
|
235
|
+
In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from sqlglot import *
|
|
239
|
+
from sqlglot.expressions import Func
|
|
240
|
+
|
|
241
|
+
class SpecialUdf(Func):
|
|
242
|
+
arg_types = {'a': True, 'b': True}
|
|
243
|
+
|
|
244
|
+
tokens = Tokenizer().tokenize("SELECT SPECIAL_UDF(a, b) FROM x")
|
|
245
|
+
```
|
|
246
|
+
Here is the output of the tokenizer:
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
[
|
|
250
|
+
<Token token_type: TokenType.SELECT, text: SELECT, line: 0, col: 0>,
|
|
251
|
+
<Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
|
|
252
|
+
<Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
|
|
253
|
+
<Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
|
|
254
|
+
<Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
|
|
255
|
+
<Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
|
|
256
|
+
<Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
|
|
257
|
+
<Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
|
|
258
|
+
<Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
```python
|
|
263
|
+
expression = Parser(functions={
|
|
264
|
+
**SpecialUdf.default_parser_mappings(),
|
|
265
|
+
}).parse(tokens)[0]
|
|
266
|
+
|
|
267
|
+
repr(expression)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
The expression tree produced by the parser:
|
|
271
|
+
|
|
272
|
+
```
|
|
273
|
+
(SELECT distinct: False, expressions:
|
|
274
|
+
(SPECIALUDF a:
|
|
275
|
+
(COLUMN this:
|
|
276
|
+
(IDENTIFIER this: a, quoted: False)), b:
|
|
277
|
+
(COLUMN this:
|
|
278
|
+
(IDENTIFIER this: b, quoted: False))), from:
|
|
279
|
+
(FROM expressions:
|
|
280
|
+
(TABLE this:
|
|
281
|
+
(IDENTIFIER this: x, quoted: False))))
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
Finally generating the new SQL:
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
Generator(transforms={
|
|
288
|
+
SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
|
|
289
|
+
}).generate(expression)
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
```sql
|
|
293
|
+
SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
|
|
294
|
+
```
|
|
295
|
+
|
|
239
296
|
### Benchmarks
|
|
240
297
|
|
|
241
298
|
[Benchmarks](benchmarks) run on Python 3.9.6 in seconds.
|
|
@@ -248,4 +305,17 @@ FROM users
|
|
|
248
305
|
|
|
249
306
|
|
|
250
307
|
## Run Tests and Lint
|
|
251
|
-
```
|
|
308
|
+
```
|
|
309
|
+
pip install -r requirements.txt
|
|
310
|
+
./format_code.sh
|
|
311
|
+
./run_checks.sh
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
## Optional Dependencies
|
|
315
|
+
SQLGlot uses [dateutil](https://github.com/dateutil/dateutil) to simplify literal timedelta expressions. The optimizer will not simplify expressions like
|
|
316
|
+
|
|
317
|
+
```sql
|
|
318
|
+
x + interval '1' month
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
if the module cannot be found.
|
|
File without changes
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from setuptools import setup
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
2
|
|
|
3
3
|
version = (
|
|
4
4
|
open("sqlglot/__init__.py")
|
|
@@ -20,7 +20,7 @@ setup(
|
|
|
20
20
|
author="Toby Mao",
|
|
21
21
|
author_email="toby.mao@gmail.com",
|
|
22
22
|
license="MIT",
|
|
23
|
-
packages=["sqlglot"],
|
|
23
|
+
packages=find_packages(include=["sqlglot", "sqlglot.*"]),
|
|
24
24
|
classifiers=[
|
|
25
25
|
"Development Status :: 5 - Production/Stable",
|
|
26
26
|
"Intended Audience :: Developers",
|