sqlglot 2.2.1__tar.gz → 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {sqlglot-2.2.1 → sqlglot-3.1.0}/LICENSE +0 -0
  2. {sqlglot-2.2.1/sqlglot.egg-info → sqlglot-3.1.0}/PKG-INFO +161 -91
  3. {sqlglot-2.2.1 → sqlglot-3.1.0}/README.md +160 -90
  4. {sqlglot-2.2.1 → sqlglot-3.1.0}/setup.cfg +0 -0
  5. {sqlglot-2.2.1 → sqlglot-3.1.0}/setup.py +2 -2
  6. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/__init__.py +8 -46
  7. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/__main__.py +6 -1
  8. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/constants.py +0 -0
  9. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/dialects.py +109 -59
  10. sqlglot-3.1.0/sqlglot/errors.py +29 -0
  11. sqlglot-3.1.0/sqlglot/executor/__init__.py +40 -0
  12. sqlglot-3.1.0/sqlglot/executor/context.py +69 -0
  13. sqlglot-3.1.0/sqlglot/executor/env.py +31 -0
  14. sqlglot-3.1.0/sqlglot/executor/python.py +334 -0
  15. sqlglot-3.1.0/sqlglot/executor/table.py +81 -0
  16. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/expressions.py +567 -179
  17. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/generator.py +145 -100
  18. sqlglot-3.1.0/sqlglot/helper.py +144 -0
  19. sqlglot-3.1.0/sqlglot/optimizer/__init__.py +2 -0
  20. sqlglot-3.1.0/sqlglot/optimizer/decorrelate_subqueries.py +86 -0
  21. sqlglot-3.1.0/sqlglot/optimizer/eliminate_subqueries.py +48 -0
  22. sqlglot-3.1.0/sqlglot/optimizer/expand_multi_table_selects.py +16 -0
  23. sqlglot-3.1.0/sqlglot/optimizer/isolate_table_selects.py +31 -0
  24. sqlglot-3.1.0/sqlglot/optimizer/normalize.py +154 -0
  25. sqlglot-3.1.0/sqlglot/optimizer/optimize_joins.py +76 -0
  26. sqlglot-3.1.0/sqlglot/optimizer/optimizer.py +43 -0
  27. sqlglot-3.1.0/sqlglot/optimizer/pushdown_predicates.py +178 -0
  28. sqlglot-3.1.0/sqlglot/optimizer/pushdown_projections.py +76 -0
  29. sqlglot-3.1.0/sqlglot/optimizer/qualify_columns.py +212 -0
  30. sqlglot-3.1.0/sqlglot/optimizer/qualify_tables.py +54 -0
  31. sqlglot-3.1.0/sqlglot/optimizer/quote_identities.py +25 -0
  32. sqlglot-3.1.0/sqlglot/optimizer/schema.py +129 -0
  33. sqlglot-3.1.0/sqlglot/optimizer/scope.py +421 -0
  34. sqlglot-3.1.0/sqlglot/optimizer/simplify.py +347 -0
  35. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/parser.py +199 -103
  36. sqlglot-3.1.0/sqlglot/planner.py +338 -0
  37. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/time.py +0 -0
  38. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/tokens.py +35 -4
  39. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot/trie.py +0 -0
  40. {sqlglot-2.2.1 → sqlglot-3.1.0/sqlglot.egg-info}/PKG-INFO +161 -91
  41. sqlglot-3.1.0/sqlglot.egg-info/SOURCES.txt +41 -0
  42. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot.egg-info/dependency_links.txt +0 -0
  43. {sqlglot-2.2.1 → sqlglot-3.1.0}/sqlglot.egg-info/top_level.txt +0 -0
  44. sqlglot-2.2.1/sqlglot/errors.py +0 -21
  45. sqlglot-2.2.1/sqlglot/helper.py +0 -69
  46. sqlglot-2.2.1/sqlglot.egg-info/SOURCES.txt +0 -19
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlglot
3
- Version: 2.2.1
3
+ Version: 3.1.0
4
4
  Summary: An easily customizable SQL parser and transpiler
5
5
  Home-page: https://github.com/tobymao/sqlglot
6
6
  Author: Toby Mao
@@ -19,7 +19,7 @@ License-File: LICENSE
19
19
 
20
20
  # SQLGlot
21
21
 
22
- SQLGlot is a no dependency Python SQL parser and transpiler. It can be used to format SQL or translate between different dialects like Presto, Spark, and Hive. It aims to read a wide variety of SQL inputs and output syntatically correct SQL in the targeted dialects.
22
+ SQLGlot is a no dependency Python SQL parser, transpiler, and optimizer. It can be used to format SQL or translate between different dialects like Presto, Spark, and Hive. It aims to read a wide variety of SQL inputs and output syntatically correct SQL in the targeted dialects.
23
23
 
24
24
  It is currently the [fastest](#benchmarks) pure-Python SQL parser.
25
25
 
@@ -75,100 +75,45 @@ sqlglot.transpile(sql, write='spark', identify=True, pretty=True)[0]
75
75
  ```
76
76
 
77
77
  ```sql
78
- WITH baz AS (
79
- SELECT
80
- `a`,
81
- `c`
82
- FROM `foo`
83
- WHERE
84
- `a` = 1
78
+ WITH `baz` AS (
79
+ SELECT
80
+ `a`,
81
+ `c`
82
+ FROM `foo`
83
+ WHERE
84
+ `a` = 1
85
85
  )
86
86
  SELECT
87
87
  `f`.`a`,
88
88
  `b`.`b`,
89
89
  `baz`.`c`,
90
- CAST(`b`.`a` AS FLOAT) AS d
91
- FROM `foo` AS f
92
- JOIN `bar` AS b ON
93
- `f`.`a` = `b`.`a`
94
- LEFT JOIN `baz` ON
95
- `f`.`a` = `baz`.`a`
90
+ CAST(`b`.`a` AS FLOAT) AS `d`
91
+ FROM `foo` AS `f`
92
+ JOIN `bar` AS `b`
93
+ ON `f`.`a` = `b`.`a`
94
+ LEFT JOIN `baz`
95
+ ON `f`.`a` = `baz`.`a`
96
96
  ```
97
97
 
98
- ### Customization
99
- #### Custom Types
100
- A simple transform on types can be accomplished by providing a corresponding mapping:
101
- ```python
102
-
103
- from sqlglot import *
104
- from sqlglot import expressions as exp
105
-
106
- transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
107
- ```
108
-
109
- ```sql
110
- SELECT CAST(a AS SPECIAL INT) FROM x
111
- ```
98
+ ### Metadata
112
99
 
113
- More complicated transforms can be accomplished by using the Tokenizer, Parser, and Generator directly.
114
- #### Custom Functions
115
- In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
100
+ You can explore SQL with expression helpers to do things like find columns and tables.
116
101
 
117
102
  ```python
118
- from sqlglot import *
119
- from sqlglot.expressions import Func
103
+ from sqlglot import parse_one, exp
120
104
 
121
- class SpecialUdf(Func):
122
- arg_types = {'a': True, 'b': True}
105
+ # print all column references (a and b)
106
+ for column in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Column):
107
+ print(column.alias_or_name)
123
108
 
124
- tokens = Tokenizer().tokenize("SELECT SPECIAL_UDF(a, b) FROM x")
125
- ```
126
- Here is the output of the tokenizer:
109
+ # find all projections in select statements (a and c)
110
+ for select in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Select):
111
+ for projection in select.args["expressions"]:
112
+ print(projection.alias_or_name)
127
113
 
128
- ```
129
- [
130
- <Token token_type: TokenType.SELECT, text: SELECT, line: 0, col: 0>,
131
- <Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
132
- <Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
133
- <Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
134
- <Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
135
- <Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
136
- <Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
137
- <Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
138
- <Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
139
- ]
140
-
141
- ```
142
- ```python
143
- expression = Parser(functions={
144
- **SpecialUdf.default_parser_mappings(),
145
- }).parse(tokens)[0]
146
- ```
147
-
148
- The expression tree produced by the parser:
149
-
150
- ```
151
- (SELECT distinct: False, expressions:
152
- (SPECIALUDF a:
153
- (COLUMN this:
154
- (IDENTIFIER this: a, quoted: False)), b:
155
- (COLUMN this:
156
- (IDENTIFIER this: b, quoted: False))), from:
157
- (FROM expressions:
158
- (TABLE this:
159
- (IDENTIFIER this: x, quoted: False))))
160
- ```
161
-
162
- Finally generating the new SQL:
163
-
164
- ```python
165
- Generator(transforms={
166
- SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
167
- }).generate(expression)
168
- ```
169
-
170
- ```sql
171
- SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
114
+ # find all tables (x, y, z)
115
+ for table in parse_one("SELECT * FROM x JOIN y JOIN z").find_all(exp.Table):
116
+ print(table.name)
172
117
  ```
173
118
 
174
119
  ### Parser Errors
@@ -177,8 +122,8 @@ A syntax error will result in a parser error.
177
122
  transpile("SELECT foo( FROM bar")
178
123
  ```
179
124
  ```
180
- sqlglot.errors.ParseError: Expected )
181
- SELECT foo( __FROM__ bar
125
+ sqlglot.errors.ParseError: Expecting ). Line 1, Col: 13.
126
+ SELECT foo( FROM bar
182
127
  ```
183
128
  ### Unsupported Errors
184
129
  Presto APPROX_DISTINCT supports the accuracy argument which is not supported in Spark.
@@ -226,14 +171,13 @@ SELECT x FROM y, z
226
171
  There is also a way to recursively transform the parsed tree by applying a mapping function to each tree node:
227
172
 
228
173
  ```python
229
- import sqlglot
230
- import sqlglot.expressions as exp
174
+ from sqlglot import exp, parse_one
231
175
 
232
- expression_tree = sqlglot.parse_one("SELECT a FROM x")
176
+ expression_tree = parse_one("SELECT a FROM x")
233
177
 
234
178
  def transformer(node):
235
- if isinstance(node, exp.Column) and node.text("this") == "a":
236
- return sqlglot.parse_one("FUN(a)")
179
+ if isinstance(node, exp.Column) and node.name == "a":
180
+ return parse_one("FUN(a)")
237
181
  return node
238
182
 
239
183
  transformed_tree = expression_tree.transform(transformer)
@@ -244,6 +188,42 @@ Which outputs:
244
188
  SELECT FUN(a) FROM x
245
189
  ```
246
190
 
191
+ ### SQL Optimizer
192
+
193
+ SQLGlot can rewrite queries into an "optimized" form. It performs a variety of [techniques](sqlglot/optimizer/optimizer.py) to create a new canonical AST. This AST can be used to standaradize queries or provide the foundations for implementing an actual engine.
194
+
195
+ ```python
196
+ import sqlglot
197
+ from sqlglot.optimizer import optimize
198
+
199
+ >>>
200
+ optimize(
201
+ sqlglot.parse_one("""
202
+ SELECT A OR (B OR (C AND D))
203
+ FROM x
204
+ WHERE Z = date '2021-01-01' + INTERVAL '1' month OR 1 = 0
205
+ """),
206
+ schema={"x": {"A": "INT", "B": "INT", "C": "INT", "D": "INT", "Z": "STRING"}}
207
+ ).sql(pretty=True)
208
+
209
+ """
210
+ SELECT
211
+ (
212
+ "x"."A"
213
+ OR "x"."B"
214
+ OR "x"."C"
215
+ )
216
+ AND (
217
+ "x"."A"
218
+ OR "x"."B"
219
+ OR "x"."D"
220
+ ) AS "_col_0"
221
+ FROM "x" AS "x"
222
+ WHERE
223
+ "x"."Z" = CAST('2021-02-01' AS DATE)
224
+ """
225
+ ```
226
+
247
227
  ### SQL Annotations
248
228
 
249
229
  SQLGlot supports annotations in the sql expression. This is an experimental feature that is not part of any of the SQL standards but it can be useful when needing to annotate what a selected field is supposed to be. Below is an example:
@@ -255,6 +235,83 @@ SELECT
255
235
  FROM users
256
236
  ```
257
237
 
238
+ ### Customization
239
+ #### Custom Types
240
+ A simple transform on types can be accomplished by providing a corresponding mapping:
241
+ ```python
242
+
243
+ from sqlglot import *
244
+
245
+ transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
246
+ ```
247
+
248
+ ```sql
249
+ SELECT CAST(a AS SPECIAL INT) FROM x
250
+ ```
251
+
252
+ More complicated transforms can be accomplished by using the Tokenizer, Parser, and Generator directly.
253
+ #### Custom Functions
254
+ In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
255
+
256
+ ```python
257
+ from sqlglot import *
258
+ from sqlglot.expressions import Func
259
+
260
+ class SpecialUdf(Func):
261
+ arg_types = {'a': True, 'b': True}
262
+
263
+ tokens = Tokenizer().tokenize("SELECT SPECIAL_UDF(a, b) FROM x")
264
+ ```
265
+ Here is the output of the tokenizer:
266
+
267
+ ```
268
+ [
269
+ <Token token_type: TokenType.SELECT, text: SELECT, line: 0, col: 0>,
270
+ <Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
271
+ <Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
272
+ <Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
273
+ <Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
274
+ <Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
275
+ <Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
276
+ <Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
277
+ <Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
278
+ ]
279
+
280
+ ```
281
+ ```python
282
+ expression = Parser(functions={
283
+ **SpecialUdf.default_parser_mappings(),
284
+ }).parse(tokens)[0]
285
+
286
+ repr(expression)
287
+ ```
288
+
289
+ The expression tree produced by the parser:
290
+
291
+ ```
292
+ (SELECT distinct: False, expressions:
293
+ (SPECIALUDF a:
294
+ (COLUMN this:
295
+ (IDENTIFIER this: a, quoted: False)), b:
296
+ (COLUMN this:
297
+ (IDENTIFIER this: b, quoted: False))), from:
298
+ (FROM expressions:
299
+ (TABLE this:
300
+ (IDENTIFIER this: x, quoted: False))))
301
+ ```
302
+
303
+ Finally generating the new SQL:
304
+
305
+ ```python
306
+ Generator(transforms={
307
+ SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
308
+ }).generate(expression)
309
+ ```
310
+
311
+ ```sql
312
+ SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
313
+ ```
314
+
258
315
  ### Benchmarks
259
316
 
260
317
  [Benchmarks](benchmarks) run on Python 3.9.6 in seconds.
@@ -267,6 +324,19 @@ FROM users
267
324
 
268
325
 
269
326
  ## Run Tests and Lint
270
- ```python -m unittest && python -m pylint sqlglot/ tests/```
327
+ ```
328
+ pip install -r requirements.txt
329
+ ./format_code.sh
330
+ ./run_checks.sh
331
+ ```
332
+
333
+ ## Optional Dependencies
334
+ SQLGlot uses [dateutil](https://github.com/dateutil/dateutil) to simplify literal timedelta expressions. The optimizer will not simplify expressions like
335
+
336
+ ```sql
337
+ x + interval '1' month
338
+ ```
339
+
340
+ if the module cannot be found.
271
341
 
272
342
 
@@ -1,6 +1,6 @@
1
1
  # SQLGlot
2
2
 
3
- SQLGlot is a no dependency Python SQL parser and transpiler. It can be used to format SQL or translate between different dialects like Presto, Spark, and Hive. It aims to read a wide variety of SQL inputs and output syntatically correct SQL in the targeted dialects.
3
+ SQLGlot is a no dependency Python SQL parser, transpiler, and optimizer. It can be used to format SQL or translate between different dialects like Presto, Spark, and Hive. It aims to read a wide variety of SQL inputs and output syntatically correct SQL in the targeted dialects.
4
4
 
5
5
  It is currently the [fastest](#benchmarks) pure-Python SQL parser.
6
6
 
@@ -56,100 +56,45 @@ sqlglot.transpile(sql, write='spark', identify=True, pretty=True)[0]
56
56
  ```
57
57
 
58
58
  ```sql
59
- WITH baz AS (
60
- SELECT
61
- `a`,
62
- `c`
63
- FROM `foo`
64
- WHERE
65
- `a` = 1
59
+ WITH `baz` AS (
60
+ SELECT
61
+ `a`,
62
+ `c`
63
+ FROM `foo`
64
+ WHERE
65
+ `a` = 1
66
66
  )
67
67
  SELECT
68
68
  `f`.`a`,
69
69
  `b`.`b`,
70
70
  `baz`.`c`,
71
- CAST(`b`.`a` AS FLOAT) AS d
72
- FROM `foo` AS f
73
- JOIN `bar` AS b ON
74
- `f`.`a` = `b`.`a`
75
- LEFT JOIN `baz` ON
76
- `f`.`a` = `baz`.`a`
71
+ CAST(`b`.`a` AS FLOAT) AS `d`
72
+ FROM `foo` AS `f`
73
+ JOIN `bar` AS `b`
74
+ ON `f`.`a` = `b`.`a`
75
+ LEFT JOIN `baz`
76
+ ON `f`.`a` = `baz`.`a`
77
77
  ```
78
78
 
79
- ### Customization
80
- #### Custom Types
81
- A simple transform on types can be accomplished by providing a corresponding mapping:
82
- ```python
83
-
84
- from sqlglot import *
85
- from sqlglot import expressions as exp
86
-
87
- transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
88
- ```
89
-
90
- ```sql
91
- SELECT CAST(a AS SPECIAL INT) FROM x
92
- ```
79
+ ### Metadata
93
80
 
94
- More complicated transforms can be accomplished by using the Tokenizer, Parser, and Generator directly.
95
- #### Custom Functions
96
- In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
81
+ You can explore SQL with expression helpers to do things like find columns and tables.
97
82
 
98
83
  ```python
99
- from sqlglot import *
100
- from sqlglot.expressions import Func
84
+ from sqlglot import parse_one, exp
101
85
 
102
- class SpecialUdf(Func):
103
- arg_types = {'a': True, 'b': True}
86
+ # print all column references (a and b)
87
+ for column in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Column):
88
+ print(column.alias_or_name)
104
89
 
105
- tokens = Tokenizer().tokenize("SELECT SPECIAL_UDF(a, b) FROM x")
106
- ```
107
- Here is the output of the tokenizer:
90
+ # find all projections in select statements (a and c)
91
+ for select in parse_one("SELECT a, b + 1 AS c FROM d").find_all(exp.Select):
92
+ for projection in select.args["expressions"]:
93
+ print(projection.alias_or_name)
108
94
 
109
- ```
110
- [
111
- <Token token_type: TokenType.SELECT, text: SELECT, line: 0, col: 0>,
112
- <Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
113
- <Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
114
- <Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
115
- <Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
116
- <Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
117
- <Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
118
- <Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
119
- <Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
120
- ]
121
-
122
- ```
123
- ```python
124
- expression = Parser(functions={
125
- **SpecialUdf.default_parser_mappings(),
126
- }).parse(tokens)[0]
127
- ```
128
-
129
- The expression tree produced by the parser:
130
-
131
- ```
132
- (SELECT distinct: False, expressions:
133
- (SPECIALUDF a:
134
- (COLUMN this:
135
- (IDENTIFIER this: a, quoted: False)), b:
136
- (COLUMN this:
137
- (IDENTIFIER this: b, quoted: False))), from:
138
- (FROM expressions:
139
- (TABLE this:
140
- (IDENTIFIER this: x, quoted: False))))
141
- ```
142
-
143
- Finally generating the new SQL:
144
-
145
- ```python
146
- Generator(transforms={
147
- SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
148
- }).generate(expression)
149
- ```
150
-
151
- ```sql
152
- SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
95
+ # find all tables (x, y, z)
96
+ for table in parse_one("SELECT * FROM x JOIN y JOIN z").find_all(exp.Table):
97
+ print(table.name)
153
98
  ```
154
99
 
155
100
  ### Parser Errors
@@ -158,8 +103,8 @@ A syntax error will result in a parser error.
158
103
  transpile("SELECT foo( FROM bar")
159
104
  ```
160
105
  ```
161
- sqlglot.errors.ParseError: Expected )
162
- SELECT foo( __FROM__ bar
106
+ sqlglot.errors.ParseError: Expecting ). Line 1, Col: 13.
107
+ SELECT foo( FROM bar
163
108
  ```
164
109
  ### Unsupported Errors
165
110
  Presto APPROX_DISTINCT supports the accuracy argument which is not supported in Spark.
@@ -207,14 +152,13 @@ SELECT x FROM y, z
207
152
  There is also a way to recursively transform the parsed tree by applying a mapping function to each tree node:
208
153
 
209
154
  ```python
210
- import sqlglot
211
- import sqlglot.expressions as exp
155
+ from sqlglot import exp, parse_one
212
156
 
213
- expression_tree = sqlglot.parse_one("SELECT a FROM x")
157
+ expression_tree = parse_one("SELECT a FROM x")
214
158
 
215
159
  def transformer(node):
216
- if isinstance(node, exp.Column) and node.text("this") == "a":
217
- return sqlglot.parse_one("FUN(a)")
160
+ if isinstance(node, exp.Column) and node.name == "a":
161
+ return parse_one("FUN(a)")
218
162
  return node
219
163
 
220
164
  transformed_tree = expression_tree.transform(transformer)
@@ -225,6 +169,42 @@ Which outputs:
225
169
  SELECT FUN(a) FROM x
226
170
  ```
227
171
 
172
+ ### SQL Optimizer
173
+
174
+ SQLGlot can rewrite queries into an "optimized" form. It performs a variety of [techniques](sqlglot/optimizer/optimizer.py) to create a new canonical AST. This AST can be used to standaradize queries or provide the foundations for implementing an actual engine.
175
+
176
+ ```python
177
+ import sqlglot
178
+ from sqlglot.optimizer import optimize
179
+
180
+ >>>
181
+ optimize(
182
+ sqlglot.parse_one("""
183
+ SELECT A OR (B OR (C AND D))
184
+ FROM x
185
+ WHERE Z = date '2021-01-01' + INTERVAL '1' month OR 1 = 0
186
+ """),
187
+ schema={"x": {"A": "INT", "B": "INT", "C": "INT", "D": "INT", "Z": "STRING"}}
188
+ ).sql(pretty=True)
189
+
190
+ """
191
+ SELECT
192
+ (
193
+ "x"."A"
194
+ OR "x"."B"
195
+ OR "x"."C"
196
+ )
197
+ AND (
198
+ "x"."A"
199
+ OR "x"."B"
200
+ OR "x"."D"
201
+ ) AS "_col_0"
202
+ FROM "x" AS "x"
203
+ WHERE
204
+ "x"."Z" = CAST('2021-02-01' AS DATE)
205
+ """
206
+ ```
207
+
228
208
  ### SQL Annotations
229
209
 
230
210
  SQLGlot supports annotations in the sql expression. This is an experimental feature that is not part of any of the SQL standards but it can be useful when needing to annotate what a selected field is supposed to be. Below is an example:
@@ -236,6 +216,83 @@ SELECT
236
216
  FROM users
237
217
  ```
238
218
 
219
+ ### Customization
220
+ #### Custom Types
221
+ A simple transform on types can be accomplished by providing a corresponding mapping:
222
+ ```python
223
+
224
+ from sqlglot import *
225
+
226
+ transpile("SELECT CAST(a AS INT) FROM x", type_mapping={exp.DataType.Type.INT: "SPECIAL INT"})[0]
227
+ ```
228
+
229
+ ```sql
230
+ SELECT CAST(a AS SPECIAL INT) FROM x
231
+ ```
232
+
233
+ More complicated transforms can be accomplished by using the Tokenizer, Parser, and Generator directly.
234
+ #### Custom Functions
235
+ In this example, we want to parse a UDF SPECIAL_UDF and then output another version called SPECIAL_UDF_INVERSE with the arguments switched.
236
+
237
+ ```python
238
+ from sqlglot import *
239
+ from sqlglot.expressions import Func
240
+
241
+ class SpecialUdf(Func):
242
+ arg_types = {'a': True, 'b': True}
243
+
244
+ tokens = Tokenizer().tokenize("SELECT SPECIAL_UDF(a, b) FROM x")
245
+ ```
246
+ Here is the output of the tokenizer:
247
+
248
+ ```
249
+ [
250
+ <Token token_type: TokenType.SELECT, text: SELECT, line: 0, col: 0>,
251
+ <Token token_type: TokenType.VAR, text: SPECIAL_UDF, line: 0, col: 7>,
252
+ <Token token_type: TokenType.L_PAREN, text: (, line: 0, col: 18>,
253
+ <Token token_type: TokenType.VAR, text: a, line: 0, col: 19>,
254
+ <Token token_type: TokenType.COMMA, text: ,, line: 0, col: 20>,
255
+ <Token token_type: TokenType.VAR, text: b, line: 0, col: 22>,
256
+ <Token token_type: TokenType.R_PAREN, text: ), line: 0, col: 23>,
257
+ <Token token_type: TokenType.FROM, text: FROM, line: 0, col: 25>,
258
+ <Token token_type: TokenType.VAR, text: x, line: 0, col: 30>,
259
+ ]
260
+
261
+ ```
262
+ ```python
263
+ expression = Parser(functions={
264
+ **SpecialUdf.default_parser_mappings(),
265
+ }).parse(tokens)[0]
266
+
267
+ repr(expression)
268
+ ```
269
+
270
+ The expression tree produced by the parser:
271
+
272
+ ```
273
+ (SELECT distinct: False, expressions:
274
+ (SPECIALUDF a:
275
+ (COLUMN this:
276
+ (IDENTIFIER this: a, quoted: False)), b:
277
+ (COLUMN this:
278
+ (IDENTIFIER this: b, quoted: False))), from:
279
+ (FROM expressions:
280
+ (TABLE this:
281
+ (IDENTIFIER this: x, quoted: False))))
282
+ ```
283
+
284
+ Finally generating the new SQL:
285
+
286
+ ```python
287
+ Generator(transforms={
288
+ SpecialUdf: lambda self, e: f"SPECIAL_UDF_INVERSE({self.sql(e, 'b')}, {self.sql(e, 'a')})"
289
+ }).generate(expression)
290
+ ```
291
+
292
+ ```sql
293
+ SELECT SPECIAL_UDF_INVERSE(b, a) FROM x
294
+ ```
295
+
239
296
  ### Benchmarks
240
297
 
241
298
  [Benchmarks](benchmarks) run on Python 3.9.6 in seconds.
@@ -248,4 +305,17 @@ FROM users
248
305
 
249
306
 
250
307
  ## Run Tests and Lint
251
- ```python -m unittest && python -m pylint sqlglot/ tests/```
308
+ ```
309
+ pip install -r requirements.txt
310
+ ./format_code.sh
311
+ ./run_checks.sh
312
+ ```
313
+
314
+ ## Optional Dependencies
315
+ SQLGlot uses [dateutil](https://github.com/dateutil/dateutil) to simplify literal timedelta expressions. The optimizer will not simplify expressions like
316
+
317
+ ```sql
318
+ x + interval '1' month
319
+ ```
320
+
321
+ if the module cannot be found.
File without changes
@@ -1,4 +1,4 @@
1
- from setuptools import setup
1
+ from setuptools import setup, find_packages
2
2
 
3
3
  version = (
4
4
  open("sqlglot/__init__.py")
@@ -20,7 +20,7 @@ setup(
20
20
  author="Toby Mao",
21
21
  author_email="toby.mao@gmail.com",
22
22
  license="MIT",
23
- packages=["sqlglot"],
23
+ packages=find_packages(include=["sqlglot", "sqlglot.*"]),
24
24
  classifiers=[
25
25
  "Development Status :: 5 - Production/Stable",
26
26
  "Intended Audience :: Developers",