sqlglot 26.31.0__py3-none-any.whl → 26.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/__init__.py +1 -0
- sqlglot/dialects/athena.py +237 -116
- sqlglot/dialects/bigquery.py +3 -0
- sqlglot/dialects/clickhouse.py +5 -0
- sqlglot/dialects/dialect.py +24 -10
- sqlglot/dialects/dremio.py +53 -0
- sqlglot/dialects/duckdb.py +45 -0
- sqlglot/dialects/exasol.py +45 -2
- sqlglot/dialects/presto.py +4 -0
- sqlglot/dialects/spark2.py +2 -0
- sqlglot/expressions.py +35 -0
- sqlglot/jsonpath.py +1 -1
- sqlglot/optimizer/annotate_types.py +13 -0
- sqlglot/optimizer/pushdown_predicates.py +2 -1
- sqlglot/parser.py +1 -1
- sqlglot/tokens.py +7 -1
- {sqlglot-26.31.0.dist-info → sqlglot-26.32.0.dist-info}/METADATA +1 -1
- {sqlglot-26.31.0.dist-info → sqlglot-26.32.0.dist-info}/RECORD +22 -21
- {sqlglot-26.31.0.dist-info → sqlglot-26.32.0.dist-info}/WHEEL +0 -0
- {sqlglot-26.31.0.dist-info → sqlglot-26.32.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-26.31.0.dist-info → sqlglot-26.32.0.dist-info}/top_level.txt +0 -0
sqlglot/_version.py
CHANGED
sqlglot/dialects/__init__.py
CHANGED
sqlglot/dialects/athena.py
CHANGED
@@ -2,46 +2,218 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import typing as t
|
4
4
|
|
5
|
-
from sqlglot import exp
|
6
|
-
from sqlglot.dialects
|
7
|
-
from sqlglot.
|
8
|
-
|
5
|
+
from sqlglot import exp, generator, parser, tokens
|
6
|
+
from sqlglot.dialects import Dialect, Hive, Trino
|
7
|
+
from sqlglot.tokens import TokenType, Token
|
8
|
+
|
9
|
+
|
10
|
+
class Athena(Dialect):
|
11
|
+
"""
|
12
|
+
Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific
|
13
|
+
modifications and then built the Athena service around them.
|
14
|
+
|
15
|
+
Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an
|
16
|
+
execution engine depending on the query type.
|
17
|
+
|
18
|
+
As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3",
|
19
|
+
the following engines exist:
|
20
|
+
|
21
|
+
Hive:
|
22
|
+
- Accepts mostly the same syntax as Hadoop / Hive
|
23
|
+
- Uses backticks to quote identifiers
|
24
|
+
- Has a distinctive DDL syntax (around things like setting table properties, storage locations etc)
|
25
|
+
that is different from Trino
|
26
|
+
- Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
|
27
|
+
- CREATE [EXTERNAL] TABLE (without AS SELECT)
|
28
|
+
- ALTER
|
29
|
+
- DROP
|
30
|
+
|
31
|
+
Trino:
|
32
|
+
- Uses double quotes to quote identifiers
|
33
|
+
- Used for DDL operations that involve SELECT queries, eg:
|
34
|
+
- CREATE VIEW / DROP VIEW
|
35
|
+
- CREATE TABLE... AS SELECT
|
36
|
+
- Used for DML operations
|
37
|
+
- SELECT, INSERT, UPDATE, DELETE, MERGE
|
38
|
+
|
39
|
+
The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the
|
40
|
+
tokenizer / parser / generator for that engine. This is unfortunately necessary, as there are certain
|
41
|
+
incompatibilities between the engines' dialects and thus can't be handled by a single, unifying dialect.
|
42
|
+
|
43
|
+
References:
|
44
|
+
- https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
|
45
|
+
- https://docs.aws.amazon.com/athena/latest/ug/dml-queries-functions-operators.html
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(self, **kwargs):
|
49
|
+
super().__init__(**kwargs)
|
50
|
+
|
51
|
+
self._hive = Hive(**kwargs)
|
52
|
+
self._trino = Trino(**kwargs)
|
53
|
+
|
54
|
+
def tokenize(self, sql: str, **opts) -> t.List[Token]:
|
55
|
+
opts["hive"] = self._hive
|
56
|
+
opts["trino"] = self._trino
|
57
|
+
return super().tokenize(sql, **opts)
|
58
|
+
|
59
|
+
def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
|
60
|
+
opts["hive"] = self._hive
|
61
|
+
opts["trino"] = self._trino
|
62
|
+
return super().parse(sql, **opts)
|
63
|
+
|
64
|
+
def parse_into(
|
65
|
+
self, expression_type: exp.IntoType, sql: str, **opts
|
66
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
67
|
+
opts["hive"] = self._hive
|
68
|
+
opts["trino"] = self._trino
|
69
|
+
return super().parse_into(expression_type, sql, **opts)
|
70
|
+
|
71
|
+
def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
|
72
|
+
opts["hive"] = self._hive
|
73
|
+
opts["trino"] = self._trino
|
74
|
+
return super().generate(expression, copy=copy, **opts)
|
75
|
+
|
76
|
+
# This Tokenizer consumes a combination of HiveQL and Trino SQL and then processes the tokens
|
77
|
+
# to disambiguate which dialect needs to be actually used in order to tokenize correctly.
|
78
|
+
class Tokenizer(tokens.Tokenizer):
|
79
|
+
IDENTIFIERS = Trino.Tokenizer.IDENTIFIERS + Hive.Tokenizer.IDENTIFIERS
|
80
|
+
STRING_ESCAPES = Trino.Tokenizer.STRING_ESCAPES + Hive.Tokenizer.STRING_ESCAPES
|
81
|
+
HEX_STRINGS = Trino.Tokenizer.HEX_STRINGS + Hive.Tokenizer.HEX_STRINGS
|
82
|
+
UNICODE_STRINGS = Trino.Tokenizer.UNICODE_STRINGS + Hive.Tokenizer.UNICODE_STRINGS
|
83
|
+
|
84
|
+
NUMERIC_LITERALS = {
|
85
|
+
**Trino.Tokenizer.NUMERIC_LITERALS,
|
86
|
+
**Hive.Tokenizer.NUMERIC_LITERALS,
|
87
|
+
}
|
88
|
+
|
89
|
+
KEYWORDS = {
|
90
|
+
**Hive.Tokenizer.KEYWORDS,
|
91
|
+
**Trino.Tokenizer.KEYWORDS,
|
92
|
+
"UNLOAD": TokenType.COMMAND,
|
93
|
+
}
|
94
|
+
|
95
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
96
|
+
hive = kwargs.pop("hive", None) or Hive()
|
97
|
+
trino = kwargs.pop("trino", None) or Trino()
|
98
|
+
|
99
|
+
super().__init__(*args, **kwargs)
|
100
|
+
|
101
|
+
self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
|
102
|
+
self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
|
103
|
+
|
104
|
+
def tokenize(self, sql: str) -> t.List[Token]:
|
105
|
+
tokens = super().tokenize(sql)
|
106
|
+
|
107
|
+
if _tokenize_as_hive(tokens):
|
108
|
+
return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
|
109
|
+
|
110
|
+
return self._trino_tokenizer.tokenize(sql)
|
111
|
+
|
112
|
+
class Parser(parser.Parser):
|
113
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
114
|
+
hive = kwargs.pop("hive", None) or Hive()
|
115
|
+
trino = kwargs.pop("trino", None) or Trino()
|
116
|
+
|
117
|
+
super().__init__(*args, **kwargs)
|
118
|
+
|
119
|
+
self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
|
120
|
+
self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
|
121
|
+
|
122
|
+
def parse(
|
123
|
+
self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
|
124
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
125
|
+
if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
|
126
|
+
return self._hive_parser.parse(raw_tokens[1:], sql)
|
127
|
+
|
128
|
+
return self._trino_parser.parse(raw_tokens, sql)
|
129
|
+
|
130
|
+
def parse_into(
|
131
|
+
self,
|
132
|
+
expression_types: exp.IntoType,
|
133
|
+
raw_tokens: t.List[Token],
|
134
|
+
sql: t.Optional[str] = None,
|
135
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
136
|
+
if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
|
137
|
+
return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
|
138
|
+
|
139
|
+
return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
|
140
|
+
|
141
|
+
class Generator(generator.Generator):
|
142
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
143
|
+
hive = kwargs.pop("hive", None) or Hive()
|
144
|
+
trino = kwargs.pop("trino", None) or Trino()
|
145
|
+
|
146
|
+
super().__init__(*args, **kwargs)
|
147
|
+
|
148
|
+
self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
|
149
|
+
self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
|
150
|
+
|
151
|
+
def generate(self, expression: exp.Expression, copy: bool = True) -> str:
|
152
|
+
if _generate_as_hive(expression):
|
153
|
+
generator = self._hive_generator
|
154
|
+
else:
|
155
|
+
generator = self._trino_generator
|
156
|
+
|
157
|
+
return generator.generate(expression, copy=copy)
|
158
|
+
|
159
|
+
|
160
|
+
def _tokenize_as_hive(tokens: t.List[Token]) -> bool:
|
161
|
+
if len(tokens) < 2:
|
162
|
+
return False
|
163
|
+
|
164
|
+
first, second, *rest = tokens
|
165
|
+
|
166
|
+
first_type = first.token_type
|
167
|
+
first_text = first.text.upper()
|
168
|
+
second_type = second.token_type
|
169
|
+
second_text = second.text.upper()
|
170
|
+
|
171
|
+
if first_type in (TokenType.DESCRIBE, TokenType.SHOW) or first_text == "MSCK REPAIR":
|
172
|
+
return True
|
173
|
+
|
174
|
+
if first_type in (TokenType.ALTER, TokenType.CREATE, TokenType.DROP):
|
175
|
+
if second_text in ("DATABASE", "EXTERNAL", "SCHEMA"):
|
176
|
+
return True
|
177
|
+
if second_type == TokenType.VIEW:
|
178
|
+
return False
|
179
|
+
|
180
|
+
return all(t.token_type != TokenType.SELECT for t in rest)
|
181
|
+
|
182
|
+
return False
|
9
183
|
|
10
184
|
|
11
185
|
def _generate_as_hive(expression: exp.Expression) -> bool:
|
12
186
|
if isinstance(expression, exp.Create):
|
13
187
|
if expression.kind == "TABLE":
|
14
|
-
properties
|
188
|
+
properties = expression.args.get("properties")
|
189
|
+
|
190
|
+
# CREATE EXTERNAL TABLE is Hive
|
15
191
|
if properties and properties.find(exp.ExternalProperty):
|
16
|
-
return True
|
192
|
+
return True
|
17
193
|
|
194
|
+
# Any CREATE TABLE other than CREATE TABLE ... AS <query> is Hive
|
18
195
|
if not isinstance(expression.expression, exp.Query):
|
19
|
-
return True
|
196
|
+
return True
|
20
197
|
else:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe)):
|
198
|
+
# CREATE VIEW is Trino, but CREATE SCHEMA, CREATE DATABASE, etc, is Hive
|
199
|
+
return expression.kind != "VIEW"
|
200
|
+
elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe, exp.Show)):
|
25
201
|
if isinstance(expression, exp.Drop) and expression.kind == "VIEW":
|
26
|
-
# DROP VIEW is Trino
|
202
|
+
# DROP VIEW is Trino, because CREATE VIEW is as well
|
27
203
|
return False
|
28
204
|
|
29
|
-
# Everything else is Hive
|
205
|
+
# Everything else, e.g., ALTER statements, is Hive
|
30
206
|
return True
|
31
207
|
|
32
208
|
return False
|
33
209
|
|
34
210
|
|
35
211
|
def _is_iceberg_table(properties: exp.Properties) -> bool:
|
36
|
-
|
37
|
-
(
|
38
|
-
p
|
39
|
-
|
40
|
-
|
41
|
-
),
|
42
|
-
None,
|
43
|
-
)
|
44
|
-
return bool(table_type_property and table_type_property.text("value").lower() == "iceberg")
|
212
|
+
for p in properties.expressions:
|
213
|
+
if isinstance(p, exp.Property) and p.name == "table_type":
|
214
|
+
return p.text("value").lower() == "iceberg"
|
215
|
+
|
216
|
+
return False
|
45
217
|
|
46
218
|
|
47
219
|
def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
|
@@ -64,6 +236,7 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
|
|
64
236
|
# ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
|
65
237
|
|
66
238
|
prop_name = "partitioned_by"
|
239
|
+
|
67
240
|
if isinstance(e.parent, exp.Properties):
|
68
241
|
if _is_iceberg_table(e.parent):
|
69
242
|
prop_name = "partitioning"
|
@@ -71,97 +244,45 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
|
|
71
244
|
return f"{prop_name}={self.sql(e, 'this')}"
|
72
245
|
|
73
246
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
}
|
117
|
-
|
118
|
-
class Parser(Trino.Parser):
|
119
|
-
"""
|
120
|
-
Parse queries for the Athena Trino execution engine
|
121
|
-
"""
|
122
|
-
|
123
|
-
STATEMENT_PARSERS = {
|
124
|
-
**Trino.Parser.STATEMENT_PARSERS,
|
125
|
-
TokenType.USING: lambda self: self._parse_as_command(self._prev),
|
126
|
-
}
|
127
|
-
|
128
|
-
class _HiveGenerator(Hive.Generator):
|
129
|
-
def alter_sql(self, expression: exp.Alter) -> str:
|
130
|
-
# package any ALTER TABLE ADD actions into a Schema object
|
131
|
-
# so it gets generated as `ALTER TABLE .. ADD COLUMNS(...)`
|
132
|
-
# instead of `ALTER TABLE ... ADD COLUMN` which is invalid syntax on Athena
|
133
|
-
if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
|
134
|
-
if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
|
135
|
-
new_actions = exp.Schema(expressions=expression.actions)
|
136
|
-
expression.set("actions", [new_actions])
|
137
|
-
|
138
|
-
return super().alter_sql(expression)
|
139
|
-
|
140
|
-
class Generator(Trino.Generator):
|
141
|
-
"""
|
142
|
-
Generate queries for the Athena Trino execution engine
|
143
|
-
"""
|
144
|
-
|
145
|
-
PROPERTIES_LOCATION = {
|
146
|
-
**Trino.Generator.PROPERTIES_LOCATION,
|
147
|
-
exp.LocationProperty: exp.Properties.Location.POST_WITH,
|
148
|
-
}
|
149
|
-
|
150
|
-
TRANSFORMS = {
|
151
|
-
**Trino.Generator.TRANSFORMS,
|
152
|
-
exp.PartitionedByProperty: _partitioned_by_property_sql,
|
153
|
-
exp.LocationProperty: _location_property_sql,
|
154
|
-
}
|
155
|
-
|
156
|
-
def __init__(self, *args, **kwargs):
|
157
|
-
super().__init__(*args, **kwargs)
|
158
|
-
|
159
|
-
hive_kwargs = {**kwargs, "dialect": "hive"}
|
160
|
-
|
161
|
-
self._hive_generator = Athena._HiveGenerator(*args, **hive_kwargs)
|
162
|
-
|
163
|
-
def generate(self, expression: exp.Expression, copy: bool = True) -> str:
|
164
|
-
if _generate_as_hive(expression):
|
165
|
-
return self._hive_generator.generate(expression, copy)
|
166
|
-
|
167
|
-
return super().generate(expression, copy)
|
247
|
+
# Athena extensions to Hive's generator
|
248
|
+
class _HiveGenerator(Hive.Generator):
|
249
|
+
def alter_sql(self, expression: exp.Alter) -> str:
|
250
|
+
# Package any ALTER TABLE ADD actions into a Schema object, so it gets generated as
|
251
|
+
# `ALTER TABLE .. ADD COLUMNS(...)`, instead of `ALTER TABLE ... ADD COLUMN`, which
|
252
|
+
# is invalid syntax on Athena
|
253
|
+
if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
|
254
|
+
if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
|
255
|
+
new_actions = exp.Schema(expressions=expression.actions)
|
256
|
+
expression.set("actions", [new_actions])
|
257
|
+
|
258
|
+
return super().alter_sql(expression)
|
259
|
+
|
260
|
+
|
261
|
+
# Athena extensions to Trino's tokenizer
|
262
|
+
class _TrinoTokenizer(Trino.Tokenizer):
|
263
|
+
KEYWORDS = {
|
264
|
+
**Trino.Tokenizer.KEYWORDS,
|
265
|
+
"UNLOAD": TokenType.COMMAND,
|
266
|
+
}
|
267
|
+
|
268
|
+
|
269
|
+
# Athena extensions to Trino's parser
|
270
|
+
class _TrinoParser(Trino.Parser):
|
271
|
+
STATEMENT_PARSERS = {
|
272
|
+
**Trino.Parser.STATEMENT_PARSERS,
|
273
|
+
TokenType.USING: lambda self: self._parse_as_command(self._prev),
|
274
|
+
}
|
275
|
+
|
276
|
+
|
277
|
+
# Athena extensions to Trino's generator
|
278
|
+
class _TrinoGenerator(Trino.Generator):
|
279
|
+
PROPERTIES_LOCATION = {
|
280
|
+
**Trino.Generator.PROPERTIES_LOCATION,
|
281
|
+
exp.LocationProperty: exp.Properties.Location.POST_WITH,
|
282
|
+
}
|
283
|
+
|
284
|
+
TRANSFORMS = {
|
285
|
+
**Trino.Generator.TRANSFORMS,
|
286
|
+
exp.PartitionedByProperty: _partitioned_by_property_sql,
|
287
|
+
exp.LocationProperty: _location_property_sql,
|
288
|
+
}
|
sqlglot/dialects/bigquery.py
CHANGED
@@ -30,6 +30,7 @@ from sqlglot.dialects.dialect import (
|
|
30
30
|
unit_to_var,
|
31
31
|
strposition_sql,
|
32
32
|
groupconcat_sql,
|
33
|
+
space_sql,
|
33
34
|
)
|
34
35
|
from sqlglot.helper import seq_get, split_num_words
|
35
36
|
from sqlglot.tokens import TokenType
|
@@ -444,6 +445,7 @@ class BigQuery(Dialect):
|
|
444
445
|
exp.Substring,
|
445
446
|
)
|
446
447
|
},
|
448
|
+
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
447
449
|
exp.Concat: _annotate_concat,
|
448
450
|
exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
|
449
451
|
exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
@@ -1011,6 +1013,7 @@ class BigQuery(Dialect):
|
|
1011
1013
|
),
|
1012
1014
|
exp.SHA: rename_func("SHA1"),
|
1013
1015
|
exp.SHA2: sha256_sql,
|
1016
|
+
exp.Space: space_sql,
|
1014
1017
|
exp.StabilityProperty: lambda self, e: (
|
1015
1018
|
"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
|
1016
1019
|
),
|
sqlglot/dialects/clickhouse.py
CHANGED
@@ -303,6 +303,8 @@ class ClickHouse(Dialect):
|
|
303
303
|
**parser.Parser.FUNCTIONS,
|
304
304
|
"ANY": exp.AnyValue.from_arg_list,
|
305
305
|
"ARRAYSUM": exp.ArraySum.from_arg_list,
|
306
|
+
"ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
|
307
|
+
"ARRAYSLICE": exp.ArraySlice.from_arg_list,
|
306
308
|
"COUNTIF": _build_count_if,
|
307
309
|
"DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
|
308
310
|
"DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
|
@@ -330,6 +332,7 @@ class ClickHouse(Dialect):
|
|
330
332
|
"MD5": exp.MD5Digest.from_arg_list,
|
331
333
|
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
332
334
|
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
335
|
+
"SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, # alias for camel-case substringIndex
|
333
336
|
"EDITDISTANCE": exp.Levenshtein.from_arg_list,
|
334
337
|
"LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
|
335
338
|
}
|
@@ -1065,6 +1068,8 @@ class ClickHouse(Dialect):
|
|
1065
1068
|
exp.ArrayConcat: rename_func("arrayConcat"),
|
1066
1069
|
exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
|
1067
1070
|
exp.ArrayRemove: remove_from_array_using_filter,
|
1071
|
+
exp.ArrayReverse: rename_func("arrayReverse"),
|
1072
|
+
exp.ArraySlice: rename_func("arraySlice"),
|
1068
1073
|
exp.ArraySum: rename_func("arraySum"),
|
1069
1074
|
exp.ArgMax: arg_max_or_min_no_count("argMax"),
|
1070
1075
|
exp.ArgMin: arg_max_or_min_no_count("argMin"),
|
sqlglot/dialects/dialect.py
CHANGED
@@ -73,6 +73,7 @@ class Dialects(str, Enum):
|
|
73
73
|
CLICKHOUSE = "clickhouse"
|
74
74
|
DATABRICKS = "databricks"
|
75
75
|
DORIS = "doris"
|
76
|
+
DREMIO = "dremio"
|
76
77
|
DRILL = "drill"
|
77
78
|
DRUID = "druid"
|
78
79
|
DUCKDB = "duckdb"
|
@@ -759,6 +760,12 @@ class Dialect(metaclass=_Dialect):
|
|
759
760
|
exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
|
760
761
|
exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
761
762
|
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
763
|
+
exp.ArrayConcatAgg: lambda self, e: self._annotate_by_args(e, "this"),
|
764
|
+
exp.ArrayToString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TEXT),
|
765
|
+
exp.ArrayFirst: lambda self, e: self._annotate_by_array_element(e),
|
766
|
+
exp.ArrayLast: lambda self, e: self._annotate_by_array_element(e),
|
767
|
+
exp.ArrayReverse: lambda self, e: self._annotate_by_args(e, "this"),
|
768
|
+
exp.ArraySlice: lambda self, e: self._annotate_by_args(e, "this"),
|
762
769
|
exp.Bracket: lambda self, e: self._annotate_bracket(e),
|
763
770
|
exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
|
764
771
|
exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
|
@@ -1028,22 +1035,20 @@ class Dialect(metaclass=_Dialect):
|
|
1028
1035
|
for expression in self.parse(sql)
|
1029
1036
|
]
|
1030
1037
|
|
1031
|
-
def tokenize(self, sql: str) -> t.List[Token]:
|
1032
|
-
return self.tokenizer.tokenize(sql)
|
1038
|
+
def tokenize(self, sql: str, **opts) -> t.List[Token]:
|
1039
|
+
return self.tokenizer(**opts).tokenize(sql)
|
1033
1040
|
|
1034
|
-
|
1035
|
-
|
1036
|
-
return self.tokenizer_class(dialect=self)
|
1041
|
+
def tokenizer(self, **opts) -> Tokenizer:
|
1042
|
+
return self.tokenizer_class(**{"dialect": self, **opts})
|
1037
1043
|
|
1038
|
-
|
1039
|
-
|
1040
|
-
return self.jsonpath_tokenizer_class(dialect=self)
|
1044
|
+
def jsonpath_tokenizer(self, **opts) -> JSONPathTokenizer:
|
1045
|
+
return self.jsonpath_tokenizer_class(**{"dialect": self, **opts})
|
1041
1046
|
|
1042
1047
|
def parser(self, **opts) -> Parser:
|
1043
|
-
return self.parser_class(dialect
|
1048
|
+
return self.parser_class(**{"dialect": self, **opts})
|
1044
1049
|
|
1045
1050
|
def generator(self, **opts) -> Generator:
|
1046
|
-
return self.generator_class(dialect
|
1051
|
+
return self.generator_class(**{"dialect": self, **opts})
|
1047
1052
|
|
1048
1053
|
def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]:
|
1049
1054
|
return [
|
@@ -1930,3 +1935,12 @@ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
|
|
1930
1935
|
expression=seq_get(args, 1),
|
1931
1936
|
replacement=seq_get(args, 2) or exp.Literal.string(""),
|
1932
1937
|
)
|
1938
|
+
|
1939
|
+
|
1940
|
+
def space_sql(self: Generator, expression: exp.Space) -> str:
|
1941
|
+
return self.sql(
|
1942
|
+
exp.Repeat(
|
1943
|
+
this=exp.Literal.string(" "),
|
1944
|
+
times=expression.this,
|
1945
|
+
)
|
1946
|
+
)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from sqlglot import expressions as exp
|
2
|
+
from sqlglot import parser, generator, tokens
|
3
|
+
from sqlglot.dialects.dialect import Dialect
|
4
|
+
|
5
|
+
|
6
|
+
class Dremio(Dialect):
|
7
|
+
SUPPORTS_USER_DEFINED_TYPES = False
|
8
|
+
CONCAT_COALESCE = True
|
9
|
+
TYPED_DIVISION = True
|
10
|
+
SUPPORTS_SEMI_ANTI_JOIN = False
|
11
|
+
NULL_ORDERING = "nulls_are_last"
|
12
|
+
SUPPORTS_VALUES_DEFAULT = False
|
13
|
+
|
14
|
+
class Parser(parser.Parser):
|
15
|
+
LOG_DEFAULTS_TO_LN = True
|
16
|
+
|
17
|
+
class Generator(generator.Generator):
|
18
|
+
NVL2_SUPPORTED = False
|
19
|
+
SUPPORTS_CONVERT_TIMEZONE = True
|
20
|
+
INTERVAL_ALLOWS_PLURAL_FORM = False
|
21
|
+
JOIN_HINTS = False
|
22
|
+
LIMIT_ONLY_LITERALS = True
|
23
|
+
MULTI_ARG_DISTINCT = False
|
24
|
+
|
25
|
+
# https://docs.dremio.com/current/reference/sql/data-types/
|
26
|
+
TYPE_MAPPING = {
|
27
|
+
**generator.Generator.TYPE_MAPPING,
|
28
|
+
exp.DataType.Type.SMALLINT: "INT",
|
29
|
+
exp.DataType.Type.TINYINT: "INT",
|
30
|
+
exp.DataType.Type.BINARY: "VARBINARY",
|
31
|
+
exp.DataType.Type.TEXT: "VARCHAR",
|
32
|
+
exp.DataType.Type.NCHAR: "VARCHAR",
|
33
|
+
exp.DataType.Type.CHAR: "VARCHAR",
|
34
|
+
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
|
35
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
36
|
+
exp.DataType.Type.ARRAY: "LIST",
|
37
|
+
exp.DataType.Type.BIT: "BOOLEAN",
|
38
|
+
}
|
39
|
+
|
40
|
+
def datatype_sql(self, expression: exp.DataType) -> str:
|
41
|
+
"""
|
42
|
+
Reject time-zone–aware TIMESTAMPs, which Dremio does not accept
|
43
|
+
"""
|
44
|
+
if expression.is_type(
|
45
|
+
exp.DataType.Type.TIMESTAMPTZ,
|
46
|
+
exp.DataType.Type.TIMESTAMPLTZ,
|
47
|
+
):
|
48
|
+
self.unsupported("Dremio does not support time-zone-aware TIMESTAMP")
|
49
|
+
|
50
|
+
return super().datatype_sql(expression)
|
51
|
+
|
52
|
+
class Tokenizer(tokens.Tokenizer):
|
53
|
+
COMMENTS = ["--", "//", ("/*", "*/")]
|
sqlglot/dialects/duckdb.py
CHANGED
@@ -1165,3 +1165,48 @@ class DuckDB(Dialect):
|
|
1165
1165
|
def autoincrementcolumnconstraint_sql(self, _) -> str:
|
1166
1166
|
self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
|
1167
1167
|
return ""
|
1168
|
+
|
1169
|
+
def aliases_sql(self, expression: exp.Aliases) -> str:
|
1170
|
+
this = expression.this
|
1171
|
+
if isinstance(this, exp.Posexplode):
|
1172
|
+
return self.posexplode_sql(this)
|
1173
|
+
|
1174
|
+
return super().aliases_sql(expression)
|
1175
|
+
|
1176
|
+
def posexplode_sql(self, expression: exp.Posexplode) -> str:
|
1177
|
+
this = expression.this
|
1178
|
+
parent = expression.parent
|
1179
|
+
|
1180
|
+
# The default Spark aliases are "pos" and "col", unless specified otherwise
|
1181
|
+
pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
|
1182
|
+
|
1183
|
+
if isinstance(parent, exp.Aliases):
|
1184
|
+
# Column case: SELECT POSEXPLODE(col) [AS (a, b)]
|
1185
|
+
pos, col = parent.expressions
|
1186
|
+
elif isinstance(parent, exp.Table):
|
1187
|
+
# Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
|
1188
|
+
alias = parent.args.get("alias")
|
1189
|
+
if alias:
|
1190
|
+
pos, col = alias.columns or [pos, col]
|
1191
|
+
alias.pop()
|
1192
|
+
|
1193
|
+
# Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
|
1194
|
+
# Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
|
1195
|
+
unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
|
1196
|
+
gen_subscripts = self.sql(
|
1197
|
+
exp.Alias(
|
1198
|
+
this=exp.Anonymous(
|
1199
|
+
this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
|
1200
|
+
)
|
1201
|
+
- exp.Literal.number(1),
|
1202
|
+
alias=pos,
|
1203
|
+
)
|
1204
|
+
)
|
1205
|
+
|
1206
|
+
posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
|
1207
|
+
|
1208
|
+
if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
|
1209
|
+
# SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
|
1210
|
+
return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
|
1211
|
+
|
1212
|
+
return posexplode_sql
|
sqlglot/dialects/exasol.py
CHANGED
@@ -1,9 +1,31 @@
|
|
1
1
|
from __future__ import annotations
|
2
|
-
from sqlglot import exp, generator
|
3
|
-
from sqlglot.dialects.dialect import Dialect, rename_func
|
2
|
+
from sqlglot import exp, generator, parser
|
3
|
+
from sqlglot.dialects.dialect import Dialect, rename_func, binary_from_function
|
4
|
+
from sqlglot.helper import seq_get
|
5
|
+
from sqlglot.generator import unsupported_args
|
4
6
|
|
5
7
|
|
6
8
|
class Exasol(Dialect):
|
9
|
+
class Parser(parser.Parser):
|
10
|
+
FUNCTIONS = {
|
11
|
+
**parser.Parser.FUNCTIONS,
|
12
|
+
"BIT_AND": binary_from_function(exp.BitwiseAnd),
|
13
|
+
"BIT_OR": binary_from_function(exp.BitwiseOr),
|
14
|
+
"BIT_XOR": binary_from_function(exp.BitwiseXor),
|
15
|
+
"BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
|
16
|
+
"BIT_LSHIFT": binary_from_function(exp.BitwiseLeftShift),
|
17
|
+
"BIT_RSHIFT": binary_from_function(exp.BitwiseRightShift),
|
18
|
+
"EVERY": lambda args: exp.All(this=seq_get(args, 0)),
|
19
|
+
"EDIT_DISTANCE": exp.Levenshtein.from_arg_list,
|
20
|
+
"REGEXP_REPLACE": lambda args: exp.RegexpReplace(
|
21
|
+
this=seq_get(args, 0),
|
22
|
+
expression=seq_get(args, 1),
|
23
|
+
replacement=seq_get(args, 2),
|
24
|
+
position=seq_get(args, 3),
|
25
|
+
occurrence=seq_get(args, 4),
|
26
|
+
),
|
27
|
+
}
|
28
|
+
|
7
29
|
class Generator(generator.Generator):
|
8
30
|
# https://docs.exasol.com/db/latest/sql_references/data_types/datatypedetails.htm#StringDataType
|
9
31
|
STRING_TYPE_MAPPING = {
|
@@ -41,6 +63,27 @@ class Exasol(Dialect):
|
|
41
63
|
|
42
64
|
TRANSFORMS = {
|
43
65
|
**generator.Generator.TRANSFORMS,
|
66
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
|
67
|
+
exp.All: rename_func("EVERY"),
|
68
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_and.htm
|
69
|
+
exp.BitwiseAnd: rename_func("BIT_AND"),
|
70
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_or.htm
|
71
|
+
exp.BitwiseOr: rename_func("BIT_OR"),
|
72
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_not.htm
|
73
|
+
exp.BitwiseNot: rename_func("BIT_NOT"),
|
74
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_lshift.htm
|
75
|
+
exp.BitwiseLeftShift: rename_func("BIT_LSHIFT"),
|
76
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_rshift.htm
|
77
|
+
exp.BitwiseRightShift: rename_func("BIT_RSHIFT"),
|
78
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_xor.htm
|
79
|
+
exp.BitwiseXor: rename_func("BIT_XOR"),
|
80
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
|
81
|
+
exp.All: rename_func("EVERY"),
|
82
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/edit_distance.htm#EDIT_DISTANCE
|
83
|
+
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
|
84
|
+
rename_func("EDIT_DISTANCE")
|
85
|
+
),
|
44
86
|
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/mod.htm
|
45
87
|
exp.Mod: rename_func("MOD"),
|
88
|
+
exp.RegexpReplace: unsupported_args("modifiers")(rename_func("REGEXP_REPLACE")),
|
46
89
|
}
|
sqlglot/dialects/presto.py
CHANGED
@@ -31,6 +31,7 @@ from sqlglot.dialects.dialect import (
|
|
31
31
|
sequence_sql,
|
32
32
|
build_regexp_extract,
|
33
33
|
explode_to_unnest_sql,
|
34
|
+
space_sql,
|
34
35
|
)
|
35
36
|
from sqlglot.dialects.hive import Hive
|
36
37
|
from sqlglot.dialects.mysql import MySQL
|
@@ -369,6 +370,7 @@ class Presto(Dialect):
|
|
369
370
|
"STRPOS": lambda args: exp.StrPosition(
|
370
371
|
this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2)
|
371
372
|
),
|
373
|
+
"SLICE": exp.ArraySlice.from_arg_list,
|
372
374
|
"TO_CHAR": _build_to_char,
|
373
375
|
"TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
|
374
376
|
"TO_UTF8": lambda args: exp.Encode(
|
@@ -437,6 +439,7 @@ class Presto(Dialect):
|
|
437
439
|
exp.ArrayContains: rename_func("CONTAINS"),
|
438
440
|
exp.ArrayToString: rename_func("ARRAY_JOIN"),
|
439
441
|
exp.ArrayUniqueAgg: rename_func("SET_AGG"),
|
442
|
+
exp.ArraySlice: rename_func("SLICE"),
|
440
443
|
exp.AtTimeZone: rename_func("AT_TIMEZONE"),
|
441
444
|
exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression),
|
442
445
|
exp.BitwiseLeftShift: lambda self, e: self.func(
|
@@ -503,6 +506,7 @@ class Presto(Dialect):
|
|
503
506
|
amend_exploded_column_table,
|
504
507
|
]
|
505
508
|
),
|
509
|
+
exp.Space: space_sql,
|
506
510
|
exp.SortArray: _no_sort_array,
|
507
511
|
exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True),
|
508
512
|
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
|
sqlglot/dialects/spark2.py
CHANGED
@@ -201,6 +201,7 @@ class Spark2(Hive):
|
|
201
201
|
"SHIFTLEFT": binary_from_function(exp.BitwiseLeftShift),
|
202
202
|
"SHIFTRIGHT": binary_from_function(exp.BitwiseRightShift),
|
203
203
|
"STRING": _build_as_cast("string"),
|
204
|
+
"SLICE": exp.ArraySlice.from_arg_list,
|
204
205
|
"TIMESTAMP": _build_as_cast("timestamp"),
|
205
206
|
"TO_TIMESTAMP": lambda args: (
|
206
207
|
_build_as_cast("timestamp")(args)
|
@@ -261,6 +262,7 @@ class Spark2(Hive):
|
|
261
262
|
exp.ArraySum: lambda self,
|
262
263
|
e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
|
263
264
|
exp.ArrayToString: rename_func("ARRAY_JOIN"),
|
265
|
+
exp.ArraySlice: rename_func("SLICE"),
|
264
266
|
exp.AtTimeZone: lambda self, e: self.func(
|
265
267
|
"FROM_UTC_TIMESTAMP", e.this, e.args.get("zone")
|
266
268
|
),
|
sqlglot/expressions.py
CHANGED
@@ -5569,6 +5569,22 @@ class ArrayFilter(Func):
|
|
5569
5569
|
_sql_names = ["FILTER", "ARRAY_FILTER"]
|
5570
5570
|
|
5571
5571
|
|
5572
|
+
class ArrayFirst(Func):
|
5573
|
+
pass
|
5574
|
+
|
5575
|
+
|
5576
|
+
class ArrayLast(Func):
|
5577
|
+
pass
|
5578
|
+
|
5579
|
+
|
5580
|
+
class ArrayReverse(Func):
|
5581
|
+
pass
|
5582
|
+
|
5583
|
+
|
5584
|
+
class ArraySlice(Func):
|
5585
|
+
arg_types = {"this": True, "start": True, "end": False, "step": False}
|
5586
|
+
|
5587
|
+
|
5572
5588
|
class ArrayToString(Func):
|
5573
5589
|
arg_types = {"this": True, "expression": True, "null": False}
|
5574
5590
|
_sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
|
@@ -6723,6 +6739,17 @@ class Substring(Func):
|
|
6723
6739
|
arg_types = {"this": True, "start": False, "length": False}
|
6724
6740
|
|
6725
6741
|
|
6742
|
+
class SubstringIndex(Func):
|
6743
|
+
"""
|
6744
|
+
SUBSTRING_INDEX(str, delim, count)
|
6745
|
+
|
6746
|
+
*count* > 0 → left slice before the *count*-th delimiter
|
6747
|
+
*count* < 0 → right slice after the |count|-th delimiter
|
6748
|
+
"""
|
6749
|
+
|
6750
|
+
arg_types = {"this": True, "delimiter": True, "count": True}
|
6751
|
+
|
6752
|
+
|
6726
6753
|
class StandardHash(Func):
|
6727
6754
|
arg_types = {"this": True, "expression": False}
|
6728
6755
|
|
@@ -6779,6 +6806,14 @@ class FromBase(Func):
|
|
6779
6806
|
arg_types = {"this": True, "expression": True}
|
6780
6807
|
|
6781
6808
|
|
6809
|
+
class Space(Func):
|
6810
|
+
"""
|
6811
|
+
SPACE(n) → string consisting of n blank characters
|
6812
|
+
"""
|
6813
|
+
|
6814
|
+
pass
|
6815
|
+
|
6816
|
+
|
6782
6817
|
class Struct(Func):
|
6783
6818
|
arg_types = {"expressions": False}
|
6784
6819
|
is_var_len_args = True
|
sqlglot/jsonpath.py
CHANGED
@@ -41,7 +41,7 @@ def parse(path: str, dialect: DialectType = None) -> exp.JSONPath:
|
|
41
41
|
"""Takes in a JSON path string and parses it into a JSONPath expression."""
|
42
42
|
from sqlglot.dialects import Dialect
|
43
43
|
|
44
|
-
jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer
|
44
|
+
jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer()
|
45
45
|
tokens = jsonpath_tokenizer.tokenize(path)
|
46
46
|
size = len(tokens)
|
47
47
|
|
@@ -329,6 +329,7 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
|
|
329
329
|
],
|
330
330
|
nested=True,
|
331
331
|
)
|
332
|
+
|
332
333
|
if not any(
|
333
334
|
cd.kind.is_type(exp.DataType.Type.UNKNOWN)
|
334
335
|
for cd in struct_type.expressions
|
@@ -630,3 +631,15 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
|
|
630
631
|
else:
|
631
632
|
self._set_type(expression, exp.DataType.Type.INT)
|
632
633
|
return expression
|
634
|
+
|
635
|
+
def _annotate_by_array_element(self, expression: exp.Expression) -> exp.Expression:
|
636
|
+
self._annotate_args(expression)
|
637
|
+
|
638
|
+
array_arg = expression.this
|
639
|
+
if array_arg.type.is_type(exp.DataType.Type.ARRAY):
|
640
|
+
element_type = seq_get(array_arg.type.expressions, 0) or exp.DataType.Type.UNKNOWN
|
641
|
+
self._set_type(expression, element_type)
|
642
|
+
else:
|
643
|
+
self._set_type(expression, exp.DataType.Type.UNKNOWN)
|
644
|
+
|
645
|
+
return expression
|
@@ -21,12 +21,13 @@ def pushdown_predicates(expression, dialect=None):
|
|
21
21
|
Returns:
|
22
22
|
sqlglot.Expression: optimized expression
|
23
23
|
"""
|
24
|
+
from sqlglot.dialects.athena import Athena
|
24
25
|
from sqlglot.dialects.presto import Presto
|
25
26
|
|
26
27
|
root = build_scope(expression)
|
27
28
|
|
28
29
|
dialect = Dialect.get_or_raise(dialect)
|
29
|
-
unnest_requires_cross_join = isinstance(dialect, Presto)
|
30
|
+
unnest_requires_cross_join = isinstance(dialect, (Athena, Presto))
|
30
31
|
|
31
32
|
if root:
|
32
33
|
scope_ref_count = root.ref_count()
|
sqlglot/parser.py
CHANGED
@@ -1895,7 +1895,7 @@ class Parser(metaclass=_Parser):
|
|
1895
1895
|
stmt.add_comments(comments, prepend=True)
|
1896
1896
|
return stmt
|
1897
1897
|
|
1898
|
-
if self._match_set(self.dialect.
|
1898
|
+
if self._match_set(self.dialect.tokenizer_class.COMMANDS):
|
1899
1899
|
return self._parse_command()
|
1900
1900
|
|
1901
1901
|
expression = self._parse_expression()
|
sqlglot/tokens.py
CHANGED
@@ -427,6 +427,9 @@ class TokenType(AutoName):
|
|
427
427
|
NAMESPACE = auto()
|
428
428
|
EXPORT = auto()
|
429
429
|
|
430
|
+
# sentinel
|
431
|
+
HIVE_TOKEN_STREAM = auto()
|
432
|
+
|
430
433
|
|
431
434
|
_ALL_TOKEN_TYPES = list(TokenType)
|
432
435
|
_TOKEN_TYPE_TO_INDEX = {token_type: i for i, token_type in enumerate(_ALL_TOKEN_TYPES)}
|
@@ -1014,7 +1017,10 @@ class Tokenizer(metaclass=_Tokenizer):
|
|
1014
1017
|
)
|
1015
1018
|
|
1016
1019
|
def __init__(
|
1017
|
-
self,
|
1020
|
+
self,
|
1021
|
+
dialect: DialectType = None,
|
1022
|
+
use_rs_tokenizer: t.Optional[bool] = None,
|
1023
|
+
**opts: t.Any,
|
1018
1024
|
) -> None:
|
1019
1025
|
from sqlglot.dialects import Dialect
|
1020
1026
|
|
@@ -1,48 +1,49 @@
|
|
1
1
|
sqlglot/__init__.py,sha256=za08rtdPh2v7dOpGdNomttlIVGgTrKja7rPd6sQwaTg,5391
|
2
2
|
sqlglot/__main__.py,sha256=022c173KqxsiABWTEpUIq_tJUxuNiW7a7ABsxBXqvu8,2069
|
3
3
|
sqlglot/_typing.py,sha256=-1HPyr3w5COlSJWqlgt8jhFk2dyMvBuvVBqIX1wyVCM,642
|
4
|
-
sqlglot/_version.py,sha256=
|
4
|
+
sqlglot/_version.py,sha256=a_It9JwWrgyjKVWp4-1klpG0OQem5zJtgb83vsDFdkQ,515
|
5
5
|
sqlglot/diff.py,sha256=PtOllQMQa1Sw1-V2Y8eypmDqGujXYPaTOp_WLsWkAWk,17314
|
6
6
|
sqlglot/errors.py,sha256=QNKMr-pzLUDR-tuMmn_GK6iMHUIVdb_YSJ_BhGEvuso,2126
|
7
|
-
sqlglot/expressions.py,sha256=
|
7
|
+
sqlglot/expressions.py,sha256=4ucsMtJKzFpFm_SLb5YylQGsAjIbrwYprwk2QEKUKK4,244025
|
8
8
|
sqlglot/generator.py,sha256=Od0aBsKJph1wG_YhrknJAcAcVvuVIN823iyxA3KPi0Y,213383
|
9
9
|
sqlglot/helper.py,sha256=9nZjFVRBtMKFC3EdzpDQ6jkazFO19po6BF8xHiNGZIo,15111
|
10
|
-
sqlglot/jsonpath.py,sha256=
|
10
|
+
sqlglot/jsonpath.py,sha256=jneO-A57n4ojVT2drCn2HBlx_Ka8wLcGpemW1JgvbjA,7666
|
11
11
|
sqlglot/lineage.py,sha256=kXBDSErmZZluZx_kkrMj4MPEOAbkvcbX1tbOW7Bpl-U,15303
|
12
|
-
sqlglot/parser.py,sha256=
|
12
|
+
sqlglot/parser.py,sha256=uoqObZn91ixqajoTUvHYiZXylaMbsuL2c4bdSyOCSFU,324616
|
13
13
|
sqlglot/planner.py,sha256=ql7Li-bWJRcyXzNaZy_n6bQ6B2ZfunEIB8Ztv2xaxq4,14634
|
14
14
|
sqlglot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
sqlglot/schema.py,sha256=13H2qKQs27EKdTpDLOvcNnSTDAUbYNKjWtJs4aQCSOA,20509
|
16
16
|
sqlglot/serde.py,sha256=DQVJ95WrIvhYfe02Ytb4NQug2aMwDCEwpMBW1LKDqzE,2031
|
17
17
|
sqlglot/time.py,sha256=Q62gv6kL40OiRBF6BMESxKJcMVn7ZLNw7sv8H34z5FI,18400
|
18
|
-
sqlglot/tokens.py,sha256=
|
18
|
+
sqlglot/tokens.py,sha256=QciEA4QXiUmoVLWbd67Uf90BQvGaAvnYiZm9plqIpC8,48829
|
19
19
|
sqlglot/transforms.py,sha256=s96QMtR7rJbcLAU1I_IF1xLNxno6yvEbhERgbS5xmJ4,41164
|
20
20
|
sqlglot/trie.py,sha256=v27uXMrHfqrXlJ6GmeTSMovsB_3o0ctnlKhdNt7W6fI,2245
|
21
|
-
sqlglot/dialects/__init__.py,sha256=
|
22
|
-
sqlglot/dialects/athena.py,sha256=
|
23
|
-
sqlglot/dialects/bigquery.py,sha256=
|
24
|
-
sqlglot/dialects/clickhouse.py,sha256=
|
21
|
+
sqlglot/dialects/__init__.py,sha256=uxgw-b-QoqVFGbj1zW0p61f1mhV9oF0qscuBB0e7bkI,3541
|
22
|
+
sqlglot/dialects/athena.py,sha256=ofArmayYLev4qZQ15GM8mevG04qqR5WGFb2ZcuYm6x4,10966
|
23
|
+
sqlglot/dialects/bigquery.py,sha256=0HyMete6v-x1Thp9FVCppVjQoAcgenqg7Zl7wGPh3dA,53002
|
24
|
+
sqlglot/dialects/clickhouse.py,sha256=U_s6fRQbj3eFEaNw7N7UWC8EzYnFVYI-KKIsk-IU1I4,56936
|
25
25
|
sqlglot/dialects/databricks.py,sha256=mJN2lFpqgH95x3mtry3qWbuRf4q7NV5jbRAOspqclzY,4548
|
26
|
-
sqlglot/dialects/dialect.py,sha256=
|
26
|
+
sqlglot/dialects/dialect.py,sha256=ef933149Zk4tvoNmEAu7W6bteRbMkxnoBValfIVDOso,69667
|
27
27
|
sqlglot/dialects/doris.py,sha256=eC7Ct-iz7p4Usz659NkelUFhm-GmVolIZy5uaBvgjaA,14397
|
28
|
+
sqlglot/dialects/dremio.py,sha256=qTKZnVBGw6Tfco9MmsfIhKKYqJ4_TXaEhDCbEKNU9Uw,1866
|
28
29
|
sqlglot/dialects/drill.py,sha256=FOh7_KjPx_77pv0DiHKZog0CcmzqeF9_PEmGnJ1ESSM,5825
|
29
30
|
sqlglot/dialects/druid.py,sha256=kh3snZtneehNOWqs3XcPjsrhNaRbkCQ8E4hHbWJ1fHM,690
|
30
|
-
sqlglot/dialects/duckdb.py,sha256=
|
31
|
+
sqlglot/dialects/duckdb.py,sha256=L1pNYEQjiH2dYJurMXXYihgRalN9gZhmd5LoqCr_wvg,50015
|
31
32
|
sqlglot/dialects/dune.py,sha256=gALut-fFfN2qMsr8LvZ1NQK3F3W9z2f4PwMvTMXVVVg,375
|
32
|
-
sqlglot/dialects/exasol.py,sha256=
|
33
|
+
sqlglot/dialects/exasol.py,sha256=SbLg6l_BgI_hyZvLnnMh6m3WnBSqB3ZavLbLqE_vfjI,4726
|
33
34
|
sqlglot/dialects/fabric.py,sha256=IU7aMh2yEuG8eVBAYzXO5pObZBZ4rZSd5UgvkwbCI-E,5277
|
34
35
|
sqlglot/dialects/hive.py,sha256=yKCsVN4R8pIB2Lmx1YGiSR9b8Me3li6rsGuZrKjHTo4,31771
|
35
36
|
sqlglot/dialects/materialize.py,sha256=_DPLPt8YrdQIIXNrGJw1IMcGOoAEJ9NO9X9pDfy4hxs,3494
|
36
37
|
sqlglot/dialects/mysql.py,sha256=prZecn3zeoifZX7l54UuLG64ar7I-or_z9lF-rT8bds,49233
|
37
38
|
sqlglot/dialects/oracle.py,sha256=o6On1cYWFt6TpQYKuzo4kCz5vKb8jQr8WSwc619h3Lg,15967
|
38
39
|
sqlglot/dialects/postgres.py,sha256=KUyMoLkm1_sZKUbdjn6bjXx9xz7sbEMKa-fl5Mzfrsk,31025
|
39
|
-
sqlglot/dialects/presto.py,sha256=
|
40
|
+
sqlglot/dialects/presto.py,sha256=Tm3Bx9AJilT1xlgunTpF0wUhIZBOPS-rB5Iwitnygxc,33462
|
40
41
|
sqlglot/dialects/prql.py,sha256=fwN-SPEGx-drwf1K0U2MByN-PkW3C_rOgQ3xeJeychg,7908
|
41
42
|
sqlglot/dialects/redshift.py,sha256=sHhibn2g6_hVRd1XEe8HSQd_ofWkEpzld0odsNQ6X2g,15747
|
42
43
|
sqlglot/dialects/risingwave.py,sha256=hwEOPjMw0ZM_3fjQcBUE00oy6I8V6mzYOOYmcwwS8mw,2898
|
43
44
|
sqlglot/dialects/snowflake.py,sha256=68I7OjdWXSVnDxJ-ItmXnJd-A1nlND1T6aKNv0nkJlQ,63518
|
44
45
|
sqlglot/dialects/spark.py,sha256=bOUSXUoWtLfWaQ9fIjWaw4zLBJY6N7vxajdMbAxLdOk,8307
|
45
|
-
sqlglot/dialects/spark2.py,sha256=
|
46
|
+
sqlglot/dialects/spark2.py,sha256=EsuPGf0WQQzLy16D_uzVF0zKPyiiyxqfEQmsTegZ_IQ,14359
|
46
47
|
sqlglot/dialects/sqlite.py,sha256=fwqmopeuoupD_2dh2q6rT3UFxWtFHkskZ1OXAYnPT9Q,12483
|
47
48
|
sqlglot/dialects/starrocks.py,sha256=fHNgvq5Nz7dI4QUWCTOO5VDOYjasBxRRlcg9TbY0UZE,11235
|
48
49
|
sqlglot/dialects/tableau.py,sha256=oIawDzUITxGCWaEMB8OaNMPWhbC3U-2y09pYPm4eazc,2190
|
@@ -55,7 +56,7 @@ sqlglot/executor/env.py,sha256=tQhU5PpTBMcxgZIFddFqxWMNPtHN0vOOz72voncY3KY,8276
|
|
55
56
|
sqlglot/executor/python.py,sha256=09GYRzrPn3lZGfDJY9pbONOvmYxsRyeSWjUiqkSRHGo,16661
|
56
57
|
sqlglot/executor/table.py,sha256=xkuJlgLVNYUXsSUaX0zTcnFekldXLLU8LqDyjR5K9wY,4419
|
57
58
|
sqlglot/optimizer/__init__.py,sha256=FdAvVz6rQLLkiiH21-SD4RxB5zS3WDeU-s03PZkJ-F4,343
|
58
|
-
sqlglot/optimizer/annotate_types.py,sha256
|
59
|
+
sqlglot/optimizer/annotate_types.py,sha256=Ndfdvc0OO07DCCFuryK1tHkX9ydS-dMxKPgaZU0knWY,24619
|
59
60
|
sqlglot/optimizer/canonicalize.py,sha256=RJpUbWDudjknRMtO_Kf8MGZ5Hv1twpPWac2u5kpV4Vw,7719
|
60
61
|
sqlglot/optimizer/eliminate_ctes.py,sha256=fUBM0RUnPrm2sYptEWBux98B7fcx7W-BM1zVqfgDz9c,1448
|
61
62
|
sqlglot/optimizer/eliminate_joins.py,sha256=5Whliegc7U8BnS6tlrl9wkeAgyP1NpgCCAPxChHzFfw,5874
|
@@ -66,7 +67,7 @@ sqlglot/optimizer/normalize.py,sha256=wu3GeKY36PLyAb9f534jDDfzDwvZJpZ8g_H5QH6acZ
|
|
66
67
|
sqlglot/optimizer/normalize_identifiers.py,sha256=uD4xICJAgj0X7EFc2LYcDWxAW2aTHANO2wy7kfn9gfY,2098
|
67
68
|
sqlglot/optimizer/optimize_joins.py,sha256=LLBH6Zk0Uegsff48soJgJqhpGXkQx5VstGEt40vsjrg,2991
|
68
69
|
sqlglot/optimizer/optimizer.py,sha256=vXEXDWHvbO-vJmSI7UqJuydM2WrD1xko7rETq2EtVJo,3533
|
69
|
-
sqlglot/optimizer/pushdown_predicates.py,sha256=
|
70
|
+
sqlglot/optimizer/pushdown_predicates.py,sha256=HGjs3Z4V3-X2d1VTfWhyByY3aL5SmKnVvt3aDXiiBM0,8414
|
70
71
|
sqlglot/optimizer/pushdown_projections.py,sha256=7NoK5NAUVYVhs0YnYyo6WuXfaO-BShSwS6lA8Y-ATQ4,6668
|
71
72
|
sqlglot/optimizer/qualify.py,sha256=oAPfwub7dEkrlCrsptcJWpLya4BgKhN6M5SwIs_86LY,4002
|
72
73
|
sqlglot/optimizer/qualify_columns.py,sha256=77aScPakXYaiagnoCWk2qwMxlKuRGsFTAK9sOQuR2vY,40872
|
@@ -74,8 +75,8 @@ sqlglot/optimizer/qualify_tables.py,sha256=5f5enBAh-bpNB9ewF97W9fx9h1TGXj1Ih5fnc
|
|
74
75
|
sqlglot/optimizer/scope.py,sha256=HI3TZ4VWTgM6_x8k5ClA0lA0xidaKv4xgn8iGERJRjk,30824
|
75
76
|
sqlglot/optimizer/simplify.py,sha256=S0Blqg5Mq2KRRWhWz-Eivch9sBjBhg9fRJA6EdBzj2g,50704
|
76
77
|
sqlglot/optimizer/unnest_subqueries.py,sha256=kzWUVDlxs8z9nmRx-8U-pHXPtVZhEIwkKqmKhr2QLvc,10908
|
77
|
-
sqlglot-26.
|
78
|
-
sqlglot-26.
|
79
|
-
sqlglot-26.
|
80
|
-
sqlglot-26.
|
81
|
-
sqlglot-26.
|
78
|
+
sqlglot-26.32.0.dist-info/licenses/LICENSE,sha256=AI3__mHZfOtzY3EluR_pIYBm3_pE7TbVx7qaHxoZ114,1065
|
79
|
+
sqlglot-26.32.0.dist-info/METADATA,sha256=SeIGypJ6sJWF3j-g1zmBeptAwZOPUn61UyUJyp7qF5k,20732
|
80
|
+
sqlglot-26.32.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
81
|
+
sqlglot-26.32.0.dist-info/top_level.txt,sha256=5kRskCGA_gVADF9rSfSzPdLHXqvfMusDYeHePfNY2nQ,8
|
82
|
+
sqlglot-26.32.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|