sqlglot 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/__init__.py +2 -0
- sqlglot/dialects/athena.py +237 -116
- sqlglot/dialects/bigquery.py +9 -4
- sqlglot/dialects/clickhouse.py +5 -0
- sqlglot/dialects/databricks.py +2 -0
- sqlglot/dialects/dialect.py +39 -19
- sqlglot/dialects/dremio.py +53 -0
- sqlglot/dialects/duckdb.py +45 -0
- sqlglot/dialects/exasol.py +89 -0
- sqlglot/dialects/fabric.py +60 -33
- sqlglot/dialects/presto.py +6 -0
- sqlglot/dialects/redshift.py +10 -2
- sqlglot/dialects/snowflake.py +3 -1
- sqlglot/dialects/spark2.py +2 -0
- sqlglot/dialects/tsql.py +7 -5
- sqlglot/expressions.py +44 -2
- sqlglot/generator.py +3 -3
- sqlglot/jsonpath.py +1 -1
- sqlglot/optimizer/annotate_types.py +13 -0
- sqlglot/optimizer/pushdown_predicates.py +2 -1
- sqlglot/optimizer/scope.py +13 -3
- sqlglot/parser.py +4 -3
- sqlglot/tokens.py +7 -1
- sqlglot/transforms.py +15 -1
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/METADATA +2 -2
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/RECORD +30 -28
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/WHEEL +0 -0
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/top_level.txt +0 -0
sqlglot/_version.py
CHANGED
sqlglot/dialects/__init__.py
CHANGED
@@ -70,6 +70,7 @@ DIALECTS = [
|
|
70
70
|
"ClickHouse",
|
71
71
|
"Databricks",
|
72
72
|
"Doris",
|
73
|
+
"Dremio",
|
73
74
|
"Drill",
|
74
75
|
"Druid",
|
75
76
|
"DuckDB",
|
@@ -93,6 +94,7 @@ DIALECTS = [
|
|
93
94
|
"Teradata",
|
94
95
|
"Trino",
|
95
96
|
"TSQL",
|
97
|
+
"Exasol",
|
96
98
|
]
|
97
99
|
|
98
100
|
MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
|
sqlglot/dialects/athena.py
CHANGED
@@ -2,46 +2,218 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import typing as t
|
4
4
|
|
5
|
-
from sqlglot import exp
|
6
|
-
from sqlglot.dialects
|
7
|
-
from sqlglot.
|
8
|
-
|
5
|
+
from sqlglot import exp, generator, parser, tokens
|
6
|
+
from sqlglot.dialects import Dialect, Hive, Trino
|
7
|
+
from sqlglot.tokens import TokenType, Token
|
8
|
+
|
9
|
+
|
10
|
+
class Athena(Dialect):
|
11
|
+
"""
|
12
|
+
Over the years, it looks like AWS has taken various execution engines, bolted on AWS-specific
|
13
|
+
modifications and then built the Athena service around them.
|
14
|
+
|
15
|
+
Thus, Athena is not simply hosted Trino, it's more like a router that routes SQL queries to an
|
16
|
+
execution engine depending on the query type.
|
17
|
+
|
18
|
+
As at 2024-09-10, assuming your Athena workgroup is configured to use "Athena engine version 3",
|
19
|
+
the following engines exist:
|
20
|
+
|
21
|
+
Hive:
|
22
|
+
- Accepts mostly the same syntax as Hadoop / Hive
|
23
|
+
- Uses backticks to quote identifiers
|
24
|
+
- Has a distinctive DDL syntax (around things like setting table properties, storage locations etc)
|
25
|
+
that is different from Trino
|
26
|
+
- Used for *most* DDL, with some exceptions that get routed to the Trino engine instead:
|
27
|
+
- CREATE [EXTERNAL] TABLE (without AS SELECT)
|
28
|
+
- ALTER
|
29
|
+
- DROP
|
30
|
+
|
31
|
+
Trino:
|
32
|
+
- Uses double quotes to quote identifiers
|
33
|
+
- Used for DDL operations that involve SELECT queries, eg:
|
34
|
+
- CREATE VIEW / DROP VIEW
|
35
|
+
- CREATE TABLE... AS SELECT
|
36
|
+
- Used for DML operations
|
37
|
+
- SELECT, INSERT, UPDATE, DELETE, MERGE
|
38
|
+
|
39
|
+
The SQLGlot Athena dialect tries to identify which engine a query would be routed to and then uses the
|
40
|
+
tokenizer / parser / generator for that engine. This is unfortunately necessary, as there are certain
|
41
|
+
incompatibilities between the engines' dialects and thus can't be handled by a single, unifying dialect.
|
42
|
+
|
43
|
+
References:
|
44
|
+
- https://docs.aws.amazon.com/athena/latest/ug/ddl-reference.html
|
45
|
+
- https://docs.aws.amazon.com/athena/latest/ug/dml-queries-functions-operators.html
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(self, **kwargs):
|
49
|
+
super().__init__(**kwargs)
|
50
|
+
|
51
|
+
self._hive = Hive(**kwargs)
|
52
|
+
self._trino = Trino(**kwargs)
|
53
|
+
|
54
|
+
def tokenize(self, sql: str, **opts) -> t.List[Token]:
|
55
|
+
opts["hive"] = self._hive
|
56
|
+
opts["trino"] = self._trino
|
57
|
+
return super().tokenize(sql, **opts)
|
58
|
+
|
59
|
+
def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]:
|
60
|
+
opts["hive"] = self._hive
|
61
|
+
opts["trino"] = self._trino
|
62
|
+
return super().parse(sql, **opts)
|
63
|
+
|
64
|
+
def parse_into(
|
65
|
+
self, expression_type: exp.IntoType, sql: str, **opts
|
66
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
67
|
+
opts["hive"] = self._hive
|
68
|
+
opts["trino"] = self._trino
|
69
|
+
return super().parse_into(expression_type, sql, **opts)
|
70
|
+
|
71
|
+
def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str:
|
72
|
+
opts["hive"] = self._hive
|
73
|
+
opts["trino"] = self._trino
|
74
|
+
return super().generate(expression, copy=copy, **opts)
|
75
|
+
|
76
|
+
# This Tokenizer consumes a combination of HiveQL and Trino SQL and then processes the tokens
|
77
|
+
# to disambiguate which dialect needs to be actually used in order to tokenize correctly.
|
78
|
+
class Tokenizer(tokens.Tokenizer):
|
79
|
+
IDENTIFIERS = Trino.Tokenizer.IDENTIFIERS + Hive.Tokenizer.IDENTIFIERS
|
80
|
+
STRING_ESCAPES = Trino.Tokenizer.STRING_ESCAPES + Hive.Tokenizer.STRING_ESCAPES
|
81
|
+
HEX_STRINGS = Trino.Tokenizer.HEX_STRINGS + Hive.Tokenizer.HEX_STRINGS
|
82
|
+
UNICODE_STRINGS = Trino.Tokenizer.UNICODE_STRINGS + Hive.Tokenizer.UNICODE_STRINGS
|
83
|
+
|
84
|
+
NUMERIC_LITERALS = {
|
85
|
+
**Trino.Tokenizer.NUMERIC_LITERALS,
|
86
|
+
**Hive.Tokenizer.NUMERIC_LITERALS,
|
87
|
+
}
|
88
|
+
|
89
|
+
KEYWORDS = {
|
90
|
+
**Hive.Tokenizer.KEYWORDS,
|
91
|
+
**Trino.Tokenizer.KEYWORDS,
|
92
|
+
"UNLOAD": TokenType.COMMAND,
|
93
|
+
}
|
94
|
+
|
95
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
96
|
+
hive = kwargs.pop("hive", None) or Hive()
|
97
|
+
trino = kwargs.pop("trino", None) or Trino()
|
98
|
+
|
99
|
+
super().__init__(*args, **kwargs)
|
100
|
+
|
101
|
+
self._hive_tokenizer = hive.tokenizer(*args, **{**kwargs, "dialect": hive})
|
102
|
+
self._trino_tokenizer = _TrinoTokenizer(*args, **{**kwargs, "dialect": trino})
|
103
|
+
|
104
|
+
def tokenize(self, sql: str) -> t.List[Token]:
|
105
|
+
tokens = super().tokenize(sql)
|
106
|
+
|
107
|
+
if _tokenize_as_hive(tokens):
|
108
|
+
return [Token(TokenType.HIVE_TOKEN_STREAM, "")] + self._hive_tokenizer.tokenize(sql)
|
109
|
+
|
110
|
+
return self._trino_tokenizer.tokenize(sql)
|
111
|
+
|
112
|
+
class Parser(parser.Parser):
|
113
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
114
|
+
hive = kwargs.pop("hive", None) or Hive()
|
115
|
+
trino = kwargs.pop("trino", None) or Trino()
|
116
|
+
|
117
|
+
super().__init__(*args, **kwargs)
|
118
|
+
|
119
|
+
self._hive_parser = hive.parser(*args, **{**kwargs, "dialect": hive})
|
120
|
+
self._trino_parser = _TrinoParser(*args, **{**kwargs, "dialect": trino})
|
121
|
+
|
122
|
+
def parse(
|
123
|
+
self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
|
124
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
125
|
+
if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
|
126
|
+
return self._hive_parser.parse(raw_tokens[1:], sql)
|
127
|
+
|
128
|
+
return self._trino_parser.parse(raw_tokens, sql)
|
129
|
+
|
130
|
+
def parse_into(
|
131
|
+
self,
|
132
|
+
expression_types: exp.IntoType,
|
133
|
+
raw_tokens: t.List[Token],
|
134
|
+
sql: t.Optional[str] = None,
|
135
|
+
) -> t.List[t.Optional[exp.Expression]]:
|
136
|
+
if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
|
137
|
+
return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
|
138
|
+
|
139
|
+
return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
|
140
|
+
|
141
|
+
class Generator(generator.Generator):
|
142
|
+
def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
|
143
|
+
hive = kwargs.pop("hive", None) or Hive()
|
144
|
+
trino = kwargs.pop("trino", None) or Trino()
|
145
|
+
|
146
|
+
super().__init__(*args, **kwargs)
|
147
|
+
|
148
|
+
self._hive_generator = _HiveGenerator(*args, **{**kwargs, "dialect": hive})
|
149
|
+
self._trino_generator = _TrinoGenerator(*args, **{**kwargs, "dialect": trino})
|
150
|
+
|
151
|
+
def generate(self, expression: exp.Expression, copy: bool = True) -> str:
|
152
|
+
if _generate_as_hive(expression):
|
153
|
+
generator = self._hive_generator
|
154
|
+
else:
|
155
|
+
generator = self._trino_generator
|
156
|
+
|
157
|
+
return generator.generate(expression, copy=copy)
|
158
|
+
|
159
|
+
|
160
|
+
def _tokenize_as_hive(tokens: t.List[Token]) -> bool:
|
161
|
+
if len(tokens) < 2:
|
162
|
+
return False
|
163
|
+
|
164
|
+
first, second, *rest = tokens
|
165
|
+
|
166
|
+
first_type = first.token_type
|
167
|
+
first_text = first.text.upper()
|
168
|
+
second_type = second.token_type
|
169
|
+
second_text = second.text.upper()
|
170
|
+
|
171
|
+
if first_type in (TokenType.DESCRIBE, TokenType.SHOW) or first_text == "MSCK REPAIR":
|
172
|
+
return True
|
173
|
+
|
174
|
+
if first_type in (TokenType.ALTER, TokenType.CREATE, TokenType.DROP):
|
175
|
+
if second_text in ("DATABASE", "EXTERNAL", "SCHEMA"):
|
176
|
+
return True
|
177
|
+
if second_type == TokenType.VIEW:
|
178
|
+
return False
|
179
|
+
|
180
|
+
return all(t.token_type != TokenType.SELECT for t in rest)
|
181
|
+
|
182
|
+
return False
|
9
183
|
|
10
184
|
|
11
185
|
def _generate_as_hive(expression: exp.Expression) -> bool:
|
12
186
|
if isinstance(expression, exp.Create):
|
13
187
|
if expression.kind == "TABLE":
|
14
|
-
properties
|
188
|
+
properties = expression.args.get("properties")
|
189
|
+
|
190
|
+
# CREATE EXTERNAL TABLE is Hive
|
15
191
|
if properties and properties.find(exp.ExternalProperty):
|
16
|
-
return True
|
192
|
+
return True
|
17
193
|
|
194
|
+
# Any CREATE TABLE other than CREATE TABLE ... AS <query> is Hive
|
18
195
|
if not isinstance(expression.expression, exp.Query):
|
19
|
-
return True
|
196
|
+
return True
|
20
197
|
else:
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe)):
|
198
|
+
# CREATE VIEW is Trino, but CREATE SCHEMA, CREATE DATABASE, etc, is Hive
|
199
|
+
return expression.kind != "VIEW"
|
200
|
+
elif isinstance(expression, (exp.Alter, exp.Drop, exp.Describe, exp.Show)):
|
25
201
|
if isinstance(expression, exp.Drop) and expression.kind == "VIEW":
|
26
|
-
# DROP VIEW is Trino
|
202
|
+
# DROP VIEW is Trino, because CREATE VIEW is as well
|
27
203
|
return False
|
28
204
|
|
29
|
-
# Everything else is Hive
|
205
|
+
# Everything else, e.g., ALTER statements, is Hive
|
30
206
|
return True
|
31
207
|
|
32
208
|
return False
|
33
209
|
|
34
210
|
|
35
211
|
def _is_iceberg_table(properties: exp.Properties) -> bool:
|
36
|
-
|
37
|
-
(
|
38
|
-
p
|
39
|
-
|
40
|
-
|
41
|
-
),
|
42
|
-
None,
|
43
|
-
)
|
44
|
-
return bool(table_type_property and table_type_property.text("value").lower() == "iceberg")
|
212
|
+
for p in properties.expressions:
|
213
|
+
if isinstance(p, exp.Property) and p.name == "table_type":
|
214
|
+
return p.text("value").lower() == "iceberg"
|
215
|
+
|
216
|
+
return False
|
45
217
|
|
46
218
|
|
47
219
|
def _location_property_sql(self: Athena.Generator, e: exp.LocationProperty):
|
@@ -64,6 +236,7 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
|
|
64
236
|
# ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
|
65
237
|
|
66
238
|
prop_name = "partitioned_by"
|
239
|
+
|
67
240
|
if isinstance(e.parent, exp.Properties):
|
68
241
|
if _is_iceberg_table(e.parent):
|
69
242
|
prop_name = "partitioning"
|
@@ -71,97 +244,45 @@ def _partitioned_by_property_sql(self: Athena.Generator, e: exp.PartitionedByPro
|
|
71
244
|
return f"{prop_name}={self.sql(e, 'this')}"
|
72
245
|
|
73
246
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
}
|
117
|
-
|
118
|
-
class Parser(Trino.Parser):
|
119
|
-
"""
|
120
|
-
Parse queries for the Athena Trino execution engine
|
121
|
-
"""
|
122
|
-
|
123
|
-
STATEMENT_PARSERS = {
|
124
|
-
**Trino.Parser.STATEMENT_PARSERS,
|
125
|
-
TokenType.USING: lambda self: self._parse_as_command(self._prev),
|
126
|
-
}
|
127
|
-
|
128
|
-
class _HiveGenerator(Hive.Generator):
|
129
|
-
def alter_sql(self, expression: exp.Alter) -> str:
|
130
|
-
# package any ALTER TABLE ADD actions into a Schema object
|
131
|
-
# so it gets generated as `ALTER TABLE .. ADD COLUMNS(...)`
|
132
|
-
# instead of `ALTER TABLE ... ADD COLUMN` which is invalid syntax on Athena
|
133
|
-
if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
|
134
|
-
if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
|
135
|
-
new_actions = exp.Schema(expressions=expression.actions)
|
136
|
-
expression.set("actions", [new_actions])
|
137
|
-
|
138
|
-
return super().alter_sql(expression)
|
139
|
-
|
140
|
-
class Generator(Trino.Generator):
|
141
|
-
"""
|
142
|
-
Generate queries for the Athena Trino execution engine
|
143
|
-
"""
|
144
|
-
|
145
|
-
PROPERTIES_LOCATION = {
|
146
|
-
**Trino.Generator.PROPERTIES_LOCATION,
|
147
|
-
exp.LocationProperty: exp.Properties.Location.POST_WITH,
|
148
|
-
}
|
149
|
-
|
150
|
-
TRANSFORMS = {
|
151
|
-
**Trino.Generator.TRANSFORMS,
|
152
|
-
exp.PartitionedByProperty: _partitioned_by_property_sql,
|
153
|
-
exp.LocationProperty: _location_property_sql,
|
154
|
-
}
|
155
|
-
|
156
|
-
def __init__(self, *args, **kwargs):
|
157
|
-
super().__init__(*args, **kwargs)
|
158
|
-
|
159
|
-
hive_kwargs = {**kwargs, "dialect": "hive"}
|
160
|
-
|
161
|
-
self._hive_generator = Athena._HiveGenerator(*args, **hive_kwargs)
|
162
|
-
|
163
|
-
def generate(self, expression: exp.Expression, copy: bool = True) -> str:
|
164
|
-
if _generate_as_hive(expression):
|
165
|
-
return self._hive_generator.generate(expression, copy)
|
166
|
-
|
167
|
-
return super().generate(expression, copy)
|
247
|
+
# Athena extensions to Hive's generator
|
248
|
+
class _HiveGenerator(Hive.Generator):
|
249
|
+
def alter_sql(self, expression: exp.Alter) -> str:
|
250
|
+
# Package any ALTER TABLE ADD actions into a Schema object, so it gets generated as
|
251
|
+
# `ALTER TABLE .. ADD COLUMNS(...)`, instead of `ALTER TABLE ... ADD COLUMN`, which
|
252
|
+
# is invalid syntax on Athena
|
253
|
+
if isinstance(expression, exp.Alter) and expression.kind == "TABLE":
|
254
|
+
if expression.actions and isinstance(expression.actions[0], exp.ColumnDef):
|
255
|
+
new_actions = exp.Schema(expressions=expression.actions)
|
256
|
+
expression.set("actions", [new_actions])
|
257
|
+
|
258
|
+
return super().alter_sql(expression)
|
259
|
+
|
260
|
+
|
261
|
+
# Athena extensions to Trino's tokenizer
|
262
|
+
class _TrinoTokenizer(Trino.Tokenizer):
|
263
|
+
KEYWORDS = {
|
264
|
+
**Trino.Tokenizer.KEYWORDS,
|
265
|
+
"UNLOAD": TokenType.COMMAND,
|
266
|
+
}
|
267
|
+
|
268
|
+
|
269
|
+
# Athena extensions to Trino's parser
|
270
|
+
class _TrinoParser(Trino.Parser):
|
271
|
+
STATEMENT_PARSERS = {
|
272
|
+
**Trino.Parser.STATEMENT_PARSERS,
|
273
|
+
TokenType.USING: lambda self: self._parse_as_command(self._prev),
|
274
|
+
}
|
275
|
+
|
276
|
+
|
277
|
+
# Athena extensions to Trino's generator
|
278
|
+
class _TrinoGenerator(Trino.Generator):
|
279
|
+
PROPERTIES_LOCATION = {
|
280
|
+
**Trino.Generator.PROPERTIES_LOCATION,
|
281
|
+
exp.LocationProperty: exp.Properties.Location.POST_WITH,
|
282
|
+
}
|
283
|
+
|
284
|
+
TRANSFORMS = {
|
285
|
+
**Trino.Generator.TRANSFORMS,
|
286
|
+
exp.PartitionedByProperty: _partitioned_by_property_sql,
|
287
|
+
exp.LocationProperty: _location_property_sql,
|
288
|
+
}
|
sqlglot/dialects/bigquery.py
CHANGED
@@ -30,6 +30,7 @@ from sqlglot.dialects.dialect import (
|
|
30
30
|
unit_to_var,
|
31
31
|
strposition_sql,
|
32
32
|
groupconcat_sql,
|
33
|
+
space_sql,
|
33
34
|
)
|
34
35
|
from sqlglot.helper import seq_get, split_num_words
|
35
36
|
from sqlglot.tokens import TokenType
|
@@ -444,6 +445,7 @@ class BigQuery(Dialect):
|
|
444
445
|
exp.Substring,
|
445
446
|
)
|
446
447
|
},
|
448
|
+
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
447
449
|
exp.Concat: _annotate_concat,
|
448
450
|
exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
|
449
451
|
exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
@@ -543,7 +545,7 @@ class BigQuery(Dialect):
|
|
543
545
|
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
|
544
546
|
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
|
545
547
|
"DATE_TRUNC": lambda args: exp.DateTrunc(
|
546
|
-
unit=
|
548
|
+
unit=seq_get(args, 1),
|
547
549
|
this=seq_get(args, 0),
|
548
550
|
zone=seq_get(args, 2),
|
549
551
|
),
|
@@ -963,9 +965,6 @@ class BigQuery(Dialect):
|
|
963
965
|
exp.DateSub: date_add_interval_sql("DATE", "SUB"),
|
964
966
|
exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
|
965
967
|
exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
|
966
|
-
exp.DateTrunc: lambda self, e: self.func(
|
967
|
-
"DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone")
|
968
|
-
),
|
969
968
|
exp.FromTimeZone: lambda self, e: self.func(
|
970
969
|
"DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
|
971
970
|
),
|
@@ -1014,6 +1013,7 @@ class BigQuery(Dialect):
|
|
1014
1013
|
),
|
1015
1014
|
exp.SHA: rename_func("SHA1"),
|
1016
1015
|
exp.SHA2: sha256_sql,
|
1016
|
+
exp.Space: space_sql,
|
1017
1017
|
exp.StabilityProperty: lambda self, e: (
|
1018
1018
|
"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
|
1019
1019
|
),
|
@@ -1195,6 +1195,11 @@ class BigQuery(Dialect):
|
|
1195
1195
|
"within",
|
1196
1196
|
}
|
1197
1197
|
|
1198
|
+
def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
|
1199
|
+
unit = expression.unit
|
1200
|
+
unit_sql = unit.name if unit.is_string else self.sql(unit)
|
1201
|
+
return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone"))
|
1202
|
+
|
1198
1203
|
def mod_sql(self, expression: exp.Mod) -> str:
|
1199
1204
|
this = expression.this
|
1200
1205
|
expr = expression.expression
|
sqlglot/dialects/clickhouse.py
CHANGED
@@ -303,6 +303,8 @@ class ClickHouse(Dialect):
|
|
303
303
|
**parser.Parser.FUNCTIONS,
|
304
304
|
"ANY": exp.AnyValue.from_arg_list,
|
305
305
|
"ARRAYSUM": exp.ArraySum.from_arg_list,
|
306
|
+
"ARRAYREVERSE": exp.ArrayReverse.from_arg_list,
|
307
|
+
"ARRAYSLICE": exp.ArraySlice.from_arg_list,
|
306
308
|
"COUNTIF": _build_count_if,
|
307
309
|
"DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None),
|
308
310
|
"DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
|
@@ -330,6 +332,7 @@ class ClickHouse(Dialect):
|
|
330
332
|
"MD5": exp.MD5Digest.from_arg_list,
|
331
333
|
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
332
334
|
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
335
|
+
"SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, # alias for camel-case substringIndex
|
333
336
|
"EDITDISTANCE": exp.Levenshtein.from_arg_list,
|
334
337
|
"LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
|
335
338
|
}
|
@@ -1065,6 +1068,8 @@ class ClickHouse(Dialect):
|
|
1065
1068
|
exp.ArrayConcat: rename_func("arrayConcat"),
|
1066
1069
|
exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
|
1067
1070
|
exp.ArrayRemove: remove_from_array_using_filter,
|
1071
|
+
exp.ArrayReverse: rename_func("arrayReverse"),
|
1072
|
+
exp.ArraySlice: rename_func("arraySlice"),
|
1068
1073
|
exp.ArraySum: rename_func("arraySum"),
|
1069
1074
|
exp.ArgMax: arg_max_or_min_no_count("argMax"),
|
1070
1075
|
exp.ArgMin: arg_max_or_min_no_count("argMin"),
|
sqlglot/dialects/databricks.py
CHANGED
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
|
|
9
9
|
build_date_delta,
|
10
10
|
timestamptrunc_sql,
|
11
11
|
build_formatted_time,
|
12
|
+
groupconcat_sql,
|
12
13
|
)
|
13
14
|
from sqlglot.dialects.spark import Spark
|
14
15
|
from sqlglot.tokens import TokenType
|
@@ -87,6 +88,7 @@ class Databricks(Spark):
|
|
87
88
|
e.this,
|
88
89
|
),
|
89
90
|
exp.DatetimeTrunc: timestamptrunc_sql(),
|
91
|
+
exp.GroupConcat: groupconcat_sql,
|
90
92
|
exp.Select: transforms.preprocess(
|
91
93
|
[
|
92
94
|
transforms.eliminate_distinct_on,
|
sqlglot/dialects/dialect.py
CHANGED
@@ -73,6 +73,7 @@ class Dialects(str, Enum):
|
|
73
73
|
CLICKHOUSE = "clickhouse"
|
74
74
|
DATABRICKS = "databricks"
|
75
75
|
DORIS = "doris"
|
76
|
+
DREMIO = "dremio"
|
76
77
|
DRILL = "drill"
|
77
78
|
DRUID = "druid"
|
78
79
|
DUCKDB = "duckdb"
|
@@ -96,6 +97,7 @@ class Dialects(str, Enum):
|
|
96
97
|
TERADATA = "teradata"
|
97
98
|
TRINO = "trino"
|
98
99
|
TSQL = "tsql"
|
100
|
+
EXASOL = "exasol"
|
99
101
|
|
100
102
|
|
101
103
|
class NormalizationStrategy(str, AutoName):
|
@@ -700,6 +702,9 @@ class Dialect(metaclass=_Dialect):
|
|
700
702
|
exp.TimeAdd,
|
701
703
|
exp.TimeSub,
|
702
704
|
},
|
705
|
+
exp.DataType.Type.TIMESTAMPTZ: {
|
706
|
+
exp.CurrentTimestampLTZ,
|
707
|
+
},
|
703
708
|
exp.DataType.Type.TIMESTAMP: {
|
704
709
|
exp.CurrentTimestamp,
|
705
710
|
exp.StrToTime,
|
@@ -755,6 +760,12 @@ class Dialect(metaclass=_Dialect):
|
|
755
760
|
exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True),
|
756
761
|
exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
757
762
|
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
763
|
+
exp.ArrayConcatAgg: lambda self, e: self._annotate_by_args(e, "this"),
|
764
|
+
exp.ArrayToString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TEXT),
|
765
|
+
exp.ArrayFirst: lambda self, e: self._annotate_by_array_element(e),
|
766
|
+
exp.ArrayLast: lambda self, e: self._annotate_by_array_element(e),
|
767
|
+
exp.ArrayReverse: lambda self, e: self._annotate_by_args(e, "this"),
|
768
|
+
exp.ArraySlice: lambda self, e: self._annotate_by_args(e, "this"),
|
758
769
|
exp.Bracket: lambda self, e: self._annotate_bracket(e),
|
759
770
|
exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]),
|
760
771
|
exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"),
|
@@ -1024,22 +1035,20 @@ class Dialect(metaclass=_Dialect):
|
|
1024
1035
|
for expression in self.parse(sql)
|
1025
1036
|
]
|
1026
1037
|
|
1027
|
-
def tokenize(self, sql: str) -> t.List[Token]:
|
1028
|
-
return self.tokenizer.tokenize(sql)
|
1038
|
+
def tokenize(self, sql: str, **opts) -> t.List[Token]:
|
1039
|
+
return self.tokenizer(**opts).tokenize(sql)
|
1029
1040
|
|
1030
|
-
|
1031
|
-
|
1032
|
-
return self.tokenizer_class(dialect=self)
|
1041
|
+
def tokenizer(self, **opts) -> Tokenizer:
|
1042
|
+
return self.tokenizer_class(**{"dialect": self, **opts})
|
1033
1043
|
|
1034
|
-
|
1035
|
-
|
1036
|
-
return self.jsonpath_tokenizer_class(dialect=self)
|
1044
|
+
def jsonpath_tokenizer(self, **opts) -> JSONPathTokenizer:
|
1045
|
+
return self.jsonpath_tokenizer_class(**{"dialect": self, **opts})
|
1037
1046
|
|
1038
1047
|
def parser(self, **opts) -> Parser:
|
1039
|
-
return self.parser_class(dialect
|
1048
|
+
return self.parser_class(**{"dialect": self, **opts})
|
1040
1049
|
|
1041
1050
|
def generator(self, **opts) -> Generator:
|
1042
|
-
return self.generator_class(dialect
|
1051
|
+
return self.generator_class(**{"dialect": self, **opts})
|
1043
1052
|
|
1044
1053
|
def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]:
|
1045
1054
|
return [
|
@@ -1906,21 +1915,32 @@ def groupconcat_sql(
|
|
1906
1915
|
|
1907
1916
|
|
1908
1917
|
def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
if this:
|
1918
|
+
if len(args) == 2:
|
1919
|
+
this = args[0]
|
1913
1920
|
if not this.type:
|
1914
1921
|
from sqlglot.optimizer.annotate_types import annotate_types
|
1915
1922
|
|
1916
1923
|
annotate_types(this, dialect=dialect)
|
1917
1924
|
|
1918
|
-
|
1919
|
-
|
1920
|
-
if this.is_type(*exp.DataType.TEMPORAL_TYPES) or (
|
1921
|
-
isinstance(format, exp.Literal) and format.name in Snowflake.TIME_MAPPING
|
1922
|
-
):
|
1925
|
+
if this.is_type(*exp.DataType.TEMPORAL_TYPES):
|
1923
1926
|
dialect_name = dialect.__class__.__name__.lower()
|
1924
1927
|
return build_formatted_time(exp.TimeToStr, dialect_name, default=True)(args)
|
1925
1928
|
|
1926
1929
|
return exp.ToChar.from_arg_list(args)
|
1930
|
+
|
1931
|
+
|
1932
|
+
def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
|
1933
|
+
return exp.Replace(
|
1934
|
+
this=seq_get(args, 0),
|
1935
|
+
expression=seq_get(args, 1),
|
1936
|
+
replacement=seq_get(args, 2) or exp.Literal.string(""),
|
1937
|
+
)
|
1938
|
+
|
1939
|
+
|
1940
|
+
def space_sql(self: Generator, expression: exp.Space) -> str:
|
1941
|
+
return self.sql(
|
1942
|
+
exp.Repeat(
|
1943
|
+
this=exp.Literal.string(" "),
|
1944
|
+
times=expression.this,
|
1945
|
+
)
|
1946
|
+
)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from sqlglot import expressions as exp
|
2
|
+
from sqlglot import parser, generator, tokens
|
3
|
+
from sqlglot.dialects.dialect import Dialect
|
4
|
+
|
5
|
+
|
6
|
+
class Dremio(Dialect):
|
7
|
+
SUPPORTS_USER_DEFINED_TYPES = False
|
8
|
+
CONCAT_COALESCE = True
|
9
|
+
TYPED_DIVISION = True
|
10
|
+
SUPPORTS_SEMI_ANTI_JOIN = False
|
11
|
+
NULL_ORDERING = "nulls_are_last"
|
12
|
+
SUPPORTS_VALUES_DEFAULT = False
|
13
|
+
|
14
|
+
class Parser(parser.Parser):
|
15
|
+
LOG_DEFAULTS_TO_LN = True
|
16
|
+
|
17
|
+
class Generator(generator.Generator):
|
18
|
+
NVL2_SUPPORTED = False
|
19
|
+
SUPPORTS_CONVERT_TIMEZONE = True
|
20
|
+
INTERVAL_ALLOWS_PLURAL_FORM = False
|
21
|
+
JOIN_HINTS = False
|
22
|
+
LIMIT_ONLY_LITERALS = True
|
23
|
+
MULTI_ARG_DISTINCT = False
|
24
|
+
|
25
|
+
# https://docs.dremio.com/current/reference/sql/data-types/
|
26
|
+
TYPE_MAPPING = {
|
27
|
+
**generator.Generator.TYPE_MAPPING,
|
28
|
+
exp.DataType.Type.SMALLINT: "INT",
|
29
|
+
exp.DataType.Type.TINYINT: "INT",
|
30
|
+
exp.DataType.Type.BINARY: "VARBINARY",
|
31
|
+
exp.DataType.Type.TEXT: "VARCHAR",
|
32
|
+
exp.DataType.Type.NCHAR: "VARCHAR",
|
33
|
+
exp.DataType.Type.CHAR: "VARCHAR",
|
34
|
+
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
|
35
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
36
|
+
exp.DataType.Type.ARRAY: "LIST",
|
37
|
+
exp.DataType.Type.BIT: "BOOLEAN",
|
38
|
+
}
|
39
|
+
|
40
|
+
def datatype_sql(self, expression: exp.DataType) -> str:
|
41
|
+
"""
|
42
|
+
Reject time-zone–aware TIMESTAMPs, which Dremio does not accept
|
43
|
+
"""
|
44
|
+
if expression.is_type(
|
45
|
+
exp.DataType.Type.TIMESTAMPTZ,
|
46
|
+
exp.DataType.Type.TIMESTAMPLTZ,
|
47
|
+
):
|
48
|
+
self.unsupported("Dremio does not support time-zone-aware TIMESTAMP")
|
49
|
+
|
50
|
+
return super().datatype_sql(expression)
|
51
|
+
|
52
|
+
class Tokenizer(tokens.Tokenizer):
|
53
|
+
COMMENTS = ["--", "//", ("/*", "*/")]
|