sqlglot 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/__init__.py +2 -0
- sqlglot/dialects/athena.py +237 -116
- sqlglot/dialects/bigquery.py +9 -4
- sqlglot/dialects/clickhouse.py +5 -0
- sqlglot/dialects/databricks.py +2 -0
- sqlglot/dialects/dialect.py +39 -19
- sqlglot/dialects/dremio.py +53 -0
- sqlglot/dialects/duckdb.py +45 -0
- sqlglot/dialects/exasol.py +89 -0
- sqlglot/dialects/fabric.py +60 -33
- sqlglot/dialects/presto.py +6 -0
- sqlglot/dialects/redshift.py +10 -2
- sqlglot/dialects/snowflake.py +3 -1
- sqlglot/dialects/spark2.py +2 -0
- sqlglot/dialects/tsql.py +7 -5
- sqlglot/expressions.py +44 -2
- sqlglot/generator.py +3 -3
- sqlglot/jsonpath.py +1 -1
- sqlglot/optimizer/annotate_types.py +13 -0
- sqlglot/optimizer/pushdown_predicates.py +2 -1
- sqlglot/optimizer/scope.py +13 -3
- sqlglot/parser.py +4 -3
- sqlglot/tokens.py +7 -1
- sqlglot/transforms.py +15 -1
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/METADATA +2 -2
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/RECORD +30 -28
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/WHEEL +0 -0
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/top_level.txt +0 -0
sqlglot/dialects/duckdb.py
CHANGED
@@ -1165,3 +1165,48 @@ class DuckDB(Dialect):
|
|
1165
1165
|
def autoincrementcolumnconstraint_sql(self, _) -> str:
|
1166
1166
|
self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
|
1167
1167
|
return ""
|
1168
|
+
|
1169
|
+
def aliases_sql(self, expression: exp.Aliases) -> str:
|
1170
|
+
this = expression.this
|
1171
|
+
if isinstance(this, exp.Posexplode):
|
1172
|
+
return self.posexplode_sql(this)
|
1173
|
+
|
1174
|
+
return super().aliases_sql(expression)
|
1175
|
+
|
1176
|
+
def posexplode_sql(self, expression: exp.Posexplode) -> str:
|
1177
|
+
this = expression.this
|
1178
|
+
parent = expression.parent
|
1179
|
+
|
1180
|
+
# The default Spark aliases are "pos" and "col", unless specified otherwise
|
1181
|
+
pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
|
1182
|
+
|
1183
|
+
if isinstance(parent, exp.Aliases):
|
1184
|
+
# Column case: SELECT POSEXPLODE(col) [AS (a, b)]
|
1185
|
+
pos, col = parent.expressions
|
1186
|
+
elif isinstance(parent, exp.Table):
|
1187
|
+
# Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
|
1188
|
+
alias = parent.args.get("alias")
|
1189
|
+
if alias:
|
1190
|
+
pos, col = alias.columns or [pos, col]
|
1191
|
+
alias.pop()
|
1192
|
+
|
1193
|
+
# Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
|
1194
|
+
# Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
|
1195
|
+
unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
|
1196
|
+
gen_subscripts = self.sql(
|
1197
|
+
exp.Alias(
|
1198
|
+
this=exp.Anonymous(
|
1199
|
+
this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
|
1200
|
+
)
|
1201
|
+
- exp.Literal.number(1),
|
1202
|
+
alias=pos,
|
1203
|
+
)
|
1204
|
+
)
|
1205
|
+
|
1206
|
+
posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
|
1207
|
+
|
1208
|
+
if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
|
1209
|
+
# SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
|
1210
|
+
return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
|
1211
|
+
|
1212
|
+
return posexplode_sql
|
@@ -0,0 +1,89 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
from sqlglot import exp, generator, parser
|
3
|
+
from sqlglot.dialects.dialect import Dialect, rename_func, binary_from_function
|
4
|
+
from sqlglot.helper import seq_get
|
5
|
+
from sqlglot.generator import unsupported_args
|
6
|
+
|
7
|
+
|
8
|
+
class Exasol(Dialect):
|
9
|
+
class Parser(parser.Parser):
|
10
|
+
FUNCTIONS = {
|
11
|
+
**parser.Parser.FUNCTIONS,
|
12
|
+
"BIT_AND": binary_from_function(exp.BitwiseAnd),
|
13
|
+
"BIT_OR": binary_from_function(exp.BitwiseOr),
|
14
|
+
"BIT_XOR": binary_from_function(exp.BitwiseXor),
|
15
|
+
"BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
|
16
|
+
"BIT_LSHIFT": binary_from_function(exp.BitwiseLeftShift),
|
17
|
+
"BIT_RSHIFT": binary_from_function(exp.BitwiseRightShift),
|
18
|
+
"EVERY": lambda args: exp.All(this=seq_get(args, 0)),
|
19
|
+
"EDIT_DISTANCE": exp.Levenshtein.from_arg_list,
|
20
|
+
"REGEXP_REPLACE": lambda args: exp.RegexpReplace(
|
21
|
+
this=seq_get(args, 0),
|
22
|
+
expression=seq_get(args, 1),
|
23
|
+
replacement=seq_get(args, 2),
|
24
|
+
position=seq_get(args, 3),
|
25
|
+
occurrence=seq_get(args, 4),
|
26
|
+
),
|
27
|
+
}
|
28
|
+
|
29
|
+
class Generator(generator.Generator):
|
30
|
+
# https://docs.exasol.com/db/latest/sql_references/data_types/datatypedetails.htm#StringDataType
|
31
|
+
STRING_TYPE_MAPPING = {
|
32
|
+
exp.DataType.Type.BLOB: "VARCHAR",
|
33
|
+
exp.DataType.Type.LONGBLOB: "VARCHAR",
|
34
|
+
exp.DataType.Type.LONGTEXT: "VARCHAR",
|
35
|
+
exp.DataType.Type.MEDIUMBLOB: "VARCHAR",
|
36
|
+
exp.DataType.Type.MEDIUMTEXT: "VARCHAR",
|
37
|
+
exp.DataType.Type.TINYBLOB: "VARCHAR",
|
38
|
+
exp.DataType.Type.TINYTEXT: "VARCHAR",
|
39
|
+
exp.DataType.Type.TEXT: "VARCHAR",
|
40
|
+
exp.DataType.Type.VARBINARY: "VARCHAR",
|
41
|
+
}
|
42
|
+
|
43
|
+
# https://docs.exasol.com/db/latest/sql_references/data_types/datatypealiases.htm
|
44
|
+
TYPE_MAPPING = {
|
45
|
+
**generator.Generator.TYPE_MAPPING,
|
46
|
+
**STRING_TYPE_MAPPING,
|
47
|
+
exp.DataType.Type.TINYINT: "SMALLINT",
|
48
|
+
exp.DataType.Type.MEDIUMINT: "INT",
|
49
|
+
exp.DataType.Type.DECIMAL32: "DECIMAL",
|
50
|
+
exp.DataType.Type.DECIMAL64: "DECIMAL",
|
51
|
+
exp.DataType.Type.DECIMAL128: "DECIMAL",
|
52
|
+
exp.DataType.Type.DECIMAL256: "DECIMAL",
|
53
|
+
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
54
|
+
}
|
55
|
+
|
56
|
+
def datatype_sql(self, expression: exp.DataType) -> str:
|
57
|
+
# Exasol supports a fixed default precision of 3 for TIMESTAMP WITH LOCAL TIME ZONE
|
58
|
+
# and does not allow specifying a different custom precision
|
59
|
+
if expression.is_type(exp.DataType.Type.TIMESTAMPLTZ):
|
60
|
+
return "TIMESTAMP WITH LOCAL TIME ZONE"
|
61
|
+
|
62
|
+
return super().datatype_sql(expression)
|
63
|
+
|
64
|
+
TRANSFORMS = {
|
65
|
+
**generator.Generator.TRANSFORMS,
|
66
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
|
67
|
+
exp.All: rename_func("EVERY"),
|
68
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_and.htm
|
69
|
+
exp.BitwiseAnd: rename_func("BIT_AND"),
|
70
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_or.htm
|
71
|
+
exp.BitwiseOr: rename_func("BIT_OR"),
|
72
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_not.htm
|
73
|
+
exp.BitwiseNot: rename_func("BIT_NOT"),
|
74
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_lshift.htm
|
75
|
+
exp.BitwiseLeftShift: rename_func("BIT_LSHIFT"),
|
76
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_rshift.htm
|
77
|
+
exp.BitwiseRightShift: rename_func("BIT_RSHIFT"),
|
78
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_xor.htm
|
79
|
+
exp.BitwiseXor: rename_func("BIT_XOR"),
|
80
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
|
81
|
+
exp.All: rename_func("EVERY"),
|
82
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/edit_distance.htm#EDIT_DISTANCE
|
83
|
+
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
|
84
|
+
rename_func("EDIT_DISTANCE")
|
85
|
+
),
|
86
|
+
# https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/mod.htm
|
87
|
+
exp.Mod: rename_func("MOD"),
|
88
|
+
exp.RegexpReplace: unsupported_args("modifiers")(rename_func("REGEXP_REPLACE")),
|
89
|
+
}
|
sqlglot/dialects/fabric.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from sqlglot import exp
|
4
4
|
from sqlglot.dialects.dialect import NormalizationStrategy
|
5
5
|
from sqlglot.dialects.tsql import TSQL
|
6
|
+
from sqlglot.tokens import TokenType
|
6
7
|
|
7
8
|
|
8
9
|
class Fabric(TSQL):
|
@@ -28,61 +29,87 @@ class Fabric(TSQL):
|
|
28
29
|
# Fabric is case-sensitive unlike T-SQL which is case-insensitive
|
29
30
|
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
|
30
31
|
|
32
|
+
class Tokenizer(TSQL.Tokenizer):
|
33
|
+
# Override T-SQL tokenizer to handle TIMESTAMP differently
|
34
|
+
# In T-SQL, TIMESTAMP is a synonym for ROWVERSION, but in Fabric we want it to be a datetime type
|
35
|
+
# Also add UTINYINT keyword mapping since T-SQL doesn't have it
|
36
|
+
KEYWORDS = {
|
37
|
+
**TSQL.Tokenizer.KEYWORDS,
|
38
|
+
"TIMESTAMP": TokenType.TIMESTAMP,
|
39
|
+
"UTINYINT": TokenType.UTINYINT,
|
40
|
+
}
|
41
|
+
|
31
42
|
class Generator(TSQL.Generator):
|
32
43
|
# Fabric-specific type mappings - override T-SQL types that aren't supported
|
33
44
|
# Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
|
34
45
|
TYPE_MAPPING = {
|
35
46
|
**TSQL.Generator.TYPE_MAPPING,
|
36
|
-
|
47
|
+
exp.DataType.Type.DATETIME: "DATETIME2",
|
48
|
+
exp.DataType.Type.DECIMAL: "DECIMAL",
|
49
|
+
exp.DataType.Type.IMAGE: "VARBINARY",
|
50
|
+
exp.DataType.Type.INT: "INT",
|
51
|
+
exp.DataType.Type.JSON: "VARCHAR",
|
37
52
|
exp.DataType.Type.MONEY: "DECIMAL",
|
38
|
-
exp.DataType.Type.SMALLMONEY: "DECIMAL",
|
39
|
-
exp.DataType.Type.DATETIME: "DATETIME2(6)",
|
40
|
-
exp.DataType.Type.SMALLDATETIME: "DATETIME2(6)",
|
41
53
|
exp.DataType.Type.NCHAR: "CHAR",
|
42
54
|
exp.DataType.Type.NVARCHAR: "VARCHAR",
|
43
|
-
exp.DataType.Type.
|
44
|
-
exp.DataType.Type.
|
55
|
+
exp.DataType.Type.ROWVERSION: "ROWVERSION",
|
56
|
+
exp.DataType.Type.SMALLDATETIME: "DATETIME2",
|
57
|
+
exp.DataType.Type.SMALLMONEY: "DECIMAL",
|
58
|
+
exp.DataType.Type.TIMESTAMP: "DATETIME2",
|
59
|
+
exp.DataType.Type.TIMESTAMPNTZ: "DATETIME2",
|
60
|
+
exp.DataType.Type.TIMESTAMPTZ: "DATETIMEOFFSET",
|
45
61
|
exp.DataType.Type.TINYINT: "SMALLINT",
|
46
|
-
exp.DataType.Type.UTINYINT: "SMALLINT",
|
47
|
-
exp.DataType.Type.
|
62
|
+
exp.DataType.Type.UTINYINT: "SMALLINT",
|
63
|
+
exp.DataType.Type.UUID: "VARBINARY(MAX)",
|
48
64
|
exp.DataType.Type.XML: "VARCHAR",
|
49
|
-
exp.DataType.Type.UUID: "VARBINARY(MAX)", # UNIQUEIDENTIFIER has limitations in Fabric
|
50
|
-
# Override T-SQL mappings that use different names in Fabric
|
51
|
-
exp.DataType.Type.DECIMAL: "DECIMAL", # T-SQL uses NUMERIC
|
52
|
-
exp.DataType.Type.DOUBLE: "FLOAT",
|
53
|
-
exp.DataType.Type.INT: "INT", # T-SQL uses INTEGER
|
54
65
|
}
|
55
66
|
|
56
67
|
def datatype_sql(self, expression: exp.DataType) -> str:
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
"""
|
63
|
-
if expression.is_type(
|
64
|
-
exp.DataType.Type.TIME,
|
65
|
-
exp.DataType.Type.DATETIME2,
|
66
|
-
exp.DataType.Type.TIMESTAMPTZ, # DATETIMEOFFSET in Fabric
|
68
|
+
# Check if this is a temporal type that needs precision handling. Fabric limits temporal
|
69
|
+
# types to max 6 digits precision. When no precision is specified, we default to 6 digits.
|
70
|
+
if (
|
71
|
+
expression.is_type(*exp.DataType.TEMPORAL_TYPES)
|
72
|
+
and expression.this != exp.DataType.Type.DATE
|
67
73
|
):
|
68
74
|
# Get the current precision (first expression if it exists)
|
69
|
-
|
75
|
+
precision_param = expression.find(exp.DataTypeParam)
|
76
|
+
target_precision = 6
|
70
77
|
|
71
|
-
|
72
|
-
if precision is None:
|
73
|
-
# No precision specified, default to 6
|
74
|
-
target_precision = 6
|
75
|
-
elif precision.this.is_int:
|
78
|
+
if precision_param and precision_param.this.is_int:
|
76
79
|
# Cap precision at 6
|
77
|
-
current_precision =
|
80
|
+
current_precision = precision_param.this.to_py()
|
78
81
|
target_precision = min(current_precision, 6)
|
82
|
+
else:
|
83
|
+
# If precision exists but is not an integer, default to 6
|
84
|
+
target_precision = 6
|
79
85
|
|
80
86
|
# Create a new expression with the target precision
|
81
|
-
|
87
|
+
expression = exp.DataType(
|
82
88
|
this=expression.this,
|
83
89
|
expressions=[exp.DataTypeParam(this=exp.Literal.number(target_precision))],
|
84
90
|
)
|
85
91
|
|
86
|
-
return super().datatype_sql(new_expression)
|
87
|
-
|
88
92
|
return super().datatype_sql(expression)
|
93
|
+
|
94
|
+
def unixtotime_sql(self, expression: exp.UnixToTime) -> str:
|
95
|
+
scale = expression.args.get("scale")
|
96
|
+
timestamp = expression.this
|
97
|
+
|
98
|
+
if scale not in (None, exp.UnixToTime.SECONDS):
|
99
|
+
self.unsupported(f"UnixToTime scale {scale} is not supported by Fabric")
|
100
|
+
return ""
|
101
|
+
|
102
|
+
# Convert unix timestamp (seconds) to microseconds and round to avoid decimals
|
103
|
+
microseconds = timestamp * exp.Literal.number("1e6")
|
104
|
+
rounded = exp.func("round", microseconds, 0)
|
105
|
+
rounded_ms_as_bigint = exp.cast(rounded, exp.DataType.Type.BIGINT)
|
106
|
+
|
107
|
+
# Create the base datetime as '1970-01-01' cast to DATETIME2(6)
|
108
|
+
epoch_start = exp.cast("'1970-01-01'", "datetime2(6)", dialect="fabric")
|
109
|
+
|
110
|
+
dateadd = exp.DateAdd(
|
111
|
+
this=epoch_start,
|
112
|
+
expression=rounded_ms_as_bigint,
|
113
|
+
unit=exp.Literal.string("MICROSECONDS"),
|
114
|
+
)
|
115
|
+
return self.sql(dateadd)
|
sqlglot/dialects/presto.py
CHANGED
@@ -8,6 +8,7 @@ from sqlglot.dialects.dialect import (
|
|
8
8
|
NormalizationStrategy,
|
9
9
|
binary_from_function,
|
10
10
|
bool_xor_sql,
|
11
|
+
build_replace_with_optional_replacement,
|
11
12
|
date_trunc_to_time,
|
12
13
|
datestrtodate_sql,
|
13
14
|
encode_decode_sql,
|
@@ -30,6 +31,7 @@ from sqlglot.dialects.dialect import (
|
|
30
31
|
sequence_sql,
|
31
32
|
build_regexp_extract,
|
32
33
|
explode_to_unnest_sql,
|
34
|
+
space_sql,
|
33
35
|
)
|
34
36
|
from sqlglot.dialects.hive import Hive
|
35
37
|
from sqlglot.dialects.mysql import MySQL
|
@@ -360,6 +362,7 @@ class Presto(Dialect):
|
|
360
362
|
expression=seq_get(args, 1),
|
361
363
|
replacement=seq_get(args, 2) or exp.Literal.string(""),
|
362
364
|
),
|
365
|
+
"REPLACE": build_replace_with_optional_replacement,
|
363
366
|
"ROW": exp.Struct.from_arg_list,
|
364
367
|
"SEQUENCE": exp.GenerateSeries.from_arg_list,
|
365
368
|
"SET_AGG": exp.ArrayUniqueAgg.from_arg_list,
|
@@ -367,6 +370,7 @@ class Presto(Dialect):
|
|
367
370
|
"STRPOS": lambda args: exp.StrPosition(
|
368
371
|
this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2)
|
369
372
|
),
|
373
|
+
"SLICE": exp.ArraySlice.from_arg_list,
|
370
374
|
"TO_CHAR": _build_to_char,
|
371
375
|
"TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
|
372
376
|
"TO_UTF8": lambda args: exp.Encode(
|
@@ -435,6 +439,7 @@ class Presto(Dialect):
|
|
435
439
|
exp.ArrayContains: rename_func("CONTAINS"),
|
436
440
|
exp.ArrayToString: rename_func("ARRAY_JOIN"),
|
437
441
|
exp.ArrayUniqueAgg: rename_func("SET_AGG"),
|
442
|
+
exp.ArraySlice: rename_func("SLICE"),
|
438
443
|
exp.AtTimeZone: rename_func("AT_TIMEZONE"),
|
439
444
|
exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression),
|
440
445
|
exp.BitwiseLeftShift: lambda self, e: self.func(
|
@@ -501,6 +506,7 @@ class Presto(Dialect):
|
|
501
506
|
amend_exploded_column_table,
|
502
507
|
]
|
503
508
|
),
|
509
|
+
exp.Space: space_sql,
|
504
510
|
exp.SortArray: _no_sort_array,
|
505
511
|
exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True),
|
506
512
|
exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
|
sqlglot/dialects/redshift.py
CHANGED
@@ -213,8 +213,7 @@ class Redshift(Postgres):
|
|
213
213
|
exp.TableSample: no_tablesample_sql,
|
214
214
|
exp.TsOrDsAdd: date_delta_sql("DATEADD"),
|
215
215
|
exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
|
216
|
-
exp.UnixToTime: lambda self,
|
217
|
-
e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')",
|
216
|
+
exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
|
218
217
|
}
|
219
218
|
|
220
219
|
# Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
|
@@ -447,3 +446,12 @@ class Redshift(Postgres):
|
|
447
446
|
def explode_sql(self, expression: exp.Explode) -> str:
|
448
447
|
self.unsupported("Unsupported EXPLODE() function")
|
449
448
|
return ""
|
449
|
+
|
450
|
+
def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str:
|
451
|
+
scale = expression.args.get("scale")
|
452
|
+
this = self.sql(expression.this)
|
453
|
+
|
454
|
+
if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int:
|
455
|
+
this = f"({this} / POWER(10, {scale.to_py()}))"
|
456
|
+
|
457
|
+
return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"
|
sqlglot/dialects/snowflake.py
CHANGED
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
|
|
9
9
|
build_timetostr_or_tochar,
|
10
10
|
binary_from_function,
|
11
11
|
build_default_decimal_type,
|
12
|
+
build_replace_with_optional_replacement,
|
12
13
|
build_timestamp_from_parts,
|
13
14
|
date_delta_sql,
|
14
15
|
date_trunc_to_time,
|
@@ -484,6 +485,7 @@ class Snowflake(Dialect):
|
|
484
485
|
"REGEXP_REPLACE": _build_regexp_replace,
|
485
486
|
"REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
|
486
487
|
"REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll),
|
488
|
+
"REPLACE": build_replace_with_optional_replacement,
|
487
489
|
"RLIKE": exp.RegexpLike.from_arg_list,
|
488
490
|
"SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
|
489
491
|
"TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)),
|
@@ -1416,7 +1418,7 @@ class Snowflake(Dialect):
|
|
1416
1418
|
|
1417
1419
|
def timetostr_sql(self, expression: exp.TimeToStr) -> str:
|
1418
1420
|
this = expression.this
|
1419
|
-
if
|
1421
|
+
if this.is_string:
|
1420
1422
|
this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
|
1421
1423
|
|
1422
1424
|
return self.func("TO_CHAR", this, self.format_time(expression))
|
sqlglot/dialects/spark2.py
CHANGED
@@ -201,6 +201,7 @@ class Spark2(Hive):
|
|
201
201
|
"SHIFTLEFT": binary_from_function(exp.BitwiseLeftShift),
|
202
202
|
"SHIFTRIGHT": binary_from_function(exp.BitwiseRightShift),
|
203
203
|
"STRING": _build_as_cast("string"),
|
204
|
+
"SLICE": exp.ArraySlice.from_arg_list,
|
204
205
|
"TIMESTAMP": _build_as_cast("timestamp"),
|
205
206
|
"TO_TIMESTAMP": lambda args: (
|
206
207
|
_build_as_cast("timestamp")(args)
|
@@ -261,6 +262,7 @@ class Spark2(Hive):
|
|
261
262
|
exp.ArraySum: lambda self,
|
262
263
|
e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
|
263
264
|
exp.ArrayToString: rename_func("ARRAY_JOIN"),
|
265
|
+
exp.ArraySlice: rename_func("SLICE"),
|
264
266
|
exp.AtTimeZone: lambda self, e: self.func(
|
265
267
|
"FROM_UTC_TIMESTAMP", e.this, e.args.get("zone")
|
266
268
|
),
|
sqlglot/dialects/tsql.py
CHANGED
@@ -612,6 +612,7 @@ class TSQL(Dialect):
|
|
612
612
|
"SYSDATETIME": exp.CurrentTimestamp.from_arg_list,
|
613
613
|
"SUSER_NAME": exp.CurrentUser.from_arg_list,
|
614
614
|
"SUSER_SNAME": exp.CurrentUser.from_arg_list,
|
615
|
+
"SYSDATETIMEOFFSET": exp.CurrentTimestampLTZ.from_arg_list,
|
615
616
|
"SYSTEM_USER": exp.CurrentUser.from_arg_list,
|
616
617
|
"TIMEFROMPARTS": _build_timefromparts,
|
617
618
|
"DATETRUNC": _build_datetrunc,
|
@@ -1020,6 +1021,7 @@ class TSQL(Dialect):
|
|
1020
1021
|
exp.CTE: transforms.preprocess([qualify_derived_table_outputs]),
|
1021
1022
|
exp.CurrentDate: rename_func("GETDATE"),
|
1022
1023
|
exp.CurrentTimestamp: rename_func("GETDATE"),
|
1024
|
+
exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"),
|
1023
1025
|
exp.DateStrToDate: datestrtodate_sql,
|
1024
1026
|
exp.Extract: rename_func("DATEPART"),
|
1025
1027
|
exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql,
|
@@ -1249,15 +1251,15 @@ class TSQL(Dialect):
|
|
1249
1251
|
sql_with_ctes = self.prepend_ctes(expression, sql)
|
1250
1252
|
sql_literal = self.sql(exp.Literal.string(sql_with_ctes))
|
1251
1253
|
if kind == "SCHEMA":
|
1252
|
-
return f"""IF NOT EXISTS (SELECT * FROM
|
1254
|
+
return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = {identifier}) EXEC({sql_literal})"""
|
1253
1255
|
elif kind == "TABLE":
|
1254
1256
|
assert table
|
1255
1257
|
where = exp.and_(
|
1256
|
-
exp.column("
|
1257
|
-
exp.column("
|
1258
|
-
exp.column("
|
1258
|
+
exp.column("TABLE_NAME").eq(table.name),
|
1259
|
+
exp.column("TABLE_SCHEMA").eq(table.db) if table.db else None,
|
1260
|
+
exp.column("TABLE_CATALOG").eq(table.catalog) if table.catalog else None,
|
1259
1261
|
)
|
1260
|
-
return f"""IF NOT EXISTS (SELECT * FROM
|
1262
|
+
return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE {where}) EXEC({sql_literal})"""
|
1261
1263
|
elif kind == "INDEX":
|
1262
1264
|
index = self.sql(exp.Literal.string(expression.this.text("this")))
|
1263
1265
|
return f"""IF NOT EXISTS (SELECT * FROM sys.indexes WHERE object_id = object_id({identifier}) AND name = {index}) EXEC({sql_literal})"""
|
sqlglot/expressions.py
CHANGED
@@ -5569,6 +5569,22 @@ class ArrayFilter(Func):
|
|
5569
5569
|
_sql_names = ["FILTER", "ARRAY_FILTER"]
|
5570
5570
|
|
5571
5571
|
|
5572
|
+
class ArrayFirst(Func):
|
5573
|
+
pass
|
5574
|
+
|
5575
|
+
|
5576
|
+
class ArrayLast(Func):
|
5577
|
+
pass
|
5578
|
+
|
5579
|
+
|
5580
|
+
class ArrayReverse(Func):
|
5581
|
+
pass
|
5582
|
+
|
5583
|
+
|
5584
|
+
class ArraySlice(Func):
|
5585
|
+
arg_types = {"this": True, "start": True, "end": False, "step": False}
|
5586
|
+
|
5587
|
+
|
5572
5588
|
class ArrayToString(Func):
|
5573
5589
|
arg_types = {"this": True, "expression": True, "null": False}
|
5574
5590
|
_sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
|
@@ -5806,6 +5822,10 @@ class CurrentTimestamp(Func):
|
|
5806
5822
|
arg_types = {"this": False, "sysdate": False}
|
5807
5823
|
|
5808
5824
|
|
5825
|
+
class CurrentTimestampLTZ(Func):
|
5826
|
+
arg_types = {}
|
5827
|
+
|
5828
|
+
|
5809
5829
|
class CurrentSchema(Func):
|
5810
5830
|
arg_types = {"this": False}
|
5811
5831
|
|
@@ -5846,8 +5866,6 @@ class DateTrunc(Func):
|
|
5846
5866
|
unit_name = TimeUnit.UNABBREVIATED_UNIT_NAME[unit_name]
|
5847
5867
|
|
5848
5868
|
args["unit"] = Literal.string(unit_name)
|
5849
|
-
elif isinstance(unit, Week):
|
5850
|
-
unit.set("this", Literal.string(unit.this.name.upper()))
|
5851
5869
|
|
5852
5870
|
super().__init__(**args)
|
5853
5871
|
|
@@ -6669,6 +6687,11 @@ class Repeat(Func):
|
|
6669
6687
|
arg_types = {"this": True, "times": True}
|
6670
6688
|
|
6671
6689
|
|
6690
|
+
# Some dialects like Snowflake support two argument replace
|
6691
|
+
class Replace(Func):
|
6692
|
+
arg_types = {"this": True, "expression": True, "replacement": False}
|
6693
|
+
|
6694
|
+
|
6672
6695
|
# https://learn.microsoft.com/en-us/sql/t-sql/functions/round-transact-sql?view=sql-server-ver16
|
6673
6696
|
# tsql third argument function == trunctaion if not 0
|
6674
6697
|
class Round(Func):
|
@@ -6716,6 +6739,17 @@ class Substring(Func):
|
|
6716
6739
|
arg_types = {"this": True, "start": False, "length": False}
|
6717
6740
|
|
6718
6741
|
|
6742
|
+
class SubstringIndex(Func):
|
6743
|
+
"""
|
6744
|
+
SUBSTRING_INDEX(str, delim, count)
|
6745
|
+
|
6746
|
+
*count* > 0 → left slice before the *count*-th delimiter
|
6747
|
+
*count* < 0 → right slice after the |count|-th delimiter
|
6748
|
+
"""
|
6749
|
+
|
6750
|
+
arg_types = {"this": True, "delimiter": True, "count": True}
|
6751
|
+
|
6752
|
+
|
6719
6753
|
class StandardHash(Func):
|
6720
6754
|
arg_types = {"this": True, "expression": False}
|
6721
6755
|
|
@@ -6772,6 +6806,14 @@ class FromBase(Func):
|
|
6772
6806
|
arg_types = {"this": True, "expression": True}
|
6773
6807
|
|
6774
6808
|
|
6809
|
+
class Space(Func):
|
6810
|
+
"""
|
6811
|
+
SPACE(n) → string consisting of n blank characters
|
6812
|
+
"""
|
6813
|
+
|
6814
|
+
pass
|
6815
|
+
|
6816
|
+
|
6775
6817
|
class Struct(Func):
|
6776
6818
|
arg_types = {"expressions": False}
|
6777
6819
|
is_var_len_args = True
|
sqlglot/generator.py
CHANGED
@@ -3480,7 +3480,7 @@ class Generator(metaclass=_Generator):
|
|
3480
3480
|
|
3481
3481
|
actions_list.append(action_sql)
|
3482
3482
|
|
3483
|
-
actions_sql = self.format_args(*actions_list)
|
3483
|
+
actions_sql = self.format_args(*actions_list).lstrip("\n")
|
3484
3484
|
|
3485
3485
|
exists = " IF EXISTS" if expression.args.get("exists") else ""
|
3486
3486
|
on_cluster = self.sql(expression, "cluster")
|
@@ -3491,7 +3491,7 @@ class Generator(metaclass=_Generator):
|
|
3491
3491
|
kind = self.sql(expression, "kind")
|
3492
3492
|
not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
|
3493
3493
|
|
3494
|
-
return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster}
|
3494
|
+
return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster}{self.sep()}{actions_sql}{not_valid}{options}"
|
3495
3495
|
|
3496
3496
|
def add_column_sql(self, expression: exp.Expression) -> str:
|
3497
3497
|
sql = self.sql(expression)
|
@@ -3510,7 +3510,7 @@ class Generator(metaclass=_Generator):
|
|
3510
3510
|
return f"DROP{exists}{expressions}"
|
3511
3511
|
|
3512
3512
|
def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
|
3513
|
-
return f"ADD {self.expressions(expression)}"
|
3513
|
+
return f"ADD {self.expressions(expression, indent=False)}"
|
3514
3514
|
|
3515
3515
|
def addpartition_sql(self, expression: exp.AddPartition) -> str:
|
3516
3516
|
exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
|
sqlglot/jsonpath.py
CHANGED
@@ -41,7 +41,7 @@ def parse(path: str, dialect: DialectType = None) -> exp.JSONPath:
|
|
41
41
|
"""Takes in a JSON path string and parses it into a JSONPath expression."""
|
42
42
|
from sqlglot.dialects import Dialect
|
43
43
|
|
44
|
-
jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer
|
44
|
+
jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer()
|
45
45
|
tokens = jsonpath_tokenizer.tokenize(path)
|
46
46
|
size = len(tokens)
|
47
47
|
|
@@ -329,6 +329,7 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
|
|
329
329
|
],
|
330
330
|
nested=True,
|
331
331
|
)
|
332
|
+
|
332
333
|
if not any(
|
333
334
|
cd.kind.is_type(exp.DataType.Type.UNKNOWN)
|
334
335
|
for cd in struct_type.expressions
|
@@ -630,3 +631,15 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
|
|
630
631
|
else:
|
631
632
|
self._set_type(expression, exp.DataType.Type.INT)
|
632
633
|
return expression
|
634
|
+
|
635
|
+
def _annotate_by_array_element(self, expression: exp.Expression) -> exp.Expression:
|
636
|
+
self._annotate_args(expression)
|
637
|
+
|
638
|
+
array_arg = expression.this
|
639
|
+
if array_arg.type.is_type(exp.DataType.Type.ARRAY):
|
640
|
+
element_type = seq_get(array_arg.type.expressions, 0) or exp.DataType.Type.UNKNOWN
|
641
|
+
self._set_type(expression, element_type)
|
642
|
+
else:
|
643
|
+
self._set_type(expression, exp.DataType.Type.UNKNOWN)
|
644
|
+
|
645
|
+
return expression
|
@@ -21,12 +21,13 @@ def pushdown_predicates(expression, dialect=None):
|
|
21
21
|
Returns:
|
22
22
|
sqlglot.Expression: optimized expression
|
23
23
|
"""
|
24
|
+
from sqlglot.dialects.athena import Athena
|
24
25
|
from sqlglot.dialects.presto import Presto
|
25
26
|
|
26
27
|
root = build_scope(expression)
|
27
28
|
|
28
29
|
dialect = Dialect.get_or_raise(dialect)
|
29
|
-
unnest_requires_cross_join = isinstance(dialect, Presto)
|
30
|
+
unnest_requires_cross_join = isinstance(dialect, (Athena, Presto))
|
30
31
|
|
31
32
|
if root:
|
32
33
|
scope_ref_count = root.ref_count()
|
sqlglot/optimizer/scope.py
CHANGED
@@ -358,7 +358,7 @@ class Scope:
|
|
358
358
|
for expression in itertools.chain(self.derived_tables, self.udtfs):
|
359
359
|
self._references.append(
|
360
360
|
(
|
361
|
-
expression
|
361
|
+
_get_source_alias(expression),
|
362
362
|
expression if expression.args.get("pivots") else expression.unnest(),
|
363
363
|
)
|
364
364
|
)
|
@@ -785,7 +785,7 @@ def _traverse_tables(scope):
|
|
785
785
|
# This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
|
786
786
|
# Until then, this means that only a single, unaliased derived table is allowed (rather,
|
787
787
|
# the latest one wins.
|
788
|
-
sources[expression
|
788
|
+
sources[_get_source_alias(expression)] = child_scope
|
789
789
|
|
790
790
|
# append the final child_scope yielded
|
791
791
|
if child_scope:
|
@@ -825,7 +825,7 @@ def _traverse_udtfs(scope):
|
|
825
825
|
):
|
826
826
|
yield child_scope
|
827
827
|
top = child_scope
|
828
|
-
sources[expression
|
828
|
+
sources[_get_source_alias(expression)] = child_scope
|
829
829
|
|
830
830
|
scope.subquery_scopes.append(top)
|
831
831
|
|
@@ -915,3 +915,13 @@ def find_in_scope(expression, expression_types, bfs=True):
|
|
915
915
|
the criteria was found.
|
916
916
|
"""
|
917
917
|
return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
|
918
|
+
|
919
|
+
|
920
|
+
def _get_source_alias(expression):
|
921
|
+
alias_arg = expression.args.get("alias")
|
922
|
+
alias_name = expression.alias
|
923
|
+
|
924
|
+
if not alias_name and isinstance(alias_arg, exp.TableAlias) and len(alias_arg.columns) == 1:
|
925
|
+
alias_name = alias_arg.columns[0].name
|
926
|
+
|
927
|
+
return alias_name
|
sqlglot/parser.py
CHANGED
@@ -1895,7 +1895,7 @@ class Parser(metaclass=_Parser):
|
|
1895
1895
|
stmt.add_comments(comments, prepend=True)
|
1896
1896
|
return stmt
|
1897
1897
|
|
1898
|
-
if self._match_set(self.dialect.
|
1898
|
+
if self._match_set(self.dialect.tokenizer_class.COMMANDS):
|
1899
1899
|
return self._parse_command()
|
1900
1900
|
|
1901
1901
|
expression = self._parse_expression()
|
@@ -7362,8 +7362,9 @@ class Parser(metaclass=_Parser):
|
|
7362
7362
|
|
7363
7363
|
return None
|
7364
7364
|
|
7365
|
-
if not self.
|
7366
|
-
|
7365
|
+
if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and (
|
7366
|
+
not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
|
7367
|
+
or self._match_text_seq("COLUMNS")
|
7367
7368
|
):
|
7368
7369
|
schema = self._parse_schema()
|
7369
7370
|
|