sqlglot 28.4.1__py3-none-any.whl → 28.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +20 -23
- sqlglot/dialects/clickhouse.py +2 -0
- sqlglot/dialects/dialect.py +355 -18
- sqlglot/dialects/doris.py +38 -90
- sqlglot/dialects/druid.py +1 -0
- sqlglot/dialects/duckdb.py +1739 -163
- sqlglot/dialects/exasol.py +17 -1
- sqlglot/dialects/hive.py +27 -2
- sqlglot/dialects/mysql.py +103 -11
- sqlglot/dialects/oracle.py +38 -1
- sqlglot/dialects/postgres.py +142 -33
- sqlglot/dialects/presto.py +6 -2
- sqlglot/dialects/redshift.py +7 -1
- sqlglot/dialects/singlestore.py +13 -3
- sqlglot/dialects/snowflake.py +271 -21
- sqlglot/dialects/spark.py +25 -0
- sqlglot/dialects/spark2.py +4 -3
- sqlglot/dialects/starrocks.py +152 -17
- sqlglot/dialects/trino.py +1 -0
- sqlglot/dialects/tsql.py +5 -0
- sqlglot/diff.py +1 -1
- sqlglot/expressions.py +239 -47
- sqlglot/generator.py +173 -44
- sqlglot/optimizer/annotate_types.py +129 -60
- sqlglot/optimizer/merge_subqueries.py +13 -2
- sqlglot/optimizer/qualify_columns.py +7 -0
- sqlglot/optimizer/resolver.py +19 -0
- sqlglot/optimizer/scope.py +12 -0
- sqlglot/optimizer/unnest_subqueries.py +7 -0
- sqlglot/parser.py +251 -58
- sqlglot/schema.py +186 -14
- sqlglot/tokens.py +36 -6
- sqlglot/transforms.py +6 -5
- sqlglot/typing/__init__.py +29 -10
- sqlglot/typing/bigquery.py +5 -10
- sqlglot/typing/duckdb.py +39 -0
- sqlglot/typing/hive.py +50 -1
- sqlglot/typing/mysql.py +32 -0
- sqlglot/typing/presto.py +0 -1
- sqlglot/typing/snowflake.py +80 -17
- sqlglot/typing/spark.py +29 -0
- sqlglot/typing/spark2.py +9 -1
- sqlglot/typing/tsql.py +21 -0
- {sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/METADATA +47 -2
- sqlglot-28.8.0.dist-info/RECORD +95 -0
- {sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/WHEEL +1 -1
- sqlglot-28.4.1.dist-info/RECORD +0 -92
- {sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-28.4.1.dist-info → sqlglot-28.8.0.dist-info}/top_level.txt +0 -0
sqlglot/dialects/duckdb.py
CHANGED
|
@@ -8,44 +8,50 @@ import typing as t
|
|
|
8
8
|
from sqlglot import exp, generator, parser, tokens, transforms
|
|
9
9
|
|
|
10
10
|
from sqlglot.dialects.dialect import (
|
|
11
|
+
DATETIME_DELTA,
|
|
11
12
|
Dialect,
|
|
12
13
|
JSON_EXTRACT_TYPE,
|
|
13
14
|
NormalizationStrategy,
|
|
14
15
|
approx_count_distinct_sql,
|
|
16
|
+
array_append_sql,
|
|
17
|
+
array_compact_sql,
|
|
18
|
+
array_concat_sql,
|
|
15
19
|
arrow_json_extract_sql,
|
|
16
20
|
binary_from_function,
|
|
17
|
-
bool_xor_sql,
|
|
18
21
|
build_default_decimal_type,
|
|
22
|
+
build_formatted_time,
|
|
23
|
+
build_regexp_extract,
|
|
19
24
|
count_if_to_sum,
|
|
20
25
|
date_delta_to_binary_interval_op,
|
|
21
26
|
date_trunc_to_time,
|
|
22
27
|
datestrtodate_sql,
|
|
23
|
-
no_datetime_sql,
|
|
24
28
|
encode_decode_sql,
|
|
25
|
-
|
|
29
|
+
explode_to_unnest_sql,
|
|
30
|
+
getbit_sql,
|
|
31
|
+
groupconcat_sql,
|
|
32
|
+
inline_array_unless_query,
|
|
33
|
+
months_between_sql,
|
|
34
|
+
no_datetime_sql,
|
|
26
35
|
no_comment_column_constraint_sql,
|
|
36
|
+
no_make_interval_sql,
|
|
27
37
|
no_time_sql,
|
|
28
38
|
no_timestamp_sql,
|
|
29
39
|
pivot_column_names,
|
|
40
|
+
regexp_replace_global_modifier,
|
|
30
41
|
rename_func,
|
|
31
42
|
remove_from_array_using_filter,
|
|
43
|
+
sha2_digest_sql,
|
|
44
|
+
sha256_sql,
|
|
32
45
|
strposition_sql,
|
|
33
46
|
str_to_time_sql,
|
|
34
47
|
timestrtotime_sql,
|
|
35
48
|
unit_to_str,
|
|
36
|
-
sha256_sql,
|
|
37
|
-
build_regexp_extract,
|
|
38
|
-
explode_to_unnest_sql,
|
|
39
|
-
no_make_interval_sql,
|
|
40
|
-
groupconcat_sql,
|
|
41
|
-
inline_array_unless_query,
|
|
42
|
-
regexp_replace_global_modifier,
|
|
43
|
-
sha2_digest_sql,
|
|
44
49
|
)
|
|
45
50
|
from sqlglot.generator import unsupported_args
|
|
46
51
|
from sqlglot.helper import is_date_unit, seq_get
|
|
47
52
|
from sqlglot.tokens import TokenType
|
|
48
53
|
from sqlglot.parser import binary_range_parser
|
|
54
|
+
from sqlglot.typing.duckdb import EXPRESSION_METADATA
|
|
49
55
|
|
|
50
56
|
# Regex to detect time zones in timestamps of the form [+|-]TT[:tt]
|
|
51
57
|
# The pattern matches timezone offsets that appear after the time portion
|
|
@@ -87,68 +93,212 @@ WEEK_START_DAY_TO_DOW = {
|
|
|
87
93
|
|
|
88
94
|
MAX_BIT_POSITION = exp.Literal.number(32768)
|
|
89
95
|
|
|
96
|
+
# SEQ function constants
|
|
97
|
+
_SEQ_BASE = "(ROW_NUMBER() OVER (ORDER BY 1) - 1)"
|
|
98
|
+
_SEQ_RESTRICTED = (exp.Where, exp.Having, exp.AggFunc, exp.Order, exp.Select)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _apply_base64_alphabet_replacements(
|
|
102
|
+
result: exp.Expression,
|
|
103
|
+
alphabet: t.Optional[exp.Expression],
|
|
104
|
+
reverse: bool = False,
|
|
105
|
+
) -> exp.Expression:
|
|
106
|
+
"""
|
|
107
|
+
Apply base64 alphabet character replacements.
|
|
108
|
+
|
|
109
|
+
Base64 alphabet can be 1-3 chars: 1st = index 62 ('+'), 2nd = index 63 ('/'), 3rd = padding ('=').
|
|
110
|
+
zip truncates to the shorter string, so 1-char alphabet only replaces '+', 2-char replaces '+/', etc.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
result: The expression to apply replacements to
|
|
114
|
+
alphabet: Custom alphabet literal (expected chars for +/=)
|
|
115
|
+
reverse: If False, replace default with custom (encode)
|
|
116
|
+
If True, replace custom with default (decode)
|
|
117
|
+
"""
|
|
118
|
+
if isinstance(alphabet, exp.Literal) and alphabet.is_string:
|
|
119
|
+
for default_char, new_char in zip("+/=", alphabet.this):
|
|
120
|
+
if new_char != default_char:
|
|
121
|
+
find, replace = (new_char, default_char) if reverse else (default_char, new_char)
|
|
122
|
+
result = exp.Replace(
|
|
123
|
+
this=result,
|
|
124
|
+
expression=exp.Literal.string(find),
|
|
125
|
+
replacement=exp.Literal.string(replace),
|
|
126
|
+
)
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _base64_decode_sql(self: DuckDB.Generator, expression: exp.Expression, to_string: bool) -> str:
|
|
131
|
+
"""
|
|
132
|
+
Transpile Snowflake BASE64_DECODE_STRING/BINARY to DuckDB.
|
|
133
|
+
|
|
134
|
+
DuckDB uses FROM_BASE64() which returns BLOB. For string output, wrap with DECODE().
|
|
135
|
+
Custom alphabets require REPLACE() calls to convert to standard base64.
|
|
136
|
+
"""
|
|
137
|
+
input_expr = expression.this
|
|
138
|
+
alphabet = expression.args.get("alphabet")
|
|
139
|
+
|
|
140
|
+
# Handle custom alphabet by replacing non-standard chars with standard ones
|
|
141
|
+
input_expr = _apply_base64_alphabet_replacements(input_expr, alphabet, reverse=True)
|
|
142
|
+
|
|
143
|
+
# FROM_BASE64 returns BLOB
|
|
144
|
+
input_expr = exp.FromBase64(this=input_expr)
|
|
145
|
+
|
|
146
|
+
if to_string:
|
|
147
|
+
input_expr = exp.Decode(this=input_expr)
|
|
148
|
+
|
|
149
|
+
return self.sql(input_expr)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _last_day_sql(self: DuckDB.Generator, expression: exp.LastDay) -> str:
|
|
153
|
+
"""
|
|
154
|
+
DuckDB's LAST_DAY only supports finding the last day of a month.
|
|
155
|
+
For other date parts (year, quarter, week), we need to implement equivalent logic.
|
|
156
|
+
"""
|
|
157
|
+
date_expr = expression.this
|
|
158
|
+
unit = expression.text("unit")
|
|
159
|
+
|
|
160
|
+
if not unit or unit.upper() == "MONTH":
|
|
161
|
+
# Default behavior - use DuckDB's native LAST_DAY
|
|
162
|
+
return self.func("LAST_DAY", date_expr)
|
|
163
|
+
|
|
164
|
+
if unit.upper() == "YEAR":
|
|
165
|
+
# Last day of year: December 31st of the same year
|
|
166
|
+
year_expr = exp.func("EXTRACT", "YEAR", date_expr)
|
|
167
|
+
make_date_expr = exp.func(
|
|
168
|
+
"MAKE_DATE", year_expr, exp.Literal.number(12), exp.Literal.number(31)
|
|
169
|
+
)
|
|
170
|
+
return self.sql(make_date_expr)
|
|
171
|
+
|
|
172
|
+
if unit.upper() == "QUARTER":
|
|
173
|
+
# Last day of quarter
|
|
174
|
+
year_expr = exp.func("EXTRACT", "YEAR", date_expr)
|
|
175
|
+
quarter_expr = exp.func("EXTRACT", "QUARTER", date_expr)
|
|
176
|
+
|
|
177
|
+
# Calculate last month of quarter: quarter * 3. Quarter can be 1 to 4
|
|
178
|
+
last_month_expr = exp.Mul(this=quarter_expr, expression=exp.Literal.number(3))
|
|
179
|
+
first_day_last_month_expr = exp.func(
|
|
180
|
+
"MAKE_DATE", year_expr, last_month_expr, exp.Literal.number(1)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Last day of the last month of the quarter
|
|
184
|
+
last_day_expr = exp.func("LAST_DAY", first_day_last_month_expr)
|
|
185
|
+
return self.sql(last_day_expr)
|
|
186
|
+
|
|
187
|
+
if unit.upper() == "WEEK":
|
|
188
|
+
# DuckDB DAYOFWEEK: Sunday=0, Monday=1, ..., Saturday=6
|
|
189
|
+
dow = exp.func("EXTRACT", "DAYOFWEEK", date_expr)
|
|
190
|
+
# Days to the last day of week: (7 - dayofweek) % 7, assuming the last day of week is Sunday (Snowflake)
|
|
191
|
+
# Wrap in parentheses to ensure correct precedence
|
|
192
|
+
days_to_sunday_expr = exp.Mod(
|
|
193
|
+
this=exp.Paren(this=exp.Sub(this=exp.Literal.number(7), expression=dow)),
|
|
194
|
+
expression=exp.Literal.number(7),
|
|
195
|
+
)
|
|
196
|
+
interval_expr = exp.Interval(this=days_to_sunday_expr, unit=exp.var("DAY"))
|
|
197
|
+
add_expr = exp.Add(this=date_expr, expression=interval_expr)
|
|
198
|
+
cast_expr = exp.cast(add_expr, exp.DataType.Type.DATE)
|
|
199
|
+
return self.sql(cast_expr)
|
|
200
|
+
|
|
201
|
+
self.unsupported(f"Unsupported date part '{unit}' in LAST_DAY function")
|
|
202
|
+
return self.function_fallback_sql(expression)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _is_nanosecond_unit(unit: t.Optional[exp.Expression]) -> bool:
|
|
206
|
+
return isinstance(unit, (exp.Var, exp.Literal)) and unit.name.upper() == "NANOSECOND"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _handle_nanosecond_diff(
|
|
210
|
+
self: DuckDB.Generator,
|
|
211
|
+
end_time: exp.Expression,
|
|
212
|
+
start_time: exp.Expression,
|
|
213
|
+
) -> str:
|
|
214
|
+
"""Generate NANOSECOND diff using EPOCH_NS since DATE_DIFF doesn't support it."""
|
|
215
|
+
end_ns = exp.cast(end_time, exp.DataType.Type.TIMESTAMP_NS)
|
|
216
|
+
start_ns = exp.cast(start_time, exp.DataType.Type.TIMESTAMP_NS)
|
|
217
|
+
|
|
218
|
+
# Build expression tree: EPOCH_NS(end) - EPOCH_NS(start)
|
|
219
|
+
return self.sql(
|
|
220
|
+
exp.Sub(this=exp.func("EPOCH_NS", end_ns), expression=exp.func("EPOCH_NS", start_ns))
|
|
221
|
+
)
|
|
222
|
+
|
|
90
223
|
|
|
91
224
|
def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str:
|
|
92
225
|
"""
|
|
93
|
-
Transpile TO_BOOLEAN
|
|
226
|
+
Transpile TO_BOOLEAN and TRY_TO_BOOLEAN functions from Snowflake to DuckDB equivalent.
|
|
94
227
|
|
|
95
228
|
DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
|
|
96
|
-
We need to handle the 'on'/'off' cases explicitly
|
|
229
|
+
We need to handle the 'on'/'off' cases explicitly.
|
|
97
230
|
|
|
98
|
-
|
|
231
|
+
For TO_BOOLEAN (safe=False): NaN and INF values cause errors. We use DuckDB's native ERROR()
|
|
99
232
|
function to replicate this behavior with a clear error message.
|
|
233
|
+
|
|
234
|
+
For TRY_TO_BOOLEAN (safe=True): Use DuckDB's TRY_CAST for conversion, which returns NULL
|
|
235
|
+
for invalid inputs instead of throwing errors.
|
|
100
236
|
"""
|
|
101
237
|
arg = expression.this
|
|
238
|
+
is_safe = expression.args.get("safe", False)
|
|
102
239
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
# Check for NaN and INF values
|
|
106
|
-
nan_inf_check = exp.Or(
|
|
107
|
-
this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
case_expr = (
|
|
240
|
+
base_case_expr = (
|
|
111
241
|
exp.case()
|
|
112
242
|
.when(
|
|
113
|
-
|
|
114
|
-
exp.func(
|
|
115
|
-
"ERROR",
|
|
116
|
-
exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
|
|
117
|
-
),
|
|
118
|
-
)
|
|
119
|
-
# Handle 'on' -> TRUE (case insensitive) - only for string literals
|
|
120
|
-
.when(
|
|
243
|
+
# Handle 'on' -> TRUE (case insensitive)
|
|
121
244
|
exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("ON")),
|
|
122
245
|
exp.true(),
|
|
123
246
|
)
|
|
124
|
-
# Handle 'off' -> FALSE (case insensitive) - only for string literals
|
|
125
247
|
.when(
|
|
248
|
+
# Handle 'off' -> FALSE (case insensitive)
|
|
126
249
|
exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("OFF")),
|
|
127
250
|
exp.false(),
|
|
128
251
|
)
|
|
129
|
-
.else_(exp.cast(arg, exp.DataType.Type.BOOLEAN))
|
|
130
252
|
)
|
|
131
253
|
|
|
254
|
+
if is_safe:
|
|
255
|
+
# TRY_TO_BOOLEAN: handle 'on'/'off' and use TRY_CAST for everything else
|
|
256
|
+
case_expr = base_case_expr.else_(exp.func("TRY_CAST", arg, exp.DataType.build("BOOLEAN")))
|
|
257
|
+
else:
|
|
258
|
+
# TO_BOOLEAN: handle NaN/INF errors, 'on'/'off', and use regular CAST
|
|
259
|
+
cast_to_real = exp.func("TRY_CAST", arg, exp.DataType.build("REAL"))
|
|
260
|
+
|
|
261
|
+
# Check for NaN and INF values
|
|
262
|
+
nan_inf_check = exp.Or(
|
|
263
|
+
this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
case_expr = base_case_expr.when(
|
|
267
|
+
nan_inf_check,
|
|
268
|
+
exp.func(
|
|
269
|
+
"ERROR",
|
|
270
|
+
exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
|
|
271
|
+
),
|
|
272
|
+
).else_(exp.cast(arg, exp.DataType.Type.BOOLEAN))
|
|
273
|
+
|
|
132
274
|
return self.sql(case_expr)
|
|
133
275
|
|
|
134
276
|
|
|
135
277
|
# BigQuery -> DuckDB conversion for the DATE function
|
|
136
278
|
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
|
|
137
|
-
|
|
279
|
+
this = expression.this
|
|
138
280
|
zone = self.sql(expression, "zone")
|
|
139
281
|
|
|
140
282
|
if zone:
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
#
|
|
145
|
-
|
|
283
|
+
# BigQuery considers "this" at UTC, converts it to the specified
|
|
284
|
+
# time zone and then keeps only the DATE part
|
|
285
|
+
# To micmic that, we:
|
|
286
|
+
# (1) Cast to TIMESTAMP to remove DuckDB's local tz
|
|
287
|
+
# (2) Apply consecutive AtTimeZone calls for UTC -> zone conversion
|
|
288
|
+
this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
|
|
289
|
+
at_utc = exp.AtTimeZone(this=this, zone=exp.Literal.string("UTC"))
|
|
290
|
+
this = exp.AtTimeZone(this=at_utc, zone=zone)
|
|
146
291
|
|
|
147
|
-
return
|
|
292
|
+
return self.sql(exp.cast(expression=this, to=exp.DataType.Type.DATE))
|
|
148
293
|
|
|
149
294
|
|
|
150
295
|
# BigQuery -> DuckDB conversion for the TIME_DIFF function
|
|
151
296
|
def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
|
|
297
|
+
unit = expression.unit
|
|
298
|
+
|
|
299
|
+
if _is_nanosecond_unit(unit):
|
|
300
|
+
return _handle_nanosecond_diff(self, expression.expression, expression.this)
|
|
301
|
+
|
|
152
302
|
this = exp.cast(expression.this, exp.DataType.Type.TIME)
|
|
153
303
|
expr = exp.cast(expression.expression, exp.DataType.Type.TIME)
|
|
154
304
|
|
|
@@ -157,6 +307,140 @@ def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str:
|
|
|
157
307
|
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
|
|
158
308
|
|
|
159
309
|
|
|
310
|
+
def _date_delta_to_binary_interval_op(
|
|
311
|
+
cast: bool = True,
|
|
312
|
+
) -> t.Callable[[DuckDB.Generator, DATETIME_DELTA], str]:
|
|
313
|
+
"""
|
|
314
|
+
DuckDB override to handle:
|
|
315
|
+
1. NANOSECOND operations (DuckDB doesn't support INTERVAL ... NANOSECOND)
|
|
316
|
+
2. Float/decimal interval values (DuckDB INTERVAL requires integers)
|
|
317
|
+
"""
|
|
318
|
+
base_impl = date_delta_to_binary_interval_op(cast=cast)
|
|
319
|
+
|
|
320
|
+
def _duckdb_date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str:
|
|
321
|
+
unit = expression.unit
|
|
322
|
+
interval_value = expression.expression
|
|
323
|
+
|
|
324
|
+
# Handle NANOSECOND unit (DuckDB doesn't support INTERVAL ... NANOSECOND)
|
|
325
|
+
if _is_nanosecond_unit(unit):
|
|
326
|
+
if isinstance(interval_value, exp.Interval):
|
|
327
|
+
interval_value = interval_value.this
|
|
328
|
+
|
|
329
|
+
timestamp_ns = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP_NS)
|
|
330
|
+
|
|
331
|
+
return self.sql(
|
|
332
|
+
exp.func(
|
|
333
|
+
"MAKE_TIMESTAMP_NS",
|
|
334
|
+
exp.Add(this=exp.func("EPOCH_NS", timestamp_ns), expression=interval_value),
|
|
335
|
+
)
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Handle float/decimal interval values as duckDB INTERVAL requires integer expressions
|
|
339
|
+
if not interval_value or isinstance(interval_value, exp.Interval):
|
|
340
|
+
return base_impl(self, expression)
|
|
341
|
+
|
|
342
|
+
if interval_value.is_type(*exp.DataType.REAL_TYPES):
|
|
343
|
+
expression.set("expression", exp.cast(exp.func("ROUND", interval_value), "INT"))
|
|
344
|
+
|
|
345
|
+
return base_impl(self, expression)
|
|
346
|
+
|
|
347
|
+
return _duckdb_date_delta_sql
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _array_insert_sql(self: DuckDB.Generator, expression: exp.ArrayInsert) -> str:
|
|
351
|
+
"""
|
|
352
|
+
Transpile ARRAY_INSERT to DuckDB using LIST_CONCAT and slicing.
|
|
353
|
+
|
|
354
|
+
Handles:
|
|
355
|
+
- 0-based and 1-based indexing (normalizes to 0-based for calculations)
|
|
356
|
+
- Negative position conversion (requires array length)
|
|
357
|
+
- NULL propagation (source dialects return NULL, DuckDB creates single-element array)
|
|
358
|
+
- Assumes position is within bounds per user constraint
|
|
359
|
+
|
|
360
|
+
Note: All dialects that support ARRAY_INSERT (Snowflake, Spark, Databricks) have
|
|
361
|
+
ARRAY_FUNCS_PROPAGATES_NULLS=True, so we always assume source propagates NULLs.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
expression: The ArrayInsert expression to transpile.
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
SQL string implementing ARRAY_INSERT behavior.
|
|
368
|
+
"""
|
|
369
|
+
this = expression.this
|
|
370
|
+
position = expression.args.get("position")
|
|
371
|
+
element = expression.expression
|
|
372
|
+
element_array = exp.Array(expressions=[element])
|
|
373
|
+
index_offset = expression.args.get("offset", 0)
|
|
374
|
+
|
|
375
|
+
if not position or not position.is_int:
|
|
376
|
+
self.unsupported("ARRAY_INSERT can only be transpiled with a literal position")
|
|
377
|
+
return self.func("ARRAY_INSERT", this, position, element)
|
|
378
|
+
|
|
379
|
+
pos_value = position.to_py()
|
|
380
|
+
|
|
381
|
+
# Normalize one-based indexing to zero-based for slice calculations
|
|
382
|
+
# Spark (1-based) → Snowflake (0-based):
|
|
383
|
+
# Positive: pos=1 → pos=0 (subtract 1)
|
|
384
|
+
# Negative: pos=-2 → pos=-1 (add 1)
|
|
385
|
+
# Example: Spark array_insert([a,b,c], -2, d) → [a,b,d,c] is same as Snowflake pos=-1
|
|
386
|
+
if pos_value > 0:
|
|
387
|
+
pos_value = pos_value - index_offset
|
|
388
|
+
elif pos_value < 0:
|
|
389
|
+
pos_value = pos_value + index_offset
|
|
390
|
+
|
|
391
|
+
# Build the appropriate list_concat expression based on position
|
|
392
|
+
if pos_value == 0:
|
|
393
|
+
# insert at beginning
|
|
394
|
+
concat_exprs = [element_array, this]
|
|
395
|
+
elif pos_value > 0:
|
|
396
|
+
# Positive position: LIST_CONCAT(arr[1:pos], [elem], arr[pos+1:])
|
|
397
|
+
# 0-based -> DuckDB 1-based slicing
|
|
398
|
+
|
|
399
|
+
# left slice: arr[1:pos]
|
|
400
|
+
slice_start = exp.Bracket(
|
|
401
|
+
this=this,
|
|
402
|
+
expressions=[
|
|
403
|
+
exp.Slice(this=exp.Literal.number(1), expression=exp.Literal.number(pos_value))
|
|
404
|
+
],
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# right slice: arr[pos+1:]
|
|
408
|
+
slice_end = exp.Bracket(
|
|
409
|
+
this=this, expressions=[exp.Slice(this=exp.Literal.number(pos_value + 1))]
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
concat_exprs = [slice_start, element_array, slice_end]
|
|
413
|
+
else:
|
|
414
|
+
# Negative position: arr[1:LEN(arr)+pos], [elem], arr[LEN(arr)+pos+1:]
|
|
415
|
+
# pos=-1 means insert before last element
|
|
416
|
+
arr_len = exp.Length(this=this)
|
|
417
|
+
|
|
418
|
+
# Calculate slice position: LEN(arr) + pos (e.g., LEN(arr) + (-1) = LEN(arr) - 1)
|
|
419
|
+
slice_end_pos = arr_len + exp.Literal.number(pos_value)
|
|
420
|
+
slice_start_pos = slice_end_pos + exp.Literal.number(1)
|
|
421
|
+
|
|
422
|
+
# left slice: arr[1:LEN(arr)+pos]
|
|
423
|
+
slice_start = exp.Bracket(
|
|
424
|
+
this=this,
|
|
425
|
+
expressions=[exp.Slice(this=exp.Literal.number(1), expression=slice_end_pos)],
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# right slice: arr[LEN(arr)+pos+1:]
|
|
429
|
+
slice_end = exp.Bracket(this=this, expressions=[exp.Slice(this=slice_start_pos)])
|
|
430
|
+
|
|
431
|
+
concat_exprs = [slice_start, element_array, slice_end]
|
|
432
|
+
|
|
433
|
+
# All dialects that support ARRAY_INSERT propagate NULLs (Snowflake/Spark/Databricks)
|
|
434
|
+
# Wrap in CASE WHEN array IS NULL THEN NULL ELSE func_expr END
|
|
435
|
+
return self.sql(
|
|
436
|
+
exp.If(
|
|
437
|
+
this=exp.Is(this=this, expression=exp.Null()),
|
|
438
|
+
true=exp.Null(),
|
|
439
|
+
false=self.func("LIST_CONCAT", *concat_exprs),
|
|
440
|
+
)
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
|
|
160
444
|
@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator."))
|
|
161
445
|
def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str:
|
|
162
446
|
return self.func("ARRAY_SORT", expression.this)
|
|
@@ -171,6 +455,10 @@ def _build_sort_array_desc(args: t.List) -> exp.Expression:
|
|
|
171
455
|
return exp.SortArray(this=seq_get(args, 0), asc=exp.false())
|
|
172
456
|
|
|
173
457
|
|
|
458
|
+
def _build_array_prepend(args: t.List) -> exp.Expression:
|
|
459
|
+
return exp.ArrayPrepend(this=seq_get(args, 1), expression=seq_get(args, 0))
|
|
460
|
+
|
|
461
|
+
|
|
174
462
|
def _build_date_diff(args: t.List) -> exp.Expression:
|
|
175
463
|
return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0))
|
|
176
464
|
|
|
@@ -212,6 +500,14 @@ def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], e
|
|
|
212
500
|
|
|
213
501
|
|
|
214
502
|
def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
|
|
503
|
+
ancestor_cast = expression.find_ancestor(exp.Cast, exp.Select)
|
|
504
|
+
ancestor_cast = None if isinstance(ancestor_cast, exp.Select) else ancestor_cast
|
|
505
|
+
|
|
506
|
+
# Empty struct cast works with MAP() since DuckDB can't parse {}
|
|
507
|
+
if not expression.expressions:
|
|
508
|
+
if isinstance(ancestor_cast, exp.Cast) and ancestor_cast.to.is_type(exp.DataType.Type.MAP):
|
|
509
|
+
return "MAP()"
|
|
510
|
+
|
|
215
511
|
args: t.List[str] = []
|
|
216
512
|
|
|
217
513
|
# BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
|
|
@@ -219,7 +515,6 @@ def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
|
|
|
219
515
|
# The transformation to ROW will take place if:
|
|
220
516
|
# 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would
|
|
221
517
|
# 2. A cast to STRUCT / ARRAY of STRUCTs is found
|
|
222
|
-
ancestor_cast = expression.find_ancestor(exp.Cast)
|
|
223
518
|
is_bq_inline_struct = (
|
|
224
519
|
(expression.find(exp.PropertyEQ) is None)
|
|
225
520
|
and ancestor_cast
|
|
@@ -231,16 +526,16 @@ def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
|
|
|
231
526
|
|
|
232
527
|
for i, expr in enumerate(expression.expressions):
|
|
233
528
|
is_property_eq = isinstance(expr, exp.PropertyEQ)
|
|
529
|
+
this = expr.this
|
|
234
530
|
value = expr.expression if is_property_eq else expr
|
|
235
531
|
|
|
236
532
|
if is_bq_inline_struct:
|
|
237
533
|
args.append(self.sql(value))
|
|
238
534
|
else:
|
|
239
|
-
if
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
key = self.sql(expr.this)
|
|
535
|
+
if isinstance(this, exp.Identifier):
|
|
536
|
+
key = self.sql(exp.Literal.string(expr.name))
|
|
537
|
+
elif is_property_eq:
|
|
538
|
+
key = self.sql(this)
|
|
244
539
|
else:
|
|
245
540
|
key = self.sql(exp.Literal.string(f"_{i}"))
|
|
246
541
|
|
|
@@ -269,21 +564,77 @@ def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str:
|
|
|
269
564
|
return f"CAST({sql} AS TEXT)"
|
|
270
565
|
|
|
271
566
|
|
|
567
|
+
def _seq_sql(self: DuckDB.Generator, expression: exp.Func, byte_width: int) -> str:
|
|
568
|
+
"""
|
|
569
|
+
Transpile Snowflake SEQ1/SEQ2/SEQ4/SEQ8 to DuckDB.
|
|
570
|
+
|
|
571
|
+
Generates monotonically increasing integers starting from 0.
|
|
572
|
+
The signed parameter (0 or 1) affects wrap-around behavior:
|
|
573
|
+
- Unsigned (0): wraps at 2^(bits) - 1
|
|
574
|
+
- Signed (1): wraps at 2^(bits-1) - 1, then goes negative
|
|
575
|
+
|
|
576
|
+
Note: SEQ in WHERE, HAVING, aggregates, or window ORDER BY is not supported
|
|
577
|
+
because these contexts don't allow window functions. Users should rewrite
|
|
578
|
+
using CTEs or subqueries.
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
expression: The SEQ function expression (may have 'this' arg for signed param)
|
|
582
|
+
byte_width: 1, 2, 4, or 8 bytes
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
SQL string using ROW_NUMBER() with modulo for wrap-around
|
|
586
|
+
"""
|
|
587
|
+
# Warn if SEQ is in a restricted context (Select stops search at current scope)
|
|
588
|
+
ancestor = expression.find_ancestor(*_SEQ_RESTRICTED)
|
|
589
|
+
if ancestor and (
|
|
590
|
+
(not isinstance(ancestor, (exp.Order, exp.Select)))
|
|
591
|
+
or (isinstance(ancestor, exp.Order) and isinstance(ancestor.parent, exp.Window))
|
|
592
|
+
):
|
|
593
|
+
self.unsupported("SEQ in restricted context is not supported - use CTE or subquery")
|
|
594
|
+
|
|
595
|
+
bits = byte_width * 8
|
|
596
|
+
max_val = exp.Literal.number(2**bits)
|
|
597
|
+
|
|
598
|
+
if expression.name == "1":
|
|
599
|
+
half = exp.Literal.number(2 ** (bits - 1))
|
|
600
|
+
result = exp.replace_placeholders(self.SEQ_SIGNED.copy(), max_val=max_val, half=half)
|
|
601
|
+
else:
|
|
602
|
+
result = exp.replace_placeholders(self.SEQ_UNSIGNED.copy(), max_val=max_val)
|
|
603
|
+
|
|
604
|
+
return self.sql(result)
|
|
605
|
+
|
|
606
|
+
|
|
272
607
|
def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str:
|
|
273
608
|
scale = expression.args.get("scale")
|
|
274
609
|
timestamp = expression.this
|
|
610
|
+
target_type = expression.args.get("target_type")
|
|
611
|
+
|
|
612
|
+
# Check if we need NTZ (naive timestamp in UTC)
|
|
613
|
+
is_ntz = target_type and target_type.this in (
|
|
614
|
+
exp.DataType.Type.TIMESTAMP,
|
|
615
|
+
exp.DataType.Type.TIMESTAMPNTZ,
|
|
616
|
+
)
|
|
275
617
|
|
|
276
|
-
if scale in (None, exp.UnixToTime.SECONDS):
|
|
277
|
-
return self.func("TO_TIMESTAMP", timestamp)
|
|
278
618
|
if scale == exp.UnixToTime.MILLIS:
|
|
619
|
+
# EPOCH_MS already returns TIMESTAMP (naive, UTC)
|
|
279
620
|
return self.func("EPOCH_MS", timestamp)
|
|
280
621
|
if scale == exp.UnixToTime.MICROS:
|
|
622
|
+
# MAKE_TIMESTAMP already returns TIMESTAMP (naive, UTC)
|
|
281
623
|
return self.func("MAKE_TIMESTAMP", timestamp)
|
|
282
624
|
|
|
283
|
-
|
|
625
|
+
# Other scales: divide and use TO_TIMESTAMP
|
|
626
|
+
if scale not in (None, exp.UnixToTime.SECONDS):
|
|
627
|
+
timestamp = exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))
|
|
628
|
+
|
|
629
|
+
to_timestamp: exp.Expression = exp.Anonymous(this="TO_TIMESTAMP", expressions=[timestamp])
|
|
630
|
+
|
|
631
|
+
if is_ntz:
|
|
632
|
+
to_timestamp = exp.AtTimeZone(this=to_timestamp, zone=exp.Literal.string("UTC"))
|
|
633
|
+
|
|
634
|
+
return self.sql(to_timestamp)
|
|
284
635
|
|
|
285
636
|
|
|
286
|
-
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In)
|
|
637
|
+
WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In, exp.Not)
|
|
287
638
|
|
|
288
639
|
|
|
289
640
|
def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str:
|
|
@@ -373,9 +724,13 @@ def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> e
|
|
|
373
724
|
|
|
374
725
|
|
|
375
726
|
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
|
|
727
|
+
unit = expression.unit
|
|
728
|
+
|
|
729
|
+
if _is_nanosecond_unit(unit):
|
|
730
|
+
return _handle_nanosecond_diff(self, expression.this, expression.expression)
|
|
731
|
+
|
|
376
732
|
this = _implicit_datetime_cast(expression.this)
|
|
377
733
|
expr = _implicit_datetime_cast(expression.expression)
|
|
378
|
-
unit = expression.args.get("unit")
|
|
379
734
|
|
|
380
735
|
# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
|
|
381
736
|
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
|
|
@@ -427,11 +782,17 @@ def _json_extract_value_array_sql(
|
|
|
427
782
|
|
|
428
783
|
|
|
429
784
|
def _cast_to_varchar(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
430
|
-
if arg and arg.type and not arg.is_type(exp.DataType.
|
|
785
|
+
if arg and arg.type and not arg.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN):
|
|
431
786
|
return exp.cast(arg, exp.DataType.Type.VARCHAR)
|
|
432
787
|
return arg
|
|
433
788
|
|
|
434
789
|
|
|
790
|
+
def _cast_to_boolean(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
791
|
+
if arg and not arg.is_type(exp.DataType.Type.BOOLEAN):
|
|
792
|
+
return exp.cast(arg, exp.DataType.Type.BOOLEAN)
|
|
793
|
+
return arg
|
|
794
|
+
|
|
795
|
+
|
|
435
796
|
def _is_binary(arg: exp.Expression) -> bool:
|
|
436
797
|
return arg.is_type(
|
|
437
798
|
exp.DataType.Type.BINARY,
|
|
@@ -466,6 +827,76 @@ def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
|
|
|
466
827
|
expression.set("expression", _cast_to_bit(expression.expression))
|
|
467
828
|
|
|
468
829
|
|
|
830
|
+
def _day_navigation_sql(
|
|
831
|
+
self: DuckDB.Generator, expression: t.Union[exp.NextDay, exp.PreviousDay]
|
|
832
|
+
) -> str:
|
|
833
|
+
"""
|
|
834
|
+
Transpile Snowflake's NEXT_DAY / PREVIOUS_DAY to DuckDB using date arithmetic.
|
|
835
|
+
|
|
836
|
+
Returns the DATE of the next/previous occurrence of the specified weekday.
|
|
837
|
+
|
|
838
|
+
Formulas:
|
|
839
|
+
- NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
|
|
840
|
+
- PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
|
|
841
|
+
|
|
842
|
+
Supports both literal and non-literal day names:
|
|
843
|
+
- Literal: Direct lookup (e.g., 'Monday' → 1)
|
|
844
|
+
- Non-literal: CASE statement for runtime evaluation
|
|
845
|
+
|
|
846
|
+
Examples:
|
|
847
|
+
NEXT_DAY('2024-01-01' (Monday), 'Monday')
|
|
848
|
+
→ (1 - 1 + 6) % 7 + 1 = 6 % 7 + 1 = 7 days → 2024-01-08
|
|
849
|
+
|
|
850
|
+
PREVIOUS_DAY('2024-01-15' (Monday), 'Friday')
|
|
851
|
+
→ (1 - 5 + 6) % 7 + 1 = 2 % 7 + 1 = 3 days → 2024-01-12
|
|
852
|
+
"""
|
|
853
|
+
date_expr = expression.this
|
|
854
|
+
day_name_expr = expression.expression
|
|
855
|
+
|
|
856
|
+
# Build ISODOW call for current day of week
|
|
857
|
+
isodow_call = exp.func("ISODOW", date_expr)
|
|
858
|
+
|
|
859
|
+
# Determine target day of week
|
|
860
|
+
if isinstance(day_name_expr, exp.Literal):
|
|
861
|
+
# Literal day name: lookup target_dow directly
|
|
862
|
+
day_name_str = day_name_expr.name.upper()
|
|
863
|
+
matching_day = next(
|
|
864
|
+
(day for day in WEEK_START_DAY_TO_DOW if day.startswith(day_name_str)), None
|
|
865
|
+
)
|
|
866
|
+
if matching_day:
|
|
867
|
+
target_dow: exp.Expression = exp.Literal.number(WEEK_START_DAY_TO_DOW[matching_day])
|
|
868
|
+
else:
|
|
869
|
+
# Unrecognized day name, use fallback
|
|
870
|
+
return self.function_fallback_sql(expression)
|
|
871
|
+
else:
|
|
872
|
+
# Non-literal day name: build CASE statement for runtime mapping
|
|
873
|
+
upper_day_name = exp.Upper(this=day_name_expr)
|
|
874
|
+
target_dow = exp.Case(
|
|
875
|
+
ifs=[
|
|
876
|
+
exp.If(
|
|
877
|
+
this=exp.func(
|
|
878
|
+
"STARTS_WITH", upper_day_name.copy(), exp.Literal.string(day[:2])
|
|
879
|
+
),
|
|
880
|
+
true=exp.Literal.number(dow_num),
|
|
881
|
+
)
|
|
882
|
+
for day, dow_num in WEEK_START_DAY_TO_DOW.items()
|
|
883
|
+
]
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
# Calculate days offset and apply interval based on direction
|
|
887
|
+
if isinstance(expression, exp.NextDay):
|
|
888
|
+
# NEXT_DAY: (target_dow - current_dow + 6) % 7 + 1
|
|
889
|
+
days_offset = exp.paren(target_dow - isodow_call + 6, copy=False) % 7 + 1
|
|
890
|
+
date_with_offset = date_expr + exp.Interval(this=days_offset, unit=exp.var("DAY"))
|
|
891
|
+
else: # exp.PreviousDay
|
|
892
|
+
# PREVIOUS_DAY: (current_dow - target_dow + 6) % 7 + 1
|
|
893
|
+
days_offset = exp.paren(isodow_call - target_dow + 6, copy=False) % 7 + 1
|
|
894
|
+
date_with_offset = date_expr - exp.Interval(this=days_offset, unit=exp.var("DAY"))
|
|
895
|
+
|
|
896
|
+
# Build final: CAST(date_with_offset AS DATE)
|
|
897
|
+
return self.sql(exp.cast(date_with_offset, exp.DataType.Type.DATE))
|
|
898
|
+
|
|
899
|
+
|
|
469
900
|
def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str:
|
|
470
901
|
# Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
|
|
471
902
|
having = expression.this
|
|
@@ -475,6 +906,39 @@ def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str:
|
|
|
475
906
|
return self.function_fallback_sql(expression)
|
|
476
907
|
|
|
477
908
|
|
|
909
|
+
def _bitwise_agg_sql(
|
|
910
|
+
self: DuckDB.Generator,
|
|
911
|
+
expression: t.Union[exp.BitwiseOrAgg, exp.BitwiseAndAgg, exp.BitwiseXorAgg],
|
|
912
|
+
) -> str:
|
|
913
|
+
"""
|
|
914
|
+
DuckDB's bitwise aggregate functions only accept integer types. For other types:
|
|
915
|
+
- DECIMAL/STRING: Use CAST(arg AS INT) to convert directly, will round to nearest int
|
|
916
|
+
- FLOAT/DOUBLE: Use ROUND(arg)::INT to round to nearest integer, required due to float precision loss
|
|
917
|
+
"""
|
|
918
|
+
if isinstance(expression, exp.BitwiseOrAgg):
|
|
919
|
+
func_name = "BIT_OR"
|
|
920
|
+
elif isinstance(expression, exp.BitwiseAndAgg):
|
|
921
|
+
func_name = "BIT_AND"
|
|
922
|
+
else: # exp.BitwiseXorAgg
|
|
923
|
+
func_name = "BIT_XOR"
|
|
924
|
+
|
|
925
|
+
arg = expression.this
|
|
926
|
+
|
|
927
|
+
if not arg.type:
|
|
928
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
929
|
+
|
|
930
|
+
arg = annotate_types(arg, dialect=self.dialect)
|
|
931
|
+
|
|
932
|
+
if arg.is_type(*exp.DataType.REAL_TYPES, *exp.DataType.TEXT_TYPES):
|
|
933
|
+
if arg.is_type(*exp.DataType.FLOAT_TYPES):
|
|
934
|
+
# float types need to be rounded first due to precision loss
|
|
935
|
+
arg = exp.func("ROUND", arg)
|
|
936
|
+
|
|
937
|
+
arg = exp.cast(arg, exp.DataType.Type.INT)
|
|
938
|
+
|
|
939
|
+
return self.func(func_name, arg)
|
|
940
|
+
|
|
941
|
+
|
|
478
942
|
def _literal_sql_with_ws_chr(self: DuckDB.Generator, literal: str) -> str:
|
|
479
943
|
# DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
|
|
480
944
|
if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
|
|
@@ -571,26 +1035,102 @@ def _initcap_sql(self: DuckDB.Generator, expression: exp.Initcap) -> str:
|
|
|
571
1035
|
return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
|
|
572
1036
|
|
|
573
1037
|
|
|
574
|
-
def
|
|
575
|
-
|
|
1038
|
+
def _boolxor_agg_sql(self: DuckDB.Generator, expression: exp.BoolxorAgg) -> str:
|
|
1039
|
+
"""
|
|
1040
|
+
Snowflake's `BOOLXOR_AGG(col)` returns TRUE if exactly one input in `col` is TRUE, FALSE otherwise;
|
|
1041
|
+
Since DuckDB does not have a mapping function, we mimic the behavior by generating `COUNT_IF(col) = 1`.
|
|
1042
|
+
|
|
1043
|
+
DuckDB's COUNT_IF strictly requires boolean inputs, so cast if not already boolean.
|
|
1044
|
+
"""
|
|
1045
|
+
return self.sql(
|
|
1046
|
+
exp.EQ(
|
|
1047
|
+
this=exp.CountIf(this=_cast_to_boolean(expression.this)),
|
|
1048
|
+
expression=exp.Literal.number(1),
|
|
1049
|
+
)
|
|
1050
|
+
)
|
|
576
1051
|
|
|
577
|
-
if decimals is not None and expression.args.get("to") is None:
|
|
578
|
-
this = expression.this
|
|
579
|
-
if isinstance(this, exp.Binary):
|
|
580
|
-
this = exp.Paren(this=this)
|
|
581
1052
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
1053
|
+
def _bitshift_sql(
|
|
1054
|
+
self: DuckDB.Generator, expression: exp.BitwiseLeftShift | exp.BitwiseRightShift
|
|
1055
|
+
) -> str:
|
|
1056
|
+
"""
|
|
1057
|
+
Transform bitshift expressions for DuckDB by injecting BIT/INT128 casts.
|
|
585
1058
|
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
result = exp.Div(this=floored, expression=pow_.copy())
|
|
1059
|
+
DuckDB's bitwise shift operators don't work with BLOB/BINARY types, so we cast
|
|
1060
|
+
them to BIT for the operation, then cast the result back to the original type.
|
|
589
1061
|
|
|
590
|
-
|
|
591
|
-
|
|
1062
|
+
Note: Assumes type annotation has been applied with the source dialect.
|
|
1063
|
+
"""
|
|
1064
|
+
operator = "<<" if isinstance(expression, exp.BitwiseLeftShift) else ">>"
|
|
1065
|
+
result_is_blob = False
|
|
1066
|
+
this = expression.this
|
|
1067
|
+
|
|
1068
|
+
if _is_binary(this):
|
|
1069
|
+
result_is_blob = True
|
|
1070
|
+
expression.set("this", exp.cast(this, exp.DataType.Type.BIT))
|
|
1071
|
+
elif expression.args.get("requires_int128"):
|
|
1072
|
+
this.replace(exp.cast(this, exp.DataType.Type.INT128))
|
|
1073
|
+
|
|
1074
|
+
result_sql = self.binary(expression, operator)
|
|
1075
|
+
|
|
1076
|
+
# Wrap in parentheses if parent is a bitwise operator to "fix" DuckDB precedence issue
|
|
1077
|
+
# DuckDB parses: a << b | c << d as (a << b | c) << d
|
|
1078
|
+
if isinstance(expression.parent, exp.Binary):
|
|
1079
|
+
result_sql = self.sql(exp.Paren(this=result_sql))
|
|
1080
|
+
|
|
1081
|
+
if result_is_blob:
|
|
1082
|
+
result_sql = self.sql(
|
|
1083
|
+
exp.Cast(this=result_sql, to=exp.DataType.build("BLOB", dialect="duckdb"))
|
|
592
1084
|
)
|
|
593
1085
|
|
|
1086
|
+
return result_sql
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def _scale_rounding_sql(
|
|
1090
|
+
self: DuckDB.Generator,
|
|
1091
|
+
expression: exp.Expression,
|
|
1092
|
+
rounding_func: type[exp.Expression],
|
|
1093
|
+
) -> str | None:
|
|
1094
|
+
"""
|
|
1095
|
+
Handle scale parameter transformation for rounding functions.
|
|
1096
|
+
|
|
1097
|
+
DuckDB doesn't support the scale parameter for certain functions (e.g., FLOOR, CEIL),
|
|
1098
|
+
so we transform: FUNC(x, n) to ROUND(FUNC(x * 10^n) / 10^n, n)
|
|
1099
|
+
|
|
1100
|
+
Args:
|
|
1101
|
+
self: The DuckDB generator instance
|
|
1102
|
+
expression: The expression to transform (must have 'this', 'decimals', and 'to' args)
|
|
1103
|
+
rounding_func: The rounding function class to use in the transformation
|
|
1104
|
+
|
|
1105
|
+
Returns:
|
|
1106
|
+
The transformed SQL string if decimals parameter exists, None otherwise
|
|
1107
|
+
"""
|
|
1108
|
+
decimals = expression.args.get("decimals")
|
|
1109
|
+
|
|
1110
|
+
if decimals is None or expression.args.get("to") is not None:
|
|
1111
|
+
return None
|
|
1112
|
+
|
|
1113
|
+
this = expression.this
|
|
1114
|
+
if isinstance(this, exp.Binary):
|
|
1115
|
+
this = exp.Paren(this=this)
|
|
1116
|
+
|
|
1117
|
+
n_int = decimals
|
|
1118
|
+
if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
|
|
1119
|
+
n_int = exp.cast(decimals, exp.DataType.Type.INT)
|
|
1120
|
+
|
|
1121
|
+
pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
|
|
1122
|
+
rounded = rounding_func(this=exp.Mul(this=this, expression=pow_))
|
|
1123
|
+
result = exp.Div(this=rounded, expression=pow_.copy())
|
|
1124
|
+
|
|
1125
|
+
return self.round_sql(
|
|
1126
|
+
exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
|
|
1127
|
+
)
|
|
1128
|
+
|
|
1129
|
+
|
|
1130
|
+
def _ceil_floor(self: DuckDB.Generator, expression: exp.Floor | exp.Ceil) -> str:
|
|
1131
|
+
scaled_sql = _scale_rounding_sql(self, expression, type(expression))
|
|
1132
|
+
if scaled_sql is not None:
|
|
1133
|
+
return scaled_sql
|
|
594
1134
|
return self.ceil_floor(expression)
|
|
595
1135
|
|
|
596
1136
|
|
|
@@ -648,6 +1188,89 @@ def _regr_val_sql(
|
|
|
648
1188
|
)
|
|
649
1189
|
|
|
650
1190
|
|
|
1191
|
+
def _maybe_corr_null_to_false(
|
|
1192
|
+
expression: t.Union[exp.Filter, exp.Window, exp.Corr],
|
|
1193
|
+
) -> t.Optional[t.Union[exp.Filter, exp.Window, exp.Corr]]:
|
|
1194
|
+
corr = expression
|
|
1195
|
+
while isinstance(corr, (exp.Window, exp.Filter)):
|
|
1196
|
+
corr = corr.this
|
|
1197
|
+
|
|
1198
|
+
if not isinstance(corr, exp.Corr) or not corr.args.get("null_on_zero_variance"):
|
|
1199
|
+
return None
|
|
1200
|
+
|
|
1201
|
+
corr.set("null_on_zero_variance", False)
|
|
1202
|
+
return expression
|
|
1203
|
+
|
|
1204
|
+
|
|
1205
|
+
def _date_from_parts_sql(self, expression: exp.DateFromParts) -> str:
|
|
1206
|
+
"""
|
|
1207
|
+
Snowflake's DATE_FROM_PARTS allows out-of-range values for the month and day input.
|
|
1208
|
+
E.g., larger values (month=13, day=100), zero-values (month=0, day=0), negative values (month=-13, day=-100).
|
|
1209
|
+
|
|
1210
|
+
DuckDB's MAKE_DATE does not support out-of-range values, but DuckDB's INTERVAL type does.
|
|
1211
|
+
|
|
1212
|
+
We convert to date arithmetic:
|
|
1213
|
+
DATE_FROM_PARTS(year, month, day)
|
|
1214
|
+
- MAKE_DATE(year, 1, 1) + INTERVAL (month-1) MONTH + INTERVAL (day-1) DAY
|
|
1215
|
+
"""
|
|
1216
|
+
year_expr = expression.args.get("year")
|
|
1217
|
+
month_expr = expression.args.get("month")
|
|
1218
|
+
day_expr = expression.args.get("day")
|
|
1219
|
+
|
|
1220
|
+
if expression.args.get("allow_overflow"):
|
|
1221
|
+
base_date: exp.Expression = exp.func(
|
|
1222
|
+
"MAKE_DATE", year_expr, exp.Literal.number(1), exp.Literal.number(1)
|
|
1223
|
+
)
|
|
1224
|
+
|
|
1225
|
+
if month_expr:
|
|
1226
|
+
base_date = base_date + exp.Interval(this=month_expr - 1, unit=exp.var("MONTH"))
|
|
1227
|
+
|
|
1228
|
+
if day_expr:
|
|
1229
|
+
base_date = base_date + exp.Interval(this=day_expr - 1, unit=exp.var("DAY"))
|
|
1230
|
+
|
|
1231
|
+
return self.sql(exp.cast(expression=base_date, to=exp.DataType.Type.DATE))
|
|
1232
|
+
|
|
1233
|
+
return self.func("MAKE_DATE", year_expr, month_expr, day_expr)
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
def _round_arg(arg: exp.Expression, round_input: t.Optional[bool] = None) -> exp.Expression:
|
|
1237
|
+
if round_input:
|
|
1238
|
+
return exp.func("ROUND", arg, exp.Literal.number(0))
|
|
1239
|
+
return arg
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
def _boolnot_sql(self: DuckDB.Generator, expression: exp.Boolnot) -> str:
|
|
1243
|
+
arg = _round_arg(expression.this, expression.args.get("round_input"))
|
|
1244
|
+
return self.sql(exp.not_(exp.paren(arg)))
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
def _booland_sql(self: DuckDB.Generator, expression: exp.Booland) -> str:
|
|
1248
|
+
round_input = expression.args.get("round_input")
|
|
1249
|
+
left = _round_arg(expression.this, round_input)
|
|
1250
|
+
right = _round_arg(expression.expression, round_input)
|
|
1251
|
+
return self.sql(exp.paren(exp.and_(exp.paren(left), exp.paren(right), wrap=False)))
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def _boolor_sql(self: DuckDB.Generator, expression: exp.Boolor) -> str:
|
|
1255
|
+
round_input = expression.args.get("round_input")
|
|
1256
|
+
left = _round_arg(expression.this, round_input)
|
|
1257
|
+
right = _round_arg(expression.expression, round_input)
|
|
1258
|
+
return self.sql(exp.paren(exp.or_(exp.paren(left), exp.paren(right), wrap=False)))
|
|
1259
|
+
|
|
1260
|
+
|
|
1261
|
+
def _xor_sql(self: DuckDB.Generator, expression: exp.Xor) -> str:
|
|
1262
|
+
round_input = expression.args.get("round_input")
|
|
1263
|
+
left = _round_arg(expression.this, round_input)
|
|
1264
|
+
right = _round_arg(expression.expression, round_input)
|
|
1265
|
+
return self.sql(
|
|
1266
|
+
exp.or_(
|
|
1267
|
+
exp.paren(exp.and_(left.copy(), exp.paren(right.not_()), wrap=False)),
|
|
1268
|
+
exp.paren(exp.and_(exp.paren(left.not_()), right.copy(), wrap=False)),
|
|
1269
|
+
wrap=False,
|
|
1270
|
+
)
|
|
1271
|
+
)
|
|
1272
|
+
|
|
1273
|
+
|
|
651
1274
|
class DuckDB(Dialect):
|
|
652
1275
|
NULL_ORDERING = "nulls_are_last"
|
|
653
1276
|
SUPPORTS_USER_DEFINED_TYPES = True
|
|
@@ -667,10 +1290,23 @@ class DuckDB(Dialect):
|
|
|
667
1290
|
"DAYOFWEEKISO": "ISODOW",
|
|
668
1291
|
}
|
|
669
1292
|
|
|
1293
|
+
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
|
|
1294
|
+
|
|
670
1295
|
DATE_PART_MAPPING.pop("WEEKDAY")
|
|
671
1296
|
|
|
672
1297
|
INVERSE_TIME_MAPPING = {
|
|
673
1298
|
"%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d)
|
|
1299
|
+
"%:z": "%z", # In DuckDB %z can represent ±HH:MM, ±HHMM, or ±HH.
|
|
1300
|
+
"%-z": "%z",
|
|
1301
|
+
"%f_zero": "%n",
|
|
1302
|
+
"%f_one": "%n",
|
|
1303
|
+
"%f_two": "%n",
|
|
1304
|
+
"%f_three": "%g",
|
|
1305
|
+
"%f_four": "%n",
|
|
1306
|
+
"%f_five": "%n",
|
|
1307
|
+
"%f_seven": "%n",
|
|
1308
|
+
"%f_eight": "%n",
|
|
1309
|
+
"%f_nine": "%n",
|
|
674
1310
|
}
|
|
675
1311
|
|
|
676
1312
|
def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
@@ -687,6 +1323,7 @@ class DuckDB(Dialect):
|
|
|
687
1323
|
|
|
688
1324
|
class Tokenizer(tokens.Tokenizer):
|
|
689
1325
|
BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
|
|
1326
|
+
BYTE_STRING_ESCAPES = ["'", "\\"]
|
|
690
1327
|
HEREDOC_STRINGS = ["$"]
|
|
691
1328
|
|
|
692
1329
|
HEREDOC_TAG_IS_IDENTIFIER = True
|
|
@@ -749,7 +1386,7 @@ class DuckDB(Dialect):
|
|
|
749
1386
|
**parser.Parser.RANGE_PARSERS,
|
|
750
1387
|
TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps),
|
|
751
1388
|
TokenType.CARET_AT: binary_range_parser(exp.StartsWith),
|
|
752
|
-
TokenType.
|
|
1389
|
+
TokenType.TILDE: binary_range_parser(exp.RegexpFullMatch),
|
|
753
1390
|
}
|
|
754
1391
|
|
|
755
1392
|
EXPONENT = {
|
|
@@ -768,6 +1405,7 @@ class DuckDB(Dialect):
|
|
|
768
1405
|
FUNCTIONS = {
|
|
769
1406
|
**parser.Parser.FUNCTIONS,
|
|
770
1407
|
"ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)),
|
|
1408
|
+
"ARRAY_PREPEND": _build_array_prepend,
|
|
771
1409
|
"ARRAY_REVERSE_SORT": _build_sort_array_desc,
|
|
772
1410
|
"ARRAY_SORT": exp.SortArray.from_arg_list,
|
|
773
1411
|
"BIT_AND": exp.BitwiseAndAgg.from_arg_list,
|
|
@@ -789,15 +1427,21 @@ class DuckDB(Dialect):
|
|
|
789
1427
|
this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS
|
|
790
1428
|
),
|
|
791
1429
|
"GENERATE_SERIES": _build_generate_series(),
|
|
1430
|
+
"GET_BIT": lambda args: exp.Getbit(
|
|
1431
|
+
this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True
|
|
1432
|
+
),
|
|
792
1433
|
"JSON": exp.ParseJSON.from_arg_list,
|
|
793
1434
|
"JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
|
|
794
1435
|
"JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
|
|
1436
|
+
"LIST_APPEND": exp.ArrayAppend.from_arg_list,
|
|
1437
|
+
"LIST_CONCAT": parser.build_array_concat,
|
|
795
1438
|
"LIST_CONTAINS": exp.ArrayContains.from_arg_list,
|
|
796
1439
|
"LIST_COSINE_DISTANCE": exp.CosineDistance.from_arg_list,
|
|
797
1440
|
"LIST_DISTANCE": exp.EuclideanDistance.from_arg_list,
|
|
798
1441
|
"LIST_FILTER": exp.ArrayFilter.from_arg_list,
|
|
799
1442
|
"LIST_HAS": exp.ArrayContains.from_arg_list,
|
|
800
1443
|
"LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list,
|
|
1444
|
+
"LIST_PREPEND": _build_array_prepend,
|
|
801
1445
|
"LIST_REVERSE_SORT": _build_sort_array_desc,
|
|
802
1446
|
"LIST_SORT": exp.SortArray.from_arg_list,
|
|
803
1447
|
"LIST_TRANSFORM": exp.Transform.from_arg_list,
|
|
@@ -1061,7 +1705,6 @@ class DuckDB(Dialect):
|
|
|
1061
1705
|
COPY_HAS_INTO_KEYWORD = False
|
|
1062
1706
|
STAR_EXCEPT = "EXCLUDE"
|
|
1063
1707
|
PAD_FILL_PATTERN_IS_REQUIRED = True
|
|
1064
|
-
ARRAY_CONCAT_IS_VAR_LEN = False
|
|
1065
1708
|
ARRAY_SIZE_DIM_REQUIRED = False
|
|
1066
1709
|
NORMALIZE_EXTRACT_DATE_PARTS = True
|
|
1067
1710
|
SUPPORTS_LIKE_QUANTIFIERS = False
|
|
@@ -1071,80 +1714,124 @@ class DuckDB(Dialect):
|
|
|
1071
1714
|
**generator.Generator.TRANSFORMS,
|
|
1072
1715
|
exp.AnyValue: _anyvalue_sql,
|
|
1073
1716
|
exp.ApproxDistinct: approx_count_distinct_sql,
|
|
1717
|
+
exp.Boolnot: _boolnot_sql,
|
|
1718
|
+
exp.Booland: _booland_sql,
|
|
1719
|
+
exp.Boolor: _boolor_sql,
|
|
1074
1720
|
exp.Array: transforms.preprocess(
|
|
1075
1721
|
[transforms.inherit_struct_field_names],
|
|
1076
1722
|
generator=inline_array_unless_query,
|
|
1077
1723
|
),
|
|
1724
|
+
exp.ArrayAppend: array_append_sql("LIST_APPEND"),
|
|
1725
|
+
exp.ArrayCompact: array_compact_sql,
|
|
1726
|
+
exp.ArrayConstructCompact: lambda self, e: self.sql(
|
|
1727
|
+
exp.ArrayCompact(this=exp.Array(expressions=e.expressions))
|
|
1728
|
+
),
|
|
1729
|
+
exp.ArrayConcat: array_concat_sql("LIST_CONCAT"),
|
|
1078
1730
|
exp.ArrayFilter: rename_func("LIST_FILTER"),
|
|
1731
|
+
exp.ArrayInsert: _array_insert_sql,
|
|
1079
1732
|
exp.ArrayRemove: remove_from_array_using_filter,
|
|
1080
1733
|
exp.ArraySort: _array_sort_sql,
|
|
1734
|
+
exp.ArrayPrepend: array_append_sql("LIST_PREPEND", swap_params=True),
|
|
1081
1735
|
exp.ArraySum: rename_func("LIST_SUM"),
|
|
1082
1736
|
exp.ArrayUniqueAgg: lambda self, e: self.func(
|
|
1083
1737
|
"LIST", exp.Distinct(expressions=[e.this])
|
|
1084
1738
|
),
|
|
1739
|
+
exp.Base64DecodeBinary: lambda self, e: _base64_decode_sql(self, e, to_string=False),
|
|
1740
|
+
exp.Base64DecodeString: lambda self, e: _base64_decode_sql(self, e, to_string=True),
|
|
1085
1741
|
exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
|
|
1086
|
-
exp.BitwiseAndAgg:
|
|
1742
|
+
exp.BitwiseAndAgg: _bitwise_agg_sql,
|
|
1743
|
+
exp.BitwiseLeftShift: _bitshift_sql,
|
|
1087
1744
|
exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
|
|
1088
|
-
exp.BitwiseOrAgg:
|
|
1089
|
-
exp.
|
|
1745
|
+
exp.BitwiseOrAgg: _bitwise_agg_sql,
|
|
1746
|
+
exp.BitwiseRightShift: _bitshift_sql,
|
|
1747
|
+
exp.BitwiseXorAgg: _bitwise_agg_sql,
|
|
1090
1748
|
exp.CommentColumnConstraint: no_comment_column_constraint_sql,
|
|
1749
|
+
exp.Corr: lambda self, e: self._corr_sql(e),
|
|
1091
1750
|
exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
|
|
1092
1751
|
exp.CurrentTime: lambda *_: "CURRENT_TIME",
|
|
1093
|
-
exp.CurrentTimestamp: lambda
|
|
1752
|
+
exp.CurrentTimestamp: lambda self, e: self.sql(
|
|
1753
|
+
exp.AtTimeZone(this=exp.var("CURRENT_TIMESTAMP"), zone=exp.Literal.string("UTC"))
|
|
1754
|
+
)
|
|
1755
|
+
if e.args.get("sysdate")
|
|
1756
|
+
else "CURRENT_TIMESTAMP",
|
|
1757
|
+
exp.Localtime: unsupported_args("this")(lambda *_: "LOCALTIME"),
|
|
1094
1758
|
exp.DayOfMonth: rename_func("DAYOFMONTH"),
|
|
1095
1759
|
exp.DayOfWeek: rename_func("DAYOFWEEK"),
|
|
1096
1760
|
exp.DayOfWeekIso: rename_func("ISODOW"),
|
|
1097
1761
|
exp.DayOfYear: rename_func("DAYOFYEAR"),
|
|
1762
|
+
exp.Dayname: lambda self, e: (
|
|
1763
|
+
self.func("STRFTIME", e.this, exp.Literal.string("%a"))
|
|
1764
|
+
if e.args.get("abbreviated")
|
|
1765
|
+
else self.func("DAYNAME", e.this)
|
|
1766
|
+
),
|
|
1767
|
+
exp.Monthname: lambda self, e: (
|
|
1768
|
+
self.func("STRFTIME", e.this, exp.Literal.string("%b"))
|
|
1769
|
+
if e.args.get("abbreviated")
|
|
1770
|
+
else self.func("MONTHNAME", e.this)
|
|
1771
|
+
),
|
|
1098
1772
|
exp.DataType: _datatype_sql,
|
|
1099
1773
|
exp.Date: _date_sql,
|
|
1100
|
-
exp.DateAdd:
|
|
1101
|
-
exp.DateFromParts:
|
|
1102
|
-
exp.DateSub:
|
|
1774
|
+
exp.DateAdd: _date_delta_to_binary_interval_op(),
|
|
1775
|
+
exp.DateFromParts: _date_from_parts_sql,
|
|
1776
|
+
exp.DateSub: _date_delta_to_binary_interval_op(),
|
|
1103
1777
|
exp.DateDiff: _date_diff_sql,
|
|
1104
1778
|
exp.DateStrToDate: datestrtodate_sql,
|
|
1105
1779
|
exp.Datetime: no_datetime_sql,
|
|
1106
1780
|
exp.DatetimeDiff: _date_diff_sql,
|
|
1107
|
-
exp.DatetimeSub:
|
|
1108
|
-
exp.DatetimeAdd:
|
|
1781
|
+
exp.DatetimeSub: _date_delta_to_binary_interval_op(),
|
|
1782
|
+
exp.DatetimeAdd: _date_delta_to_binary_interval_op(),
|
|
1109
1783
|
exp.DateToDi: lambda self,
|
|
1110
1784
|
e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)",
|
|
1111
1785
|
exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False),
|
|
1112
1786
|
exp.DiToDate: lambda self,
|
|
1113
1787
|
e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)",
|
|
1114
1788
|
exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False),
|
|
1789
|
+
exp.EqualNull: lambda self, e: self.sql(
|
|
1790
|
+
exp.NullSafeEQ(this=e.this, expression=e.expression)
|
|
1791
|
+
),
|
|
1115
1792
|
exp.EuclideanDistance: rename_func("LIST_DISTANCE"),
|
|
1116
1793
|
exp.GenerateDateArray: _generate_datetime_array_sql,
|
|
1117
1794
|
exp.GenerateTimestampArray: _generate_datetime_array_sql,
|
|
1795
|
+
exp.Getbit: getbit_sql,
|
|
1118
1796
|
exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False),
|
|
1119
1797
|
exp.Explode: rename_func("UNNEST"),
|
|
1120
1798
|
exp.IntDiv: lambda self, e: self.binary(e, "//"),
|
|
1121
1799
|
exp.IsInf: rename_func("ISINF"),
|
|
1122
1800
|
exp.IsNan: rename_func("ISNAN"),
|
|
1123
|
-
exp.
|
|
1801
|
+
exp.IsNullValue: lambda self, e: self.sql(
|
|
1802
|
+
exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("NULL"))
|
|
1803
|
+
),
|
|
1804
|
+
exp.IsArray: lambda self, e: self.sql(
|
|
1805
|
+
exp.func("JSON_TYPE", e.this).eq(exp.Literal.string("ARRAY"))
|
|
1806
|
+
),
|
|
1807
|
+
exp.Ceil: _ceil_floor,
|
|
1808
|
+
exp.Floor: _ceil_floor,
|
|
1124
1809
|
exp.JSONBExists: rename_func("JSON_EXISTS"),
|
|
1125
1810
|
exp.JSONExtract: _arrow_json_extract_sql,
|
|
1126
1811
|
exp.JSONExtractArray: _json_extract_value_array_sql,
|
|
1127
1812
|
exp.JSONFormat: _json_format_sql,
|
|
1128
1813
|
exp.JSONValueArray: _json_extract_value_array_sql,
|
|
1129
1814
|
exp.Lateral: explode_to_unnest_sql,
|
|
1130
|
-
exp.LogicalOr:
|
|
1131
|
-
exp.LogicalAnd:
|
|
1815
|
+
exp.LogicalOr: lambda self, e: self.func("BOOL_OR", _cast_to_boolean(e.this)),
|
|
1816
|
+
exp.LogicalAnd: lambda self, e: self.func("BOOL_AND", _cast_to_boolean(e.this)),
|
|
1817
|
+
exp.Seq1: lambda self, e: _seq_sql(self, e, 1),
|
|
1818
|
+
exp.Seq2: lambda self, e: _seq_sql(self, e, 2),
|
|
1819
|
+
exp.Seq4: lambda self, e: _seq_sql(self, e, 4),
|
|
1820
|
+
exp.Seq8: lambda self, e: _seq_sql(self, e, 8),
|
|
1821
|
+
exp.BoolxorAgg: _boolxor_agg_sql,
|
|
1132
1822
|
exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
|
|
1133
1823
|
exp.Initcap: _initcap_sql,
|
|
1134
1824
|
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
|
|
1135
1825
|
exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)),
|
|
1136
1826
|
exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)),
|
|
1137
|
-
exp.MonthsBetween:
|
|
1138
|
-
|
|
1139
|
-
"'month'",
|
|
1140
|
-
exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True),
|
|
1141
|
-
exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True),
|
|
1142
|
-
),
|
|
1827
|
+
exp.MonthsBetween: months_between_sql,
|
|
1828
|
+
exp.NextDay: _day_navigation_sql,
|
|
1143
1829
|
exp.PercentileCont: rename_func("QUANTILE_CONT"),
|
|
1144
1830
|
exp.PercentileDisc: rename_func("QUANTILE_DISC"),
|
|
1145
1831
|
# DuckDB doesn't allow qualified columns inside of PIVOT expressions.
|
|
1146
1832
|
# See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62
|
|
1147
1833
|
exp.Pivot: transforms.preprocess([transforms.unqualify_columns]),
|
|
1834
|
+
exp.PreviousDay: _day_navigation_sql,
|
|
1148
1835
|
exp.RegexpReplace: lambda self, e: self.func(
|
|
1149
1836
|
"REGEXP_REPLACE",
|
|
1150
1837
|
e.this,
|
|
@@ -1172,16 +1859,16 @@ class DuckDB(Dialect):
|
|
|
1172
1859
|
),
|
|
1173
1860
|
exp.Struct: _struct_sql,
|
|
1174
1861
|
exp.Transform: rename_func("LIST_TRANSFORM"),
|
|
1175
|
-
exp.TimeAdd:
|
|
1176
|
-
exp.TimeSub:
|
|
1862
|
+
exp.TimeAdd: _date_delta_to_binary_interval_op(),
|
|
1863
|
+
exp.TimeSub: _date_delta_to_binary_interval_op(),
|
|
1177
1864
|
exp.Time: no_time_sql,
|
|
1178
1865
|
exp.TimeDiff: _timediff_sql,
|
|
1179
1866
|
exp.Timestamp: no_timestamp_sql,
|
|
1180
|
-
exp.TimestampAdd:
|
|
1867
|
+
exp.TimestampAdd: _date_delta_to_binary_interval_op(),
|
|
1181
1868
|
exp.TimestampDiff: lambda self, e: self.func(
|
|
1182
1869
|
"DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
|
|
1183
1870
|
),
|
|
1184
|
-
exp.TimestampSub:
|
|
1871
|
+
exp.TimestampSub: _date_delta_to_binary_interval_op(),
|
|
1185
1872
|
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
|
|
1186
1873
|
exp.TimeStrToTime: timestrtotime_sql,
|
|
1187
1874
|
exp.TimeStrToUnix: lambda self, e: self.func(
|
|
@@ -1192,7 +1879,7 @@ class DuckDB(Dialect):
|
|
|
1192
1879
|
exp.TimeToUnix: rename_func("EPOCH"),
|
|
1193
1880
|
exp.TsOrDiToDi: lambda self,
|
|
1194
1881
|
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
|
|
1195
|
-
exp.TsOrDsAdd:
|
|
1882
|
+
exp.TsOrDsAdd: _date_delta_to_binary_interval_op(),
|
|
1196
1883
|
exp.TsOrDsDiff: lambda self, e: self.func(
|
|
1197
1884
|
"DATE_DIFF",
|
|
1198
1885
|
f"'{e.args.get('unit') or 'DAY'}'",
|
|
@@ -1216,13 +1903,23 @@ class DuckDB(Dialect):
|
|
|
1216
1903
|
exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)",
|
|
1217
1904
|
exp.VariancePop: rename_func("VAR_POP"),
|
|
1218
1905
|
exp.WeekOfYear: rename_func("WEEKOFYEAR"),
|
|
1219
|
-
exp.
|
|
1220
|
-
|
|
1221
|
-
|
|
1906
|
+
exp.YearOfWeek: lambda self, e: self.sql(
|
|
1907
|
+
exp.Extract(
|
|
1908
|
+
this=exp.Var(this="ISOYEAR"),
|
|
1909
|
+
expression=e.this,
|
|
1910
|
+
)
|
|
1222
1911
|
),
|
|
1912
|
+
exp.YearOfWeekIso: lambda self, e: self.sql(
|
|
1913
|
+
exp.Extract(
|
|
1914
|
+
this=exp.Var(this="ISOYEAR"),
|
|
1915
|
+
expression=e.this,
|
|
1916
|
+
)
|
|
1917
|
+
),
|
|
1918
|
+
exp.Xor: _xor_sql,
|
|
1223
1919
|
exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"),
|
|
1224
1920
|
exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"),
|
|
1225
1921
|
exp.DateBin: rename_func("TIME_BUCKET"),
|
|
1922
|
+
exp.LastDay: _last_day_sql,
|
|
1226
1923
|
}
|
|
1227
1924
|
|
|
1228
1925
|
SUPPORTED_JSON_PATH_PARTS = {
|
|
@@ -1247,6 +1944,7 @@ class DuckDB(Dialect):
|
|
|
1247
1944
|
exp.DataType.Type.VARBINARY: "BLOB",
|
|
1248
1945
|
exp.DataType.Type.ROWVERSION: "BLOB",
|
|
1249
1946
|
exp.DataType.Type.VARCHAR: "TEXT",
|
|
1947
|
+
exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMPTZ",
|
|
1250
1948
|
exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP",
|
|
1251
1949
|
exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S",
|
|
1252
1950
|
exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS",
|
|
@@ -1359,6 +2057,277 @@ class DuckDB(Dialect):
|
|
|
1359
2057
|
exp.NthValue,
|
|
1360
2058
|
)
|
|
1361
2059
|
|
|
2060
|
+
# Template for ZIPF transpilation - placeholders get replaced with actual parameters
|
|
2061
|
+
ZIPF_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2062
|
+
"""
|
|
2063
|
+
WITH rand AS (SELECT :random_expr AS r),
|
|
2064
|
+
weights AS (
|
|
2065
|
+
SELECT i, 1.0 / POWER(i, :s) AS w
|
|
2066
|
+
FROM RANGE(1, :n + 1) AS t(i)
|
|
2067
|
+
),
|
|
2068
|
+
cdf AS (
|
|
2069
|
+
SELECT i, SUM(w) OVER (ORDER BY i) / SUM(w) OVER () AS p
|
|
2070
|
+
FROM weights
|
|
2071
|
+
)
|
|
2072
|
+
SELECT MIN(i)
|
|
2073
|
+
FROM cdf
|
|
2074
|
+
WHERE p >= (SELECT r FROM rand)
|
|
2075
|
+
"""
|
|
2076
|
+
)
|
|
2077
|
+
|
|
2078
|
+
# Template for NORMAL transpilation using Box-Muller transform
|
|
2079
|
+
# mean + (stddev * sqrt(-2 * ln(u1)) * cos(2 * pi * u2))
|
|
2080
|
+
NORMAL_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2081
|
+
":mean + (:stddev * SQRT(-2 * LN(GREATEST(:u1, 1e-10))) * COS(2 * PI() * :u2))"
|
|
2082
|
+
)
|
|
2083
|
+
|
|
2084
|
+
# Template for generating a seeded pseudo-random value in [0, 1) from a hash
|
|
2085
|
+
SEEDED_RANDOM_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2086
|
+
"(ABS(HASH(:seed)) % 1000000) / 1000000.0"
|
|
2087
|
+
)
|
|
2088
|
+
|
|
2089
|
+
# Template for generating signed and unsigned SEQ values within a specified range
|
|
2090
|
+
SEQ_UNSIGNED: exp.Expression = exp.maybe_parse(f"{_SEQ_BASE} % :max_val")
|
|
2091
|
+
SEQ_SIGNED: exp.Expression = exp.maybe_parse(
|
|
2092
|
+
f"(CASE WHEN {_SEQ_BASE} % :max_val >= :half "
|
|
2093
|
+
f"THEN {_SEQ_BASE} % :max_val - :max_val "
|
|
2094
|
+
f"ELSE {_SEQ_BASE} % :max_val END)"
|
|
2095
|
+
)
|
|
2096
|
+
|
|
2097
|
+
# Template for MAP_CAT transpilation - Snowflake semantics:
|
|
2098
|
+
# 1. Returns NULL if either input is NULL
|
|
2099
|
+
# 2. For duplicate keys, prefers non-NULL value (COALESCE(m2[k], m1[k]))
|
|
2100
|
+
# 3. Filters out entries with NULL values from the result
|
|
2101
|
+
MAPCAT_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2102
|
+
"""
|
|
2103
|
+
CASE
|
|
2104
|
+
WHEN :map1 IS NULL OR :map2 IS NULL THEN NULL
|
|
2105
|
+
ELSE MAP_FROM_ENTRIES(LIST_FILTER(LIST_TRANSFORM(
|
|
2106
|
+
LIST_DISTINCT(LIST_CONCAT(MAP_KEYS(:map1), MAP_KEYS(:map2))),
|
|
2107
|
+
__k -> STRUCT_PACK(key := __k, value := COALESCE(:map2[__k], :map1[__k]))
|
|
2108
|
+
), __x -> __x.value IS NOT NULL))
|
|
2109
|
+
END
|
|
2110
|
+
"""
|
|
2111
|
+
)
|
|
2112
|
+
|
|
2113
|
+
# Mappings for EXTRACT/DATE_PART transpilation
|
|
2114
|
+
# Maps Snowflake specifiers unsupported in DuckDB to strftime format codes
|
|
2115
|
+
EXTRACT_STRFTIME_MAPPINGS: t.Dict[str, t.Tuple[str, str]] = {
|
|
2116
|
+
"WEEKISO": ("%V", "INTEGER"),
|
|
2117
|
+
"YEAROFWEEK": ("%G", "INTEGER"),
|
|
2118
|
+
"YEAROFWEEKISO": ("%G", "INTEGER"),
|
|
2119
|
+
"NANOSECOND": ("%n", "BIGINT"),
|
|
2120
|
+
}
|
|
2121
|
+
|
|
2122
|
+
# Maps epoch-based specifiers to DuckDB epoch functions
|
|
2123
|
+
EXTRACT_EPOCH_MAPPINGS: t.Dict[str, str] = {
|
|
2124
|
+
"EPOCH_SECOND": "EPOCH",
|
|
2125
|
+
"EPOCH_MILLISECOND": "EPOCH_MS",
|
|
2126
|
+
"EPOCH_MICROSECOND": "EPOCH_US",
|
|
2127
|
+
"EPOCH_NANOSECOND": "EPOCH_NS",
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
# Template for BITMAP_CONSTRUCT_AGG transpilation
|
|
2131
|
+
#
|
|
2132
|
+
# BACKGROUND:
|
|
2133
|
+
# Snowflake's BITMAP_CONSTRUCT_AGG aggregates integers into a compact binary bitmap.
|
|
2134
|
+
# Supports values in range 0-32767, this version returns NULL if any value is out of range
|
|
2135
|
+
# See: https://docs.snowflake.com/en/sql-reference/functions/bitmap_construct_agg
|
|
2136
|
+
# See: https://docs.snowflake.com/en/user-guide/querying-bitmaps-for-distinct-counts
|
|
2137
|
+
#
|
|
2138
|
+
# Snowflake uses two different formats based on the number of unique values:
|
|
2139
|
+
#
|
|
2140
|
+
# Format 1 - Small bitmap (< 5 unique values): Length of 10 bytes
|
|
2141
|
+
# Bytes 0-1: Count of values as 2-byte big-endian integer (e.g., 3 values = 0x0003)
|
|
2142
|
+
# Bytes 2-9: Up to 4 values, each as 2-byte little-endian integers, zero-padded to 8 bytes
|
|
2143
|
+
# Example: Values [1, 2, 3] -> 0x0003 0100 0200 0300 0000 (hex)
|
|
2144
|
+
# count v1 v2 v3 pad
|
|
2145
|
+
#
|
|
2146
|
+
# Format 2 - Large bitmap (>= 5 unique values): Length of 10 + (2 * count) bytes
|
|
2147
|
+
# Bytes 0-9: Fixed header 0x08 followed by 9 zero bytes
|
|
2148
|
+
# Bytes 10+: Each value as 2-byte little-endian integer (no padding)
|
|
2149
|
+
# Example: Values [1,2,3,4,5] -> 0x08 00000000 00000000 00 0100 0200 0300 0400 0500
|
|
2150
|
+
# hdr ----9 zero bytes---- v1 v2 v3 v4 v5
|
|
2151
|
+
#
|
|
2152
|
+
# TEMPLATE STRUCTURE
|
|
2153
|
+
#
|
|
2154
|
+
# Phase 1 - Innermost subquery: Data preparation
|
|
2155
|
+
# SELECT LIST_SORT(...) AS l
|
|
2156
|
+
# - Aggregates all input values into a list, remove NULLs, duplicates and sorts
|
|
2157
|
+
# Result: Clean, sorted list of unique non-null integers stored as 'l'
|
|
2158
|
+
#
|
|
2159
|
+
# Phase 2 - Middle subquery: Hex string construction
|
|
2160
|
+
# LIST_TRANSFORM(...)
|
|
2161
|
+
# - Converts each integer to 2-byte little-endian hex representation
|
|
2162
|
+
# - & 255 extracts low byte, >> 8 extracts high byte
|
|
2163
|
+
# - LIST_REDUCE: Concatenates all hex pairs into single string 'h'
|
|
2164
|
+
# Result: Hex string of all values
|
|
2165
|
+
#
|
|
2166
|
+
# Phase 3 - Outer SELECT: Final bitmap assembly
|
|
2167
|
+
# LENGTH(l) < 5:
|
|
2168
|
+
# - Small format: 2-byte count (big-endian via %04X) + values + zero padding
|
|
2169
|
+
# LENGTH(l) >= 5:
|
|
2170
|
+
# - Large format: Fixed 10-byte header + values (no padding needed)
|
|
2171
|
+
# Result: Complete binary bitmap as BLOB
|
|
2172
|
+
#
|
|
2173
|
+
BITMAP_CONSTRUCT_AGG_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2174
|
+
"""
|
|
2175
|
+
SELECT CASE
|
|
2176
|
+
WHEN l IS NULL OR LENGTH(l) = 0 THEN NULL
|
|
2177
|
+
WHEN LENGTH(l) != LENGTH(LIST_FILTER(l, __v -> __v BETWEEN 0 AND 32767)) THEN NULL
|
|
2178
|
+
WHEN LENGTH(l) < 5 THEN UNHEX(PRINTF('%04X', LENGTH(l)) || h || REPEAT('00', GREATEST(0, 4 - LENGTH(l)) * 2))
|
|
2179
|
+
ELSE UNHEX('08000000000000000000' || h)
|
|
2180
|
+
END
|
|
2181
|
+
FROM (
|
|
2182
|
+
SELECT l, COALESCE(LIST_REDUCE(
|
|
2183
|
+
LIST_TRANSFORM(l, __x -> PRINTF('%02X%02X', CAST(__x AS INT) & 255, (CAST(__x AS INT) >> 8) & 255)),
|
|
2184
|
+
(__a, __b) -> __a || __b, ''
|
|
2185
|
+
), '') AS h
|
|
2186
|
+
FROM (SELECT LIST_SORT(LIST_DISTINCT(LIST(:arg) FILTER(NOT :arg IS NULL))) AS l)
|
|
2187
|
+
)
|
|
2188
|
+
"""
|
|
2189
|
+
)
|
|
2190
|
+
|
|
2191
|
+
# Template for RANDSTR transpilation - placeholders get replaced with actual parameters
|
|
2192
|
+
RANDSTR_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2193
|
+
f"""
|
|
2194
|
+
SELECT LISTAGG(
|
|
2195
|
+
SUBSTRING(
|
|
2196
|
+
'{RANDSTR_CHAR_POOL}',
|
|
2197
|
+
1 + CAST(FLOOR(random_value * 62) AS INT),
|
|
2198
|
+
1
|
|
2199
|
+
),
|
|
2200
|
+
''
|
|
2201
|
+
)
|
|
2202
|
+
FROM (
|
|
2203
|
+
SELECT (ABS(HASH(i + :seed)) % 1000) / 1000.0 AS random_value
|
|
2204
|
+
FROM RANGE(:length) AS t(i)
|
|
2205
|
+
)
|
|
2206
|
+
""",
|
|
2207
|
+
)
|
|
2208
|
+
|
|
2209
|
+
# Template for MINHASH transpilation
|
|
2210
|
+
# Computes k minimum hash values across aggregated data using DuckDB list functions
|
|
2211
|
+
# Returns JSON matching Snowflake format: {"state": [...], "type": "minhash", "version": 1}
|
|
2212
|
+
MINHASH_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2213
|
+
"""
|
|
2214
|
+
SELECT JSON_OBJECT('state', LIST(min_h ORDER BY seed), 'type', 'minhash', 'version', 1)
|
|
2215
|
+
FROM (
|
|
2216
|
+
SELECT seed, LIST_MIN(LIST_TRANSFORM(vals, __v -> HASH(CAST(__v AS VARCHAR) || CAST(seed AS VARCHAR)))) AS min_h
|
|
2217
|
+
FROM (SELECT LIST(:expr) AS vals), RANGE(0, :k) AS t(seed)
|
|
2218
|
+
)
|
|
2219
|
+
""",
|
|
2220
|
+
)
|
|
2221
|
+
|
|
2222
|
+
# Template for MINHASH_COMBINE transpilation
|
|
2223
|
+
# Combines multiple minhash signatures by taking element-wise minimum
|
|
2224
|
+
MINHASH_COMBINE_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2225
|
+
"""
|
|
2226
|
+
SELECT JSON_OBJECT('state', LIST(min_h ORDER BY idx), 'type', 'minhash', 'version', 1)
|
|
2227
|
+
FROM (
|
|
2228
|
+
SELECT
|
|
2229
|
+
pos AS idx,
|
|
2230
|
+
MIN(val) AS min_h
|
|
2231
|
+
FROM
|
|
2232
|
+
UNNEST(LIST(:expr)) AS _(sig),
|
|
2233
|
+
UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS t(val, pos)
|
|
2234
|
+
GROUP BY pos
|
|
2235
|
+
)
|
|
2236
|
+
""",
|
|
2237
|
+
)
|
|
2238
|
+
|
|
2239
|
+
# Template for APPROXIMATE_SIMILARITY transpilation
|
|
2240
|
+
# Computes multi-way Jaccard similarity: fraction of positions where ALL signatures agree
|
|
2241
|
+
APPROXIMATE_SIMILARITY_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2242
|
+
"""
|
|
2243
|
+
SELECT CAST(SUM(CASE WHEN num_distinct = 1 THEN 1 ELSE 0 END) AS DOUBLE) / COUNT(*)
|
|
2244
|
+
FROM (
|
|
2245
|
+
SELECT pos, COUNT(DISTINCT h) AS num_distinct
|
|
2246
|
+
FROM (
|
|
2247
|
+
SELECT h, pos
|
|
2248
|
+
FROM UNNEST(LIST(:expr)) AS _(sig),
|
|
2249
|
+
UNNEST(CAST(sig -> 'state' AS UBIGINT[])) WITH ORDINALITY AS s(h, pos)
|
|
2250
|
+
)
|
|
2251
|
+
GROUP BY pos
|
|
2252
|
+
)
|
|
2253
|
+
""",
|
|
2254
|
+
)
|
|
2255
|
+
|
|
2256
|
+
# Template for ARRAYS_ZIP transpilation
|
|
2257
|
+
# Snowflake pads to longest array; DuckDB LIST_ZIP truncates to shortest
|
|
2258
|
+
# Uses RANGE + indexing to match Snowflake behavior
|
|
2259
|
+
ARRAYS_ZIP_TEMPLATE: exp.Expression = exp.maybe_parse(
|
|
2260
|
+
"""
|
|
2261
|
+
CASE WHEN :null_check THEN NULL
|
|
2262
|
+
WHEN :all_empty_check THEN [:empty_struct]
|
|
2263
|
+
ELSE LIST_TRANSFORM(RANGE(0, :max_len), __i -> :transform_struct)
|
|
2264
|
+
END
|
|
2265
|
+
""",
|
|
2266
|
+
)
|
|
2267
|
+
|
|
2268
|
+
def timeslice_sql(self: DuckDB.Generator, expression: exp.TimeSlice) -> str:
|
|
2269
|
+
"""
|
|
2270
|
+
Transform Snowflake's TIME_SLICE to DuckDB's time_bucket.
|
|
2271
|
+
|
|
2272
|
+
Snowflake: TIME_SLICE(date_expr, slice_length, 'UNIT' [, 'START'|'END'])
|
|
2273
|
+
DuckDB: time_bucket(INTERVAL 'slice_length' UNIT, date_expr)
|
|
2274
|
+
|
|
2275
|
+
For 'END' kind, add the interval to get the end of the slice.
|
|
2276
|
+
For DATE type with 'END', cast result back to DATE to preserve type.
|
|
2277
|
+
"""
|
|
2278
|
+
date_expr = expression.this
|
|
2279
|
+
slice_length = expression.expression
|
|
2280
|
+
unit = expression.unit
|
|
2281
|
+
kind = expression.text("kind").upper()
|
|
2282
|
+
|
|
2283
|
+
# Create INTERVAL expression: INTERVAL 'N' UNIT
|
|
2284
|
+
interval_expr = exp.Interval(this=slice_length, unit=unit)
|
|
2285
|
+
|
|
2286
|
+
# Create base time_bucket expression
|
|
2287
|
+
time_bucket_expr = exp.func("time_bucket", interval_expr, date_expr)
|
|
2288
|
+
|
|
2289
|
+
# Check if we need the end of the slice (default is start)
|
|
2290
|
+
if not kind == "END":
|
|
2291
|
+
# For 'START', return time_bucket directly
|
|
2292
|
+
return self.sql(time_bucket_expr)
|
|
2293
|
+
|
|
2294
|
+
# For 'END', add the interval to get end of slice
|
|
2295
|
+
add_expr = exp.Add(this=time_bucket_expr, expression=interval_expr.copy())
|
|
2296
|
+
|
|
2297
|
+
# If input is DATE type, cast result back to DATE to preserve type
|
|
2298
|
+
# DuckDB converts DATE to TIMESTAMP when adding intervals
|
|
2299
|
+
if date_expr.is_type(exp.DataType.Type.DATE):
|
|
2300
|
+
return self.sql(exp.cast(add_expr, exp.DataType.Type.DATE))
|
|
2301
|
+
|
|
2302
|
+
return self.sql(add_expr)
|
|
2303
|
+
|
|
2304
|
+
def bitmapbucketnumber_sql(
|
|
2305
|
+
self: DuckDB.Generator, expression: exp.BitmapBucketNumber
|
|
2306
|
+
) -> str:
|
|
2307
|
+
"""
|
|
2308
|
+
Transpile BITMAP_BUCKET_NUMBER function from Snowflake to DuckDB equivalent.
|
|
2309
|
+
|
|
2310
|
+
Snowflake's BITMAP_BUCKET_NUMBER returns a 1-based bucket identifier where:
|
|
2311
|
+
- Each bucket covers 32,768 values
|
|
2312
|
+
- Bucket numbering starts at 1
|
|
2313
|
+
- Formula: ((value - 1) // 32768) + 1 for positive values
|
|
2314
|
+
|
|
2315
|
+
For non-positive values (0 and negative), we use value // 32768 to avoid
|
|
2316
|
+
producing bucket 0 or positive bucket IDs for negative inputs.
|
|
2317
|
+
"""
|
|
2318
|
+
value = expression.this
|
|
2319
|
+
|
|
2320
|
+
positive_formula = ((value - 1) // 32768) + 1
|
|
2321
|
+
non_positive_formula = value // 32768
|
|
2322
|
+
|
|
2323
|
+
# CASE WHEN value > 0 THEN ((value - 1) // 32768) + 1 ELSE value // 32768 END
|
|
2324
|
+
case_expr = (
|
|
2325
|
+
exp.case()
|
|
2326
|
+
.when(exp.GT(this=value, expression=exp.Literal.number(0)), positive_formula)
|
|
2327
|
+
.else_(non_positive_formula)
|
|
2328
|
+
)
|
|
2329
|
+
return self.sql(case_expr)
|
|
2330
|
+
|
|
1362
2331
|
def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str:
|
|
1363
2332
|
"""
|
|
1364
2333
|
Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
|
|
@@ -1382,9 +2351,24 @@ class DuckDB(Dialect):
|
|
|
1382
2351
|
)
|
|
1383
2352
|
)
|
|
1384
2353
|
|
|
2354
|
+
def bitmapconstructagg_sql(
|
|
2355
|
+
self: DuckDB.Generator, expression: exp.BitmapConstructAgg
|
|
2356
|
+
) -> str:
|
|
2357
|
+
"""
|
|
2358
|
+
Transpile Snowflake's BITMAP_CONSTRUCT_AGG to DuckDB equivalent.
|
|
2359
|
+
Uses a pre-parsed template with placeholders replaced by expression nodes.
|
|
2360
|
+
|
|
2361
|
+
Snowflake bitmap format:
|
|
2362
|
+
- Small (< 5 unique values): 2-byte count (big-endian) + values (little-endian) + padding to 10 bytes
|
|
2363
|
+
- Large (>= 5 unique values): 10-byte header (0x08 + 9 zeros) + values (little-endian)
|
|
2364
|
+
"""
|
|
2365
|
+
arg = expression.this
|
|
2366
|
+
return f"({self.sql(exp.replace_placeholders(self.BITMAP_CONSTRUCT_AGG_TEMPLATE, arg=arg))})"
|
|
2367
|
+
|
|
1385
2368
|
def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
|
|
1386
2369
|
"""
|
|
1387
2370
|
Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
|
|
2371
|
+
Uses a pre-parsed template with placeholders replaced by expression nodes.
|
|
1388
2372
|
|
|
1389
2373
|
RANDSTR(length, generator) generates a random string of specified length.
|
|
1390
2374
|
- With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
|
|
@@ -1405,40 +2389,49 @@ class DuckDB(Dialect):
|
|
|
1405
2389
|
# No generator specified, use default seed (arbitrary but deterministic)
|
|
1406
2390
|
seed_value = exp.Literal.number(RANDSTR_SEED)
|
|
1407
2391
|
|
|
1408
|
-
|
|
1409
|
-
|
|
2392
|
+
replacements = {"seed": seed_value, "length": length}
|
|
2393
|
+
return f"({self.sql(exp.replace_placeholders(self.RANDSTR_TEMPLATE, **replacements))})"
|
|
2394
|
+
|
|
2395
|
+
def zipf_sql(self: DuckDB.Generator, expression: exp.Zipf) -> str:
|
|
2396
|
+
"""
|
|
2397
|
+
Transpile Snowflake's ZIPF to DuckDB using CDF-based inverse sampling.
|
|
2398
|
+
Uses a pre-parsed template with placeholders replaced by expression nodes.
|
|
2399
|
+
"""
|
|
2400
|
+
s = expression.this
|
|
2401
|
+
n = expression.args["elementcount"]
|
|
2402
|
+
gen = expression.args["gen"]
|
|
1410
2403
|
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
2404
|
+
if not isinstance(gen, exp.Rand):
|
|
2405
|
+
# (ABS(HASH(seed)) % 1000000) / 1000000.0
|
|
2406
|
+
random_expr: exp.Expression = exp.Div(
|
|
2407
|
+
this=exp.Paren(
|
|
2408
|
+
this=exp.Mod(
|
|
2409
|
+
this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen.copy()])),
|
|
2410
|
+
expression=exp.Literal.number(1000000),
|
|
2411
|
+
)
|
|
1418
2412
|
),
|
|
1419
|
-
|
|
2413
|
+
expression=exp.Literal.number(1000000.0),
|
|
1420
2414
|
)
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
)
|
|
1428
|
-
return f"({self.sql(query)})"
|
|
2415
|
+
else:
|
|
2416
|
+
# Use RANDOM() for non-deterministic output
|
|
2417
|
+
random_expr = exp.Rand()
|
|
2418
|
+
|
|
2419
|
+
replacements = {"s": s, "n": n, "random_expr": random_expr}
|
|
2420
|
+
return f"({self.sql(exp.replace_placeholders(self.ZIPF_TEMPLATE, **replacements))})"
|
|
1429
2421
|
|
|
1430
2422
|
def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str:
|
|
1431
2423
|
"""
|
|
1432
|
-
TO_BINARY
|
|
2424
|
+
TO_BINARY and TRY_TO_BINARY transpilation:
|
|
1433
2425
|
- 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50')
|
|
1434
2426
|
- 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST')
|
|
1435
2427
|
- 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==')
|
|
1436
2428
|
|
|
1437
|
-
|
|
1438
|
-
|
|
2429
|
+
For TRY_TO_BINARY (safe=True), wrap with TRY():
|
|
2430
|
+
- 'HEX': TRY_TO_BINARY('invalid', 'HEX') → TRY(UNHEX('invalid'))
|
|
1439
2431
|
"""
|
|
1440
2432
|
value = expression.this
|
|
1441
2433
|
format_arg = expression.args.get("format")
|
|
2434
|
+
is_safe = expression.args.get("safe")
|
|
1442
2435
|
|
|
1443
2436
|
fmt = "HEX"
|
|
1444
2437
|
if format_arg:
|
|
@@ -1446,12 +2439,23 @@ class DuckDB(Dialect):
|
|
|
1446
2439
|
|
|
1447
2440
|
if expression.is_type(exp.DataType.Type.BINARY):
|
|
1448
2441
|
if fmt == "UTF-8":
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
2442
|
+
result = self.func("ENCODE", value)
|
|
2443
|
+
elif fmt == "BASE64":
|
|
2444
|
+
result = self.func("FROM_BASE64", value)
|
|
2445
|
+
elif fmt == "HEX":
|
|
2446
|
+
result = self.func("UNHEX", value)
|
|
2447
|
+
else:
|
|
2448
|
+
if is_safe:
|
|
2449
|
+
return self.sql(exp.null())
|
|
2450
|
+
else:
|
|
2451
|
+
self.unsupported(f"format {fmt} is not supported")
|
|
2452
|
+
result = self.func("TO_BINARY", value)
|
|
1452
2453
|
|
|
1453
|
-
#
|
|
1454
|
-
|
|
2454
|
+
# Wrap with TRY() for TRY_TO_BINARY
|
|
2455
|
+
if is_safe:
|
|
2456
|
+
result = self.func("TRY", result)
|
|
2457
|
+
|
|
2458
|
+
return result
|
|
1455
2459
|
|
|
1456
2460
|
# Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake
|
|
1457
2461
|
return self.func("TO_BINARY", value)
|
|
@@ -1462,25 +2466,39 @@ class DuckDB(Dialect):
|
|
|
1462
2466
|
"""
|
|
1463
2467
|
Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
|
|
1464
2468
|
|
|
1465
|
-
- If
|
|
1466
|
-
- If
|
|
2469
|
+
- If ignore_nulls=False (BigQuery-style): return NULL if any argument is NULL
|
|
2470
|
+
- If ignore_nulls=True (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
|
|
1467
2471
|
"""
|
|
1468
2472
|
# Get all arguments
|
|
1469
2473
|
all_args = [expression.this, *expression.expressions]
|
|
1470
2474
|
fallback_sql = self.function_fallback_sql(expression)
|
|
1471
2475
|
|
|
1472
|
-
if expression.args.get("
|
|
1473
|
-
#
|
|
1474
|
-
|
|
1475
|
-
exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
|
|
1476
|
-
exp.null(),
|
|
1477
|
-
copy=False,
|
|
1478
|
-
)
|
|
1479
|
-
case_expr.set("default", fallback_sql)
|
|
1480
|
-
return self.sql(case_expr)
|
|
2476
|
+
if expression.args.get("ignore_nulls"):
|
|
2477
|
+
# DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
|
|
2478
|
+
return self.sql(fallback_sql)
|
|
1481
2479
|
|
|
1482
|
-
#
|
|
1483
|
-
|
|
2480
|
+
# return NULL if any argument is NULL
|
|
2481
|
+
case_expr = exp.case().when(
|
|
2482
|
+
exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
|
|
2483
|
+
exp.null(),
|
|
2484
|
+
copy=False,
|
|
2485
|
+
)
|
|
2486
|
+
case_expr.set("default", fallback_sql)
|
|
2487
|
+
return self.sql(case_expr)
|
|
2488
|
+
|
|
2489
|
+
def generator_sql(self, expression: exp.Generator) -> str:
|
|
2490
|
+
# Transpile Snowflake GENERATOR to DuckDB range()
|
|
2491
|
+
rowcount = expression.args.get("rowcount")
|
|
2492
|
+
time_limit = expression.args.get("time_limit")
|
|
2493
|
+
|
|
2494
|
+
if time_limit:
|
|
2495
|
+
self.unsupported("GENERATOR TIMELIMIT parameter is not supported in DuckDB")
|
|
2496
|
+
|
|
2497
|
+
if not rowcount:
|
|
2498
|
+
self.unsupported("GENERATOR without ROWCOUNT is not supported in DuckDB")
|
|
2499
|
+
return self.func("range", exp.Literal.number(0))
|
|
2500
|
+
|
|
2501
|
+
return self.func("range", rowcount)
|
|
1484
2502
|
|
|
1485
2503
|
def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str:
|
|
1486
2504
|
return self._greatest_least_sql(expression)
|
|
@@ -1521,16 +2539,58 @@ class DuckDB(Dialect):
|
|
|
1521
2539
|
return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ))
|
|
1522
2540
|
|
|
1523
2541
|
def strtotime_sql(self, expression: exp.StrToTime) -> str:
|
|
2542
|
+
# Check if target_type requires TIMESTAMPTZ (for LTZ/TZ variants)
|
|
2543
|
+
target_type = expression.args.get("target_type")
|
|
2544
|
+
needs_tz = target_type and target_type.this in (
|
|
2545
|
+
exp.DataType.Type.TIMESTAMPLTZ,
|
|
2546
|
+
exp.DataType.Type.TIMESTAMPTZ,
|
|
2547
|
+
)
|
|
2548
|
+
|
|
1524
2549
|
if expression.args.get("safe"):
|
|
1525
2550
|
formatted_time = self.format_time(expression)
|
|
1526
|
-
|
|
1527
|
-
|
|
2551
|
+
cast_type = (
|
|
2552
|
+
exp.DataType.Type.TIMESTAMPTZ if needs_tz else exp.DataType.Type.TIMESTAMP
|
|
2553
|
+
)
|
|
2554
|
+
return self.sql(
|
|
2555
|
+
exp.cast(self.func("TRY_STRPTIME", expression.this, formatted_time), cast_type)
|
|
2556
|
+
)
|
|
2557
|
+
|
|
2558
|
+
base_sql = str_to_time_sql(self, expression)
|
|
2559
|
+
if needs_tz:
|
|
2560
|
+
return self.sql(
|
|
2561
|
+
exp.cast(
|
|
2562
|
+
base_sql,
|
|
2563
|
+
exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ),
|
|
2564
|
+
)
|
|
2565
|
+
)
|
|
2566
|
+
return base_sql
|
|
1528
2567
|
|
|
1529
2568
|
def strtodate_sql(self, expression: exp.StrToDate) -> str:
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
2569
|
+
formatted_time = self.format_time(expression)
|
|
2570
|
+
function_name = "STRPTIME" if not expression.args.get("safe") else "TRY_STRPTIME"
|
|
2571
|
+
return self.sql(
|
|
2572
|
+
exp.cast(
|
|
2573
|
+
self.func(function_name, expression.this, formatted_time),
|
|
2574
|
+
exp.DataType(this=exp.DataType.Type.DATE),
|
|
2575
|
+
)
|
|
2576
|
+
)
|
|
2577
|
+
|
|
2578
|
+
def tsordstotime_sql(self, expression: exp.TsOrDsToTime) -> str:
|
|
2579
|
+
this = expression.this
|
|
2580
|
+
time_format = self.format_time(expression)
|
|
2581
|
+
safe = expression.args.get("safe")
|
|
2582
|
+
time_type = exp.DataType.build("TIME", dialect="duckdb")
|
|
2583
|
+
cast_expr = exp.TryCast if safe else exp.Cast
|
|
2584
|
+
|
|
2585
|
+
if time_format:
|
|
2586
|
+
func_name = "TRY_STRPTIME" if safe else "STRPTIME"
|
|
2587
|
+
strptime = exp.Anonymous(this=func_name, expressions=[this, time_format])
|
|
2588
|
+
return self.sql(cast_expr(this=strptime, to=time_type))
|
|
2589
|
+
|
|
2590
|
+
if isinstance(this, exp.TsOrDsToTime) or this.is_type(exp.DataType.Type.TIME):
|
|
2591
|
+
return self.sql(this)
|
|
2592
|
+
|
|
2593
|
+
return self.sql(cast_expr(this=this, to=time_type))
|
|
1534
2594
|
|
|
1535
2595
|
def currentdate_sql(self, expression: exp.CurrentDate) -> str:
|
|
1536
2596
|
if not expression.this:
|
|
@@ -1548,17 +2608,210 @@ class DuckDB(Dialect):
|
|
|
1548
2608
|
return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null()))
|
|
1549
2609
|
return self.func("JSON", arg)
|
|
1550
2610
|
|
|
2611
|
+
def normal_sql(self, expression: exp.Normal) -> str:
|
|
2612
|
+
"""
|
|
2613
|
+
Transpile Snowflake's NORMAL(mean, stddev, gen) to DuckDB.
|
|
2614
|
+
|
|
2615
|
+
Uses the Box-Muller transform via NORMAL_TEMPLATE.
|
|
2616
|
+
"""
|
|
2617
|
+
mean = expression.this
|
|
2618
|
+
stddev = expression.args["stddev"]
|
|
2619
|
+
gen: exp.Expression = expression.args["gen"]
|
|
2620
|
+
|
|
2621
|
+
# Build two uniform random values [0, 1) for Box-Muller transform
|
|
2622
|
+
if isinstance(gen, exp.Rand) and gen.this is None:
|
|
2623
|
+
u1: exp.Expression = exp.Rand()
|
|
2624
|
+
u2: exp.Expression = exp.Rand()
|
|
2625
|
+
else:
|
|
2626
|
+
# Seeded: derive two values using HASH with different inputs
|
|
2627
|
+
seed = gen.this if isinstance(gen, exp.Rand) else gen
|
|
2628
|
+
u1 = exp.replace_placeholders(self.SEEDED_RANDOM_TEMPLATE, seed=seed)
|
|
2629
|
+
u2 = exp.replace_placeholders(
|
|
2630
|
+
self.SEEDED_RANDOM_TEMPLATE,
|
|
2631
|
+
seed=exp.Add(this=seed.copy(), expression=exp.Literal.number(1)),
|
|
2632
|
+
)
|
|
2633
|
+
|
|
2634
|
+
replacements = {"mean": mean, "stddev": stddev, "u1": u1, "u2": u2}
|
|
2635
|
+
return self.sql(exp.replace_placeholders(self.NORMAL_TEMPLATE, **replacements))
|
|
2636
|
+
|
|
2637
|
+
def uniform_sql(self, expression: exp.Uniform) -> str:
|
|
2638
|
+
"""
|
|
2639
|
+
Transpile Snowflake's UNIFORM(min, max, gen) to DuckDB.
|
|
2640
|
+
|
|
2641
|
+
UNIFORM returns a random value in [min, max]:
|
|
2642
|
+
- Integer result if both min and max are integers
|
|
2643
|
+
- Float result if either min or max is a float
|
|
2644
|
+
"""
|
|
2645
|
+
min_val = expression.this
|
|
2646
|
+
max_val = expression.expression
|
|
2647
|
+
gen = expression.args.get("gen")
|
|
2648
|
+
|
|
2649
|
+
# Determine if result should be integer (both bounds are integers).
|
|
2650
|
+
# We do this to emulate Snowflake's behavior, INT -> INT, FLOAT -> FLOAT
|
|
2651
|
+
is_int_result = min_val.is_int and max_val.is_int
|
|
2652
|
+
|
|
2653
|
+
# Build the random value expression [0, 1)
|
|
2654
|
+
if not isinstance(gen, exp.Rand):
|
|
2655
|
+
# Seed value: (ABS(HASH(seed)) % 1000000) / 1000000.0
|
|
2656
|
+
random_expr: exp.Expression = exp.Div(
|
|
2657
|
+
this=exp.Paren(
|
|
2658
|
+
this=exp.Mod(
|
|
2659
|
+
this=exp.Abs(this=exp.Anonymous(this="HASH", expressions=[gen])),
|
|
2660
|
+
expression=exp.Literal.number(1000000),
|
|
2661
|
+
)
|
|
2662
|
+
),
|
|
2663
|
+
expression=exp.Literal.number(1000000.0),
|
|
2664
|
+
)
|
|
2665
|
+
else:
|
|
2666
|
+
random_expr = exp.Rand()
|
|
2667
|
+
|
|
2668
|
+
# Build: min + random * (max - min [+ 1 for int])
|
|
2669
|
+
range_expr: exp.Expression = exp.Sub(this=max_val, expression=min_val)
|
|
2670
|
+
if is_int_result:
|
|
2671
|
+
range_expr = exp.Add(this=range_expr, expression=exp.Literal.number(1))
|
|
2672
|
+
|
|
2673
|
+
result: exp.Expression = exp.Add(
|
|
2674
|
+
this=min_val,
|
|
2675
|
+
expression=exp.Mul(this=random_expr, expression=exp.Paren(this=range_expr)),
|
|
2676
|
+
)
|
|
2677
|
+
|
|
2678
|
+
if is_int_result:
|
|
2679
|
+
result = exp.Cast(
|
|
2680
|
+
this=exp.Floor(this=result),
|
|
2681
|
+
to=exp.DataType.build("BIGINT"),
|
|
2682
|
+
)
|
|
2683
|
+
|
|
2684
|
+
return self.sql(result)
|
|
2685
|
+
|
|
1551
2686
|
def timefromparts_sql(self, expression: exp.TimeFromParts) -> str:
|
|
1552
2687
|
nano = expression.args.get("nano")
|
|
1553
|
-
|
|
2688
|
+
overflow = expression.args.get("overflow")
|
|
2689
|
+
|
|
2690
|
+
# Snowflake's TIME_FROM_PARTS supports overflow
|
|
2691
|
+
if overflow:
|
|
2692
|
+
hour = expression.args["hour"]
|
|
2693
|
+
minute = expression.args["min"]
|
|
2694
|
+
sec = expression.args["sec"]
|
|
2695
|
+
|
|
2696
|
+
# Check if values are within normal ranges - use MAKE_TIME for efficiency
|
|
2697
|
+
if not nano and all(arg.is_int for arg in [hour, minute, sec]):
|
|
2698
|
+
try:
|
|
2699
|
+
h_val = hour.to_py()
|
|
2700
|
+
m_val = minute.to_py()
|
|
2701
|
+
s_val = sec.to_py()
|
|
2702
|
+
if 0 <= h_val <= 23 and 0 <= m_val <= 59 and 0 <= s_val <= 59:
|
|
2703
|
+
return rename_func("MAKE_TIME")(self, expression)
|
|
2704
|
+
except ValueError:
|
|
2705
|
+
pass
|
|
2706
|
+
|
|
2707
|
+
# Overflow or nanoseconds detected - use INTERVAL arithmetic
|
|
2708
|
+
if nano:
|
|
2709
|
+
sec = sec + nano.pop() / exp.Literal.number(1000000000.0)
|
|
2710
|
+
|
|
2711
|
+
total_seconds = (
|
|
2712
|
+
hour * exp.Literal.number(3600) + minute * exp.Literal.number(60) + sec
|
|
2713
|
+
)
|
|
2714
|
+
|
|
2715
|
+
return self.sql(
|
|
2716
|
+
exp.Add(
|
|
2717
|
+
this=exp.Cast(
|
|
2718
|
+
this=exp.Literal.string("00:00:00"), to=exp.DataType.build("TIME")
|
|
2719
|
+
),
|
|
2720
|
+
expression=exp.Interval(this=total_seconds, unit=exp.var("SECOND")),
|
|
2721
|
+
)
|
|
2722
|
+
)
|
|
2723
|
+
|
|
2724
|
+
# Default: MAKE_TIME
|
|
2725
|
+
if nano:
|
|
1554
2726
|
expression.set(
|
|
1555
2727
|
"sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0)
|
|
1556
2728
|
)
|
|
1557
2729
|
|
|
1558
2730
|
return rename_func("MAKE_TIME")(self, expression)
|
|
1559
2731
|
|
|
2732
|
+
def extract_sql(self, expression: exp.Extract) -> str:
|
|
2733
|
+
"""
|
|
2734
|
+
Transpile EXTRACT/DATE_PART for DuckDB, handling specifiers not natively supported.
|
|
2735
|
+
|
|
2736
|
+
DuckDB doesn't support: WEEKISO, YEAROFWEEK, YEAROFWEEKISO, NANOSECOND,
|
|
2737
|
+
EPOCH_SECOND (as integer), EPOCH_MILLISECOND, EPOCH_MICROSECOND, EPOCH_NANOSECOND
|
|
2738
|
+
"""
|
|
2739
|
+
this = expression.this
|
|
2740
|
+
datetime_expr = expression.expression
|
|
2741
|
+
|
|
2742
|
+
# TIMESTAMPTZ extractions may produce different results between Snowflake and DuckDB
|
|
2743
|
+
# because Snowflake applies server timezone while DuckDB uses local timezone
|
|
2744
|
+
if datetime_expr.is_type(exp.DataType.Type.TIMESTAMPTZ, exp.DataType.Type.TIMESTAMPLTZ):
|
|
2745
|
+
self.unsupported(
|
|
2746
|
+
"EXTRACT from TIMESTAMPTZ / TIMESTAMPLTZ may produce different results due to timezone handling differences"
|
|
2747
|
+
)
|
|
2748
|
+
|
|
2749
|
+
part_name = this.name.upper()
|
|
2750
|
+
|
|
2751
|
+
if part_name in self.EXTRACT_STRFTIME_MAPPINGS:
|
|
2752
|
+
fmt, cast_type = self.EXTRACT_STRFTIME_MAPPINGS[part_name]
|
|
2753
|
+
|
|
2754
|
+
# Problem: strftime doesn't accept TIME and there's no NANOSECOND function
|
|
2755
|
+
# So, for NANOSECOND with TIME, fallback to MICROSECOND * 1000
|
|
2756
|
+
is_nano_time = part_name == "NANOSECOND" and datetime_expr.is_type(
|
|
2757
|
+
exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ
|
|
2758
|
+
)
|
|
2759
|
+
|
|
2760
|
+
if is_nano_time:
|
|
2761
|
+
self.unsupported(
|
|
2762
|
+
"Parameter NANOSECOND is not supported with TIME type in DuckDB"
|
|
2763
|
+
)
|
|
2764
|
+
return self.sql(
|
|
2765
|
+
exp.cast(
|
|
2766
|
+
exp.Mul(
|
|
2767
|
+
this=exp.Extract(
|
|
2768
|
+
this=exp.var("MICROSECOND"), expression=datetime_expr
|
|
2769
|
+
),
|
|
2770
|
+
expression=exp.Literal.number(1000),
|
|
2771
|
+
),
|
|
2772
|
+
exp.DataType.build(cast_type, dialect="duckdb"),
|
|
2773
|
+
)
|
|
2774
|
+
)
|
|
2775
|
+
|
|
2776
|
+
# For NANOSECOND, cast to TIMESTAMP_NS to preserve nanosecond precision
|
|
2777
|
+
strftime_input = datetime_expr
|
|
2778
|
+
if part_name == "NANOSECOND":
|
|
2779
|
+
strftime_input = exp.cast(datetime_expr, exp.DataType.Type.TIMESTAMP_NS)
|
|
2780
|
+
|
|
2781
|
+
return self.sql(
|
|
2782
|
+
exp.cast(
|
|
2783
|
+
exp.Anonymous(
|
|
2784
|
+
this="STRFTIME",
|
|
2785
|
+
expressions=[strftime_input, exp.Literal.string(fmt)],
|
|
2786
|
+
),
|
|
2787
|
+
exp.DataType.build(cast_type, dialect="duckdb"),
|
|
2788
|
+
)
|
|
2789
|
+
)
|
|
2790
|
+
|
|
2791
|
+
if part_name in self.EXTRACT_EPOCH_MAPPINGS:
|
|
2792
|
+
func_name = self.EXTRACT_EPOCH_MAPPINGS[part_name]
|
|
2793
|
+
result: exp.Expression = exp.Anonymous(this=func_name, expressions=[datetime_expr])
|
|
2794
|
+
# EPOCH returns float, cast to BIGINT for integer result
|
|
2795
|
+
if part_name == "EPOCH_SECOND":
|
|
2796
|
+
result = exp.cast(result, exp.DataType.build("BIGINT", dialect="duckdb"))
|
|
2797
|
+
return self.sql(result)
|
|
2798
|
+
|
|
2799
|
+
return super().extract_sql(expression)
|
|
2800
|
+
|
|
1560
2801
|
def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str:
|
|
1561
|
-
|
|
2802
|
+
# Check if this is the date/time expression form: TIMESTAMP_FROM_PARTS(date_expr, time_expr)
|
|
2803
|
+
date_expr = expression.this
|
|
2804
|
+
time_expr = expression.expression
|
|
2805
|
+
|
|
2806
|
+
if date_expr is not None and time_expr is not None:
|
|
2807
|
+
# In DuckDB, DATE + TIME produces TIMESTAMP
|
|
2808
|
+
return self.sql(exp.Add(this=date_expr, expression=time_expr))
|
|
2809
|
+
|
|
2810
|
+
# Component-based form: TIMESTAMP_FROM_PARTS(year, month, day, hour, minute, second, ...)
|
|
2811
|
+
sec = expression.args.get("sec")
|
|
2812
|
+
if sec is None:
|
|
2813
|
+
# This shouldn't happen with valid input, but handle gracefully
|
|
2814
|
+
return rename_func("MAKE_TIMESTAMP")(self, expression)
|
|
1562
2815
|
|
|
1563
2816
|
milli = expression.args.get("milli")
|
|
1564
2817
|
if milli is not None:
|
|
@@ -1573,6 +2826,34 @@ class DuckDB(Dialect):
|
|
|
1573
2826
|
|
|
1574
2827
|
return rename_func("MAKE_TIMESTAMP")(self, expression)
|
|
1575
2828
|
|
|
2829
|
+
@unsupported_args("nano")
|
|
2830
|
+
def timestampltzfromparts_sql(self, expression: exp.TimestampLtzFromParts) -> str:
|
|
2831
|
+
# Pop nano so rename_func only passes args that MAKE_TIMESTAMP accepts
|
|
2832
|
+
if nano := expression.args.get("nano"):
|
|
2833
|
+
nano.pop()
|
|
2834
|
+
|
|
2835
|
+
timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
|
|
2836
|
+
return f"CAST({timestamp} AS TIMESTAMPTZ)"
|
|
2837
|
+
|
|
2838
|
+
@unsupported_args("nano")
|
|
2839
|
+
def timestamptzfromparts_sql(self, expression: exp.TimestampTzFromParts) -> str:
|
|
2840
|
+
# Extract zone before popping
|
|
2841
|
+
zone = expression.args.get("zone")
|
|
2842
|
+
# Pop zone and nano so rename_func only passes args that MAKE_TIMESTAMP accepts
|
|
2843
|
+
if zone:
|
|
2844
|
+
zone = zone.pop()
|
|
2845
|
+
|
|
2846
|
+
if nano := expression.args.get("nano"):
|
|
2847
|
+
nano.pop()
|
|
2848
|
+
|
|
2849
|
+
timestamp = rename_func("MAKE_TIMESTAMP")(self, expression)
|
|
2850
|
+
|
|
2851
|
+
if zone:
|
|
2852
|
+
# Use AT TIME ZONE to apply the explicit timezone
|
|
2853
|
+
return f"{timestamp} AT TIME ZONE {self.sql(zone)}"
|
|
2854
|
+
|
|
2855
|
+
return timestamp
|
|
2856
|
+
|
|
1576
2857
|
def tablesample_sql(
|
|
1577
2858
|
self,
|
|
1578
2859
|
expression: exp.TableSample,
|
|
@@ -1652,9 +2933,35 @@ class DuckDB(Dialect):
|
|
|
1652
2933
|
return bracket
|
|
1653
2934
|
|
|
1654
2935
|
def withingroup_sql(self, expression: exp.WithinGroup) -> str:
|
|
2936
|
+
func = expression.this
|
|
2937
|
+
|
|
2938
|
+
# For ARRAY_AGG, DuckDB requires ORDER BY inside the function, not in WITHIN GROUP
|
|
2939
|
+
# Transform: ARRAY_AGG(x) WITHIN GROUP (ORDER BY y) -> ARRAY_AGG(x ORDER BY y)
|
|
2940
|
+
if isinstance(func, exp.ArrayAgg):
|
|
2941
|
+
if not isinstance(order := expression.expression, exp.Order):
|
|
2942
|
+
return self.sql(func)
|
|
2943
|
+
|
|
2944
|
+
# Save the original column for FILTER clause (before wrapping with Order)
|
|
2945
|
+
original_this = func.this
|
|
2946
|
+
|
|
2947
|
+
# Move ORDER BY inside ARRAY_AGG by wrapping its argument with Order
|
|
2948
|
+
# ArrayAgg.this should become Order(this=ArrayAgg.this, expressions=order.expressions)
|
|
2949
|
+
func.set(
|
|
2950
|
+
"this",
|
|
2951
|
+
exp.Order(
|
|
2952
|
+
this=func.this.copy(),
|
|
2953
|
+
expressions=order.expressions,
|
|
2954
|
+
),
|
|
2955
|
+
)
|
|
2956
|
+
|
|
2957
|
+
# Generate the ARRAY_AGG function with ORDER BY and add FILTER clause if needed
|
|
2958
|
+
# Use original_this (not the Order-wrapped version) for the FILTER condition
|
|
2959
|
+
array_agg_sql = self.function_fallback_sql(func)
|
|
2960
|
+
return self._add_arrayagg_null_filter(array_agg_sql, func, original_this)
|
|
2961
|
+
|
|
2962
|
+
# For other functions (like PERCENTILES), use existing logic
|
|
1655
2963
|
expression_sql = self.sql(expression, "expression")
|
|
1656
2964
|
|
|
1657
|
-
func = expression.this
|
|
1658
2965
|
if isinstance(func, exp.PERCENTILES):
|
|
1659
2966
|
# Make the order key the first arg and slide the fraction to the right
|
|
1660
2967
|
# https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions
|
|
@@ -1697,6 +3004,98 @@ class DuckDB(Dialect):
|
|
|
1697
3004
|
|
|
1698
3005
|
return self.sql(case)
|
|
1699
3006
|
|
|
3007
|
+
@unsupported_args("ins_cost", "del_cost", "sub_cost")
|
|
3008
|
+
def levenshtein_sql(self, expression: exp.Levenshtein) -> str:
|
|
3009
|
+
this = expression.this
|
|
3010
|
+
expr = expression.expression
|
|
3011
|
+
max_dist = expression.args.get("max_dist")
|
|
3012
|
+
|
|
3013
|
+
if max_dist is None:
|
|
3014
|
+
return self.func("LEVENSHTEIN", this, expr)
|
|
3015
|
+
|
|
3016
|
+
# Emulate Snowflake semantics: if distance > max_dist, return max_dist
|
|
3017
|
+
levenshtein = exp.Levenshtein(this=this, expression=expr)
|
|
3018
|
+
return self.sql(exp.Least(this=levenshtein, expressions=[max_dist]))
|
|
3019
|
+
|
|
3020
|
+
def minhash_sql(self, expression: exp.Minhash) -> str:
|
|
3021
|
+
k = expression.this
|
|
3022
|
+
exprs = expression.expressions
|
|
3023
|
+
|
|
3024
|
+
if len(exprs) != 1 or isinstance(exprs[0], exp.Star):
|
|
3025
|
+
self.unsupported(
|
|
3026
|
+
"MINHASH with multiple expressions or * requires manual query restructuring"
|
|
3027
|
+
)
|
|
3028
|
+
return self.func("MINHASH", k, *exprs)
|
|
3029
|
+
|
|
3030
|
+
expr = exprs[0]
|
|
3031
|
+
result = exp.replace_placeholders(self.MINHASH_TEMPLATE.copy(), expr=expr, k=k)
|
|
3032
|
+
return f"({self.sql(result)})"
|
|
3033
|
+
|
|
3034
|
+
def minhashcombine_sql(self, expression: exp.MinhashCombine) -> str:
|
|
3035
|
+
expr = expression.this
|
|
3036
|
+
result = exp.replace_placeholders(self.MINHASH_COMBINE_TEMPLATE.copy(), expr=expr)
|
|
3037
|
+
return f"({self.sql(result)})"
|
|
3038
|
+
|
|
3039
|
+
def approximatesimilarity_sql(self, expression: exp.ApproximateSimilarity) -> str:
|
|
3040
|
+
expr = expression.this
|
|
3041
|
+
result = exp.replace_placeholders(
|
|
3042
|
+
self.APPROXIMATE_SIMILARITY_TEMPLATE.copy(), expr=expr
|
|
3043
|
+
)
|
|
3044
|
+
return f"({self.sql(result)})"
|
|
3045
|
+
|
|
3046
|
+
def arrayszip_sql(self, expression: exp.ArraysZip) -> str:
|
|
3047
|
+
args = expression.expressions
|
|
3048
|
+
|
|
3049
|
+
if not args:
|
|
3050
|
+
# Return [{}] - using MAP([], []) since DuckDB can't represent empty structs
|
|
3051
|
+
return self.sql(exp.array(exp.Map(keys=exp.array(), values=exp.array())))
|
|
3052
|
+
|
|
3053
|
+
# Build placeholder values for template
|
|
3054
|
+
lengths = [exp.Length(this=arg) for arg in args]
|
|
3055
|
+
max_len = (
|
|
3056
|
+
lengths[0]
|
|
3057
|
+
if len(lengths) == 1
|
|
3058
|
+
else exp.Greatest(this=lengths[0], expressions=lengths[1:])
|
|
3059
|
+
)
|
|
3060
|
+
|
|
3061
|
+
# Empty struct with same schema: {'$1': NULL, '$2': NULL, ...}
|
|
3062
|
+
empty_struct = exp.func(
|
|
3063
|
+
"STRUCT",
|
|
3064
|
+
*[
|
|
3065
|
+
exp.PropertyEQ(this=exp.Literal.string(f"${i + 1}"), expression=exp.Null())
|
|
3066
|
+
for i in range(len(args))
|
|
3067
|
+
],
|
|
3068
|
+
)
|
|
3069
|
+
|
|
3070
|
+
# Struct for transform: {'$1': COALESCE(arr1, [])[__i + 1], ...}
|
|
3071
|
+
# COALESCE wrapping handles NULL arrays - prevents invalid NULL[i] syntax
|
|
3072
|
+
index = exp.column("__i") + 1
|
|
3073
|
+
transform_struct = exp.func(
|
|
3074
|
+
"STRUCT",
|
|
3075
|
+
*[
|
|
3076
|
+
exp.PropertyEQ(
|
|
3077
|
+
this=exp.Literal.string(f"${i + 1}"),
|
|
3078
|
+
expression=exp.func("COALESCE", arg, exp.array())[index],
|
|
3079
|
+
)
|
|
3080
|
+
for i, arg in enumerate(args)
|
|
3081
|
+
],
|
|
3082
|
+
)
|
|
3083
|
+
|
|
3084
|
+
result = exp.replace_placeholders(
|
|
3085
|
+
self.ARRAYS_ZIP_TEMPLATE.copy(),
|
|
3086
|
+
null_check=exp.or_(*[arg.is_(exp.Null()) for arg in args]),
|
|
3087
|
+
all_empty_check=exp.and_(
|
|
3088
|
+
*[
|
|
3089
|
+
exp.EQ(this=exp.Length(this=arg), expression=exp.Literal.number(0))
|
|
3090
|
+
for arg in args
|
|
3091
|
+
]
|
|
3092
|
+
),
|
|
3093
|
+
empty_struct=empty_struct,
|
|
3094
|
+
max_len=max_len,
|
|
3095
|
+
transform_struct=transform_struct,
|
|
3096
|
+
)
|
|
3097
|
+
return self.sql(result)
|
|
3098
|
+
|
|
1700
3099
|
def lower_sql(self, expression: exp.Lower) -> str:
|
|
1701
3100
|
result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
|
|
1702
3101
|
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
@@ -1705,6 +3104,50 @@ class DuckDB(Dialect):
|
|
|
1705
3104
|
result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
|
|
1706
3105
|
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1707
3106
|
|
|
3107
|
+
def reverse_sql(self, expression: exp.Reverse) -> str:
|
|
3108
|
+
result_sql = self.func("REVERSE", _cast_to_varchar(expression.this))
|
|
3109
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
3110
|
+
|
|
3111
|
+
def base64encode_sql(self, expression: exp.Base64Encode) -> str:
|
|
3112
|
+
# DuckDB TO_BASE64 requires BLOB input
|
|
3113
|
+
# Snowflake BASE64_ENCODE accepts both VARCHAR and BINARY - for VARCHAR it implicitly
|
|
3114
|
+
# encodes UTF-8 bytes. We add ENCODE unless the input is a binary type.
|
|
3115
|
+
result = expression.this
|
|
3116
|
+
|
|
3117
|
+
# Check if input is a string type - ENCODE only accepts VARCHAR
|
|
3118
|
+
if result.is_type(*exp.DataType.TEXT_TYPES):
|
|
3119
|
+
result = exp.Encode(this=result)
|
|
3120
|
+
|
|
3121
|
+
result = exp.ToBase64(this=result)
|
|
3122
|
+
|
|
3123
|
+
max_line_length = expression.args.get("max_line_length")
|
|
3124
|
+
alphabet = expression.args.get("alphabet")
|
|
3125
|
+
|
|
3126
|
+
# Handle custom alphabet by replacing standard chars with custom ones
|
|
3127
|
+
result = _apply_base64_alphabet_replacements(result, alphabet)
|
|
3128
|
+
|
|
3129
|
+
# Handle max_line_length by inserting newlines every N characters
|
|
3130
|
+
line_length = (
|
|
3131
|
+
t.cast(int, max_line_length.to_py())
|
|
3132
|
+
if isinstance(max_line_length, exp.Literal) and max_line_length.is_number
|
|
3133
|
+
else 0
|
|
3134
|
+
)
|
|
3135
|
+
if line_length > 0:
|
|
3136
|
+
newline = exp.Chr(expressions=[exp.Literal.number(10)])
|
|
3137
|
+
result = exp.Trim(
|
|
3138
|
+
this=exp.RegexpReplace(
|
|
3139
|
+
this=result,
|
|
3140
|
+
expression=exp.Literal.string(f"(.{{{line_length}}})"),
|
|
3141
|
+
replacement=exp.Concat(
|
|
3142
|
+
expressions=[exp.Literal.string("\\1"), newline.copy()]
|
|
3143
|
+
),
|
|
3144
|
+
),
|
|
3145
|
+
expression=newline,
|
|
3146
|
+
position="TRAILING",
|
|
3147
|
+
)
|
|
3148
|
+
|
|
3149
|
+
return self.sql(result)
|
|
3150
|
+
|
|
1708
3151
|
def replace_sql(self, expression: exp.Replace) -> str:
|
|
1709
3152
|
result_sql = self.func(
|
|
1710
3153
|
"REPLACE",
|
|
@@ -1739,6 +3182,14 @@ class DuckDB(Dialect):
|
|
|
1739
3182
|
|
|
1740
3183
|
return self.func("STRUCT_INSERT", this, kv_sql)
|
|
1741
3184
|
|
|
3185
|
+
def mapcat_sql(self, expression: exp.MapCat) -> str:
|
|
3186
|
+
result = exp.replace_placeholders(
|
|
3187
|
+
self.MAPCAT_TEMPLATE.copy(),
|
|
3188
|
+
map1=expression.this,
|
|
3189
|
+
map2=expression.expression,
|
|
3190
|
+
)
|
|
3191
|
+
return self.sql(result)
|
|
3192
|
+
|
|
1742
3193
|
def startswith_sql(self, expression: exp.StartsWith) -> str:
|
|
1743
3194
|
return self.func(
|
|
1744
3195
|
"STARTS_WITH",
|
|
@@ -1746,6 +3197,28 @@ class DuckDB(Dialect):
|
|
|
1746
3197
|
_cast_to_varchar(expression.expression),
|
|
1747
3198
|
)
|
|
1748
3199
|
|
|
3200
|
+
def space_sql(self, expression: exp.Space) -> str:
|
|
3201
|
+
# DuckDB's REPEAT requires BIGINT for the count parameter
|
|
3202
|
+
return self.sql(
|
|
3203
|
+
exp.Repeat(
|
|
3204
|
+
this=exp.Literal.string(" "),
|
|
3205
|
+
times=exp.cast(expression.this, exp.DataType.Type.BIGINT),
|
|
3206
|
+
)
|
|
3207
|
+
)
|
|
3208
|
+
|
|
3209
|
+
def tablefromrows_sql(self, expression: exp.TableFromRows) -> str:
|
|
3210
|
+
# For GENERATOR, unwrap TABLE() - just emit the Generator (becomes RANGE)
|
|
3211
|
+
if isinstance(expression.this, exp.Generator):
|
|
3212
|
+
# Preserve alias, joins, and other table-level args
|
|
3213
|
+
table = exp.Table(
|
|
3214
|
+
this=expression.this,
|
|
3215
|
+
alias=expression.args.get("alias"),
|
|
3216
|
+
joins=expression.args.get("joins"),
|
|
3217
|
+
)
|
|
3218
|
+
return self.sql(table)
|
|
3219
|
+
|
|
3220
|
+
return super().tablefromrows_sql(expression)
|
|
3221
|
+
|
|
1749
3222
|
def unnest_sql(self, expression: exp.Unnest) -> str:
|
|
1750
3223
|
explode_array = expression.args.get("explode_array")
|
|
1751
3224
|
if explode_array:
|
|
@@ -1893,18 +3366,54 @@ class DuckDB(Dialect):
|
|
|
1893
3366
|
return posexplode_sql
|
|
1894
3367
|
|
|
1895
3368
|
def addmonths_sql(self, expression: exp.AddMonths) -> str:
|
|
1896
|
-
|
|
3369
|
+
"""
|
|
3370
|
+
Handles three key issues:
|
|
3371
|
+
1. Float/decimal months: e.g., Snowflake rounds, whereas DuckDB INTERVAL requires integers
|
|
3372
|
+
2. End-of-month preservation: If input is last day of month, result is last day of result month
|
|
3373
|
+
3. Type preservation: Maintains DATE/TIMESTAMPTZ types (DuckDB defaults to TIMESTAMP)
|
|
3374
|
+
"""
|
|
3375
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
1897
3376
|
|
|
3377
|
+
this = expression.this
|
|
1898
3378
|
if not this.type:
|
|
1899
|
-
from sqlglot.optimizer.annotate_types import annotate_types
|
|
1900
|
-
|
|
1901
3379
|
this = annotate_types(this, dialect=self.dialect)
|
|
1902
3380
|
|
|
1903
3381
|
if this.is_type(*exp.DataType.TEXT_TYPES):
|
|
1904
3382
|
this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP))
|
|
1905
3383
|
|
|
1906
|
-
|
|
1907
|
-
|
|
3384
|
+
# Detect float/decimal months to apply rounding (Snowflake behavior)
|
|
3385
|
+
# DuckDB INTERVAL syntax doesn't support non-integer expressions, so use TO_MONTHS
|
|
3386
|
+
months_expr = expression.expression
|
|
3387
|
+
if not months_expr.type:
|
|
3388
|
+
months_expr = annotate_types(months_expr, dialect=self.dialect)
|
|
3389
|
+
|
|
3390
|
+
# Build interval or to_months expression based on type
|
|
3391
|
+
# Float/decimal case: Round and use TO_MONTHS(CAST(ROUND(value) AS INT))
|
|
3392
|
+
interval_or_to_months = (
|
|
3393
|
+
exp.func("TO_MONTHS", exp.cast(exp.func("ROUND", months_expr), "INT"))
|
|
3394
|
+
if months_expr.is_type(
|
|
3395
|
+
exp.DataType.Type.FLOAT,
|
|
3396
|
+
exp.DataType.Type.DOUBLE,
|
|
3397
|
+
exp.DataType.Type.DECIMAL,
|
|
3398
|
+
)
|
|
3399
|
+
# Integer case: standard INTERVAL N MONTH syntax
|
|
3400
|
+
else exp.Interval(this=months_expr, unit=exp.var("MONTH"))
|
|
3401
|
+
)
|
|
3402
|
+
|
|
3403
|
+
date_add_expr = exp.Add(this=this, expression=interval_or_to_months)
|
|
3404
|
+
|
|
3405
|
+
# Apply end-of-month preservation if Snowflake flag is set
|
|
3406
|
+
# CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(result) ELSE result END
|
|
3407
|
+
preserve_eom = expression.args.get("preserve_end_of_month")
|
|
3408
|
+
result_expr = (
|
|
3409
|
+
exp.case()
|
|
3410
|
+
.when(
|
|
3411
|
+
exp.EQ(this=exp.func("LAST_DAY", this), expression=this),
|
|
3412
|
+
exp.func("LAST_DAY", date_add_expr),
|
|
3413
|
+
)
|
|
3414
|
+
.else_(date_add_expr)
|
|
3415
|
+
if preserve_eom
|
|
3416
|
+
else date_add_expr
|
|
1908
3417
|
)
|
|
1909
3418
|
|
|
1910
3419
|
# DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE
|
|
@@ -1912,9 +3421,8 @@ class DuckDB(Dialect):
|
|
|
1912
3421
|
# We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ
|
|
1913
3422
|
# Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP
|
|
1914
3423
|
if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ):
|
|
1915
|
-
return self.sql(exp.Cast(this=
|
|
1916
|
-
|
|
1917
|
-
return self.sql(func)
|
|
3424
|
+
return self.sql(exp.Cast(this=result_expr, to=this.type))
|
|
3425
|
+
return self.sql(result_expr)
|
|
1918
3426
|
|
|
1919
3427
|
def format_sql(self, expression: exp.Format) -> str:
|
|
1920
3428
|
if expression.name.lower() == "%s" and len(expression.expressions) == 1:
|
|
@@ -1925,23 +3433,30 @@ class DuckDB(Dialect):
|
|
|
1925
3433
|
def hexstring_sql(
|
|
1926
3434
|
self, expression: exp.HexString, binary_function_repr: t.Optional[str] = None
|
|
1927
3435
|
) -> str:
|
|
1928
|
-
|
|
3436
|
+
# UNHEX('FF') correctly produces blob \xFF in DuckDB
|
|
3437
|
+
return super().hexstring_sql(expression, binary_function_repr="UNHEX")
|
|
1929
3438
|
|
|
1930
|
-
|
|
1931
|
-
|
|
3439
|
+
def datetrunc_sql(self, expression: exp.DateTrunc) -> str:
|
|
3440
|
+
unit = unit_to_str(expression)
|
|
3441
|
+
date = expression.this
|
|
3442
|
+
result = self.func("DATE_TRUNC", unit, date)
|
|
1932
3443
|
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
3444
|
+
if (
|
|
3445
|
+
expression.args.get("input_type_preserved")
|
|
3446
|
+
and date.is_type(*exp.DataType.TEMPORAL_TYPES)
|
|
3447
|
+
and not (is_date_unit(unit) and date.is_type(exp.DataType.Type.DATE))
|
|
3448
|
+
):
|
|
3449
|
+
return self.sql(exp.Cast(this=result, to=date.type))
|
|
1936
3450
|
|
|
1937
|
-
return
|
|
3451
|
+
return result
|
|
1938
3452
|
|
|
1939
3453
|
def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
|
|
1940
3454
|
unit = unit_to_str(expression)
|
|
1941
3455
|
zone = expression.args.get("zone")
|
|
1942
3456
|
timestamp = expression.this
|
|
3457
|
+
date_unit = is_date_unit(unit)
|
|
1943
3458
|
|
|
1944
|
-
if
|
|
3459
|
+
if date_unit and zone:
|
|
1945
3460
|
# BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
|
|
1946
3461
|
# Double AT TIME ZONE needed for BigQuery compatibility:
|
|
1947
3462
|
# 1. First AT TIME ZONE: ensures truncation happens in the target timezone
|
|
@@ -1950,14 +3465,32 @@ class DuckDB(Dialect):
|
|
|
1950
3465
|
result_sql = self.func("DATE_TRUNC", unit, timestamp)
|
|
1951
3466
|
return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
|
|
1952
3467
|
|
|
1953
|
-
|
|
3468
|
+
result = self.func("DATE_TRUNC", unit, timestamp)
|
|
3469
|
+
if expression.args.get("input_type_preserved"):
|
|
3470
|
+
if timestamp.type and timestamp.is_type(
|
|
3471
|
+
exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ
|
|
3472
|
+
):
|
|
3473
|
+
dummy_date = exp.Cast(
|
|
3474
|
+
this=exp.Literal.string("1970-01-01"),
|
|
3475
|
+
to=exp.DataType(this=exp.DataType.Type.DATE),
|
|
3476
|
+
)
|
|
3477
|
+
date_time = exp.Add(this=dummy_date, expression=timestamp)
|
|
3478
|
+
result = self.func("DATE_TRUNC", unit, date_time)
|
|
3479
|
+
return self.sql(exp.Cast(this=result, to=timestamp.type))
|
|
3480
|
+
|
|
3481
|
+
if timestamp.is_type(*exp.DataType.TEMPORAL_TYPES) and not (
|
|
3482
|
+
date_unit and timestamp.is_type(exp.DataType.Type.DATE)
|
|
3483
|
+
):
|
|
3484
|
+
return self.sql(exp.Cast(this=result, to=timestamp.type))
|
|
3485
|
+
|
|
3486
|
+
return result
|
|
1954
3487
|
|
|
1955
3488
|
def trim_sql(self, expression: exp.Trim) -> str:
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
_cast_to_varchar(expression.
|
|
1959
|
-
|
|
1960
|
-
)
|
|
3489
|
+
expression.this.replace(_cast_to_varchar(expression.this))
|
|
3490
|
+
if expression.expression:
|
|
3491
|
+
expression.expression.replace(_cast_to_varchar(expression.expression))
|
|
3492
|
+
|
|
3493
|
+
result_sql = super().trim_sql(expression)
|
|
1961
3494
|
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1962
3495
|
|
|
1963
3496
|
def round_sql(self, expression: exp.Round) -> str:
|
|
@@ -1983,6 +3516,15 @@ class DuckDB(Dialect):
|
|
|
1983
3516
|
|
|
1984
3517
|
return self.func(func, this, decimals, truncate)
|
|
1985
3518
|
|
|
3519
|
+
def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str:
|
|
3520
|
+
result = self.func("APPROX_QUANTILE", expression.this, expression.args.get("quantile"))
|
|
3521
|
+
|
|
3522
|
+
# DuckDB returns integers for APPROX_QUANTILE, cast to DOUBLE if the expected type is a real type
|
|
3523
|
+
if expression.is_type(*exp.DataType.REAL_TYPES):
|
|
3524
|
+
result = f"CAST({result} AS DOUBLE)"
|
|
3525
|
+
|
|
3526
|
+
return result
|
|
3527
|
+
|
|
1986
3528
|
def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
|
|
1987
3529
|
"""
|
|
1988
3530
|
BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
|
|
@@ -2043,3 +3585,37 @@ class DuckDB(Dialect):
|
|
|
2043
3585
|
result_sql = f"~{self.sql(expression, 'this')}"
|
|
2044
3586
|
|
|
2045
3587
|
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
3588
|
+
|
|
3589
|
+
def window_sql(self, expression: exp.Window) -> str:
|
|
3590
|
+
this = expression.this
|
|
3591
|
+
if isinstance(this, exp.Corr) or (
|
|
3592
|
+
isinstance(this, exp.Filter) and isinstance(this.this, exp.Corr)
|
|
3593
|
+
):
|
|
3594
|
+
return self._corr_sql(expression)
|
|
3595
|
+
|
|
3596
|
+
return super().window_sql(expression)
|
|
3597
|
+
|
|
3598
|
+
def filter_sql(self, expression: exp.Filter) -> str:
|
|
3599
|
+
if isinstance(expression.this, exp.Corr):
|
|
3600
|
+
return self._corr_sql(expression)
|
|
3601
|
+
|
|
3602
|
+
return super().filter_sql(expression)
|
|
3603
|
+
|
|
3604
|
+
def _corr_sql(
|
|
3605
|
+
self,
|
|
3606
|
+
expression: t.Union[exp.Filter, exp.Window, exp.Corr],
|
|
3607
|
+
) -> str:
|
|
3608
|
+
if isinstance(expression, exp.Corr) and not expression.args.get(
|
|
3609
|
+
"null_on_zero_variance"
|
|
3610
|
+
):
|
|
3611
|
+
return self.func("CORR", expression.this, expression.expression)
|
|
3612
|
+
|
|
3613
|
+
corr_expr = _maybe_corr_null_to_false(expression)
|
|
3614
|
+
if corr_expr is None:
|
|
3615
|
+
if isinstance(expression, exp.Window):
|
|
3616
|
+
return super().window_sql(expression)
|
|
3617
|
+
if isinstance(expression, exp.Filter):
|
|
3618
|
+
return super().filter_sql(expression)
|
|
3619
|
+
corr_expr = expression # make mypy happy
|
|
3620
|
+
|
|
3621
|
+
return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
|