sqlglot 27.27.0__py3-none-any.whl → 28.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__init__.py +1 -0
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +118 -279
- sqlglot/dialects/clickhouse.py +73 -5
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +354 -275
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +754 -25
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +14 -4
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +60 -26
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +239 -218
- sqlglot/dialects/spark.py +15 -4
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +10 -0
- sqlglot/dialects/starrocks.py +3 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -22
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/executor/__init__.py +5 -10
- sqlglot/executor/python.py +1 -29
- sqlglot/expressions.py +637 -100
- sqlglot/generator.py +160 -43
- sqlglot/helper.py +2 -44
- sqlglot/lineage.py +10 -4
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +18 -10
- sqlglot/optimizer/qualify_columns.py +122 -275
- sqlglot/optimizer/qualify_tables.py +128 -76
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1075 -959
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +296 -170
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
- sqlglot-28.4.0.dist-info/RECORD +92 -0
- sqlglot-27.27.0.dist-info/RECORD +0 -84
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.27.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0
sqlglot/dialects/duckdb.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from itertools import groupby
|
|
5
|
+
import re
|
|
3
6
|
import typing as t
|
|
4
7
|
|
|
5
8
|
from sqlglot import exp, generator, parser, tokens, transforms
|
|
@@ -8,7 +11,6 @@ from sqlglot.dialects.dialect import (
|
|
|
8
11
|
Dialect,
|
|
9
12
|
JSON_EXTRACT_TYPE,
|
|
10
13
|
NormalizationStrategy,
|
|
11
|
-
Version,
|
|
12
14
|
approx_count_distinct_sql,
|
|
13
15
|
arrow_json_extract_sql,
|
|
14
16
|
binary_from_function,
|
|
@@ -21,7 +23,6 @@ from sqlglot.dialects.dialect import (
|
|
|
21
23
|
no_datetime_sql,
|
|
22
24
|
encode_decode_sql,
|
|
23
25
|
build_formatted_time,
|
|
24
|
-
inline_array_unless_query,
|
|
25
26
|
no_comment_column_constraint_sql,
|
|
26
27
|
no_time_sql,
|
|
27
28
|
no_timestamp_sql,
|
|
@@ -30,7 +31,6 @@ from sqlglot.dialects.dialect import (
|
|
|
30
31
|
remove_from_array_using_filter,
|
|
31
32
|
strposition_sql,
|
|
32
33
|
str_to_time_sql,
|
|
33
|
-
timestamptrunc_sql,
|
|
34
34
|
timestrtotime_sql,
|
|
35
35
|
unit_to_str,
|
|
36
36
|
sha256_sql,
|
|
@@ -38,12 +38,99 @@ from sqlglot.dialects.dialect import (
|
|
|
38
38
|
explode_to_unnest_sql,
|
|
39
39
|
no_make_interval_sql,
|
|
40
40
|
groupconcat_sql,
|
|
41
|
+
inline_array_unless_query,
|
|
42
|
+
regexp_replace_global_modifier,
|
|
43
|
+
sha2_digest_sql,
|
|
41
44
|
)
|
|
42
45
|
from sqlglot.generator import unsupported_args
|
|
43
|
-
from sqlglot.helper import seq_get
|
|
46
|
+
from sqlglot.helper import is_date_unit, seq_get
|
|
44
47
|
from sqlglot.tokens import TokenType
|
|
45
48
|
from sqlglot.parser import binary_range_parser
|
|
46
49
|
|
|
50
|
+
# Regex to detect time zones in timestamps of the form [+|-]TT[:tt]
|
|
51
|
+
# The pattern matches timezone offsets that appear after the time portion
|
|
52
|
+
TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?")
|
|
53
|
+
|
|
54
|
+
# Characters that must be escaped when building regex expressions in INITCAP
|
|
55
|
+
REGEX_ESCAPE_REPLACEMENTS = {
|
|
56
|
+
"\\": "\\\\",
|
|
57
|
+
"-": r"\-",
|
|
58
|
+
"^": r"\^",
|
|
59
|
+
"[": r"\[",
|
|
60
|
+
"]": r"\]",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Used to in RANDSTR transpilation
|
|
64
|
+
RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
65
|
+
RANDSTR_SEED = 123456
|
|
66
|
+
|
|
67
|
+
# Whitespace control characters that DuckDB must process with `CHR({val})` calls
|
|
68
|
+
WS_CONTROL_CHARS_TO_DUCK = {
|
|
69
|
+
"\u000b": 11,
|
|
70
|
+
"\u001c": 28,
|
|
71
|
+
"\u001d": 29,
|
|
72
|
+
"\u001e": 30,
|
|
73
|
+
"\u001f": 31,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Days of week to ISO 8601 day-of-week numbers
|
|
77
|
+
# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
|
|
78
|
+
WEEK_START_DAY_TO_DOW = {
|
|
79
|
+
"MONDAY": 1,
|
|
80
|
+
"TUESDAY": 2,
|
|
81
|
+
"WEDNESDAY": 3,
|
|
82
|
+
"THURSDAY": 4,
|
|
83
|
+
"FRIDAY": 5,
|
|
84
|
+
"SATURDAY": 6,
|
|
85
|
+
"SUNDAY": 7,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
MAX_BIT_POSITION = exp.Literal.number(32768)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str:
|
|
92
|
+
"""
|
|
93
|
+
Transpile TO_BOOLEAN function from Snowflake to DuckDB equivalent.
|
|
94
|
+
|
|
95
|
+
DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
|
|
96
|
+
We need to handle the 'on'/'off' cases explicitly, plus NaN/INF error cases.
|
|
97
|
+
|
|
98
|
+
In Snowflake, NaN and INF values cause errors. We use DuckDB's native ERROR()
|
|
99
|
+
function to replicate this behavior with a clear error message.
|
|
100
|
+
"""
|
|
101
|
+
arg = expression.this
|
|
102
|
+
|
|
103
|
+
cast_to_real = exp.func("TRY_CAST", arg, exp.DataType.build("REAL"))
|
|
104
|
+
|
|
105
|
+
# Check for NaN and INF values
|
|
106
|
+
nan_inf_check = exp.Or(
|
|
107
|
+
this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
case_expr = (
|
|
111
|
+
exp.case()
|
|
112
|
+
.when(
|
|
113
|
+
nan_inf_check,
|
|
114
|
+
exp.func(
|
|
115
|
+
"ERROR",
|
|
116
|
+
exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
# Handle 'on' -> TRUE (case insensitive) - only for string literals
|
|
120
|
+
.when(
|
|
121
|
+
exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("ON")),
|
|
122
|
+
exp.true(),
|
|
123
|
+
)
|
|
124
|
+
# Handle 'off' -> FALSE (case insensitive) - only for string literals
|
|
125
|
+
.when(
|
|
126
|
+
exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("OFF")),
|
|
127
|
+
exp.false(),
|
|
128
|
+
)
|
|
129
|
+
.else_(exp.cast(arg, exp.DataType.Type.BOOLEAN))
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return self.sql(case_expr)
|
|
133
|
+
|
|
47
134
|
|
|
48
135
|
# BigQuery -> DuckDB conversion for the DATE function
|
|
49
136
|
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
|
|
@@ -211,12 +298,100 @@ def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYP
|
|
|
211
298
|
def _implicit_datetime_cast(
|
|
212
299
|
arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE
|
|
213
300
|
) -> t.Optional[exp.Expression]:
|
|
214
|
-
|
|
301
|
+
if isinstance(arg, exp.Literal) and arg.is_string:
|
|
302
|
+
ts = arg.name
|
|
303
|
+
if type == exp.DataType.Type.DATE and ":" in ts:
|
|
304
|
+
type = (
|
|
305
|
+
exp.DataType.Type.TIMESTAMPTZ
|
|
306
|
+
if TIMEZONE_PATTERN.search(ts)
|
|
307
|
+
else exp.DataType.Type.TIMESTAMP
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
arg = exp.cast(arg, type)
|
|
311
|
+
|
|
312
|
+
return arg
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]:
|
|
316
|
+
"""
|
|
317
|
+
Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
|
|
318
|
+
from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
unit: The unit expression (Var for ISOWEEK or WeekStart)
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
|
|
325
|
+
|
|
326
|
+
Examples:
|
|
327
|
+
"WEEK(SUNDAY)" -> 7
|
|
328
|
+
"WEEK(MONDAY)" -> 1
|
|
329
|
+
"ISOWEEK" -> 1
|
|
330
|
+
"""
|
|
331
|
+
# Handle plain Var expressions for ISOWEEK only
|
|
332
|
+
if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
|
|
333
|
+
return 1
|
|
334
|
+
|
|
335
|
+
# Handle WeekStart expressions with explicit day
|
|
336
|
+
if isinstance(unit, exp.WeekStart):
|
|
337
|
+
return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
|
|
338
|
+
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
|
|
343
|
+
"""
|
|
344
|
+
Build DATE_TRUNC expression for week boundaries with custom start day.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
date_expr: The date expression to truncate
|
|
348
|
+
shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6)
|
|
349
|
+
|
|
350
|
+
DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard).
|
|
351
|
+
To align to a different start day, we shift the date before truncating.
|
|
352
|
+
|
|
353
|
+
Shift formula: Sunday (7) gets +1, others get (1 - start_dow)
|
|
354
|
+
Examples:
|
|
355
|
+
Monday (1): shift = 0 (no shift needed)
|
|
356
|
+
Tuesday (2): shift = -1 (shift back 1 day) ...
|
|
357
|
+
Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week)
|
|
358
|
+
"""
|
|
359
|
+
shift_days = 1 if start_dow == 7 else 1 - start_dow
|
|
360
|
+
|
|
361
|
+
# Shift date to align week boundaries with the desired start day
|
|
362
|
+
# No shift needed for Monday-based weeks (shift_days == 0)
|
|
363
|
+
shifted_date = (
|
|
364
|
+
exp.DateAdd(
|
|
365
|
+
this=date_expr,
|
|
366
|
+
expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
|
|
367
|
+
)
|
|
368
|
+
if shift_days != 0
|
|
369
|
+
else date_expr
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date)
|
|
215
373
|
|
|
216
374
|
|
|
217
375
|
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
|
|
218
376
|
this = _implicit_datetime_cast(expression.this)
|
|
219
377
|
expr = _implicit_datetime_cast(expression.expression)
|
|
378
|
+
unit = expression.args.get("unit")
|
|
379
|
+
|
|
380
|
+
# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
|
|
381
|
+
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
|
|
382
|
+
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
|
|
383
|
+
# Whereas for other units such as MONTH it does respect month boundaries:
|
|
384
|
+
# SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
|
|
385
|
+
date_part_boundary = expression.args.get("date_part_boundary")
|
|
386
|
+
|
|
387
|
+
# Extract week start day; returns None if day is dynamic (column/placeholder)
|
|
388
|
+
week_start = _week_unit_to_dow(unit)
|
|
389
|
+
if date_part_boundary and week_start and this and expr:
|
|
390
|
+
expression.set("unit", exp.Literal.string("WEEK"))
|
|
391
|
+
|
|
392
|
+
# Truncate both dates to week boundaries to respect input dialect semantics
|
|
393
|
+
this = _build_week_trunc_expression(this, week_start)
|
|
394
|
+
expr = _build_week_trunc_expression(expr, week_start)
|
|
220
395
|
|
|
221
396
|
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
|
|
222
397
|
|
|
@@ -251,6 +426,228 @@ def _json_extract_value_array_sql(
|
|
|
251
426
|
return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type)))
|
|
252
427
|
|
|
253
428
|
|
|
429
|
+
def _cast_to_varchar(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
430
|
+
if arg and arg.type and not arg.is_type(exp.DataType.Type.VARCHAR, exp.DataType.Type.UNKNOWN):
|
|
431
|
+
return exp.cast(arg, exp.DataType.Type.VARCHAR)
|
|
432
|
+
return arg
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _is_binary(arg: exp.Expression) -> bool:
|
|
436
|
+
return arg.is_type(
|
|
437
|
+
exp.DataType.Type.BINARY,
|
|
438
|
+
exp.DataType.Type.VARBINARY,
|
|
439
|
+
exp.DataType.Type.BLOB,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def _gen_with_cast_to_blob(
|
|
444
|
+
self: DuckDB.Generator, expression: exp.Expression, result_sql: str
|
|
445
|
+
) -> str:
|
|
446
|
+
if _is_binary(expression):
|
|
447
|
+
blob = exp.DataType.build("BLOB", dialect="duckdb")
|
|
448
|
+
result_sql = self.sql(exp.Cast(this=result_sql, to=blob))
|
|
449
|
+
return result_sql
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _cast_to_bit(arg: exp.Expression) -> exp.Expression:
|
|
453
|
+
if not _is_binary(arg):
|
|
454
|
+
return arg
|
|
455
|
+
|
|
456
|
+
if isinstance(arg, exp.HexString):
|
|
457
|
+
arg = exp.Unhex(this=exp.Literal.string(arg.this))
|
|
458
|
+
|
|
459
|
+
return exp.cast(arg, exp.DataType.Type.BIT)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
|
|
463
|
+
if _is_binary(expression.this):
|
|
464
|
+
expression.set("this", _cast_to_bit(expression.this))
|
|
465
|
+
if _is_binary(expression.expression):
|
|
466
|
+
expression.set("expression", _cast_to_bit(expression.expression))
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str:
|
|
470
|
+
# Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
|
|
471
|
+
having = expression.this
|
|
472
|
+
if isinstance(having, exp.HavingMax):
|
|
473
|
+
func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL"
|
|
474
|
+
return self.func(func_name, having.this, having.expression)
|
|
475
|
+
return self.function_fallback_sql(expression)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _literal_sql_with_ws_chr(self: DuckDB.Generator, literal: str) -> str:
|
|
479
|
+
# DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
|
|
480
|
+
if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
|
|
481
|
+
return self.sql(exp.Literal.string(literal))
|
|
482
|
+
|
|
483
|
+
sql_segments: t.List[str] = []
|
|
484
|
+
for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK):
|
|
485
|
+
if is_ws_control:
|
|
486
|
+
for ch in group:
|
|
487
|
+
duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch]
|
|
488
|
+
sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
|
|
489
|
+
else:
|
|
490
|
+
sql_segments.append(self.sql(exp.Literal.string("".join(group))))
|
|
491
|
+
|
|
492
|
+
sql = " || ".join(sql_segments)
|
|
493
|
+
return sql if len(sql_segments) == 1 else f"({sql})"
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _escape_regex_metachars(
|
|
497
|
+
self: DuckDB.Generator, delimiters: t.Optional[exp.Expression], delimiters_sql: str
|
|
498
|
+
) -> str:
|
|
499
|
+
r"""
|
|
500
|
+
Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions.
|
|
501
|
+
|
|
502
|
+
Literal strings are escaped at transpile time, expressions handled with REPLACE() calls.
|
|
503
|
+
"""
|
|
504
|
+
if not delimiters:
|
|
505
|
+
return delimiters_sql
|
|
506
|
+
|
|
507
|
+
if delimiters.is_string:
|
|
508
|
+
literal_value = delimiters.this
|
|
509
|
+
escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
|
|
510
|
+
return _literal_sql_with_ws_chr(self, escaped_literal)
|
|
511
|
+
|
|
512
|
+
escaped_sql = delimiters_sql
|
|
513
|
+
for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
|
|
514
|
+
escaped_sql = self.func(
|
|
515
|
+
"REPLACE",
|
|
516
|
+
escaped_sql,
|
|
517
|
+
self.sql(exp.Literal.string(raw)),
|
|
518
|
+
self.sql(exp.Literal.string(escaped)),
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
return escaped_sql
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _build_capitalization_sql(
|
|
525
|
+
self: DuckDB.Generator,
|
|
526
|
+
value_to_split: str,
|
|
527
|
+
delimiters_sql: str,
|
|
528
|
+
) -> str:
|
|
529
|
+
# empty string delimiter --> treat value as one word, no need to split
|
|
530
|
+
if delimiters_sql == "''":
|
|
531
|
+
return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
|
|
532
|
+
|
|
533
|
+
delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
|
|
534
|
+
split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
|
|
535
|
+
|
|
536
|
+
# REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
|
|
537
|
+
# We do not know whether the first segment is a delimiter or not, so we check the first character of the string
|
|
538
|
+
# with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
|
|
539
|
+
return self.func(
|
|
540
|
+
"ARRAY_TO_STRING",
|
|
541
|
+
exp.case()
|
|
542
|
+
.when(
|
|
543
|
+
f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})",
|
|
544
|
+
self.func(
|
|
545
|
+
"LIST_TRANSFORM",
|
|
546
|
+
self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
|
|
547
|
+
"(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
|
|
548
|
+
),
|
|
549
|
+
)
|
|
550
|
+
.else_(
|
|
551
|
+
self.func(
|
|
552
|
+
"LIST_TRANSFORM",
|
|
553
|
+
self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
|
|
554
|
+
"(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
|
|
555
|
+
),
|
|
556
|
+
),
|
|
557
|
+
"''",
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _initcap_sql(self: DuckDB.Generator, expression: exp.Initcap) -> str:
|
|
562
|
+
this_sql = self.sql(expression, "this")
|
|
563
|
+
delimiters = expression.args.get("expression")
|
|
564
|
+
if delimiters is None:
|
|
565
|
+
# fallback for manually created exp.Initcap w/o delimiters arg
|
|
566
|
+
delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)
|
|
567
|
+
delimiters_sql = self.sql(delimiters)
|
|
568
|
+
|
|
569
|
+
escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
|
|
570
|
+
|
|
571
|
+
return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _floor_sql(self: DuckDB.Generator, expression: exp.Floor) -> str:
|
|
575
|
+
decimals = expression.args.get("decimals")
|
|
576
|
+
|
|
577
|
+
if decimals is not None and expression.args.get("to") is None:
|
|
578
|
+
this = expression.this
|
|
579
|
+
if isinstance(this, exp.Binary):
|
|
580
|
+
this = exp.Paren(this=this)
|
|
581
|
+
|
|
582
|
+
n_int = decimals
|
|
583
|
+
if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
|
|
584
|
+
n_int = exp.cast(decimals, exp.DataType.Type.INT)
|
|
585
|
+
|
|
586
|
+
pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
|
|
587
|
+
floored = exp.Floor(this=exp.Mul(this=this, expression=pow_))
|
|
588
|
+
result = exp.Div(this=floored, expression=pow_.copy())
|
|
589
|
+
|
|
590
|
+
return self.round_sql(
|
|
591
|
+
exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
return self.ceil_floor(expression)
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _regr_val_sql(
|
|
598
|
+
self: DuckDB.Generator,
|
|
599
|
+
expression: exp.RegrValx | exp.RegrValy,
|
|
600
|
+
) -> str:
|
|
601
|
+
"""
|
|
602
|
+
Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent.
|
|
603
|
+
|
|
604
|
+
REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x.
|
|
605
|
+
REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y.
|
|
606
|
+
"""
|
|
607
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
608
|
+
|
|
609
|
+
y = expression.this
|
|
610
|
+
x = expression.expression
|
|
611
|
+
|
|
612
|
+
# Determine which argument to check for NULL and which to return based on expression type
|
|
613
|
+
if isinstance(expression, exp.RegrValx):
|
|
614
|
+
# REGR_VALX: check y for NULL, return x
|
|
615
|
+
check_for_null = y
|
|
616
|
+
return_value = x
|
|
617
|
+
return_value_attr = "expression"
|
|
618
|
+
else:
|
|
619
|
+
# REGR_VALY: check x for NULL, return y
|
|
620
|
+
check_for_null = x
|
|
621
|
+
return_value = y
|
|
622
|
+
return_value_attr = "this"
|
|
623
|
+
|
|
624
|
+
# Get the type from the return argument
|
|
625
|
+
result_type = return_value.type
|
|
626
|
+
|
|
627
|
+
# If no type info, annotate the expression to infer types
|
|
628
|
+
if not result_type or result_type.this == exp.DataType.Type.UNKNOWN:
|
|
629
|
+
try:
|
|
630
|
+
annotated = annotate_types(expression.copy(), dialect=self.dialect)
|
|
631
|
+
result_type = getattr(annotated, return_value_attr).type
|
|
632
|
+
except Exception:
|
|
633
|
+
pass
|
|
634
|
+
|
|
635
|
+
# Default to DOUBLE for regression functions if type still unknown
|
|
636
|
+
if not result_type or result_type.this == exp.DataType.Type.UNKNOWN:
|
|
637
|
+
result_type = exp.DataType.build("DOUBLE")
|
|
638
|
+
|
|
639
|
+
# Cast NULL to the same type as return_value to avoid DuckDB type inference issues
|
|
640
|
+
typed_null = exp.Cast(this=exp.Null(), to=result_type)
|
|
641
|
+
|
|
642
|
+
return self.sql(
|
|
643
|
+
exp.If(
|
|
644
|
+
this=exp.Is(this=check_for_null.copy(), expression=exp.Null()),
|
|
645
|
+
true=typed_null,
|
|
646
|
+
false=return_value.copy(),
|
|
647
|
+
)
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
|
|
254
651
|
class DuckDB(Dialect):
|
|
255
652
|
NULL_ORDERING = "nulls_are_last"
|
|
256
653
|
SUPPORTS_USER_DEFINED_TYPES = True
|
|
@@ -269,8 +666,13 @@ class DuckDB(Dialect):
|
|
|
269
666
|
**Dialect.DATE_PART_MAPPING,
|
|
270
667
|
"DAYOFWEEKISO": "ISODOW",
|
|
271
668
|
}
|
|
669
|
+
|
|
272
670
|
DATE_PART_MAPPING.pop("WEEKDAY")
|
|
273
671
|
|
|
672
|
+
INVERSE_TIME_MAPPING = {
|
|
673
|
+
"%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d)
|
|
674
|
+
}
|
|
675
|
+
|
|
274
676
|
def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
275
677
|
if isinstance(path, exp.Literal):
|
|
276
678
|
# DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
|
|
@@ -306,7 +708,9 @@ class DuckDB(Dialect):
|
|
|
306
708
|
"DETACH": TokenType.DETACH,
|
|
307
709
|
"FORCE": TokenType.FORCE,
|
|
308
710
|
"INSTALL": TokenType.INSTALL,
|
|
711
|
+
"INT8": TokenType.BIGINT,
|
|
309
712
|
"LOGICAL": TokenType.BOOLEAN,
|
|
713
|
+
"MACRO": TokenType.FUNCTION,
|
|
310
714
|
"ONLY": TokenType.ONLY,
|
|
311
715
|
"PIVOT_WIDER": TokenType.PIVOT,
|
|
312
716
|
"POSITIONAL": TokenType.POSITIONAL,
|
|
@@ -398,6 +802,7 @@ class DuckDB(Dialect):
|
|
|
398
802
|
"LIST_SORT": exp.SortArray.from_arg_list,
|
|
399
803
|
"LIST_TRANSFORM": exp.Transform.from_arg_list,
|
|
400
804
|
"LIST_VALUE": lambda args: exp.Array(expressions=args),
|
|
805
|
+
"MAKE_DATE": exp.DateFromParts.from_arg_list,
|
|
401
806
|
"MAKE_TIME": exp.TimeFromParts.from_arg_list,
|
|
402
807
|
"MAKE_TIMESTAMP": _build_make_timestamp,
|
|
403
808
|
"QUANTILE_CONT": exp.PercentileCont.from_arg_list,
|
|
@@ -411,6 +816,7 @@ class DuckDB(Dialect):
|
|
|
411
816
|
expression=seq_get(args, 1),
|
|
412
817
|
replacement=seq_get(args, 2),
|
|
413
818
|
modifiers=seq_get(args, 3),
|
|
819
|
+
single_replace=True,
|
|
414
820
|
),
|
|
415
821
|
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
|
|
416
822
|
"STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"),
|
|
@@ -561,7 +967,7 @@ class DuckDB(Dialect):
|
|
|
561
967
|
) -> t.Optional[exp.Expression]:
|
|
562
968
|
bracket = super()._parse_bracket(this)
|
|
563
969
|
|
|
564
|
-
if self.dialect.version <
|
|
970
|
+
if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket):
|
|
565
971
|
# https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
|
|
566
972
|
bracket.set("returns_list_for_maps", True)
|
|
567
973
|
|
|
@@ -619,11 +1025,9 @@ class DuckDB(Dialect):
|
|
|
619
1025
|
def _parse_install(self, force: bool = False) -> exp.Install:
|
|
620
1026
|
return self.expression(
|
|
621
1027
|
exp.Install,
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
"force": force,
|
|
626
|
-
},
|
|
1028
|
+
this=self._parse_id_var(),
|
|
1029
|
+
from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None,
|
|
1030
|
+
force=force,
|
|
627
1031
|
)
|
|
628
1032
|
|
|
629
1033
|
def _parse_primary(self) -> t.Optional[exp.Expression]:
|
|
@@ -661,11 +1065,16 @@ class DuckDB(Dialect):
|
|
|
661
1065
|
ARRAY_SIZE_DIM_REQUIRED = False
|
|
662
1066
|
NORMALIZE_EXTRACT_DATE_PARTS = True
|
|
663
1067
|
SUPPORTS_LIKE_QUANTIFIERS = False
|
|
1068
|
+
SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
|
|
664
1069
|
|
|
665
1070
|
TRANSFORMS = {
|
|
666
1071
|
**generator.Generator.TRANSFORMS,
|
|
1072
|
+
exp.AnyValue: _anyvalue_sql,
|
|
667
1073
|
exp.ApproxDistinct: approx_count_distinct_sql,
|
|
668
|
-
exp.Array:
|
|
1074
|
+
exp.Array: transforms.preprocess(
|
|
1075
|
+
[transforms.inherit_struct_field_names],
|
|
1076
|
+
generator=inline_array_unless_query,
|
|
1077
|
+
),
|
|
669
1078
|
exp.ArrayFilter: rename_func("LIST_FILTER"),
|
|
670
1079
|
exp.ArrayRemove: remove_from_array_using_filter,
|
|
671
1080
|
exp.ArraySort: _array_sort_sql,
|
|
@@ -673,13 +1082,13 @@ class DuckDB(Dialect):
|
|
|
673
1082
|
exp.ArrayUniqueAgg: lambda self, e: self.func(
|
|
674
1083
|
"LIST", exp.Distinct(expressions=[e.this])
|
|
675
1084
|
),
|
|
1085
|
+
exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
|
|
676
1086
|
exp.BitwiseAndAgg: rename_func("BIT_AND"),
|
|
1087
|
+
exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
|
|
677
1088
|
exp.BitwiseOrAgg: rename_func("BIT_OR"),
|
|
678
|
-
exp.BitwiseXor: rename_func("XOR"),
|
|
679
1089
|
exp.BitwiseXorAgg: rename_func("BIT_XOR"),
|
|
680
1090
|
exp.CommentColumnConstraint: no_comment_column_constraint_sql,
|
|
681
1091
|
exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
|
|
682
|
-
exp.CurrentDate: lambda *_: "CURRENT_DATE",
|
|
683
1092
|
exp.CurrentTime: lambda *_: "CURRENT_TIME",
|
|
684
1093
|
exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP",
|
|
685
1094
|
exp.DayOfMonth: rename_func("DAYOFMONTH"),
|
|
@@ -694,6 +1103,7 @@ class DuckDB(Dialect):
|
|
|
694
1103
|
exp.DateDiff: _date_diff_sql,
|
|
695
1104
|
exp.DateStrToDate: datestrtodate_sql,
|
|
696
1105
|
exp.Datetime: no_datetime_sql,
|
|
1106
|
+
exp.DatetimeDiff: _date_diff_sql,
|
|
697
1107
|
exp.DatetimeSub: date_delta_to_binary_interval_op(),
|
|
698
1108
|
exp.DatetimeAdd: date_delta_to_binary_interval_op(),
|
|
699
1109
|
exp.DateToDi: lambda self,
|
|
@@ -710,17 +1120,20 @@ class DuckDB(Dialect):
|
|
|
710
1120
|
exp.IntDiv: lambda self, e: self.binary(e, "//"),
|
|
711
1121
|
exp.IsInf: rename_func("ISINF"),
|
|
712
1122
|
exp.IsNan: rename_func("ISNAN"),
|
|
1123
|
+
exp.Floor: _floor_sql,
|
|
713
1124
|
exp.JSONBExists: rename_func("JSON_EXISTS"),
|
|
714
1125
|
exp.JSONExtract: _arrow_json_extract_sql,
|
|
715
1126
|
exp.JSONExtractArray: _json_extract_value_array_sql,
|
|
716
|
-
exp.JSONExtractScalar: _arrow_json_extract_sql,
|
|
717
1127
|
exp.JSONFormat: _json_format_sql,
|
|
718
1128
|
exp.JSONValueArray: _json_extract_value_array_sql,
|
|
719
1129
|
exp.Lateral: explode_to_unnest_sql,
|
|
720
1130
|
exp.LogicalOr: rename_func("BOOL_OR"),
|
|
721
1131
|
exp.LogicalAnd: rename_func("BOOL_AND"),
|
|
722
1132
|
exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
|
|
1133
|
+
exp.Initcap: _initcap_sql,
|
|
723
1134
|
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
|
|
1135
|
+
exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)),
|
|
1136
|
+
exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)),
|
|
724
1137
|
exp.MonthsBetween: lambda self, e: self.func(
|
|
725
1138
|
"DATEDIFF",
|
|
726
1139
|
"'month'",
|
|
@@ -737,13 +1150,15 @@ class DuckDB(Dialect):
|
|
|
737
1150
|
e.this,
|
|
738
1151
|
e.expression,
|
|
739
1152
|
e.args.get("replacement"),
|
|
740
|
-
e
|
|
1153
|
+
regexp_replace_global_modifier(e),
|
|
741
1154
|
),
|
|
742
1155
|
exp.RegexpLike: rename_func("REGEXP_MATCHES"),
|
|
743
1156
|
exp.RegexpILike: lambda self, e: self.func(
|
|
744
1157
|
"REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
|
|
745
1158
|
),
|
|
746
1159
|
exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
|
|
1160
|
+
exp.RegrValx: _regr_val_sql,
|
|
1161
|
+
exp.RegrValy: _regr_val_sql,
|
|
747
1162
|
exp.Return: lambda self, e: self.sql(e, "this"),
|
|
748
1163
|
exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
|
|
749
1164
|
exp.Rand: rename_func("RANDOM"),
|
|
@@ -758,19 +1173,22 @@ class DuckDB(Dialect):
|
|
|
758
1173
|
exp.Struct: _struct_sql,
|
|
759
1174
|
exp.Transform: rename_func("LIST_TRANSFORM"),
|
|
760
1175
|
exp.TimeAdd: date_delta_to_binary_interval_op(),
|
|
1176
|
+
exp.TimeSub: date_delta_to_binary_interval_op(),
|
|
761
1177
|
exp.Time: no_time_sql,
|
|
762
1178
|
exp.TimeDiff: _timediff_sql,
|
|
763
1179
|
exp.Timestamp: no_timestamp_sql,
|
|
1180
|
+
exp.TimestampAdd: date_delta_to_binary_interval_op(),
|
|
764
1181
|
exp.TimestampDiff: lambda self, e: self.func(
|
|
765
1182
|
"DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
|
|
766
1183
|
),
|
|
767
|
-
exp.
|
|
1184
|
+
exp.TimestampSub: date_delta_to_binary_interval_op(),
|
|
768
1185
|
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
|
|
769
1186
|
exp.TimeStrToTime: timestrtotime_sql,
|
|
770
1187
|
exp.TimeStrToUnix: lambda self, e: self.func(
|
|
771
1188
|
"EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP)
|
|
772
1189
|
),
|
|
773
1190
|
exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
|
|
1191
|
+
exp.ToBoolean: _to_boolean_sql,
|
|
774
1192
|
exp.TimeToUnix: rename_func("EPOCH"),
|
|
775
1193
|
exp.TsOrDiToDi: lambda self,
|
|
776
1194
|
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
|
|
@@ -781,6 +1199,13 @@ class DuckDB(Dialect):
|
|
|
781
1199
|
exp.cast(e.expression, exp.DataType.Type.TIMESTAMP),
|
|
782
1200
|
exp.cast(e.this, exp.DataType.Type.TIMESTAMP),
|
|
783
1201
|
),
|
|
1202
|
+
exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
|
|
1203
|
+
exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
|
|
1204
|
+
exp.UnixSeconds: lambda self, e: self.sql(
|
|
1205
|
+
exp.cast(
|
|
1206
|
+
self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DataType.Type.BIGINT
|
|
1207
|
+
)
|
|
1208
|
+
),
|
|
784
1209
|
exp.UnixToStr: lambda self, e: self.func(
|
|
785
1210
|
"STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
|
|
786
1211
|
),
|
|
@@ -813,6 +1238,7 @@ class DuckDB(Dialect):
|
|
|
813
1238
|
exp.DataType.Type.BPCHAR: "TEXT",
|
|
814
1239
|
exp.DataType.Type.CHAR: "TEXT",
|
|
815
1240
|
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
|
1241
|
+
exp.DataType.Type.DECFLOAT: "DECIMAL(38, 5)",
|
|
816
1242
|
exp.DataType.Type.FLOAT: "REAL",
|
|
817
1243
|
exp.DataType.Type.JSONB: "JSON",
|
|
818
1244
|
exp.DataType.Type.NCHAR: "TEXT",
|
|
@@ -825,6 +1251,7 @@ class DuckDB(Dialect):
|
|
|
825
1251
|
exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S",
|
|
826
1252
|
exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS",
|
|
827
1253
|
exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
|
|
1254
|
+
exp.DataType.Type.BIGDECIMAL: "DECIMAL(38, 5)",
|
|
828
1255
|
}
|
|
829
1256
|
|
|
830
1257
|
# https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
|
|
@@ -932,6 +1359,135 @@ class DuckDB(Dialect):
|
|
|
932
1359
|
exp.NthValue,
|
|
933
1360
|
)
|
|
934
1361
|
|
|
1362
|
+
def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str:
|
|
1363
|
+
"""
|
|
1364
|
+
Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
|
|
1365
|
+
|
|
1366
|
+
Snowflake's BITMAP_BIT_POSITION behavior:
|
|
1367
|
+
- For n <= 0: returns ABS(n) % 32768
|
|
1368
|
+
- For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
|
|
1369
|
+
"""
|
|
1370
|
+
this = expression.this
|
|
1371
|
+
|
|
1372
|
+
return self.sql(
|
|
1373
|
+
exp.Mod(
|
|
1374
|
+
this=exp.Paren(
|
|
1375
|
+
this=exp.If(
|
|
1376
|
+
this=exp.GT(this=this, expression=exp.Literal.number(0)),
|
|
1377
|
+
true=this - exp.Literal.number(1),
|
|
1378
|
+
false=exp.Abs(this=this),
|
|
1379
|
+
)
|
|
1380
|
+
),
|
|
1381
|
+
expression=MAX_BIT_POSITION,
|
|
1382
|
+
)
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
|
|
1386
|
+
"""
|
|
1387
|
+
Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
|
|
1388
|
+
|
|
1389
|
+
RANDSTR(length, generator) generates a random string of specified length.
|
|
1390
|
+
- With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
|
|
1391
|
+
- With RANDOM(): Use RANDOM() in the hash for non-deterministic output
|
|
1392
|
+
- No generator: Use default seed value
|
|
1393
|
+
"""
|
|
1394
|
+
length = expression.this
|
|
1395
|
+
generator = expression.args.get("generator")
|
|
1396
|
+
|
|
1397
|
+
if generator:
|
|
1398
|
+
if isinstance(generator, exp.Rand):
|
|
1399
|
+
# If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
|
|
1400
|
+
seed_value = generator.this or generator
|
|
1401
|
+
else:
|
|
1402
|
+
# Const/int or other expression - use as seed directly
|
|
1403
|
+
seed_value = generator
|
|
1404
|
+
else:
|
|
1405
|
+
# No generator specified, use default seed (arbitrary but deterministic)
|
|
1406
|
+
seed_value = exp.Literal.number(RANDSTR_SEED)
|
|
1407
|
+
|
|
1408
|
+
length_sql = self.sql(length)
|
|
1409
|
+
seed_sql = self.sql(seed_value)
|
|
1410
|
+
|
|
1411
|
+
query: exp.Select = exp.maybe_parse(
|
|
1412
|
+
f"""
|
|
1413
|
+
SELECT LISTAGG(
|
|
1414
|
+
SUBSTRING(
|
|
1415
|
+
'{RANDSTR_CHAR_POOL}',
|
|
1416
|
+
1 + CAST(FLOOR(random_value * 62) AS INT),
|
|
1417
|
+
1
|
|
1418
|
+
),
|
|
1419
|
+
''
|
|
1420
|
+
)
|
|
1421
|
+
FROM (
|
|
1422
|
+
SELECT (ABS(HASH(i + {seed_sql})) % 1000) / 1000.0 AS random_value
|
|
1423
|
+
FROM RANGE({length_sql}) AS t(i)
|
|
1424
|
+
)
|
|
1425
|
+
""",
|
|
1426
|
+
dialect="duckdb",
|
|
1427
|
+
)
|
|
1428
|
+
return f"({self.sql(query)})"
|
|
1429
|
+
|
|
1430
|
+
def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str:
|
|
1431
|
+
"""
|
|
1432
|
+
TO_BINARY(value, format) transpilation if the return type is BINARY:
|
|
1433
|
+
- 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50')
|
|
1434
|
+
- 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST')
|
|
1435
|
+
- 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==')
|
|
1436
|
+
|
|
1437
|
+
format can be 'HEX', 'UTF-8' or 'BASE64'
|
|
1438
|
+
return type can be either VARCHAR or BINARY
|
|
1439
|
+
"""
|
|
1440
|
+
value = expression.this
|
|
1441
|
+
format_arg = expression.args.get("format")
|
|
1442
|
+
|
|
1443
|
+
fmt = "HEX"
|
|
1444
|
+
if format_arg:
|
|
1445
|
+
fmt = format_arg.name.upper()
|
|
1446
|
+
|
|
1447
|
+
if expression.is_type(exp.DataType.Type.BINARY):
|
|
1448
|
+
if fmt == "UTF-8":
|
|
1449
|
+
return self.func("ENCODE", value)
|
|
1450
|
+
if fmt == "BASE64":
|
|
1451
|
+
return self.func("FROM_BASE64", value)
|
|
1452
|
+
|
|
1453
|
+
# Hex
|
|
1454
|
+
return self.func("UNHEX", value)
|
|
1455
|
+
|
|
1456
|
+
# Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake
|
|
1457
|
+
return self.func("TO_BINARY", value)
|
|
1458
|
+
|
|
1459
|
+
def _greatest_least_sql(
|
|
1460
|
+
self: DuckDB.Generator, expression: exp.Greatest | exp.Least
|
|
1461
|
+
) -> str:
|
|
1462
|
+
"""
|
|
1463
|
+
Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
|
|
1464
|
+
|
|
1465
|
+
- If null_if_any_null=True (BigQuery-style): return NULL if any argument is NULL
|
|
1466
|
+
- If null_if_any_null=False (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
|
|
1467
|
+
"""
|
|
1468
|
+
# Get all arguments
|
|
1469
|
+
all_args = [expression.this, *expression.expressions]
|
|
1470
|
+
fallback_sql = self.function_fallback_sql(expression)
|
|
1471
|
+
|
|
1472
|
+
if expression.args.get("null_if_any_null"):
|
|
1473
|
+
# BigQuery behavior: NULL if any argument is NULL
|
|
1474
|
+
case_expr = exp.case().when(
|
|
1475
|
+
exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
|
|
1476
|
+
exp.null(),
|
|
1477
|
+
copy=False,
|
|
1478
|
+
)
|
|
1479
|
+
case_expr.set("default", fallback_sql)
|
|
1480
|
+
return self.sql(case_expr)
|
|
1481
|
+
|
|
1482
|
+
# DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
|
|
1483
|
+
return self.sql(fallback_sql)
|
|
1484
|
+
|
|
1485
|
+
def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str:
|
|
1486
|
+
return self._greatest_least_sql(expression)
|
|
1487
|
+
|
|
1488
|
+
def least_sql(self: DuckDB.Generator, expression: exp.Least) -> str:
|
|
1489
|
+
return self._greatest_least_sql(expression)
|
|
1490
|
+
|
|
935
1491
|
def lambda_sql(
|
|
936
1492
|
self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True
|
|
937
1493
|
) -> str:
|
|
@@ -951,10 +1507,16 @@ class DuckDB(Dialect):
|
|
|
951
1507
|
def install_sql(self, expression: exp.Install) -> str:
|
|
952
1508
|
force = "FORCE " if expression.args.get("force") else ""
|
|
953
1509
|
this = self.sql(expression, "this")
|
|
954
|
-
from_clause = expression.args.get("
|
|
1510
|
+
from_clause = expression.args.get("from_")
|
|
955
1511
|
from_clause = f" FROM {from_clause}" if from_clause else ""
|
|
956
1512
|
return f"{force}INSTALL {this}{from_clause}"
|
|
957
1513
|
|
|
1514
|
+
def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
|
|
1515
|
+
self.unsupported(
|
|
1516
|
+
"APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
|
|
1517
|
+
)
|
|
1518
|
+
return self.function_fallback_sql(expression)
|
|
1519
|
+
|
|
958
1520
|
def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
|
|
959
1521
|
return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ))
|
|
960
1522
|
|
|
@@ -970,6 +1532,16 @@ class DuckDB(Dialect):
|
|
|
970
1532
|
return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)"
|
|
971
1533
|
return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
|
|
972
1534
|
|
|
1535
|
+
def currentdate_sql(self, expression: exp.CurrentDate) -> str:
|
|
1536
|
+
if not expression.this:
|
|
1537
|
+
return "CURRENT_DATE"
|
|
1538
|
+
|
|
1539
|
+
expr = exp.Cast(
|
|
1540
|
+
this=exp.AtTimeZone(this=exp.CurrentTimestamp(), zone=expression.this),
|
|
1541
|
+
to=exp.DataType(this=exp.DataType.Type.DATE),
|
|
1542
|
+
)
|
|
1543
|
+
return self.sql(expr)
|
|
1544
|
+
|
|
973
1545
|
def parsejson_sql(self, expression: exp.ParseJSON) -> str:
|
|
974
1546
|
arg = expression.this
|
|
975
1547
|
if expression.args.get("safe"):
|
|
@@ -1051,14 +1623,14 @@ class DuckDB(Dialect):
|
|
|
1051
1623
|
return self.function_fallback_sql(expression)
|
|
1052
1624
|
|
|
1053
1625
|
def countif_sql(self, expression: exp.CountIf) -> str:
|
|
1054
|
-
if self.dialect.version >=
|
|
1626
|
+
if self.dialect.version >= (1, 2):
|
|
1055
1627
|
return self.function_fallback_sql(expression)
|
|
1056
1628
|
|
|
1057
1629
|
# https://github.com/tobymao/sqlglot/pull/4749
|
|
1058
1630
|
return count_if_to_sum(self, expression)
|
|
1059
1631
|
|
|
1060
1632
|
def bracket_sql(self, expression: exp.Bracket) -> str:
|
|
1061
|
-
if self.dialect.version >=
|
|
1633
|
+
if self.dialect.version >= (1, 2):
|
|
1062
1634
|
return super().bracket_sql(expression)
|
|
1063
1635
|
|
|
1064
1636
|
# https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
|
|
@@ -1125,6 +1697,33 @@ class DuckDB(Dialect):
|
|
|
1125
1697
|
|
|
1126
1698
|
return self.sql(case)
|
|
1127
1699
|
|
|
1700
|
+
def lower_sql(self, expression: exp.Lower) -> str:
|
|
1701
|
+
result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
|
|
1702
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1703
|
+
|
|
1704
|
+
def upper_sql(self, expression: exp.Upper) -> str:
|
|
1705
|
+
result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
|
|
1706
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1707
|
+
|
|
1708
|
+
def replace_sql(self, expression: exp.Replace) -> str:
|
|
1709
|
+
result_sql = self.func(
|
|
1710
|
+
"REPLACE",
|
|
1711
|
+
_cast_to_varchar(expression.this),
|
|
1712
|
+
_cast_to_varchar(expression.expression),
|
|
1713
|
+
_cast_to_varchar(expression.args.get("replacement")),
|
|
1714
|
+
)
|
|
1715
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1716
|
+
|
|
1717
|
+
def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
|
|
1718
|
+
_prepare_binary_bitwise_args(expression)
|
|
1719
|
+
result_sql = self.binary(expression, op)
|
|
1720
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1721
|
+
|
|
1722
|
+
def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
|
|
1723
|
+
_prepare_binary_bitwise_args(expression)
|
|
1724
|
+
result_sql = self.func("XOR", expression.this, expression.expression)
|
|
1725
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1726
|
+
|
|
1128
1727
|
def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
|
|
1129
1728
|
this = expression.this
|
|
1130
1729
|
key = expression.args.get("key")
|
|
@@ -1140,6 +1739,13 @@ class DuckDB(Dialect):
|
|
|
1140
1739
|
|
|
1141
1740
|
return self.func("STRUCT_INSERT", this, kv_sql)
|
|
1142
1741
|
|
|
1742
|
+
def startswith_sql(self, expression: exp.StartsWith) -> str:
|
|
1743
|
+
return self.func(
|
|
1744
|
+
"STARTS_WITH",
|
|
1745
|
+
_cast_to_varchar(expression.this),
|
|
1746
|
+
_cast_to_varchar(expression.expression),
|
|
1747
|
+
)
|
|
1748
|
+
|
|
1143
1749
|
def unnest_sql(self, expression: exp.Unnest) -> str:
|
|
1144
1750
|
explode_array = expression.args.get("explode_array")
|
|
1145
1751
|
if explode_array:
|
|
@@ -1173,7 +1779,7 @@ class DuckDB(Dialect):
|
|
|
1173
1779
|
if isinstance(this, exp.First):
|
|
1174
1780
|
this = exp.AnyValue(this=this.this)
|
|
1175
1781
|
|
|
1176
|
-
if not isinstance(this, exp.AnyValue):
|
|
1782
|
+
if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
|
|
1177
1783
|
self.unsupported("IGNORE NULLS is not supported for non-window functions.")
|
|
1178
1784
|
|
|
1179
1785
|
return self.sql(this)
|
|
@@ -1196,10 +1802,19 @@ class DuckDB(Dialect):
|
|
|
1196
1802
|
|
|
1197
1803
|
return self.func("ARRAY_TO_STRING", this, expression.expression)
|
|
1198
1804
|
|
|
1199
|
-
@unsupported_args("position", "occurrence")
|
|
1200
1805
|
def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
|
|
1806
|
+
this = expression.this
|
|
1201
1807
|
group = expression.args.get("group")
|
|
1202
1808
|
params = expression.args.get("parameters")
|
|
1809
|
+
position = expression.args.get("position")
|
|
1810
|
+
occurrence = expression.args.get("occurrence")
|
|
1811
|
+
null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
|
|
1812
|
+
|
|
1813
|
+
if position and (not position.is_int or position.to_py() > 1):
|
|
1814
|
+
this = exp.Substring(this=this, start=position)
|
|
1815
|
+
|
|
1816
|
+
if null_if_pos_overflow:
|
|
1817
|
+
this = exp.Nullif(this=this, expression=exp.Literal.string(""))
|
|
1203
1818
|
|
|
1204
1819
|
# Do not render group if there is no following argument,
|
|
1205
1820
|
# and it's the default value for this dialect
|
|
@@ -1209,9 +1824,15 @@ class DuckDB(Dialect):
|
|
|
1209
1824
|
and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
|
|
1210
1825
|
):
|
|
1211
1826
|
group = None
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1827
|
+
|
|
1828
|
+
if occurrence and (not occurrence.is_int or occurrence.to_py() > 1):
|
|
1829
|
+
return self.func(
|
|
1830
|
+
"ARRAY_EXTRACT",
|
|
1831
|
+
self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params),
|
|
1832
|
+
exp.Literal.number(occurrence),
|
|
1833
|
+
)
|
|
1834
|
+
|
|
1835
|
+
return self.func("REGEXP_EXTRACT", this, expression.expression, group, params)
|
|
1215
1836
|
|
|
1216
1837
|
@unsupported_args("culture")
|
|
1217
1838
|
def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
|
|
@@ -1314,3 +1935,111 @@ class DuckDB(Dialect):
|
|
|
1314
1935
|
to_hex = exp.cast(self.func("TO_HEX", from_hex), exp.DataType.Type.BLOB)
|
|
1315
1936
|
|
|
1316
1937
|
return self.sql(to_hex)
|
|
1938
|
+
|
|
1939
|
+
def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
|
|
1940
|
+
unit = unit_to_str(expression)
|
|
1941
|
+
zone = expression.args.get("zone")
|
|
1942
|
+
timestamp = expression.this
|
|
1943
|
+
|
|
1944
|
+
if is_date_unit(unit) and zone:
|
|
1945
|
+
# BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
|
|
1946
|
+
# Double AT TIME ZONE needed for BigQuery compatibility:
|
|
1947
|
+
# 1. First AT TIME ZONE: ensures truncation happens in the target timezone
|
|
1948
|
+
# 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
|
|
1949
|
+
timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
|
|
1950
|
+
result_sql = self.func("DATE_TRUNC", unit, timestamp)
|
|
1951
|
+
return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
|
|
1952
|
+
|
|
1953
|
+
return self.func("DATE_TRUNC", unit, timestamp)
|
|
1954
|
+
|
|
1955
|
+
def trim_sql(self, expression: exp.Trim) -> str:
|
|
1956
|
+
result_sql = self.func(
|
|
1957
|
+
"TRIM",
|
|
1958
|
+
_cast_to_varchar(expression.this),
|
|
1959
|
+
_cast_to_varchar(expression.expression),
|
|
1960
|
+
)
|
|
1961
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1962
|
+
|
|
1963
|
+
def round_sql(self, expression: exp.Round) -> str:
|
|
1964
|
+
this = expression.this
|
|
1965
|
+
decimals = expression.args.get("decimals")
|
|
1966
|
+
truncate = expression.args.get("truncate")
|
|
1967
|
+
|
|
1968
|
+
# DuckDB requires the scale (decimals) argument to be an INT
|
|
1969
|
+
# Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
|
|
1970
|
+
if decimals is not None and expression.args.get("casts_non_integer_decimals"):
|
|
1971
|
+
if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
|
|
1972
|
+
decimals = exp.cast(decimals, exp.DataType.Type.INT)
|
|
1973
|
+
|
|
1974
|
+
func = "ROUND"
|
|
1975
|
+
if truncate:
|
|
1976
|
+
# BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
|
|
1977
|
+
if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
|
|
1978
|
+
func = "ROUND_EVEN"
|
|
1979
|
+
truncate = None
|
|
1980
|
+
# BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
|
|
1981
|
+
elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
|
|
1982
|
+
truncate = None
|
|
1983
|
+
|
|
1984
|
+
return self.func(func, this, decimals, truncate)
|
|
1985
|
+
|
|
1986
|
+
def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
|
|
1987
|
+
"""
|
|
1988
|
+
BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
|
|
1989
|
+
dividing the input distribution into n equal-sized buckets.
|
|
1990
|
+
|
|
1991
|
+
Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
|
|
1992
|
+
does not document the specific algorithm used so results may differ. DuckDB does not
|
|
1993
|
+
support RESPECT NULLS.
|
|
1994
|
+
"""
|
|
1995
|
+
this = expression.this
|
|
1996
|
+
if isinstance(this, exp.Distinct):
|
|
1997
|
+
# APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
|
|
1998
|
+
if len(this.expressions) < 2:
|
|
1999
|
+
self.unsupported("APPROX_QUANTILES requires a bucket count argument")
|
|
2000
|
+
return self.function_fallback_sql(expression)
|
|
2001
|
+
num_quantiles_expr = this.expressions[1].pop()
|
|
2002
|
+
else:
|
|
2003
|
+
num_quantiles_expr = expression.expression
|
|
2004
|
+
|
|
2005
|
+
if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
|
|
2006
|
+
self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
|
|
2007
|
+
return self.function_fallback_sql(expression)
|
|
2008
|
+
|
|
2009
|
+
num_quantiles = t.cast(int, num_quantiles_expr.to_py())
|
|
2010
|
+
if num_quantiles <= 0:
|
|
2011
|
+
self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
|
|
2012
|
+
return self.function_fallback_sql(expression)
|
|
2013
|
+
|
|
2014
|
+
quantiles = [
|
|
2015
|
+
exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
|
|
2016
|
+
for i in range(num_quantiles + 1)
|
|
2017
|
+
]
|
|
2018
|
+
|
|
2019
|
+
return self.sql(
|
|
2020
|
+
exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles))
|
|
2021
|
+
)
|
|
2022
|
+
|
|
2023
|
+
def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
|
|
2024
|
+
if expression.args.get("scalar_only"):
|
|
2025
|
+
expression = exp.JSONExtractScalar(
|
|
2026
|
+
this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
|
|
2027
|
+
)
|
|
2028
|
+
return _arrow_json_extract_sql(self, expression)
|
|
2029
|
+
|
|
2030
|
+
def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
|
|
2031
|
+
this = expression.this
|
|
2032
|
+
|
|
2033
|
+
if _is_binary(this):
|
|
2034
|
+
expression.type = exp.DataType.build("BINARY")
|
|
2035
|
+
|
|
2036
|
+
arg = _cast_to_bit(this)
|
|
2037
|
+
|
|
2038
|
+
if isinstance(this, exp.Neg):
|
|
2039
|
+
arg = exp.Paren(this=arg)
|
|
2040
|
+
|
|
2041
|
+
expression.set("this", arg)
|
|
2042
|
+
|
|
2043
|
+
result_sql = f"~{self.sql(expression, 'this')}"
|
|
2044
|
+
|
|
2045
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|