sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +116 -295
- sqlglot/dialects/clickhouse.py +67 -2
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +327 -286
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +718 -22
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +11 -2
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +46 -24
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +199 -271
- sqlglot/dialects/spark.py +2 -2
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +9 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -25
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/expressions.py +484 -84
- sqlglot/generator.py +143 -41
- sqlglot/helper.py +2 -2
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +14 -6
- sqlglot/optimizer/qualify_columns.py +113 -352
- sqlglot/optimizer/qualify_tables.py +112 -70
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1074 -964
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +276 -160
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
- sqlglot-28.4.1.dist-info/RECORD +92 -0
- sqlglot-27.29.0.dist-info/RECORD +0 -84
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
sqlglot/dialects/duckdb.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from decimal import Decimal
|
|
4
|
+
from itertools import groupby
|
|
3
5
|
import re
|
|
4
6
|
import typing as t
|
|
5
7
|
|
|
@@ -9,7 +11,6 @@ from sqlglot.dialects.dialect import (
|
|
|
9
11
|
Dialect,
|
|
10
12
|
JSON_EXTRACT_TYPE,
|
|
11
13
|
NormalizationStrategy,
|
|
12
|
-
Version,
|
|
13
14
|
approx_count_distinct_sql,
|
|
14
15
|
arrow_json_extract_sql,
|
|
15
16
|
binary_from_function,
|
|
@@ -22,7 +23,6 @@ from sqlglot.dialects.dialect import (
|
|
|
22
23
|
no_datetime_sql,
|
|
23
24
|
encode_decode_sql,
|
|
24
25
|
build_formatted_time,
|
|
25
|
-
inline_array_unless_query,
|
|
26
26
|
no_comment_column_constraint_sql,
|
|
27
27
|
no_time_sql,
|
|
28
28
|
no_timestamp_sql,
|
|
@@ -31,7 +31,6 @@ from sqlglot.dialects.dialect import (
|
|
|
31
31
|
remove_from_array_using_filter,
|
|
32
32
|
strposition_sql,
|
|
33
33
|
str_to_time_sql,
|
|
34
|
-
timestamptrunc_sql,
|
|
35
34
|
timestrtotime_sql,
|
|
36
35
|
unit_to_str,
|
|
37
36
|
sha256_sql,
|
|
@@ -39,10 +38,12 @@ from sqlglot.dialects.dialect import (
|
|
|
39
38
|
explode_to_unnest_sql,
|
|
40
39
|
no_make_interval_sql,
|
|
41
40
|
groupconcat_sql,
|
|
41
|
+
inline_array_unless_query,
|
|
42
42
|
regexp_replace_global_modifier,
|
|
43
|
+
sha2_digest_sql,
|
|
43
44
|
)
|
|
44
45
|
from sqlglot.generator import unsupported_args
|
|
45
|
-
from sqlglot.helper import seq_get
|
|
46
|
+
from sqlglot.helper import is_date_unit, seq_get
|
|
46
47
|
from sqlglot.tokens import TokenType
|
|
47
48
|
from sqlglot.parser import binary_range_parser
|
|
48
49
|
|
|
@@ -50,6 +51,86 @@ from sqlglot.parser import binary_range_parser
|
|
|
50
51
|
# The pattern matches timezone offsets that appear after the time portion
|
|
51
52
|
TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?")
|
|
52
53
|
|
|
54
|
+
# Characters that must be escaped when building regex expressions in INITCAP
|
|
55
|
+
REGEX_ESCAPE_REPLACEMENTS = {
|
|
56
|
+
"\\": "\\\\",
|
|
57
|
+
"-": r"\-",
|
|
58
|
+
"^": r"\^",
|
|
59
|
+
"[": r"\[",
|
|
60
|
+
"]": r"\]",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Used to in RANDSTR transpilation
|
|
64
|
+
RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
65
|
+
RANDSTR_SEED = 123456
|
|
66
|
+
|
|
67
|
+
# Whitespace control characters that DuckDB must process with `CHR({val})` calls
|
|
68
|
+
WS_CONTROL_CHARS_TO_DUCK = {
|
|
69
|
+
"\u000b": 11,
|
|
70
|
+
"\u001c": 28,
|
|
71
|
+
"\u001d": 29,
|
|
72
|
+
"\u001e": 30,
|
|
73
|
+
"\u001f": 31,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
# Days of week to ISO 8601 day-of-week numbers
|
|
77
|
+
# ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
|
|
78
|
+
WEEK_START_DAY_TO_DOW = {
|
|
79
|
+
"MONDAY": 1,
|
|
80
|
+
"TUESDAY": 2,
|
|
81
|
+
"WEDNESDAY": 3,
|
|
82
|
+
"THURSDAY": 4,
|
|
83
|
+
"FRIDAY": 5,
|
|
84
|
+
"SATURDAY": 6,
|
|
85
|
+
"SUNDAY": 7,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
MAX_BIT_POSITION = exp.Literal.number(32768)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str:
|
|
92
|
+
"""
|
|
93
|
+
Transpile TO_BOOLEAN function from Snowflake to DuckDB equivalent.
|
|
94
|
+
|
|
95
|
+
DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
|
|
96
|
+
We need to handle the 'on'/'off' cases explicitly, plus NaN/INF error cases.
|
|
97
|
+
|
|
98
|
+
In Snowflake, NaN and INF values cause errors. We use DuckDB's native ERROR()
|
|
99
|
+
function to replicate this behavior with a clear error message.
|
|
100
|
+
"""
|
|
101
|
+
arg = expression.this
|
|
102
|
+
|
|
103
|
+
cast_to_real = exp.func("TRY_CAST", arg, exp.DataType.build("REAL"))
|
|
104
|
+
|
|
105
|
+
# Check for NaN and INF values
|
|
106
|
+
nan_inf_check = exp.Or(
|
|
107
|
+
this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
case_expr = (
|
|
111
|
+
exp.case()
|
|
112
|
+
.when(
|
|
113
|
+
nan_inf_check,
|
|
114
|
+
exp.func(
|
|
115
|
+
"ERROR",
|
|
116
|
+
exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
|
|
117
|
+
),
|
|
118
|
+
)
|
|
119
|
+
# Handle 'on' -> TRUE (case insensitive) - only for string literals
|
|
120
|
+
.when(
|
|
121
|
+
exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("ON")),
|
|
122
|
+
exp.true(),
|
|
123
|
+
)
|
|
124
|
+
# Handle 'off' -> FALSE (case insensitive) - only for string literals
|
|
125
|
+
.when(
|
|
126
|
+
exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("OFF")),
|
|
127
|
+
exp.false(),
|
|
128
|
+
)
|
|
129
|
+
.else_(exp.cast(arg, exp.DataType.Type.BOOLEAN))
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return self.sql(case_expr)
|
|
133
|
+
|
|
53
134
|
|
|
54
135
|
# BigQuery -> DuckDB conversion for the DATE function
|
|
55
136
|
def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
|
|
@@ -231,9 +312,86 @@ def _implicit_datetime_cast(
|
|
|
231
312
|
return arg
|
|
232
313
|
|
|
233
314
|
|
|
315
|
+
def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]:
|
|
316
|
+
"""
|
|
317
|
+
Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
|
|
318
|
+
from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
unit: The unit expression (Var for ISOWEEK or WeekStart)
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
|
|
325
|
+
|
|
326
|
+
Examples:
|
|
327
|
+
"WEEK(SUNDAY)" -> 7
|
|
328
|
+
"WEEK(MONDAY)" -> 1
|
|
329
|
+
"ISOWEEK" -> 1
|
|
330
|
+
"""
|
|
331
|
+
# Handle plain Var expressions for ISOWEEK only
|
|
332
|
+
if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
|
|
333
|
+
return 1
|
|
334
|
+
|
|
335
|
+
# Handle WeekStart expressions with explicit day
|
|
336
|
+
if isinstance(unit, exp.WeekStart):
|
|
337
|
+
return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
|
|
338
|
+
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
|
|
343
|
+
"""
|
|
344
|
+
Build DATE_TRUNC expression for week boundaries with custom start day.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
date_expr: The date expression to truncate
|
|
348
|
+
shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6)
|
|
349
|
+
|
|
350
|
+
DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard).
|
|
351
|
+
To align to a different start day, we shift the date before truncating.
|
|
352
|
+
|
|
353
|
+
Shift formula: Sunday (7) gets +1, others get (1 - start_dow)
|
|
354
|
+
Examples:
|
|
355
|
+
Monday (1): shift = 0 (no shift needed)
|
|
356
|
+
Tuesday (2): shift = -1 (shift back 1 day) ...
|
|
357
|
+
Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week)
|
|
358
|
+
"""
|
|
359
|
+
shift_days = 1 if start_dow == 7 else 1 - start_dow
|
|
360
|
+
|
|
361
|
+
# Shift date to align week boundaries with the desired start day
|
|
362
|
+
# No shift needed for Monday-based weeks (shift_days == 0)
|
|
363
|
+
shifted_date = (
|
|
364
|
+
exp.DateAdd(
|
|
365
|
+
this=date_expr,
|
|
366
|
+
expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
|
|
367
|
+
)
|
|
368
|
+
if shift_days != 0
|
|
369
|
+
else date_expr
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date)
|
|
373
|
+
|
|
374
|
+
|
|
234
375
|
def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
|
|
235
376
|
this = _implicit_datetime_cast(expression.this)
|
|
236
377
|
expr = _implicit_datetime_cast(expression.expression)
|
|
378
|
+
unit = expression.args.get("unit")
|
|
379
|
+
|
|
380
|
+
# DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
|
|
381
|
+
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
|
|
382
|
+
# SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
|
|
383
|
+
# Whereas for other units such as MONTH it does respect month boundaries:
|
|
384
|
+
# SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
|
|
385
|
+
date_part_boundary = expression.args.get("date_part_boundary")
|
|
386
|
+
|
|
387
|
+
# Extract week start day; returns None if day is dynamic (column/placeholder)
|
|
388
|
+
week_start = _week_unit_to_dow(unit)
|
|
389
|
+
if date_part_boundary and week_start and this and expr:
|
|
390
|
+
expression.set("unit", exp.Literal.string("WEEK"))
|
|
391
|
+
|
|
392
|
+
# Truncate both dates to week boundaries to respect input dialect semantics
|
|
393
|
+
this = _build_week_trunc_expression(this, week_start)
|
|
394
|
+
expr = _build_week_trunc_expression(expr, week_start)
|
|
237
395
|
|
|
238
396
|
return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
|
|
239
397
|
|
|
@@ -268,6 +426,228 @@ def _json_extract_value_array_sql(
|
|
|
268
426
|
return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type)))
|
|
269
427
|
|
|
270
428
|
|
|
429
|
+
def _cast_to_varchar(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
430
|
+
if arg and arg.type and not arg.is_type(exp.DataType.Type.VARCHAR, exp.DataType.Type.UNKNOWN):
|
|
431
|
+
return exp.cast(arg, exp.DataType.Type.VARCHAR)
|
|
432
|
+
return arg
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _is_binary(arg: exp.Expression) -> bool:
|
|
436
|
+
return arg.is_type(
|
|
437
|
+
exp.DataType.Type.BINARY,
|
|
438
|
+
exp.DataType.Type.VARBINARY,
|
|
439
|
+
exp.DataType.Type.BLOB,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def _gen_with_cast_to_blob(
|
|
444
|
+
self: DuckDB.Generator, expression: exp.Expression, result_sql: str
|
|
445
|
+
) -> str:
|
|
446
|
+
if _is_binary(expression):
|
|
447
|
+
blob = exp.DataType.build("BLOB", dialect="duckdb")
|
|
448
|
+
result_sql = self.sql(exp.Cast(this=result_sql, to=blob))
|
|
449
|
+
return result_sql
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _cast_to_bit(arg: exp.Expression) -> exp.Expression:
|
|
453
|
+
if not _is_binary(arg):
|
|
454
|
+
return arg
|
|
455
|
+
|
|
456
|
+
if isinstance(arg, exp.HexString):
|
|
457
|
+
arg = exp.Unhex(this=exp.Literal.string(arg.this))
|
|
458
|
+
|
|
459
|
+
return exp.cast(arg, exp.DataType.Type.BIT)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
|
|
463
|
+
if _is_binary(expression.this):
|
|
464
|
+
expression.set("this", _cast_to_bit(expression.this))
|
|
465
|
+
if _is_binary(expression.expression):
|
|
466
|
+
expression.set("expression", _cast_to_bit(expression.expression))
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str:
|
|
470
|
+
# Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
|
|
471
|
+
having = expression.this
|
|
472
|
+
if isinstance(having, exp.HavingMax):
|
|
473
|
+
func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL"
|
|
474
|
+
return self.func(func_name, having.this, having.expression)
|
|
475
|
+
return self.function_fallback_sql(expression)
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _literal_sql_with_ws_chr(self: DuckDB.Generator, literal: str) -> str:
|
|
479
|
+
# DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
|
|
480
|
+
if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
|
|
481
|
+
return self.sql(exp.Literal.string(literal))
|
|
482
|
+
|
|
483
|
+
sql_segments: t.List[str] = []
|
|
484
|
+
for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK):
|
|
485
|
+
if is_ws_control:
|
|
486
|
+
for ch in group:
|
|
487
|
+
duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch]
|
|
488
|
+
sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
|
|
489
|
+
else:
|
|
490
|
+
sql_segments.append(self.sql(exp.Literal.string("".join(group))))
|
|
491
|
+
|
|
492
|
+
sql = " || ".join(sql_segments)
|
|
493
|
+
return sql if len(sql_segments) == 1 else f"({sql})"
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _escape_regex_metachars(
|
|
497
|
+
self: DuckDB.Generator, delimiters: t.Optional[exp.Expression], delimiters_sql: str
|
|
498
|
+
) -> str:
|
|
499
|
+
r"""
|
|
500
|
+
Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions.
|
|
501
|
+
|
|
502
|
+
Literal strings are escaped at transpile time, expressions handled with REPLACE() calls.
|
|
503
|
+
"""
|
|
504
|
+
if not delimiters:
|
|
505
|
+
return delimiters_sql
|
|
506
|
+
|
|
507
|
+
if delimiters.is_string:
|
|
508
|
+
literal_value = delimiters.this
|
|
509
|
+
escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
|
|
510
|
+
return _literal_sql_with_ws_chr(self, escaped_literal)
|
|
511
|
+
|
|
512
|
+
escaped_sql = delimiters_sql
|
|
513
|
+
for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
|
|
514
|
+
escaped_sql = self.func(
|
|
515
|
+
"REPLACE",
|
|
516
|
+
escaped_sql,
|
|
517
|
+
self.sql(exp.Literal.string(raw)),
|
|
518
|
+
self.sql(exp.Literal.string(escaped)),
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
return escaped_sql
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _build_capitalization_sql(
|
|
525
|
+
self: DuckDB.Generator,
|
|
526
|
+
value_to_split: str,
|
|
527
|
+
delimiters_sql: str,
|
|
528
|
+
) -> str:
|
|
529
|
+
# empty string delimiter --> treat value as one word, no need to split
|
|
530
|
+
if delimiters_sql == "''":
|
|
531
|
+
return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
|
|
532
|
+
|
|
533
|
+
delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
|
|
534
|
+
split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
|
|
535
|
+
|
|
536
|
+
# REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
|
|
537
|
+
# We do not know whether the first segment is a delimiter or not, so we check the first character of the string
|
|
538
|
+
# with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
|
|
539
|
+
return self.func(
|
|
540
|
+
"ARRAY_TO_STRING",
|
|
541
|
+
exp.case()
|
|
542
|
+
.when(
|
|
543
|
+
f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})",
|
|
544
|
+
self.func(
|
|
545
|
+
"LIST_TRANSFORM",
|
|
546
|
+
self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
|
|
547
|
+
"(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
|
|
548
|
+
),
|
|
549
|
+
)
|
|
550
|
+
.else_(
|
|
551
|
+
self.func(
|
|
552
|
+
"LIST_TRANSFORM",
|
|
553
|
+
self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
|
|
554
|
+
"(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
|
|
555
|
+
),
|
|
556
|
+
),
|
|
557
|
+
"''",
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _initcap_sql(self: DuckDB.Generator, expression: exp.Initcap) -> str:
|
|
562
|
+
this_sql = self.sql(expression, "this")
|
|
563
|
+
delimiters = expression.args.get("expression")
|
|
564
|
+
if delimiters is None:
|
|
565
|
+
# fallback for manually created exp.Initcap w/o delimiters arg
|
|
566
|
+
delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)
|
|
567
|
+
delimiters_sql = self.sql(delimiters)
|
|
568
|
+
|
|
569
|
+
escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
|
|
570
|
+
|
|
571
|
+
return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _floor_sql(self: DuckDB.Generator, expression: exp.Floor) -> str:
|
|
575
|
+
decimals = expression.args.get("decimals")
|
|
576
|
+
|
|
577
|
+
if decimals is not None and expression.args.get("to") is None:
|
|
578
|
+
this = expression.this
|
|
579
|
+
if isinstance(this, exp.Binary):
|
|
580
|
+
this = exp.Paren(this=this)
|
|
581
|
+
|
|
582
|
+
n_int = decimals
|
|
583
|
+
if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
|
|
584
|
+
n_int = exp.cast(decimals, exp.DataType.Type.INT)
|
|
585
|
+
|
|
586
|
+
pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
|
|
587
|
+
floored = exp.Floor(this=exp.Mul(this=this, expression=pow_))
|
|
588
|
+
result = exp.Div(this=floored, expression=pow_.copy())
|
|
589
|
+
|
|
590
|
+
return self.round_sql(
|
|
591
|
+
exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
return self.ceil_floor(expression)
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _regr_val_sql(
|
|
598
|
+
self: DuckDB.Generator,
|
|
599
|
+
expression: exp.RegrValx | exp.RegrValy,
|
|
600
|
+
) -> str:
|
|
601
|
+
"""
|
|
602
|
+
Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent.
|
|
603
|
+
|
|
604
|
+
REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x.
|
|
605
|
+
REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y.
|
|
606
|
+
"""
|
|
607
|
+
from sqlglot.optimizer.annotate_types import annotate_types
|
|
608
|
+
|
|
609
|
+
y = expression.this
|
|
610
|
+
x = expression.expression
|
|
611
|
+
|
|
612
|
+
# Determine which argument to check for NULL and which to return based on expression type
|
|
613
|
+
if isinstance(expression, exp.RegrValx):
|
|
614
|
+
# REGR_VALX: check y for NULL, return x
|
|
615
|
+
check_for_null = y
|
|
616
|
+
return_value = x
|
|
617
|
+
return_value_attr = "expression"
|
|
618
|
+
else:
|
|
619
|
+
# REGR_VALY: check x for NULL, return y
|
|
620
|
+
check_for_null = x
|
|
621
|
+
return_value = y
|
|
622
|
+
return_value_attr = "this"
|
|
623
|
+
|
|
624
|
+
# Get the type from the return argument
|
|
625
|
+
result_type = return_value.type
|
|
626
|
+
|
|
627
|
+
# If no type info, annotate the expression to infer types
|
|
628
|
+
if not result_type or result_type.this == exp.DataType.Type.UNKNOWN:
|
|
629
|
+
try:
|
|
630
|
+
annotated = annotate_types(expression.copy(), dialect=self.dialect)
|
|
631
|
+
result_type = getattr(annotated, return_value_attr).type
|
|
632
|
+
except Exception:
|
|
633
|
+
pass
|
|
634
|
+
|
|
635
|
+
# Default to DOUBLE for regression functions if type still unknown
|
|
636
|
+
if not result_type or result_type.this == exp.DataType.Type.UNKNOWN:
|
|
637
|
+
result_type = exp.DataType.build("DOUBLE")
|
|
638
|
+
|
|
639
|
+
# Cast NULL to the same type as return_value to avoid DuckDB type inference issues
|
|
640
|
+
typed_null = exp.Cast(this=exp.Null(), to=result_type)
|
|
641
|
+
|
|
642
|
+
return self.sql(
|
|
643
|
+
exp.If(
|
|
644
|
+
this=exp.Is(this=check_for_null.copy(), expression=exp.Null()),
|
|
645
|
+
true=typed_null,
|
|
646
|
+
false=return_value.copy(),
|
|
647
|
+
)
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
|
|
271
651
|
class DuckDB(Dialect):
|
|
272
652
|
NULL_ORDERING = "nulls_are_last"
|
|
273
653
|
SUPPORTS_USER_DEFINED_TYPES = True
|
|
@@ -286,8 +666,13 @@ class DuckDB(Dialect):
|
|
|
286
666
|
**Dialect.DATE_PART_MAPPING,
|
|
287
667
|
"DAYOFWEEKISO": "ISODOW",
|
|
288
668
|
}
|
|
669
|
+
|
|
289
670
|
DATE_PART_MAPPING.pop("WEEKDAY")
|
|
290
671
|
|
|
672
|
+
INVERSE_TIME_MAPPING = {
|
|
673
|
+
"%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d)
|
|
674
|
+
}
|
|
675
|
+
|
|
291
676
|
def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
292
677
|
if isinstance(path, exp.Literal):
|
|
293
678
|
# DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
|
|
@@ -323,7 +708,9 @@ class DuckDB(Dialect):
|
|
|
323
708
|
"DETACH": TokenType.DETACH,
|
|
324
709
|
"FORCE": TokenType.FORCE,
|
|
325
710
|
"INSTALL": TokenType.INSTALL,
|
|
711
|
+
"INT8": TokenType.BIGINT,
|
|
326
712
|
"LOGICAL": TokenType.BOOLEAN,
|
|
713
|
+
"MACRO": TokenType.FUNCTION,
|
|
327
714
|
"ONLY": TokenType.ONLY,
|
|
328
715
|
"PIVOT_WIDER": TokenType.PIVOT,
|
|
329
716
|
"POSITIONAL": TokenType.POSITIONAL,
|
|
@@ -580,7 +967,7 @@ class DuckDB(Dialect):
|
|
|
580
967
|
) -> t.Optional[exp.Expression]:
|
|
581
968
|
bracket = super()._parse_bracket(this)
|
|
582
969
|
|
|
583
|
-
if self.dialect.version <
|
|
970
|
+
if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket):
|
|
584
971
|
# https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
|
|
585
972
|
bracket.set("returns_list_for_maps", True)
|
|
586
973
|
|
|
@@ -638,11 +1025,9 @@ class DuckDB(Dialect):
|
|
|
638
1025
|
def _parse_install(self, force: bool = False) -> exp.Install:
|
|
639
1026
|
return self.expression(
|
|
640
1027
|
exp.Install,
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
"force": force,
|
|
645
|
-
},
|
|
1028
|
+
this=self._parse_id_var(),
|
|
1029
|
+
from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None,
|
|
1030
|
+
force=force,
|
|
646
1031
|
)
|
|
647
1032
|
|
|
648
1033
|
def _parse_primary(self) -> t.Optional[exp.Expression]:
|
|
@@ -680,11 +1065,16 @@ class DuckDB(Dialect):
|
|
|
680
1065
|
ARRAY_SIZE_DIM_REQUIRED = False
|
|
681
1066
|
NORMALIZE_EXTRACT_DATE_PARTS = True
|
|
682
1067
|
SUPPORTS_LIKE_QUANTIFIERS = False
|
|
1068
|
+
SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
|
|
683
1069
|
|
|
684
1070
|
TRANSFORMS = {
|
|
685
1071
|
**generator.Generator.TRANSFORMS,
|
|
1072
|
+
exp.AnyValue: _anyvalue_sql,
|
|
686
1073
|
exp.ApproxDistinct: approx_count_distinct_sql,
|
|
687
|
-
exp.Array:
|
|
1074
|
+
exp.Array: transforms.preprocess(
|
|
1075
|
+
[transforms.inherit_struct_field_names],
|
|
1076
|
+
generator=inline_array_unless_query,
|
|
1077
|
+
),
|
|
688
1078
|
exp.ArrayFilter: rename_func("LIST_FILTER"),
|
|
689
1079
|
exp.ArrayRemove: remove_from_array_using_filter,
|
|
690
1080
|
exp.ArraySort: _array_sort_sql,
|
|
@@ -692,9 +1082,10 @@ class DuckDB(Dialect):
|
|
|
692
1082
|
exp.ArrayUniqueAgg: lambda self, e: self.func(
|
|
693
1083
|
"LIST", exp.Distinct(expressions=[e.this])
|
|
694
1084
|
),
|
|
1085
|
+
exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
|
|
695
1086
|
exp.BitwiseAndAgg: rename_func("BIT_AND"),
|
|
1087
|
+
exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
|
|
696
1088
|
exp.BitwiseOrAgg: rename_func("BIT_OR"),
|
|
697
|
-
exp.BitwiseXor: rename_func("XOR"),
|
|
698
1089
|
exp.BitwiseXorAgg: rename_func("BIT_XOR"),
|
|
699
1090
|
exp.CommentColumnConstraint: no_comment_column_constraint_sql,
|
|
700
1091
|
exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
|
|
@@ -729,17 +1120,20 @@ class DuckDB(Dialect):
|
|
|
729
1120
|
exp.IntDiv: lambda self, e: self.binary(e, "//"),
|
|
730
1121
|
exp.IsInf: rename_func("ISINF"),
|
|
731
1122
|
exp.IsNan: rename_func("ISNAN"),
|
|
1123
|
+
exp.Floor: _floor_sql,
|
|
732
1124
|
exp.JSONBExists: rename_func("JSON_EXISTS"),
|
|
733
1125
|
exp.JSONExtract: _arrow_json_extract_sql,
|
|
734
1126
|
exp.JSONExtractArray: _json_extract_value_array_sql,
|
|
735
|
-
exp.JSONExtractScalar: _arrow_json_extract_sql,
|
|
736
1127
|
exp.JSONFormat: _json_format_sql,
|
|
737
1128
|
exp.JSONValueArray: _json_extract_value_array_sql,
|
|
738
1129
|
exp.Lateral: explode_to_unnest_sql,
|
|
739
1130
|
exp.LogicalOr: rename_func("BOOL_OR"),
|
|
740
1131
|
exp.LogicalAnd: rename_func("BOOL_AND"),
|
|
741
1132
|
exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
|
|
1133
|
+
exp.Initcap: _initcap_sql,
|
|
742
1134
|
exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
|
|
1135
|
+
exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)),
|
|
1136
|
+
exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)),
|
|
743
1137
|
exp.MonthsBetween: lambda self, e: self.func(
|
|
744
1138
|
"DATEDIFF",
|
|
745
1139
|
"'month'",
|
|
@@ -763,6 +1157,8 @@ class DuckDB(Dialect):
|
|
|
763
1157
|
"REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
|
|
764
1158
|
),
|
|
765
1159
|
exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
|
|
1160
|
+
exp.RegrValx: _regr_val_sql,
|
|
1161
|
+
exp.RegrValy: _regr_val_sql,
|
|
766
1162
|
exp.Return: lambda self, e: self.sql(e, "this"),
|
|
767
1163
|
exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
|
|
768
1164
|
exp.Rand: rename_func("RANDOM"),
|
|
@@ -786,13 +1182,13 @@ class DuckDB(Dialect):
|
|
|
786
1182
|
"DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
|
|
787
1183
|
),
|
|
788
1184
|
exp.TimestampSub: date_delta_to_binary_interval_op(),
|
|
789
|
-
exp.TimestampTrunc: timestamptrunc_sql(),
|
|
790
1185
|
exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
|
|
791
1186
|
exp.TimeStrToTime: timestrtotime_sql,
|
|
792
1187
|
exp.TimeStrToUnix: lambda self, e: self.func(
|
|
793
1188
|
"EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP)
|
|
794
1189
|
),
|
|
795
1190
|
exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
|
|
1191
|
+
exp.ToBoolean: _to_boolean_sql,
|
|
796
1192
|
exp.TimeToUnix: rename_func("EPOCH"),
|
|
797
1193
|
exp.TsOrDiToDi: lambda self,
|
|
798
1194
|
e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
|
|
@@ -804,6 +1200,12 @@ class DuckDB(Dialect):
|
|
|
804
1200
|
exp.cast(e.this, exp.DataType.Type.TIMESTAMP),
|
|
805
1201
|
),
|
|
806
1202
|
exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
|
|
1203
|
+
exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
|
|
1204
|
+
exp.UnixSeconds: lambda self, e: self.sql(
|
|
1205
|
+
exp.cast(
|
|
1206
|
+
self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DataType.Type.BIGINT
|
|
1207
|
+
)
|
|
1208
|
+
),
|
|
807
1209
|
exp.UnixToStr: lambda self, e: self.func(
|
|
808
1210
|
"STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
|
|
809
1211
|
),
|
|
@@ -836,6 +1238,7 @@ class DuckDB(Dialect):
|
|
|
836
1238
|
exp.DataType.Type.BPCHAR: "TEXT",
|
|
837
1239
|
exp.DataType.Type.CHAR: "TEXT",
|
|
838
1240
|
exp.DataType.Type.DATETIME: "TIMESTAMP",
|
|
1241
|
+
exp.DataType.Type.DECFLOAT: "DECIMAL(38, 5)",
|
|
839
1242
|
exp.DataType.Type.FLOAT: "REAL",
|
|
840
1243
|
exp.DataType.Type.JSONB: "JSON",
|
|
841
1244
|
exp.DataType.Type.NCHAR: "TEXT",
|
|
@@ -848,6 +1251,7 @@ class DuckDB(Dialect):
|
|
|
848
1251
|
exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S",
|
|
849
1252
|
exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS",
|
|
850
1253
|
exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
|
|
1254
|
+
exp.DataType.Type.BIGDECIMAL: "DECIMAL(38, 5)",
|
|
851
1255
|
}
|
|
852
1256
|
|
|
853
1257
|
# https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
|
|
@@ -955,6 +1359,135 @@ class DuckDB(Dialect):
|
|
|
955
1359
|
exp.NthValue,
|
|
956
1360
|
)
|
|
957
1361
|
|
|
1362
|
+
def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str:
|
|
1363
|
+
"""
|
|
1364
|
+
Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
|
|
1365
|
+
|
|
1366
|
+
Snowflake's BITMAP_BIT_POSITION behavior:
|
|
1367
|
+
- For n <= 0: returns ABS(n) % 32768
|
|
1368
|
+
- For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
|
|
1369
|
+
"""
|
|
1370
|
+
this = expression.this
|
|
1371
|
+
|
|
1372
|
+
return self.sql(
|
|
1373
|
+
exp.Mod(
|
|
1374
|
+
this=exp.Paren(
|
|
1375
|
+
this=exp.If(
|
|
1376
|
+
this=exp.GT(this=this, expression=exp.Literal.number(0)),
|
|
1377
|
+
true=this - exp.Literal.number(1),
|
|
1378
|
+
false=exp.Abs(this=this),
|
|
1379
|
+
)
|
|
1380
|
+
),
|
|
1381
|
+
expression=MAX_BIT_POSITION,
|
|
1382
|
+
)
|
|
1383
|
+
)
|
|
1384
|
+
|
|
1385
|
+
def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
|
|
1386
|
+
"""
|
|
1387
|
+
Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
|
|
1388
|
+
|
|
1389
|
+
RANDSTR(length, generator) generates a random string of specified length.
|
|
1390
|
+
- With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
|
|
1391
|
+
- With RANDOM(): Use RANDOM() in the hash for non-deterministic output
|
|
1392
|
+
- No generator: Use default seed value
|
|
1393
|
+
"""
|
|
1394
|
+
length = expression.this
|
|
1395
|
+
generator = expression.args.get("generator")
|
|
1396
|
+
|
|
1397
|
+
if generator:
|
|
1398
|
+
if isinstance(generator, exp.Rand):
|
|
1399
|
+
# If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
|
|
1400
|
+
seed_value = generator.this or generator
|
|
1401
|
+
else:
|
|
1402
|
+
# Const/int or other expression - use as seed directly
|
|
1403
|
+
seed_value = generator
|
|
1404
|
+
else:
|
|
1405
|
+
# No generator specified, use default seed (arbitrary but deterministic)
|
|
1406
|
+
seed_value = exp.Literal.number(RANDSTR_SEED)
|
|
1407
|
+
|
|
1408
|
+
length_sql = self.sql(length)
|
|
1409
|
+
seed_sql = self.sql(seed_value)
|
|
1410
|
+
|
|
1411
|
+
query: exp.Select = exp.maybe_parse(
|
|
1412
|
+
f"""
|
|
1413
|
+
SELECT LISTAGG(
|
|
1414
|
+
SUBSTRING(
|
|
1415
|
+
'{RANDSTR_CHAR_POOL}',
|
|
1416
|
+
1 + CAST(FLOOR(random_value * 62) AS INT),
|
|
1417
|
+
1
|
|
1418
|
+
),
|
|
1419
|
+
''
|
|
1420
|
+
)
|
|
1421
|
+
FROM (
|
|
1422
|
+
SELECT (ABS(HASH(i + {seed_sql})) % 1000) / 1000.0 AS random_value
|
|
1423
|
+
FROM RANGE({length_sql}) AS t(i)
|
|
1424
|
+
)
|
|
1425
|
+
""",
|
|
1426
|
+
dialect="duckdb",
|
|
1427
|
+
)
|
|
1428
|
+
return f"({self.sql(query)})"
|
|
1429
|
+
|
|
1430
|
+
def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str:
|
|
1431
|
+
"""
|
|
1432
|
+
TO_BINARY(value, format) transpilation if the return type is BINARY:
|
|
1433
|
+
- 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50')
|
|
1434
|
+
- 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST')
|
|
1435
|
+
- 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==')
|
|
1436
|
+
|
|
1437
|
+
format can be 'HEX', 'UTF-8' or 'BASE64'
|
|
1438
|
+
return type can be either VARCHAR or BINARY
|
|
1439
|
+
"""
|
|
1440
|
+
value = expression.this
|
|
1441
|
+
format_arg = expression.args.get("format")
|
|
1442
|
+
|
|
1443
|
+
fmt = "HEX"
|
|
1444
|
+
if format_arg:
|
|
1445
|
+
fmt = format_arg.name.upper()
|
|
1446
|
+
|
|
1447
|
+
if expression.is_type(exp.DataType.Type.BINARY):
|
|
1448
|
+
if fmt == "UTF-8":
|
|
1449
|
+
return self.func("ENCODE", value)
|
|
1450
|
+
if fmt == "BASE64":
|
|
1451
|
+
return self.func("FROM_BASE64", value)
|
|
1452
|
+
|
|
1453
|
+
# Hex
|
|
1454
|
+
return self.func("UNHEX", value)
|
|
1455
|
+
|
|
1456
|
+
# Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake
|
|
1457
|
+
return self.func("TO_BINARY", value)
|
|
1458
|
+
|
|
1459
|
+
def _greatest_least_sql(
|
|
1460
|
+
self: DuckDB.Generator, expression: exp.Greatest | exp.Least
|
|
1461
|
+
) -> str:
|
|
1462
|
+
"""
|
|
1463
|
+
Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
|
|
1464
|
+
|
|
1465
|
+
- If null_if_any_null=True (BigQuery-style): return NULL if any argument is NULL
|
|
1466
|
+
- If null_if_any_null=False (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
|
|
1467
|
+
"""
|
|
1468
|
+
# Get all arguments
|
|
1469
|
+
all_args = [expression.this, *expression.expressions]
|
|
1470
|
+
fallback_sql = self.function_fallback_sql(expression)
|
|
1471
|
+
|
|
1472
|
+
if expression.args.get("null_if_any_null"):
|
|
1473
|
+
# BigQuery behavior: NULL if any argument is NULL
|
|
1474
|
+
case_expr = exp.case().when(
|
|
1475
|
+
exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
|
|
1476
|
+
exp.null(),
|
|
1477
|
+
copy=False,
|
|
1478
|
+
)
|
|
1479
|
+
case_expr.set("default", fallback_sql)
|
|
1480
|
+
return self.sql(case_expr)
|
|
1481
|
+
|
|
1482
|
+
# DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
|
|
1483
|
+
return self.sql(fallback_sql)
|
|
1484
|
+
|
|
1485
|
+
def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str:
|
|
1486
|
+
return self._greatest_least_sql(expression)
|
|
1487
|
+
|
|
1488
|
+
def least_sql(self: DuckDB.Generator, expression: exp.Least) -> str:
|
|
1489
|
+
return self._greatest_least_sql(expression)
|
|
1490
|
+
|
|
958
1491
|
def lambda_sql(
|
|
959
1492
|
self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True
|
|
960
1493
|
) -> str:
|
|
@@ -974,10 +1507,16 @@ class DuckDB(Dialect):
|
|
|
974
1507
|
def install_sql(self, expression: exp.Install) -> str:
|
|
975
1508
|
force = "FORCE " if expression.args.get("force") else ""
|
|
976
1509
|
this = self.sql(expression, "this")
|
|
977
|
-
from_clause = expression.args.get("
|
|
1510
|
+
from_clause = expression.args.get("from_")
|
|
978
1511
|
from_clause = f" FROM {from_clause}" if from_clause else ""
|
|
979
1512
|
return f"{force}INSTALL {this}{from_clause}"
|
|
980
1513
|
|
|
1514
|
+
def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
|
|
1515
|
+
self.unsupported(
|
|
1516
|
+
"APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
|
|
1517
|
+
)
|
|
1518
|
+
return self.function_fallback_sql(expression)
|
|
1519
|
+
|
|
981
1520
|
def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
|
|
982
1521
|
return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ))
|
|
983
1522
|
|
|
@@ -1084,14 +1623,14 @@ class DuckDB(Dialect):
|
|
|
1084
1623
|
return self.function_fallback_sql(expression)
|
|
1085
1624
|
|
|
1086
1625
|
def countif_sql(self, expression: exp.CountIf) -> str:
|
|
1087
|
-
if self.dialect.version >=
|
|
1626
|
+
if self.dialect.version >= (1, 2):
|
|
1088
1627
|
return self.function_fallback_sql(expression)
|
|
1089
1628
|
|
|
1090
1629
|
# https://github.com/tobymao/sqlglot/pull/4749
|
|
1091
1630
|
return count_if_to_sum(self, expression)
|
|
1092
1631
|
|
|
1093
1632
|
def bracket_sql(self, expression: exp.Bracket) -> str:
|
|
1094
|
-
if self.dialect.version >=
|
|
1633
|
+
if self.dialect.version >= (1, 2):
|
|
1095
1634
|
return super().bracket_sql(expression)
|
|
1096
1635
|
|
|
1097
1636
|
# https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
|
|
@@ -1158,6 +1697,33 @@ class DuckDB(Dialect):
|
|
|
1158
1697
|
|
|
1159
1698
|
return self.sql(case)
|
|
1160
1699
|
|
|
1700
|
+
def lower_sql(self, expression: exp.Lower) -> str:
|
|
1701
|
+
result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
|
|
1702
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1703
|
+
|
|
1704
|
+
def upper_sql(self, expression: exp.Upper) -> str:
|
|
1705
|
+
result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
|
|
1706
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1707
|
+
|
|
1708
|
+
def replace_sql(self, expression: exp.Replace) -> str:
|
|
1709
|
+
result_sql = self.func(
|
|
1710
|
+
"REPLACE",
|
|
1711
|
+
_cast_to_varchar(expression.this),
|
|
1712
|
+
_cast_to_varchar(expression.expression),
|
|
1713
|
+
_cast_to_varchar(expression.args.get("replacement")),
|
|
1714
|
+
)
|
|
1715
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1716
|
+
|
|
1717
|
+
def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
|
|
1718
|
+
_prepare_binary_bitwise_args(expression)
|
|
1719
|
+
result_sql = self.binary(expression, op)
|
|
1720
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1721
|
+
|
|
1722
|
+
def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
|
|
1723
|
+
_prepare_binary_bitwise_args(expression)
|
|
1724
|
+
result_sql = self.func("XOR", expression.this, expression.expression)
|
|
1725
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1726
|
+
|
|
1161
1727
|
def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
|
|
1162
1728
|
this = expression.this
|
|
1163
1729
|
key = expression.args.get("key")
|
|
@@ -1173,6 +1739,13 @@ class DuckDB(Dialect):
|
|
|
1173
1739
|
|
|
1174
1740
|
return self.func("STRUCT_INSERT", this, kv_sql)
|
|
1175
1741
|
|
|
1742
|
+
def startswith_sql(self, expression: exp.StartsWith) -> str:
|
|
1743
|
+
return self.func(
|
|
1744
|
+
"STARTS_WITH",
|
|
1745
|
+
_cast_to_varchar(expression.this),
|
|
1746
|
+
_cast_to_varchar(expression.expression),
|
|
1747
|
+
)
|
|
1748
|
+
|
|
1176
1749
|
def unnest_sql(self, expression: exp.Unnest) -> str:
|
|
1177
1750
|
explode_array = expression.args.get("explode_array")
|
|
1178
1751
|
if explode_array:
|
|
@@ -1206,7 +1779,7 @@ class DuckDB(Dialect):
|
|
|
1206
1779
|
if isinstance(this, exp.First):
|
|
1207
1780
|
this = exp.AnyValue(this=this.this)
|
|
1208
1781
|
|
|
1209
|
-
if not isinstance(this, exp.AnyValue):
|
|
1782
|
+
if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
|
|
1210
1783
|
self.unsupported("IGNORE NULLS is not supported for non-window functions.")
|
|
1211
1784
|
|
|
1212
1785
|
return self.sql(this)
|
|
@@ -1229,10 +1802,19 @@ class DuckDB(Dialect):
|
|
|
1229
1802
|
|
|
1230
1803
|
return self.func("ARRAY_TO_STRING", this, expression.expression)
|
|
1231
1804
|
|
|
1232
|
-
@unsupported_args("position", "occurrence")
|
|
1233
1805
|
def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
|
|
1806
|
+
this = expression.this
|
|
1234
1807
|
group = expression.args.get("group")
|
|
1235
1808
|
params = expression.args.get("parameters")
|
|
1809
|
+
position = expression.args.get("position")
|
|
1810
|
+
occurrence = expression.args.get("occurrence")
|
|
1811
|
+
null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
|
|
1812
|
+
|
|
1813
|
+
if position and (not position.is_int or position.to_py() > 1):
|
|
1814
|
+
this = exp.Substring(this=this, start=position)
|
|
1815
|
+
|
|
1816
|
+
if null_if_pos_overflow:
|
|
1817
|
+
this = exp.Nullif(this=this, expression=exp.Literal.string(""))
|
|
1236
1818
|
|
|
1237
1819
|
# Do not render group if there is no following argument,
|
|
1238
1820
|
# and it's the default value for this dialect
|
|
@@ -1242,9 +1824,15 @@ class DuckDB(Dialect):
|
|
|
1242
1824
|
and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
|
|
1243
1825
|
):
|
|
1244
1826
|
group = None
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1827
|
+
|
|
1828
|
+
if occurrence and (not occurrence.is_int or occurrence.to_py() > 1):
|
|
1829
|
+
return self.func(
|
|
1830
|
+
"ARRAY_EXTRACT",
|
|
1831
|
+
self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params),
|
|
1832
|
+
exp.Literal.number(occurrence),
|
|
1833
|
+
)
|
|
1834
|
+
|
|
1835
|
+
return self.func("REGEXP_EXTRACT", this, expression.expression, group, params)
|
|
1248
1836
|
|
|
1249
1837
|
@unsupported_args("culture")
|
|
1250
1838
|
def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
|
|
@@ -1347,3 +1935,111 @@ class DuckDB(Dialect):
|
|
|
1347
1935
|
to_hex = exp.cast(self.func("TO_HEX", from_hex), exp.DataType.Type.BLOB)
|
|
1348
1936
|
|
|
1349
1937
|
return self.sql(to_hex)
|
|
1938
|
+
|
|
1939
|
+
def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
|
|
1940
|
+
unit = unit_to_str(expression)
|
|
1941
|
+
zone = expression.args.get("zone")
|
|
1942
|
+
timestamp = expression.this
|
|
1943
|
+
|
|
1944
|
+
if is_date_unit(unit) and zone:
|
|
1945
|
+
# BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
|
|
1946
|
+
# Double AT TIME ZONE needed for BigQuery compatibility:
|
|
1947
|
+
# 1. First AT TIME ZONE: ensures truncation happens in the target timezone
|
|
1948
|
+
# 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
|
|
1949
|
+
timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
|
|
1950
|
+
result_sql = self.func("DATE_TRUNC", unit, timestamp)
|
|
1951
|
+
return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
|
|
1952
|
+
|
|
1953
|
+
return self.func("DATE_TRUNC", unit, timestamp)
|
|
1954
|
+
|
|
1955
|
+
def trim_sql(self, expression: exp.Trim) -> str:
|
|
1956
|
+
result_sql = self.func(
|
|
1957
|
+
"TRIM",
|
|
1958
|
+
_cast_to_varchar(expression.this),
|
|
1959
|
+
_cast_to_varchar(expression.expression),
|
|
1960
|
+
)
|
|
1961
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|
|
1962
|
+
|
|
1963
|
+
def round_sql(self, expression: exp.Round) -> str:
|
|
1964
|
+
this = expression.this
|
|
1965
|
+
decimals = expression.args.get("decimals")
|
|
1966
|
+
truncate = expression.args.get("truncate")
|
|
1967
|
+
|
|
1968
|
+
# DuckDB requires the scale (decimals) argument to be an INT
|
|
1969
|
+
# Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
|
|
1970
|
+
if decimals is not None and expression.args.get("casts_non_integer_decimals"):
|
|
1971
|
+
if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
|
|
1972
|
+
decimals = exp.cast(decimals, exp.DataType.Type.INT)
|
|
1973
|
+
|
|
1974
|
+
func = "ROUND"
|
|
1975
|
+
if truncate:
|
|
1976
|
+
# BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
|
|
1977
|
+
if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
|
|
1978
|
+
func = "ROUND_EVEN"
|
|
1979
|
+
truncate = None
|
|
1980
|
+
# BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
|
|
1981
|
+
elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
|
|
1982
|
+
truncate = None
|
|
1983
|
+
|
|
1984
|
+
return self.func(func, this, decimals, truncate)
|
|
1985
|
+
|
|
1986
|
+
def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
|
|
1987
|
+
"""
|
|
1988
|
+
BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
|
|
1989
|
+
dividing the input distribution into n equal-sized buckets.
|
|
1990
|
+
|
|
1991
|
+
Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
|
|
1992
|
+
does not document the specific algorithm used so results may differ. DuckDB does not
|
|
1993
|
+
support RESPECT NULLS.
|
|
1994
|
+
"""
|
|
1995
|
+
this = expression.this
|
|
1996
|
+
if isinstance(this, exp.Distinct):
|
|
1997
|
+
# APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
|
|
1998
|
+
if len(this.expressions) < 2:
|
|
1999
|
+
self.unsupported("APPROX_QUANTILES requires a bucket count argument")
|
|
2000
|
+
return self.function_fallback_sql(expression)
|
|
2001
|
+
num_quantiles_expr = this.expressions[1].pop()
|
|
2002
|
+
else:
|
|
2003
|
+
num_quantiles_expr = expression.expression
|
|
2004
|
+
|
|
2005
|
+
if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
|
|
2006
|
+
self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
|
|
2007
|
+
return self.function_fallback_sql(expression)
|
|
2008
|
+
|
|
2009
|
+
num_quantiles = t.cast(int, num_quantiles_expr.to_py())
|
|
2010
|
+
if num_quantiles <= 0:
|
|
2011
|
+
self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
|
|
2012
|
+
return self.function_fallback_sql(expression)
|
|
2013
|
+
|
|
2014
|
+
quantiles = [
|
|
2015
|
+
exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
|
|
2016
|
+
for i in range(num_quantiles + 1)
|
|
2017
|
+
]
|
|
2018
|
+
|
|
2019
|
+
return self.sql(
|
|
2020
|
+
exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles))
|
|
2021
|
+
)
|
|
2022
|
+
|
|
2023
|
+
def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
|
|
2024
|
+
if expression.args.get("scalar_only"):
|
|
2025
|
+
expression = exp.JSONExtractScalar(
|
|
2026
|
+
this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
|
|
2027
|
+
)
|
|
2028
|
+
return _arrow_json_extract_sql(self, expression)
|
|
2029
|
+
|
|
2030
|
+
def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
|
|
2031
|
+
this = expression.this
|
|
2032
|
+
|
|
2033
|
+
if _is_binary(this):
|
|
2034
|
+
expression.type = exp.DataType.build("BINARY")
|
|
2035
|
+
|
|
2036
|
+
arg = _cast_to_bit(this)
|
|
2037
|
+
|
|
2038
|
+
if isinstance(this, exp.Neg):
|
|
2039
|
+
arg = exp.Paren(this=arg)
|
|
2040
|
+
|
|
2041
|
+
expression.set("this", arg)
|
|
2042
|
+
|
|
2043
|
+
result_sql = f"~{self.sql(expression, 'this')}"
|
|
2044
|
+
|
|
2045
|
+
return _gen_with_cast_to_blob(self, expression, result_sql)
|