sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. sqlglot/__main__.py +6 -4
  2. sqlglot/_version.py +2 -2
  3. sqlglot/dialects/bigquery.py +116 -295
  4. sqlglot/dialects/clickhouse.py +67 -2
  5. sqlglot/dialects/databricks.py +38 -1
  6. sqlglot/dialects/dialect.py +327 -286
  7. sqlglot/dialects/dremio.py +4 -1
  8. sqlglot/dialects/duckdb.py +718 -22
  9. sqlglot/dialects/exasol.py +243 -10
  10. sqlglot/dialects/hive.py +8 -8
  11. sqlglot/dialects/mysql.py +11 -2
  12. sqlglot/dialects/oracle.py +29 -0
  13. sqlglot/dialects/postgres.py +46 -24
  14. sqlglot/dialects/presto.py +47 -16
  15. sqlglot/dialects/redshift.py +16 -0
  16. sqlglot/dialects/risingwave.py +3 -0
  17. sqlglot/dialects/singlestore.py +12 -3
  18. sqlglot/dialects/snowflake.py +199 -271
  19. sqlglot/dialects/spark.py +2 -2
  20. sqlglot/dialects/spark2.py +11 -48
  21. sqlglot/dialects/sqlite.py +9 -0
  22. sqlglot/dialects/teradata.py +5 -8
  23. sqlglot/dialects/trino.py +6 -0
  24. sqlglot/dialects/tsql.py +61 -25
  25. sqlglot/diff.py +4 -2
  26. sqlglot/errors.py +69 -0
  27. sqlglot/expressions.py +484 -84
  28. sqlglot/generator.py +143 -41
  29. sqlglot/helper.py +2 -2
  30. sqlglot/optimizer/annotate_types.py +247 -140
  31. sqlglot/optimizer/canonicalize.py +6 -1
  32. sqlglot/optimizer/eliminate_joins.py +1 -1
  33. sqlglot/optimizer/eliminate_subqueries.py +2 -2
  34. sqlglot/optimizer/merge_subqueries.py +5 -5
  35. sqlglot/optimizer/normalize.py +20 -13
  36. sqlglot/optimizer/normalize_identifiers.py +17 -3
  37. sqlglot/optimizer/optimizer.py +4 -0
  38. sqlglot/optimizer/pushdown_predicates.py +1 -1
  39. sqlglot/optimizer/qualify.py +14 -6
  40. sqlglot/optimizer/qualify_columns.py +113 -352
  41. sqlglot/optimizer/qualify_tables.py +112 -70
  42. sqlglot/optimizer/resolver.py +374 -0
  43. sqlglot/optimizer/scope.py +27 -16
  44. sqlglot/optimizer/simplify.py +1074 -964
  45. sqlglot/optimizer/unnest_subqueries.py +12 -2
  46. sqlglot/parser.py +276 -160
  47. sqlglot/planner.py +2 -2
  48. sqlglot/schema.py +15 -4
  49. sqlglot/tokens.py +42 -7
  50. sqlglot/transforms.py +77 -22
  51. sqlglot/typing/__init__.py +316 -0
  52. sqlglot/typing/bigquery.py +376 -0
  53. sqlglot/typing/hive.py +12 -0
  54. sqlglot/typing/presto.py +24 -0
  55. sqlglot/typing/snowflake.py +505 -0
  56. sqlglot/typing/spark2.py +58 -0
  57. sqlglot/typing/tsql.py +9 -0
  58. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
  59. sqlglot-28.4.1.dist-info/RECORD +92 -0
  60. sqlglot-27.29.0.dist-info/RECORD +0 -84
  61. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
  62. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
  63. {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from decimal import Decimal
4
+ from itertools import groupby
3
5
  import re
4
6
  import typing as t
5
7
 
@@ -9,7 +11,6 @@ from sqlglot.dialects.dialect import (
9
11
  Dialect,
10
12
  JSON_EXTRACT_TYPE,
11
13
  NormalizationStrategy,
12
- Version,
13
14
  approx_count_distinct_sql,
14
15
  arrow_json_extract_sql,
15
16
  binary_from_function,
@@ -22,7 +23,6 @@ from sqlglot.dialects.dialect import (
22
23
  no_datetime_sql,
23
24
  encode_decode_sql,
24
25
  build_formatted_time,
25
- inline_array_unless_query,
26
26
  no_comment_column_constraint_sql,
27
27
  no_time_sql,
28
28
  no_timestamp_sql,
@@ -31,7 +31,6 @@ from sqlglot.dialects.dialect import (
31
31
  remove_from_array_using_filter,
32
32
  strposition_sql,
33
33
  str_to_time_sql,
34
- timestamptrunc_sql,
35
34
  timestrtotime_sql,
36
35
  unit_to_str,
37
36
  sha256_sql,
@@ -39,10 +38,12 @@ from sqlglot.dialects.dialect import (
39
38
  explode_to_unnest_sql,
40
39
  no_make_interval_sql,
41
40
  groupconcat_sql,
41
+ inline_array_unless_query,
42
42
  regexp_replace_global_modifier,
43
+ sha2_digest_sql,
43
44
  )
44
45
  from sqlglot.generator import unsupported_args
45
- from sqlglot.helper import seq_get
46
+ from sqlglot.helper import is_date_unit, seq_get
46
47
  from sqlglot.tokens import TokenType
47
48
  from sqlglot.parser import binary_range_parser
48
49
 
@@ -50,6 +51,86 @@ from sqlglot.parser import binary_range_parser
50
51
  # The pattern matches timezone offsets that appear after the time portion
51
52
  TIMEZONE_PATTERN = re.compile(r":\d{2}.*?[+\-]\d{2}(?::\d{2})?")
52
53
 
54
+ # Characters that must be escaped when building regex expressions in INITCAP
55
+ REGEX_ESCAPE_REPLACEMENTS = {
56
+ "\\": "\\\\",
57
+ "-": r"\-",
58
+ "^": r"\^",
59
+ "[": r"\[",
60
+ "]": r"\]",
61
+ }
62
+
63
+ # Used to in RANDSTR transpilation
64
+ RANDSTR_CHAR_POOL = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
65
+ RANDSTR_SEED = 123456
66
+
67
+ # Whitespace control characters that DuckDB must process with `CHR({val})` calls
68
+ WS_CONTROL_CHARS_TO_DUCK = {
69
+ "\u000b": 11,
70
+ "\u001c": 28,
71
+ "\u001d": 29,
72
+ "\u001e": 30,
73
+ "\u001f": 31,
74
+ }
75
+
76
+ # Days of week to ISO 8601 day-of-week numbers
77
+ # ISO 8601 standard: Monday=1, Tuesday=2, Wednesday=3, Thursday=4, Friday=5, Saturday=6, Sunday=7
78
+ WEEK_START_DAY_TO_DOW = {
79
+ "MONDAY": 1,
80
+ "TUESDAY": 2,
81
+ "WEDNESDAY": 3,
82
+ "THURSDAY": 4,
83
+ "FRIDAY": 5,
84
+ "SATURDAY": 6,
85
+ "SUNDAY": 7,
86
+ }
87
+
88
+ MAX_BIT_POSITION = exp.Literal.number(32768)
89
+
90
+
91
+ def _to_boolean_sql(self: DuckDB.Generator, expression: exp.ToBoolean) -> str:
92
+ """
93
+ Transpile TO_BOOLEAN function from Snowflake to DuckDB equivalent.
94
+
95
+ DuckDB's CAST to BOOLEAN supports most of Snowflake's TO_BOOLEAN strings except 'on'/'off'.
96
+ We need to handle the 'on'/'off' cases explicitly, plus NaN/INF error cases.
97
+
98
+ In Snowflake, NaN and INF values cause errors. We use DuckDB's native ERROR()
99
+ function to replicate this behavior with a clear error message.
100
+ """
101
+ arg = expression.this
102
+
103
+ cast_to_real = exp.func("TRY_CAST", arg, exp.DataType.build("REAL"))
104
+
105
+ # Check for NaN and INF values
106
+ nan_inf_check = exp.Or(
107
+ this=exp.func("ISNAN", cast_to_real), expression=exp.func("ISINF", cast_to_real)
108
+ )
109
+
110
+ case_expr = (
111
+ exp.case()
112
+ .when(
113
+ nan_inf_check,
114
+ exp.func(
115
+ "ERROR",
116
+ exp.Literal.string("TO_BOOLEAN: Non-numeric values NaN and INF are not supported"),
117
+ ),
118
+ )
119
+ # Handle 'on' -> TRUE (case insensitive) - only for string literals
120
+ .when(
121
+ exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("ON")),
122
+ exp.true(),
123
+ )
124
+ # Handle 'off' -> FALSE (case insensitive) - only for string literals
125
+ .when(
126
+ exp.Upper(this=exp.cast(arg, exp.DataType.Type.VARCHAR)).eq(exp.Literal.string("OFF")),
127
+ exp.false(),
128
+ )
129
+ .else_(exp.cast(arg, exp.DataType.Type.BOOLEAN))
130
+ )
131
+
132
+ return self.sql(case_expr)
133
+
53
134
 
54
135
  # BigQuery -> DuckDB conversion for the DATE function
55
136
  def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str:
@@ -231,9 +312,86 @@ def _implicit_datetime_cast(
231
312
  return arg
232
313
 
233
314
 
315
+ def _week_unit_to_dow(unit: t.Optional[exp.Expression]) -> t.Optional[int]:
316
+ """
317
+ Compute the Monday-based day shift to align DATE_DIFF('WEEK', ...) coming
318
+ from other dialects, e.g BigQuery's WEEK(<day>) or ISOWEEK unit parts.
319
+
320
+ Args:
321
+ unit: The unit expression (Var for ISOWEEK or WeekStart)
322
+
323
+ Returns:
324
+ The ISO 8601 day number (Monday=1, Sunday=7 etc) or None if not a week unit or if day is dynamic (not a constant).
325
+
326
+ Examples:
327
+ "WEEK(SUNDAY)" -> 7
328
+ "WEEK(MONDAY)" -> 1
329
+ "ISOWEEK" -> 1
330
+ """
331
+ # Handle plain Var expressions for ISOWEEK only
332
+ if isinstance(unit, exp.Var) and unit.name.upper() in "ISOWEEK":
333
+ return 1
334
+
335
+ # Handle WeekStart expressions with explicit day
336
+ if isinstance(unit, exp.WeekStart):
337
+ return WEEK_START_DAY_TO_DOW.get(unit.name.upper())
338
+
339
+ return None
340
+
341
+
342
+ def _build_week_trunc_expression(date_expr: exp.Expression, start_dow: int) -> exp.Expression:
343
+ """
344
+ Build DATE_TRUNC expression for week boundaries with custom start day.
345
+
346
+ Args:
347
+ date_expr: The date expression to truncate
348
+ shift_days: ISO 8601 day-of-week number (Monday=0, ..., Sunday=6)
349
+
350
+ DuckDB's DATE_TRUNC('WEEK', ...) aligns weeks to Monday (ISO standard).
351
+ To align to a different start day, we shift the date before truncating.
352
+
353
+ Shift formula: Sunday (7) gets +1, others get (1 - start_dow)
354
+ Examples:
355
+ Monday (1): shift = 0 (no shift needed)
356
+ Tuesday (2): shift = -1 (shift back 1 day) ...
357
+ Sunday (7): shift = +1 (shift forward 1 day, wraps to next Monday-based week)
358
+ """
359
+ shift_days = 1 if start_dow == 7 else 1 - start_dow
360
+
361
+ # Shift date to align week boundaries with the desired start day
362
+ # No shift needed for Monday-based weeks (shift_days == 0)
363
+ shifted_date = (
364
+ exp.DateAdd(
365
+ this=date_expr,
366
+ expression=exp.Interval(this=exp.Literal.string(str(shift_days)), unit=exp.var("DAY")),
367
+ )
368
+ if shift_days != 0
369
+ else date_expr
370
+ )
371
+
372
+ return exp.DateTrunc(unit=exp.var("WEEK"), this=shifted_date)
373
+
374
+
234
375
  def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str:
235
376
  this = _implicit_datetime_cast(expression.this)
236
377
  expr = _implicit_datetime_cast(expression.expression)
378
+ unit = expression.args.get("unit")
379
+
380
+ # DuckDB's WEEK diff does not respect Monday crossing (week boundaries), it checks (end_day - start_day) / 7:
381
+ # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-17' AS DATE)) --> 0 (Monday crossed)
382
+ # SELECT DATE_DIFF('WEEK', CAST('2024-12-13' AS DATE), CAST('2024-12-20' AS DATE)) --> 1 (7 days difference)
383
+ # Whereas for other units such as MONTH it does respect month boundaries:
384
+ # SELECT DATE_DIFF('MONTH', CAST('2024-11-30' AS DATE), CAST('2024-12-01' AS DATE)) --> 1 (Month crossed)
385
+ date_part_boundary = expression.args.get("date_part_boundary")
386
+
387
+ # Extract week start day; returns None if day is dynamic (column/placeholder)
388
+ week_start = _week_unit_to_dow(unit)
389
+ if date_part_boundary and week_start and this and expr:
390
+ expression.set("unit", exp.Literal.string("WEEK"))
391
+
392
+ # Truncate both dates to week boundaries to respect input dialect semantics
393
+ this = _build_week_trunc_expression(this, week_start)
394
+ expr = _build_week_trunc_expression(expr, week_start)
237
395
 
238
396
  return self.func("DATE_DIFF", unit_to_str(expression), expr, this)
239
397
 
@@ -268,6 +426,228 @@ def _json_extract_value_array_sql(
268
426
  return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type)))
269
427
 
270
428
 
429
+ def _cast_to_varchar(arg: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
430
+ if arg and arg.type and not arg.is_type(exp.DataType.Type.VARCHAR, exp.DataType.Type.UNKNOWN):
431
+ return exp.cast(arg, exp.DataType.Type.VARCHAR)
432
+ return arg
433
+
434
+
435
+ def _is_binary(arg: exp.Expression) -> bool:
436
+ return arg.is_type(
437
+ exp.DataType.Type.BINARY,
438
+ exp.DataType.Type.VARBINARY,
439
+ exp.DataType.Type.BLOB,
440
+ )
441
+
442
+
443
+ def _gen_with_cast_to_blob(
444
+ self: DuckDB.Generator, expression: exp.Expression, result_sql: str
445
+ ) -> str:
446
+ if _is_binary(expression):
447
+ blob = exp.DataType.build("BLOB", dialect="duckdb")
448
+ result_sql = self.sql(exp.Cast(this=result_sql, to=blob))
449
+ return result_sql
450
+
451
+
452
+ def _cast_to_bit(arg: exp.Expression) -> exp.Expression:
453
+ if not _is_binary(arg):
454
+ return arg
455
+
456
+ if isinstance(arg, exp.HexString):
457
+ arg = exp.Unhex(this=exp.Literal.string(arg.this))
458
+
459
+ return exp.cast(arg, exp.DataType.Type.BIT)
460
+
461
+
462
+ def _prepare_binary_bitwise_args(expression: exp.Binary) -> None:
463
+ if _is_binary(expression.this):
464
+ expression.set("this", _cast_to_bit(expression.this))
465
+ if _is_binary(expression.expression):
466
+ expression.set("expression", _cast_to_bit(expression.expression))
467
+
468
+
469
+ def _anyvalue_sql(self: DuckDB.Generator, expression: exp.AnyValue) -> str:
470
+ # Transform ANY_VALUE(expr HAVING MAX/MIN having_expr) to ARG_MAX_NULL/ARG_MIN_NULL
471
+ having = expression.this
472
+ if isinstance(having, exp.HavingMax):
473
+ func_name = "ARG_MAX_NULL" if having.args.get("max") else "ARG_MIN_NULL"
474
+ return self.func(func_name, having.this, having.expression)
475
+ return self.function_fallback_sql(expression)
476
+
477
+
478
+ def _literal_sql_with_ws_chr(self: DuckDB.Generator, literal: str) -> str:
479
+ # DuckDB does not support \uXXXX escapes, so we must use CHR() instead of replacing them directly
480
+ if not any(ch in WS_CONTROL_CHARS_TO_DUCK for ch in literal):
481
+ return self.sql(exp.Literal.string(literal))
482
+
483
+ sql_segments: t.List[str] = []
484
+ for is_ws_control, group in groupby(literal, key=lambda ch: ch in WS_CONTROL_CHARS_TO_DUCK):
485
+ if is_ws_control:
486
+ for ch in group:
487
+ duckdb_char_code = WS_CONTROL_CHARS_TO_DUCK[ch]
488
+ sql_segments.append(self.func("CHR", exp.Literal.number(str(duckdb_char_code))))
489
+ else:
490
+ sql_segments.append(self.sql(exp.Literal.string("".join(group))))
491
+
492
+ sql = " || ".join(sql_segments)
493
+ return sql if len(sql_segments) == 1 else f"({sql})"
494
+
495
+
496
+ def _escape_regex_metachars(
497
+ self: DuckDB.Generator, delimiters: t.Optional[exp.Expression], delimiters_sql: str
498
+ ) -> str:
499
+ r"""
500
+ Escapes regex metacharacters \ - ^ [ ] for use in character classes regex expressions.
501
+
502
+ Literal strings are escaped at transpile time, expressions handled with REPLACE() calls.
503
+ """
504
+ if not delimiters:
505
+ return delimiters_sql
506
+
507
+ if delimiters.is_string:
508
+ literal_value = delimiters.this
509
+ escaped_literal = "".join(REGEX_ESCAPE_REPLACEMENTS.get(ch, ch) for ch in literal_value)
510
+ return _literal_sql_with_ws_chr(self, escaped_literal)
511
+
512
+ escaped_sql = delimiters_sql
513
+ for raw, escaped in REGEX_ESCAPE_REPLACEMENTS.items():
514
+ escaped_sql = self.func(
515
+ "REPLACE",
516
+ escaped_sql,
517
+ self.sql(exp.Literal.string(raw)),
518
+ self.sql(exp.Literal.string(escaped)),
519
+ )
520
+
521
+ return escaped_sql
522
+
523
+
524
+ def _build_capitalization_sql(
525
+ self: DuckDB.Generator,
526
+ value_to_split: str,
527
+ delimiters_sql: str,
528
+ ) -> str:
529
+ # empty string delimiter --> treat value as one word, no need to split
530
+ if delimiters_sql == "''":
531
+ return f"UPPER(LEFT({value_to_split}, 1)) || LOWER(SUBSTRING({value_to_split}, 2))"
532
+
533
+ delim_regex_sql = f"CONCAT('[', {delimiters_sql}, ']')"
534
+ split_regex_sql = f"CONCAT('([', {delimiters_sql}, ']+|[^', {delimiters_sql}, ']+)')"
535
+
536
+ # REGEXP_EXTRACT_ALL produces a list of string segments, alternating between delimiter and non-delimiter segments.
537
+ # We do not know whether the first segment is a delimiter or not, so we check the first character of the string
538
+ # with REGEXP_MATCHES. If the first char is a delimiter, we capitalize even list indexes, otherwise capitalize odd.
539
+ return self.func(
540
+ "ARRAY_TO_STRING",
541
+ exp.case()
542
+ .when(
543
+ f"REGEXP_MATCHES(LEFT({value_to_split}, 1), {delim_regex_sql})",
544
+ self.func(
545
+ "LIST_TRANSFORM",
546
+ self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
547
+ "(seg, idx) -> CASE WHEN idx % 2 = 0 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
548
+ ),
549
+ )
550
+ .else_(
551
+ self.func(
552
+ "LIST_TRANSFORM",
553
+ self.func("REGEXP_EXTRACT_ALL", value_to_split, split_regex_sql),
554
+ "(seg, idx) -> CASE WHEN idx % 2 = 1 THEN UPPER(LEFT(seg, 1)) || LOWER(SUBSTRING(seg, 2)) ELSE seg END",
555
+ ),
556
+ ),
557
+ "''",
558
+ )
559
+
560
+
561
+ def _initcap_sql(self: DuckDB.Generator, expression: exp.Initcap) -> str:
562
+ this_sql = self.sql(expression, "this")
563
+ delimiters = expression.args.get("expression")
564
+ if delimiters is None:
565
+ # fallback for manually created exp.Initcap w/o delimiters arg
566
+ delimiters = exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)
567
+ delimiters_sql = self.sql(delimiters)
568
+
569
+ escaped_delimiters_sql = _escape_regex_metachars(self, delimiters, delimiters_sql)
570
+
571
+ return _build_capitalization_sql(self, this_sql, escaped_delimiters_sql)
572
+
573
+
574
+ def _floor_sql(self: DuckDB.Generator, expression: exp.Floor) -> str:
575
+ decimals = expression.args.get("decimals")
576
+
577
+ if decimals is not None and expression.args.get("to") is None:
578
+ this = expression.this
579
+ if isinstance(this, exp.Binary):
580
+ this = exp.Paren(this=this)
581
+
582
+ n_int = decimals
583
+ if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
584
+ n_int = exp.cast(decimals, exp.DataType.Type.INT)
585
+
586
+ pow_ = exp.Pow(this=exp.Literal.number("10"), expression=n_int)
587
+ floored = exp.Floor(this=exp.Mul(this=this, expression=pow_))
588
+ result = exp.Div(this=floored, expression=pow_.copy())
589
+
590
+ return self.round_sql(
591
+ exp.Round(this=result, decimals=decimals, casts_non_integer_decimals=True)
592
+ )
593
+
594
+ return self.ceil_floor(expression)
595
+
596
+
597
+ def _regr_val_sql(
598
+ self: DuckDB.Generator,
599
+ expression: exp.RegrValx | exp.RegrValy,
600
+ ) -> str:
601
+ """
602
+ Transpile Snowflake's REGR_VALX/REGR_VALY to DuckDB equivalent.
603
+
604
+ REGR_VALX(y, x) returns NULL if y is NULL; otherwise returns x.
605
+ REGR_VALY(y, x) returns NULL if x is NULL; otherwise returns y.
606
+ """
607
+ from sqlglot.optimizer.annotate_types import annotate_types
608
+
609
+ y = expression.this
610
+ x = expression.expression
611
+
612
+ # Determine which argument to check for NULL and which to return based on expression type
613
+ if isinstance(expression, exp.RegrValx):
614
+ # REGR_VALX: check y for NULL, return x
615
+ check_for_null = y
616
+ return_value = x
617
+ return_value_attr = "expression"
618
+ else:
619
+ # REGR_VALY: check x for NULL, return y
620
+ check_for_null = x
621
+ return_value = y
622
+ return_value_attr = "this"
623
+
624
+ # Get the type from the return argument
625
+ result_type = return_value.type
626
+
627
+ # If no type info, annotate the expression to infer types
628
+ if not result_type or result_type.this == exp.DataType.Type.UNKNOWN:
629
+ try:
630
+ annotated = annotate_types(expression.copy(), dialect=self.dialect)
631
+ result_type = getattr(annotated, return_value_attr).type
632
+ except Exception:
633
+ pass
634
+
635
+ # Default to DOUBLE for regression functions if type still unknown
636
+ if not result_type or result_type.this == exp.DataType.Type.UNKNOWN:
637
+ result_type = exp.DataType.build("DOUBLE")
638
+
639
+ # Cast NULL to the same type as return_value to avoid DuckDB type inference issues
640
+ typed_null = exp.Cast(this=exp.Null(), to=result_type)
641
+
642
+ return self.sql(
643
+ exp.If(
644
+ this=exp.Is(this=check_for_null.copy(), expression=exp.Null()),
645
+ true=typed_null,
646
+ false=return_value.copy(),
647
+ )
648
+ )
649
+
650
+
271
651
  class DuckDB(Dialect):
272
652
  NULL_ORDERING = "nulls_are_last"
273
653
  SUPPORTS_USER_DEFINED_TYPES = True
@@ -286,8 +666,13 @@ class DuckDB(Dialect):
286
666
  **Dialect.DATE_PART_MAPPING,
287
667
  "DAYOFWEEKISO": "ISODOW",
288
668
  }
669
+
289
670
  DATE_PART_MAPPING.pop("WEEKDAY")
290
671
 
672
+ INVERSE_TIME_MAPPING = {
673
+ "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d)
674
+ }
675
+
291
676
  def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
292
677
  if isinstance(path, exp.Literal):
293
678
  # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`.
@@ -323,7 +708,9 @@ class DuckDB(Dialect):
323
708
  "DETACH": TokenType.DETACH,
324
709
  "FORCE": TokenType.FORCE,
325
710
  "INSTALL": TokenType.INSTALL,
711
+ "INT8": TokenType.BIGINT,
326
712
  "LOGICAL": TokenType.BOOLEAN,
713
+ "MACRO": TokenType.FUNCTION,
327
714
  "ONLY": TokenType.ONLY,
328
715
  "PIVOT_WIDER": TokenType.PIVOT,
329
716
  "POSITIONAL": TokenType.POSITIONAL,
@@ -580,7 +967,7 @@ class DuckDB(Dialect):
580
967
  ) -> t.Optional[exp.Expression]:
581
968
  bracket = super()._parse_bracket(this)
582
969
 
583
- if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket):
970
+ if self.dialect.version < (1, 2) and isinstance(bracket, exp.Bracket):
584
971
  # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
585
972
  bracket.set("returns_list_for_maps", True)
586
973
 
@@ -638,11 +1025,9 @@ class DuckDB(Dialect):
638
1025
  def _parse_install(self, force: bool = False) -> exp.Install:
639
1026
  return self.expression(
640
1027
  exp.Install,
641
- **{ # type: ignore
642
- "this": self._parse_id_var(),
643
- "from": self._parse_var_or_string() if self._match(TokenType.FROM) else None,
644
- "force": force,
645
- },
1028
+ this=self._parse_id_var(),
1029
+ from_=self._parse_var_or_string() if self._match(TokenType.FROM) else None,
1030
+ force=force,
646
1031
  )
647
1032
 
648
1033
  def _parse_primary(self) -> t.Optional[exp.Expression]:
@@ -680,11 +1065,16 @@ class DuckDB(Dialect):
680
1065
  ARRAY_SIZE_DIM_REQUIRED = False
681
1066
  NORMALIZE_EXTRACT_DATE_PARTS = True
682
1067
  SUPPORTS_LIKE_QUANTIFIERS = False
1068
+ SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD = True
683
1069
 
684
1070
  TRANSFORMS = {
685
1071
  **generator.Generator.TRANSFORMS,
1072
+ exp.AnyValue: _anyvalue_sql,
686
1073
  exp.ApproxDistinct: approx_count_distinct_sql,
687
- exp.Array: inline_array_unless_query,
1074
+ exp.Array: transforms.preprocess(
1075
+ [transforms.inherit_struct_field_names],
1076
+ generator=inline_array_unless_query,
1077
+ ),
688
1078
  exp.ArrayFilter: rename_func("LIST_FILTER"),
689
1079
  exp.ArrayRemove: remove_from_array_using_filter,
690
1080
  exp.ArraySort: _array_sort_sql,
@@ -692,9 +1082,10 @@ class DuckDB(Dialect):
692
1082
  exp.ArrayUniqueAgg: lambda self, e: self.func(
693
1083
  "LIST", exp.Distinct(expressions=[e.this])
694
1084
  ),
1085
+ exp.BitwiseAnd: lambda self, e: self._bitwise_op(e, "&"),
695
1086
  exp.BitwiseAndAgg: rename_func("BIT_AND"),
1087
+ exp.BitwiseOr: lambda self, e: self._bitwise_op(e, "|"),
696
1088
  exp.BitwiseOrAgg: rename_func("BIT_OR"),
697
- exp.BitwiseXor: rename_func("XOR"),
698
1089
  exp.BitwiseXorAgg: rename_func("BIT_XOR"),
699
1090
  exp.CommentColumnConstraint: no_comment_column_constraint_sql,
700
1091
  exp.CosineDistance: rename_func("LIST_COSINE_DISTANCE"),
@@ -729,17 +1120,20 @@ class DuckDB(Dialect):
729
1120
  exp.IntDiv: lambda self, e: self.binary(e, "//"),
730
1121
  exp.IsInf: rename_func("ISINF"),
731
1122
  exp.IsNan: rename_func("ISNAN"),
1123
+ exp.Floor: _floor_sql,
732
1124
  exp.JSONBExists: rename_func("JSON_EXISTS"),
733
1125
  exp.JSONExtract: _arrow_json_extract_sql,
734
1126
  exp.JSONExtractArray: _json_extract_value_array_sql,
735
- exp.JSONExtractScalar: _arrow_json_extract_sql,
736
1127
  exp.JSONFormat: _json_format_sql,
737
1128
  exp.JSONValueArray: _json_extract_value_array_sql,
738
1129
  exp.Lateral: explode_to_unnest_sql,
739
1130
  exp.LogicalOr: rename_func("BOOL_OR"),
740
1131
  exp.LogicalAnd: rename_func("BOOL_AND"),
741
1132
  exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "),
1133
+ exp.Initcap: _initcap_sql,
742
1134
  exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
1135
+ exp.SHA1Digest: lambda self, e: self.func("UNHEX", self.func("SHA1", e.this)),
1136
+ exp.SHA2Digest: lambda self, e: self.func("UNHEX", sha2_digest_sql(self, e)),
743
1137
  exp.MonthsBetween: lambda self, e: self.func(
744
1138
  "DATEDIFF",
745
1139
  "'month'",
@@ -763,6 +1157,8 @@ class DuckDB(Dialect):
763
1157
  "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i")
764
1158
  ),
765
1159
  exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"),
1160
+ exp.RegrValx: _regr_val_sql,
1161
+ exp.RegrValy: _regr_val_sql,
766
1162
  exp.Return: lambda self, e: self.sql(e, "this"),
767
1163
  exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "",
768
1164
  exp.Rand: rename_func("RANDOM"),
@@ -786,13 +1182,13 @@ class DuckDB(Dialect):
786
1182
  "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this
787
1183
  ),
788
1184
  exp.TimestampSub: date_delta_to_binary_interval_op(),
789
- exp.TimestampTrunc: timestamptrunc_sql(),
790
1185
  exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)),
791
1186
  exp.TimeStrToTime: timestrtotime_sql,
792
1187
  exp.TimeStrToUnix: lambda self, e: self.func(
793
1188
  "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP)
794
1189
  ),
795
1190
  exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)),
1191
+ exp.ToBoolean: _to_boolean_sql,
796
1192
  exp.TimeToUnix: rename_func("EPOCH"),
797
1193
  exp.TsOrDiToDi: lambda self,
798
1194
  e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)",
@@ -804,6 +1200,12 @@ class DuckDB(Dialect):
804
1200
  exp.cast(e.this, exp.DataType.Type.TIMESTAMP),
805
1201
  ),
806
1202
  exp.UnixMicros: lambda self, e: self.func("EPOCH_US", _implicit_datetime_cast(e.this)),
1203
+ exp.UnixMillis: lambda self, e: self.func("EPOCH_MS", _implicit_datetime_cast(e.this)),
1204
+ exp.UnixSeconds: lambda self, e: self.sql(
1205
+ exp.cast(
1206
+ self.func("EPOCH", _implicit_datetime_cast(e.this)), exp.DataType.Type.BIGINT
1207
+ )
1208
+ ),
807
1209
  exp.UnixToStr: lambda self, e: self.func(
808
1210
  "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e)
809
1211
  ),
@@ -836,6 +1238,7 @@ class DuckDB(Dialect):
836
1238
  exp.DataType.Type.BPCHAR: "TEXT",
837
1239
  exp.DataType.Type.CHAR: "TEXT",
838
1240
  exp.DataType.Type.DATETIME: "TIMESTAMP",
1241
+ exp.DataType.Type.DECFLOAT: "DECIMAL(38, 5)",
839
1242
  exp.DataType.Type.FLOAT: "REAL",
840
1243
  exp.DataType.Type.JSONB: "JSON",
841
1244
  exp.DataType.Type.NCHAR: "TEXT",
@@ -848,6 +1251,7 @@ class DuckDB(Dialect):
848
1251
  exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S",
849
1252
  exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS",
850
1253
  exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS",
1254
+ exp.DataType.Type.BIGDECIMAL: "DECIMAL(38, 5)",
851
1255
  }
852
1256
 
853
1257
  # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77
@@ -955,6 +1359,135 @@ class DuckDB(Dialect):
955
1359
  exp.NthValue,
956
1360
  )
957
1361
 
1362
+ def bitmapbitposition_sql(self: DuckDB.Generator, expression: exp.BitmapBitPosition) -> str:
1363
+ """
1364
+ Transpile Snowflake's BITMAP_BIT_POSITION to DuckDB CASE expression.
1365
+
1366
+ Snowflake's BITMAP_BIT_POSITION behavior:
1367
+ - For n <= 0: returns ABS(n) % 32768
1368
+ - For n > 0: returns (n - 1) % 32768 (maximum return value is 32767)
1369
+ """
1370
+ this = expression.this
1371
+
1372
+ return self.sql(
1373
+ exp.Mod(
1374
+ this=exp.Paren(
1375
+ this=exp.If(
1376
+ this=exp.GT(this=this, expression=exp.Literal.number(0)),
1377
+ true=this - exp.Literal.number(1),
1378
+ false=exp.Abs(this=this),
1379
+ )
1380
+ ),
1381
+ expression=MAX_BIT_POSITION,
1382
+ )
1383
+ )
1384
+
1385
+ def randstr_sql(self: DuckDB.Generator, expression: exp.Randstr) -> str:
1386
+ """
1387
+ Transpile Snowflake's RANDSTR to DuckDB equivalent using deterministic hash-based random.
1388
+
1389
+ RANDSTR(length, generator) generates a random string of specified length.
1390
+ - With numeric seed: Use HASH(i + seed) for deterministic output (same seed = same result)
1391
+ - With RANDOM(): Use RANDOM() in the hash for non-deterministic output
1392
+ - No generator: Use default seed value
1393
+ """
1394
+ length = expression.this
1395
+ generator = expression.args.get("generator")
1396
+
1397
+ if generator:
1398
+ if isinstance(generator, exp.Rand):
1399
+ # If it's RANDOM(), use its seed if available, otherwise use RANDOM() itself
1400
+ seed_value = generator.this or generator
1401
+ else:
1402
+ # Const/int or other expression - use as seed directly
1403
+ seed_value = generator
1404
+ else:
1405
+ # No generator specified, use default seed (arbitrary but deterministic)
1406
+ seed_value = exp.Literal.number(RANDSTR_SEED)
1407
+
1408
+ length_sql = self.sql(length)
1409
+ seed_sql = self.sql(seed_value)
1410
+
1411
+ query: exp.Select = exp.maybe_parse(
1412
+ f"""
1413
+ SELECT LISTAGG(
1414
+ SUBSTRING(
1415
+ '{RANDSTR_CHAR_POOL}',
1416
+ 1 + CAST(FLOOR(random_value * 62) AS INT),
1417
+ 1
1418
+ ),
1419
+ ''
1420
+ )
1421
+ FROM (
1422
+ SELECT (ABS(HASH(i + {seed_sql})) % 1000) / 1000.0 AS random_value
1423
+ FROM RANGE({length_sql}) AS t(i)
1424
+ )
1425
+ """,
1426
+ dialect="duckdb",
1427
+ )
1428
+ return f"({self.sql(query)})"
1429
+
1430
+ def tobinary_sql(self: DuckDB.Generator, expression: exp.ToBinary) -> str:
1431
+ """
1432
+ TO_BINARY(value, format) transpilation if the return type is BINARY:
1433
+ - 'HEX': TO_BINARY('48454C50', 'HEX') → UNHEX('48454C50')
1434
+ - 'UTF-8': TO_BINARY('TEST', 'UTF-8') → ENCODE('TEST')
1435
+ - 'BASE64': TO_BINARY('SEVMUA==', 'BASE64') → FROM_BASE64('SEVMUA==')
1436
+
1437
+ format can be 'HEX', 'UTF-8' or 'BASE64'
1438
+ return type can be either VARCHAR or BINARY
1439
+ """
1440
+ value = expression.this
1441
+ format_arg = expression.args.get("format")
1442
+
1443
+ fmt = "HEX"
1444
+ if format_arg:
1445
+ fmt = format_arg.name.upper()
1446
+
1447
+ if expression.is_type(exp.DataType.Type.BINARY):
1448
+ if fmt == "UTF-8":
1449
+ return self.func("ENCODE", value)
1450
+ if fmt == "BASE64":
1451
+ return self.func("FROM_BASE64", value)
1452
+
1453
+ # Hex
1454
+ return self.func("UNHEX", value)
1455
+
1456
+ # Fallback, which needs to be updated if want to support transpilation from other dialects than Snowflake
1457
+ return self.func("TO_BINARY", value)
1458
+
1459
+ def _greatest_least_sql(
1460
+ self: DuckDB.Generator, expression: exp.Greatest | exp.Least
1461
+ ) -> str:
1462
+ """
1463
+ Handle GREATEST/LEAST functions with dialect-aware NULL behavior.
1464
+
1465
+ - If null_if_any_null=True (BigQuery-style): return NULL if any argument is NULL
1466
+ - If null_if_any_null=False (DuckDB/PostgreSQL-style): ignore NULLs, return greatest/least non-NULL value
1467
+ """
1468
+ # Get all arguments
1469
+ all_args = [expression.this, *expression.expressions]
1470
+ fallback_sql = self.function_fallback_sql(expression)
1471
+
1472
+ if expression.args.get("null_if_any_null"):
1473
+ # BigQuery behavior: NULL if any argument is NULL
1474
+ case_expr = exp.case().when(
1475
+ exp.or_(*[arg.is_(exp.null()) for arg in all_args], copy=False),
1476
+ exp.null(),
1477
+ copy=False,
1478
+ )
1479
+ case_expr.set("default", fallback_sql)
1480
+ return self.sql(case_expr)
1481
+
1482
+ # DuckDB/PostgreSQL behavior: use native GREATEST/LEAST (ignores NULLs)
1483
+ return self.sql(fallback_sql)
1484
+
1485
+ def greatest_sql(self: DuckDB.Generator, expression: exp.Greatest) -> str:
1486
+ return self._greatest_least_sql(expression)
1487
+
1488
+ def least_sql(self: DuckDB.Generator, expression: exp.Least) -> str:
1489
+ return self._greatest_least_sql(expression)
1490
+
958
1491
  def lambda_sql(
959
1492
  self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True
960
1493
  ) -> str:
@@ -974,10 +1507,16 @@ class DuckDB(Dialect):
974
1507
  def install_sql(self, expression: exp.Install) -> str:
975
1508
  force = "FORCE " if expression.args.get("force") else ""
976
1509
  this = self.sql(expression, "this")
977
- from_clause = expression.args.get("from")
1510
+ from_clause = expression.args.get("from_")
978
1511
  from_clause = f" FROM {from_clause}" if from_clause else ""
979
1512
  return f"{force}INSTALL {this}{from_clause}"
980
1513
 
1514
+ def approxtopk_sql(self, expression: exp.ApproxTopK) -> str:
1515
+ self.unsupported(
1516
+ "APPROX_TOP_K cannot be transpiled to DuckDB due to incompatible return types. "
1517
+ )
1518
+ return self.function_fallback_sql(expression)
1519
+
981
1520
  def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str:
982
1521
  return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ))
983
1522
 
@@ -1084,14 +1623,14 @@ class DuckDB(Dialect):
1084
1623
  return self.function_fallback_sql(expression)
1085
1624
 
1086
1625
  def countif_sql(self, expression: exp.CountIf) -> str:
1087
- if self.dialect.version >= Version("1.2"):
1626
+ if self.dialect.version >= (1, 2):
1088
1627
  return self.function_fallback_sql(expression)
1089
1628
 
1090
1629
  # https://github.com/tobymao/sqlglot/pull/4749
1091
1630
  return count_if_to_sum(self, expression)
1092
1631
 
1093
1632
  def bracket_sql(self, expression: exp.Bracket) -> str:
1094
- if self.dialect.version >= Version("1.2"):
1633
+ if self.dialect.version >= (1, 2):
1095
1634
  return super().bracket_sql(expression)
1096
1635
 
1097
1636
  # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes
@@ -1158,6 +1697,33 @@ class DuckDB(Dialect):
1158
1697
 
1159
1698
  return self.sql(case)
1160
1699
 
1700
+ def lower_sql(self, expression: exp.Lower) -> str:
1701
+ result_sql = self.func("LOWER", _cast_to_varchar(expression.this))
1702
+ return _gen_with_cast_to_blob(self, expression, result_sql)
1703
+
1704
+ def upper_sql(self, expression: exp.Upper) -> str:
1705
+ result_sql = self.func("UPPER", _cast_to_varchar(expression.this))
1706
+ return _gen_with_cast_to_blob(self, expression, result_sql)
1707
+
1708
+ def replace_sql(self, expression: exp.Replace) -> str:
1709
+ result_sql = self.func(
1710
+ "REPLACE",
1711
+ _cast_to_varchar(expression.this),
1712
+ _cast_to_varchar(expression.expression),
1713
+ _cast_to_varchar(expression.args.get("replacement")),
1714
+ )
1715
+ return _gen_with_cast_to_blob(self, expression, result_sql)
1716
+
1717
+ def _bitwise_op(self, expression: exp.Binary, op: str) -> str:
1718
+ _prepare_binary_bitwise_args(expression)
1719
+ result_sql = self.binary(expression, op)
1720
+ return _gen_with_cast_to_blob(self, expression, result_sql)
1721
+
1722
+ def bitwisexor_sql(self, expression: exp.BitwiseXor) -> str:
1723
+ _prepare_binary_bitwise_args(expression)
1724
+ result_sql = self.func("XOR", expression.this, expression.expression)
1725
+ return _gen_with_cast_to_blob(self, expression, result_sql)
1726
+
1161
1727
  def objectinsert_sql(self, expression: exp.ObjectInsert) -> str:
1162
1728
  this = expression.this
1163
1729
  key = expression.args.get("key")
@@ -1173,6 +1739,13 @@ class DuckDB(Dialect):
1173
1739
 
1174
1740
  return self.func("STRUCT_INSERT", this, kv_sql)
1175
1741
 
1742
+ def startswith_sql(self, expression: exp.StartsWith) -> str:
1743
+ return self.func(
1744
+ "STARTS_WITH",
1745
+ _cast_to_varchar(expression.this),
1746
+ _cast_to_varchar(expression.expression),
1747
+ )
1748
+
1176
1749
  def unnest_sql(self, expression: exp.Unnest) -> str:
1177
1750
  explode_array = expression.args.get("explode_array")
1178
1751
  if explode_array:
@@ -1206,7 +1779,7 @@ class DuckDB(Dialect):
1206
1779
  if isinstance(this, exp.First):
1207
1780
  this = exp.AnyValue(this=this.this)
1208
1781
 
1209
- if not isinstance(this, exp.AnyValue):
1782
+ if not isinstance(this, (exp.AnyValue, exp.ApproxQuantiles)):
1210
1783
  self.unsupported("IGNORE NULLS is not supported for non-window functions.")
1211
1784
 
1212
1785
  return self.sql(this)
@@ -1229,10 +1802,19 @@ class DuckDB(Dialect):
1229
1802
 
1230
1803
  return self.func("ARRAY_TO_STRING", this, expression.expression)
1231
1804
 
1232
- @unsupported_args("position", "occurrence")
1233
1805
  def regexpextract_sql(self, expression: exp.RegexpExtract) -> str:
1806
+ this = expression.this
1234
1807
  group = expression.args.get("group")
1235
1808
  params = expression.args.get("parameters")
1809
+ position = expression.args.get("position")
1810
+ occurrence = expression.args.get("occurrence")
1811
+ null_if_pos_overflow = expression.args.get("null_if_pos_overflow")
1812
+
1813
+ if position and (not position.is_int or position.to_py() > 1):
1814
+ this = exp.Substring(this=this, start=position)
1815
+
1816
+ if null_if_pos_overflow:
1817
+ this = exp.Nullif(this=this, expression=exp.Literal.string(""))
1236
1818
 
1237
1819
  # Do not render group if there is no following argument,
1238
1820
  # and it's the default value for this dialect
@@ -1242,9 +1824,15 @@ class DuckDB(Dialect):
1242
1824
  and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP)
1243
1825
  ):
1244
1826
  group = None
1245
- return self.func(
1246
- "REGEXP_EXTRACT", expression.this, expression.expression, group, params
1247
- )
1827
+
1828
+ if occurrence and (not occurrence.is_int or occurrence.to_py() > 1):
1829
+ return self.func(
1830
+ "ARRAY_EXTRACT",
1831
+ self.func("REGEXP_EXTRACT_ALL", this, expression.expression, group, params),
1832
+ exp.Literal.number(occurrence),
1833
+ )
1834
+
1835
+ return self.func("REGEXP_EXTRACT", this, expression.expression, group, params)
1248
1836
 
1249
1837
  @unsupported_args("culture")
1250
1838
  def numbertostr_sql(self, expression: exp.NumberToStr) -> str:
@@ -1347,3 +1935,111 @@ class DuckDB(Dialect):
1347
1935
  to_hex = exp.cast(self.func("TO_HEX", from_hex), exp.DataType.Type.BLOB)
1348
1936
 
1349
1937
  return self.sql(to_hex)
1938
+
1939
+ def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str:
1940
+ unit = unit_to_str(expression)
1941
+ zone = expression.args.get("zone")
1942
+ timestamp = expression.this
1943
+
1944
+ if is_date_unit(unit) and zone:
1945
+ # BigQuery's TIMESTAMP_TRUNC with timezone truncates in the target timezone and returns as UTC.
1946
+ # Double AT TIME ZONE needed for BigQuery compatibility:
1947
+ # 1. First AT TIME ZONE: ensures truncation happens in the target timezone
1948
+ # 2. Second AT TIME ZONE: converts the DATE result back to TIMESTAMPTZ (preserving time component)
1949
+ timestamp = exp.AtTimeZone(this=timestamp, zone=zone)
1950
+ result_sql = self.func("DATE_TRUNC", unit, timestamp)
1951
+ return self.sql(exp.AtTimeZone(this=result_sql, zone=zone))
1952
+
1953
+ return self.func("DATE_TRUNC", unit, timestamp)
1954
+
1955
+ def trim_sql(self, expression: exp.Trim) -> str:
1956
+ result_sql = self.func(
1957
+ "TRIM",
1958
+ _cast_to_varchar(expression.this),
1959
+ _cast_to_varchar(expression.expression),
1960
+ )
1961
+ return _gen_with_cast_to_blob(self, expression, result_sql)
1962
+
1963
+ def round_sql(self, expression: exp.Round) -> str:
1964
+ this = expression.this
1965
+ decimals = expression.args.get("decimals")
1966
+ truncate = expression.args.get("truncate")
1967
+
1968
+ # DuckDB requires the scale (decimals) argument to be an INT
1969
+ # Some dialects (e.g., Snowflake) allow non-integer scales and cast to an integer internally
1970
+ if decimals is not None and expression.args.get("casts_non_integer_decimals"):
1971
+ if not (decimals.is_int or decimals.is_type(*exp.DataType.INTEGER_TYPES)):
1972
+ decimals = exp.cast(decimals, exp.DataType.Type.INT)
1973
+
1974
+ func = "ROUND"
1975
+ if truncate:
1976
+ # BigQuery uses ROUND_HALF_EVEN; Snowflake uses HALF_TO_EVEN
1977
+ if truncate.this in ("ROUND_HALF_EVEN", "HALF_TO_EVEN"):
1978
+ func = "ROUND_EVEN"
1979
+ truncate = None
1980
+ # BigQuery uses ROUND_HALF_AWAY_FROM_ZERO; Snowflake uses HALF_AWAY_FROM_ZERO
1981
+ elif truncate.this in ("ROUND_HALF_AWAY_FROM_ZERO", "HALF_AWAY_FROM_ZERO"):
1982
+ truncate = None
1983
+
1984
+ return self.func(func, this, decimals, truncate)
1985
+
1986
+ def approxquantiles_sql(self, expression: exp.ApproxQuantiles) -> str:
1987
+ """
1988
+ BigQuery's APPROX_QUANTILES(expr, n) returns an array of n+1 approximate quantile values
1989
+ dividing the input distribution into n equal-sized buckets.
1990
+
1991
+ Both BigQuery and DuckDB use approximate algorithms for quantile estimation, but BigQuery
1992
+ does not document the specific algorithm used so results may differ. DuckDB does not
1993
+ support RESPECT NULLS.
1994
+ """
1995
+ this = expression.this
1996
+ if isinstance(this, exp.Distinct):
1997
+ # APPROX_QUANTILES requires 2 args and DISTINCT node grabs both
1998
+ if len(this.expressions) < 2:
1999
+ self.unsupported("APPROX_QUANTILES requires a bucket count argument")
2000
+ return self.function_fallback_sql(expression)
2001
+ num_quantiles_expr = this.expressions[1].pop()
2002
+ else:
2003
+ num_quantiles_expr = expression.expression
2004
+
2005
+ if not isinstance(num_quantiles_expr, exp.Literal) or not num_quantiles_expr.is_int:
2006
+ self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
2007
+ return self.function_fallback_sql(expression)
2008
+
2009
+ num_quantiles = t.cast(int, num_quantiles_expr.to_py())
2010
+ if num_quantiles <= 0:
2011
+ self.unsupported("APPROX_QUANTILES bucket count must be a positive integer")
2012
+ return self.function_fallback_sql(expression)
2013
+
2014
+ quantiles = [
2015
+ exp.Literal.number(Decimal(i) / Decimal(num_quantiles))
2016
+ for i in range(num_quantiles + 1)
2017
+ ]
2018
+
2019
+ return self.sql(
2020
+ exp.ApproxQuantile(this=this, quantile=exp.Array(expressions=quantiles))
2021
+ )
2022
+
2023
+ def jsonextractscalar_sql(self, expression: exp.JSONExtractScalar) -> str:
2024
+ if expression.args.get("scalar_only"):
2025
+ expression = exp.JSONExtractScalar(
2026
+ this=rename_func("JSON_VALUE")(self, expression), expression="'$'"
2027
+ )
2028
+ return _arrow_json_extract_sql(self, expression)
2029
+
2030
+ def bitwisenot_sql(self, expression: exp.BitwiseNot) -> str:
2031
+ this = expression.this
2032
+
2033
+ if _is_binary(this):
2034
+ expression.type = exp.DataType.build("BINARY")
2035
+
2036
+ arg = _cast_to_bit(this)
2037
+
2038
+ if isinstance(this, exp.Neg):
2039
+ arg = exp.Paren(this=arg)
2040
+
2041
+ expression.set("this", arg)
2042
+
2043
+ result_sql = f"~{self.sql(expression, 'this')}"
2044
+
2045
+ return _gen_with_cast_to_blob(self, expression, result_sql)