sqlglot 27.8.0__py3-none-any.whl → 27.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '27.8.0'
32
- __version_tuple__ = version_tuple = (27, 8, 0)
31
+ __version__ = version = '27.9.0'
32
+ __version_tuple__ = version_tuple = (27, 9, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -75,6 +75,7 @@ DIALECTS = [
75
75
  "Druid",
76
76
  "DuckDB",
77
77
  "Dune",
78
+ "Exasol",
78
79
  "Fabric",
79
80
  "Hive",
80
81
  "Materialize",
@@ -95,7 +96,6 @@ DIALECTS = [
95
96
  "Teradata",
96
97
  "Trino",
97
98
  "TSQL",
98
- "Exasol",
99
99
  ]
100
100
 
101
101
  MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
@@ -295,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
295
295
  return expression
296
296
 
297
297
 
298
+ def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
299
+ self._annotate_args(expression)
300
+
301
+ struct_type = exp.DataType(
302
+ this=exp.DataType.Type.STRUCT,
303
+ expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
304
+ nested=True,
305
+ )
306
+ self._set_type(
307
+ expression,
308
+ exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
309
+ )
310
+
311
+ return expression
312
+
313
+
298
314
  @unsupported_args("ins_cost", "del_cost", "sub_cost")
299
315
  def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
300
316
  max_dist = expression.args.get("max_dist")
@@ -473,17 +489,24 @@ class BigQuery(Dialect):
473
489
  exp.Substring,
474
490
  )
475
491
  },
492
+ exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
493
+ exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
494
+ exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
476
495
  exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
477
496
  exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
478
497
  exp.Array: _annotate_array,
479
498
  exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
480
499
  exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
500
+ exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
481
501
  exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
482
502
  exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
483
503
  exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
484
504
  exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
485
505
  exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
486
506
  exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
507
+ exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
508
+ e, exp.DataType.Type.BINARY
509
+ ),
487
510
  exp.CodePointsToString: lambda self, e: self._annotate_with_type(
488
511
  e, exp.DataType.Type.VARCHAR
489
512
  ),
@@ -493,6 +516,9 @@ class BigQuery(Dialect):
493
516
  exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
494
517
  exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
495
518
  exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
519
+ exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
520
+ exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
521
+ exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
496
522
  exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
497
523
  e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
498
524
  ),
@@ -506,12 +532,20 @@ class BigQuery(Dialect):
506
532
  ),
507
533
  exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
508
534
  exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
535
+ exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
509
536
  exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
510
537
  exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
511
538
  exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
539
+ exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
540
+ e, exp.DataType.Type.BIGDECIMAL
541
+ ),
542
+ exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
512
543
  exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
513
544
  exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
514
545
  exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
546
+ exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
547
+ e, exp.DataType.Type.VARCHAR
548
+ ),
515
549
  exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
516
550
  exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
517
551
  exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
@@ -522,8 +556,11 @@ class BigQuery(Dialect):
522
556
  ),
523
557
  exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
524
558
  exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
525
- exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
526
559
  exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
560
+ exp.ToCodePoints: lambda self, e: self._annotate_with_type(
561
+ e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
562
+ ),
563
+ exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
527
564
  exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
528
565
  exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
529
566
  }
@@ -596,10 +633,13 @@ class BigQuery(Dialect):
596
633
  "EXPORT": TokenType.EXPORT,
597
634
  "FLOAT64": TokenType.DOUBLE,
598
635
  "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
636
+ "LOOP": TokenType.COMMAND,
599
637
  "MODEL": TokenType.MODEL,
600
638
  "NOT DETERMINISTIC": TokenType.VOLATILE,
601
639
  "RECORD": TokenType.STRUCT,
640
+ "REPEAT": TokenType.COMMAND,
602
641
  "TIMESTAMP": TokenType.TIMESTAMPTZ,
642
+ "WHILE": TokenType.COMMAND,
603
643
  }
604
644
  KEYWORDS.pop("DIV")
605
645
  KEYWORDS.pop("VALUES")
@@ -623,6 +663,8 @@ class BigQuery(Dialect):
623
663
 
624
664
  FUNCTIONS = {
625
665
  **parser.Parser.FUNCTIONS,
666
+ "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
667
+ "BOOL": exp.JSONBool.from_arg_list,
626
668
  "CONTAINS_SUBSTR": _build_contains_substring,
627
669
  "DATE": _build_date,
628
670
  "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
@@ -689,6 +731,7 @@ class BigQuery(Dialect):
689
731
  "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
690
732
  "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
691
733
  "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
734
+ "FROM_HEX": exp.Unhex.from_arg_list,
692
735
  "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
693
736
  }
694
737
 
@@ -699,7 +742,10 @@ class BigQuery(Dialect):
699
742
  exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
700
743
  ),
701
744
  "MAKE_INTERVAL": lambda self: self._parse_make_interval(),
745
+ "PREDICT": lambda self: self._parse_predict(),
702
746
  "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
747
+ "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
748
+ "VECTOR_SEARCH": lambda self: self._parse_vector_search(),
703
749
  }
704
750
  FUNCTION_PARSERS.pop("TRIM")
705
751
 
@@ -979,13 +1025,40 @@ class BigQuery(Dialect):
979
1025
 
980
1026
  return expr
981
1027
 
982
- def _parse_features_at_time(self) -> exp.FeaturesAtTime:
983
- expr = self.expression(
984
- exp.FeaturesAtTime,
985
- this=(self._match(TokenType.TABLE) and self._parse_table())
986
- or self._parse_select(nested=True),
1028
+ def _parse_predict(self) -> exp.Predict:
1029
+ self._match_text_seq("MODEL")
1030
+ this = self._parse_table()
1031
+
1032
+ self._match(TokenType.COMMA)
1033
+ self._match_text_seq("TABLE")
1034
+
1035
+ return self.expression(
1036
+ exp.Predict,
1037
+ this=this,
1038
+ expression=self._parse_table(),
1039
+ params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
1040
+ )
1041
+
1042
+ def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
1043
+ self._match_text_seq("MODEL")
1044
+ this = self._parse_table()
1045
+
1046
+ self._match(TokenType.COMMA)
1047
+ self._match_text_seq("TABLE")
1048
+
1049
+ return self.expression(
1050
+ exp.GenerateEmbedding,
1051
+ this=this,
1052
+ expression=self._parse_table(),
1053
+ params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
987
1054
  )
988
1055
 
1056
+ def _parse_features_at_time(self) -> exp.FeaturesAtTime:
1057
+ self._match(TokenType.TABLE)
1058
+ this = self._parse_table()
1059
+
1060
+ expr = self.expression(exp.FeaturesAtTime, this=this)
1061
+
989
1062
  while self._match(TokenType.COMMA):
990
1063
  arg = self._parse_lambda()
991
1064
 
@@ -996,6 +1069,37 @@ class BigQuery(Dialect):
996
1069
 
997
1070
  return expr
998
1071
 
1072
+ def _parse_vector_search(self) -> exp.VectorSearch:
1073
+ self._match(TokenType.TABLE)
1074
+ base_table = self._parse_table()
1075
+
1076
+ self._match(TokenType.COMMA)
1077
+
1078
+ column_to_search = self._parse_bitwise()
1079
+ self._match(TokenType.COMMA)
1080
+
1081
+ self._match(TokenType.TABLE)
1082
+ query_table = self._parse_table()
1083
+
1084
+ expr = self.expression(
1085
+ exp.VectorSearch,
1086
+ this=base_table,
1087
+ column_to_search=column_to_search,
1088
+ query_table=query_table,
1089
+ )
1090
+
1091
+ while self._match(TokenType.COMMA):
1092
+ # query_column_to_search can be named argument or positional
1093
+ if self._match(TokenType.STRING, advance=False):
1094
+ query_column = self._parse_string()
1095
+ expr.set("query_column_to_search", query_column)
1096
+ else:
1097
+ arg = self._parse_lambda()
1098
+ if arg:
1099
+ expr.set(arg.this.name, arg)
1100
+
1101
+ return expr
1102
+
999
1103
  def _parse_export_data(self) -> exp.Export:
1000
1104
  self._match_text_seq("DATA")
1001
1105
 
@@ -1043,6 +1147,7 @@ class BigQuery(Dialect):
1043
1147
 
1044
1148
  TRANSFORMS = {
1045
1149
  **generator.Generator.TRANSFORMS,
1150
+ exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
1046
1151
  exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
1047
1152
  exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
1048
1153
  exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
@@ -1083,6 +1188,7 @@ class BigQuery(Dialect):
1083
1188
  exp.ILike: no_ilike_sql,
1084
1189
  exp.IntDiv: rename_func("DIV"),
1085
1190
  exp.Int64: rename_func("INT64"),
1191
+ exp.JSONBool: rename_func("BOOL"),
1086
1192
  exp.JSONExtract: _json_extract_sql,
1087
1193
  exp.JSONExtractArray: _json_extract_sql,
1088
1194
  exp.JSONExtractScalar: _json_extract_sql,
@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
345
345
  "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
346
346
  }
347
347
  FUNCTIONS.pop("TRANSFORM")
348
+ FUNCTIONS.pop("APPROX_TOP_SUM")
348
349
 
349
350
  AGG_FUNCTIONS = {
350
351
  "count",
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
379
380
  "argMax",
380
381
  "avgWeighted",
381
382
  "topK",
383
+ "approx_top_sum",
382
384
  "topKWeighted",
383
385
  "deltaSum",
384
386
  "deltaSumTimestamp",
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
977
979
 
978
980
  return value
979
981
 
982
+ def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
983
+ # ClickHouse allows custom expressions as partition key
984
+ # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
985
+ return self.expression(
986
+ exp.PartitionedByProperty,
987
+ this=self._parse_assignment(),
988
+ )
989
+
980
990
  class Generator(generator.Generator):
981
991
  QUERY_HINTS = False
982
992
  STRUCT_DELIMITER = ("(", ")")
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
1094
1104
  exp.DateStrToDate: rename_func("toDate"),
1095
1105
  exp.DateSub: _datetime_delta_sql("DATE_SUB"),
1096
1106
  exp.Explode: rename_func("arrayJoin"),
1107
+ exp.FarmFingerprint: rename_func("farmFingerprint64"),
1097
1108
  exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
1098
1109
  exp.IsNan: rename_func("isNaN"),
1099
1110
  exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
@@ -668,6 +668,7 @@ class Dialect(metaclass=_Dialect):
668
668
  exp.UnixMillis,
669
669
  },
670
670
  exp.DataType.Type.BINARY: {
671
+ exp.FromBase32,
671
672
  exp.FromBase64,
672
673
  },
673
674
  exp.DataType.Type.BOOLEAN: {
@@ -779,6 +780,7 @@ class Dialect(metaclass=_Dialect):
779
780
  exp.TimeToStr,
780
781
  exp.TimeToTimeStr,
781
782
  exp.Trim,
783
+ exp.ToBase32,
782
784
  exp.ToBase64,
783
785
  exp.TsOrDsToDateStr,
784
786
  exp.UnixToStr,
sqlglot/dialects/doris.py CHANGED
@@ -65,7 +65,11 @@ class Doris(MySQL):
65
65
  **MySQL.Parser.PROPERTY_PARSERS,
66
66
  "PROPERTIES": lambda self: self._parse_wrapped_properties(),
67
67
  "UNIQUE": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
68
+ # Plain KEY without UNIQUE/DUPLICATE/AGGREGATE prefixes should be treated as UniqueKeyProperty with unique=False
69
+ "KEY": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
68
70
  "PARTITION BY": lambda self: self._parse_partition_by_opt_range(),
71
+ "BUILD": lambda self: self._parse_build_property(),
72
+ "REFRESH": lambda self: self._parse_refresh_property(),
69
73
  }
70
74
 
71
75
  def _parse_partitioning_granularity_dynamic(self) -> exp.PartitionByRangePropertyDynamic:
@@ -104,9 +108,27 @@ class Doris(MySQL):
104
108
  part_range = self.expression(exp.PartitionRange, this=name, expressions=values)
105
109
  return self.expression(exp.Partition, expressions=[part_range])
106
110
 
111
+ def _parse_partition_definition_list(self) -> exp.Partition:
112
+ # PARTITION <name> VALUES IN (<value_csv>)
113
+ self._match_text_seq("PARTITION")
114
+ name = self._parse_id_var()
115
+ self._match_text_seq("VALUES", "IN")
116
+ values = self._parse_wrapped_csv(self._parse_expression)
117
+ part_list = self.expression(exp.PartitionList, this=name, expressions=values)
118
+ return self.expression(exp.Partition, expressions=[part_list])
119
+
107
120
  def _parse_partition_by_opt_range(
108
121
  self,
109
- ) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty:
122
+ ) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty | exp.PartitionByListProperty:
123
+ if self._match_text_seq("LIST"):
124
+ return self.expression(
125
+ exp.PartitionByListProperty,
126
+ partition_expressions=self._parse_wrapped_id_vars(),
127
+ create_expressions=self._parse_wrapped_csv(
128
+ self._parse_partition_definition_list
129
+ ),
130
+ )
131
+
110
132
  if not self._match_text_seq("RANGE"):
111
133
  return super()._parse_partitioned_by()
112
134
 
@@ -128,6 +150,28 @@ class Doris(MySQL):
128
150
  create_expressions=create_expressions,
129
151
  )
130
152
 
153
+ def _parse_build_property(self) -> exp.BuildProperty:
154
+ return self.expression(exp.BuildProperty, this=self._parse_var(upper=True))
155
+
156
+ def _parse_refresh_property(self) -> exp.RefreshTriggerProperty:
157
+ method = self._parse_var(upper=True)
158
+
159
+ self._match(TokenType.ON)
160
+
161
+ kind = self._match_texts(("MANUAL", "COMMIT", "SCHEDULE")) and self._prev.text.upper()
162
+ every = self._match_text_seq("EVERY") and self._parse_number()
163
+ unit = self._parse_var(any_token=True) if every else None
164
+ starts = self._match_text_seq("STARTS") and self._parse_string()
165
+
166
+ return self.expression(
167
+ exp.RefreshTriggerProperty,
168
+ method=method,
169
+ kind=kind,
170
+ every=every,
171
+ unit=unit,
172
+ starts=starts,
173
+ )
174
+
131
175
  class Generator(MySQL.Generator):
132
176
  LAST_DAY_SUPPORTS_DATE_PART = False
133
177
  VARCHAR_REQUIRES_SIZE = False
@@ -145,7 +189,10 @@ class Doris(MySQL):
145
189
  **MySQL.Generator.PROPERTIES_LOCATION,
146
190
  exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA,
147
191
  exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA,
192
+ exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA,
148
193
  exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
194
+ exp.BuildProperty: exp.Properties.Location.POST_SCHEMA,
195
+ exp.RefreshTriggerProperty: exp.Properties.Location.POST_SCHEMA,
149
196
  }
150
197
 
151
198
  CAST_MAPPING = {}
@@ -662,9 +709,18 @@ class Doris(MySQL):
662
709
  "year",
663
710
  }
664
711
 
712
+ def uniquekeyproperty_sql(
713
+ self, expression: exp.UniqueKeyProperty, prefix: str = "UNIQUE KEY"
714
+ ) -> str:
715
+ create_stmt = expression.find_ancestor(exp.Create)
716
+ if create_stmt and create_stmt.args["properties"].find(exp.MaterializedProperty):
717
+ return super().uniquekeyproperty_sql(expression, prefix="KEY")
718
+
719
+ return super().uniquekeyproperty_sql(expression)
720
+
665
721
  def partition_sql(self, expression: exp.Partition) -> str:
666
722
  parent = expression.parent
667
- if isinstance(parent, exp.PartitionByRangeProperty):
723
+ if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)):
668
724
  return ", ".join(self.sql(e) for e in expression.expressions)
669
725
  return super().partition_sql(expression)
670
726
 
@@ -685,7 +741,9 @@ class Doris(MySQL):
685
741
 
686
742
  return f"PARTITION {name} VALUES LESS THAN ({self.sql(values[0])})"
687
743
 
688
- def partitionbyrangepropertydynamic_sql(self, expression):
744
+ def partitionbyrangepropertydynamic_sql(
745
+ self, expression: exp.PartitionByRangePropertyDynamic
746
+ ) -> str:
689
747
  # Generates: FROM ("start") TO ("end") INTERVAL N UNIT
690
748
  start = self.sql(expression, "start")
691
749
  end = self.sql(expression, "end")
@@ -699,15 +757,25 @@ class Doris(MySQL):
699
757
 
700
758
  return f"FROM ({start}) TO ({end}) {interval}"
701
759
 
702
- def partitionbyrangeproperty_sql(self, expression):
703
- partition_expressions = ", ".join(
704
- self.sql(e) for e in expression.args.get("partition_expressions") or []
760
+ def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str:
761
+ partition_expressions = self.expressions(
762
+ expression, key="partition_expressions", indent=False
705
763
  )
706
- create_expressions = expression.args.get("create_expressions") or []
707
- # Handle both static and dynamic partition definitions
708
- create_sql = ", ".join(self.sql(e) for e in create_expressions)
764
+ create_sql = self.expressions(expression, key="create_expressions", indent=False)
709
765
  return f"PARTITION BY RANGE ({partition_expressions}) ({create_sql})"
710
766
 
767
+ def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str:
768
+ partition_expressions = self.expressions(
769
+ expression, key="partition_expressions", indent=False
770
+ )
771
+ create_sql = self.expressions(expression, key="create_expressions", indent=False)
772
+ return f"PARTITION BY LIST ({partition_expressions}) ({create_sql})"
773
+
774
+ def partitionlist_sql(self, expression: exp.PartitionList) -> str:
775
+ name = self.sql(expression, "this")
776
+ values = self.expressions(expression, indent=False)
777
+ return f"PARTITION {name} VALUES IN ({values})"
778
+
711
779
  def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
712
780
  node = expression.this
713
781
  if isinstance(node, exp.Schema):
@@ -1,17 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import typing as t
4
-
5
4
  from sqlglot import expressions as exp
6
5
  from sqlglot import parser, generator, tokens
7
6
  from sqlglot.dialects.dialect import (
8
7
  Dialect,
9
8
  build_timetostr_or_tochar,
10
9
  build_formatted_time,
10
+ build_date_delta,
11
11
  rename_func,
12
- unit_to_var,
13
12
  )
14
13
  from sqlglot.helper import seq_get
14
+ from sqlglot.tokens import TokenType
15
15
 
16
16
  if t.TYPE_CHECKING:
17
17
  from sqlglot.dialects.dialect import DialectType
@@ -21,20 +21,17 @@ DATE_DELTA = t.Union[exp.DateAdd, exp.DateSub]
21
21
 
22
22
  def _date_delta_sql(name: str) -> t.Callable[[Dremio.Generator, DATE_DELTA], str]:
23
23
  def _delta_sql(self: Dremio.Generator, expression: DATE_DELTA) -> str:
24
- unit = expression.text("unit")
24
+ unit = expression.text("unit").upper()
25
25
 
26
- if not unit or unit.upper() == "DAY":
26
+ # Fallback to default behavior if unit is missing or 'DAY'
27
+ if not unit or unit == "DAY":
27
28
  return self.func(name, expression.this, expression.expression)
28
29
 
29
- # to support units we need to use TIMESTAMPADD function
30
- increment = expression.expression
31
- if isinstance(expression, exp.DateSub):
32
- if isinstance(increment, exp.Literal):
33
- value = increment.to_py() if increment.is_number else int(increment.name)
34
- increment = exp.Literal.number(value * -1)
35
- else:
36
- increment *= exp.Literal.number(-1)
37
- return self.func("TIMESTAMPADD", unit_to_var(expression), increment, expression.this)
30
+ this_sql = self.sql(expression, "this")
31
+ expr_sql = self.sql(expression, "expression")
32
+
33
+ interval_sql = f"CAST({expr_sql} AS INTERVAL {unit})"
34
+ return f"{name}({this_sql}, {interval_sql})"
38
35
 
39
36
  return _delta_sql
40
37
 
@@ -50,6 +47,33 @@ def to_char_is_numeric_handler(args: t.List, dialect: DialectType) -> exp.TimeTo
50
47
  return expression
51
48
 
52
49
 
50
+ def build_date_delta_with_cast_interval(
51
+ expression_class: t.Type[DATE_DELTA],
52
+ ) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
53
+ fallback_builder = build_date_delta(expression_class)
54
+
55
+ def _builder(args):
56
+ if len(args) == 2:
57
+ date_arg, interval_arg = args
58
+
59
+ if (
60
+ isinstance(interval_arg, exp.Cast)
61
+ and isinstance(interval_arg.to, exp.DataType)
62
+ and isinstance(interval_arg.to.this, exp.Interval)
63
+ ):
64
+ return expression_class(
65
+ this=date_arg,
66
+ expression=interval_arg.this,
67
+ unit=interval_arg.to.this.unit,
68
+ )
69
+
70
+ return expression_class(this=date_arg, expression=interval_arg)
71
+
72
+ return fallback_builder(args)
73
+
74
+ return _builder
75
+
76
+
53
77
  class Dremio(Dialect):
54
78
  SUPPORTS_USER_DEFINED_TYPES = False
55
79
  CONCAT_COALESCE = True
@@ -108,16 +132,39 @@ class Dremio(Dialect):
108
132
  "tzo": "%z", # numeric offset (+0200)
109
133
  }
110
134
 
135
+ class Tokenizer(tokens.Tokenizer):
136
+ COMMENTS = ["--", "//", ("/*", "*/")]
137
+
111
138
  class Parser(parser.Parser):
112
139
  LOG_DEFAULTS_TO_LN = True
113
140
 
141
+ NO_PAREN_FUNCTION_PARSERS = {
142
+ **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
143
+ "CURRENT_DATE_UTC": lambda self: self._parse_current_date_utc(),
144
+ }
145
+
114
146
  FUNCTIONS = {
115
147
  **parser.Parser.FUNCTIONS,
116
148
  "TO_CHAR": to_char_is_numeric_handler,
117
149
  "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"),
118
150
  "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"),
151
+ "DATE_ADD": build_date_delta_with_cast_interval(exp.DateAdd),
152
+ "DATE_SUB": build_date_delta_with_cast_interval(exp.DateSub),
153
+ "ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list,
119
154
  }
120
155
 
156
+ def _parse_current_date_utc(self) -> exp.Cast:
157
+ if self._match(TokenType.L_PAREN):
158
+ self._match_r_paren()
159
+
160
+ return exp.Cast(
161
+ this=exp.AtTimeZone(
162
+ this=exp.CurrentTimestamp(),
163
+ zone=exp.Literal.string("UTC"),
164
+ ),
165
+ to=exp.DataType.build("DATE"),
166
+ )
167
+
121
168
  class Generator(generator.Generator):
122
169
  NVL2_SUPPORTED = False
123
170
  SUPPORTS_CONVERT_TIMEZONE = True
@@ -148,6 +195,7 @@ class Dremio(Dialect):
148
195
  exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)),
149
196
  exp.DateAdd: _date_delta_sql("DATE_ADD"),
150
197
  exp.DateSub: _date_delta_sql("DATE_SUB"),
198
+ exp.GenerateSeries: rename_func("ARRAY_GENERATE_RANGE"),
151
199
  }
152
200
 
153
201
  def datatype_sql(self, expression: exp.DataType) -> str:
@@ -162,5 +210,17 @@ class Dremio(Dialect):
162
210
 
163
211
  return super().datatype_sql(expression)
164
212
 
165
- class Tokenizer(tokens.Tokenizer):
166
- COMMENTS = ["--", "//", ("/*", "*/")]
213
+ def cast_sql(self, expression: exp.Cast, safe_prefix: str | None = None) -> str:
214
+ # Match: CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
215
+ if expression.is_type(exp.DataType.Type.DATE):
216
+ at_time_zone = expression.this
217
+
218
+ if (
219
+ isinstance(at_time_zone, exp.AtTimeZone)
220
+ and isinstance(at_time_zone.this, exp.CurrentTimestamp)
221
+ and isinstance(at_time_zone.args["zone"], exp.Literal)
222
+ and at_time_zone.text("zone").upper() == "UTC"
223
+ ):
224
+ return "CURRENT_DATE_UTC"
225
+
226
+ return super().cast_sql(expression, safe_prefix)
@@ -386,10 +386,12 @@ class DuckDB(Dialect):
386
386
  "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
387
387
  "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
388
388
  "LIST_CONTAINS": exp.ArrayContains.from_arg_list,
389
+ "LIST_FILTER": exp.ArrayFilter.from_arg_list,
389
390
  "LIST_HAS": exp.ArrayContains.from_arg_list,
390
391
  "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list,
391
392
  "LIST_REVERSE_SORT": _build_sort_array_desc,
392
393
  "LIST_SORT": exp.SortArray.from_arg_list,
394
+ "LIST_TRANSFORM": exp.Transform.from_arg_list,
393
395
  "LIST_VALUE": lambda args: exp.Array(expressions=args),
394
396
  "MAKE_TIME": exp.TimeFromParts.from_arg_list,
395
397
  "MAKE_TIMESTAMP": _build_make_timestamp,
@@ -643,6 +645,9 @@ class DuckDB(Dialect):
643
645
  exp.ArrayRemove: remove_from_array_using_filter,
644
646
  exp.ArraySort: _array_sort_sql,
645
647
  exp.ArraySum: rename_func("LIST_SUM"),
648
+ exp.ArrayUniqueAgg: lambda self, e: self.func(
649
+ "LIST", exp.Distinct(expressions=[e.this])
650
+ ),
646
651
  exp.BitwiseXor: rename_func("XOR"),
647
652
  exp.CommentColumnConstraint: no_comment_column_constraint_sql,
648
653
  exp.CurrentDate: lambda *_: "CURRENT_DATE",
@@ -1117,15 +1122,20 @@ class DuckDB(Dialect):
1117
1122
  return super().unnest_sql(expression)
1118
1123
 
1119
1124
  def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
1120
- if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
1125
+ this = expression.this
1126
+
1127
+ if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
1121
1128
  # DuckDB should render IGNORE NULLS only for the general-purpose
1122
1129
  # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
1123
1130
  return super().ignorenulls_sql(expression)
1124
1131
 
1125
- if not isinstance(expression.this, exp.AnyValue):
1132
+ if isinstance(this, exp.First):
1133
+ this = exp.AnyValue(this=this.this)
1134
+
1135
+ if not isinstance(this, exp.AnyValue):
1126
1136
  self.unsupported("IGNORE NULLS is not supported for non-window functions.")
1127
1137
 
1128
- return self.sql(expression, "this")
1138
+ return self.sql(this)
1129
1139
 
1130
1140
  def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
1131
1141
  if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):