sqlglot 27.8.0__py3-none-any.whl → 27.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '27.8.0'
32
- __version_tuple__ = version_tuple = (27, 8, 0)
31
+ __version__ = version = '27.10.0'
32
+ __version_tuple__ = version_tuple = (27, 10, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -75,6 +75,7 @@ DIALECTS = [
75
75
  "Druid",
76
76
  "DuckDB",
77
77
  "Dune",
78
+ "Exasol",
78
79
  "Fabric",
79
80
  "Hive",
80
81
  "Materialize",
@@ -95,7 +96,6 @@ DIALECTS = [
95
96
  "Teradata",
96
97
  "Trino",
97
98
  "TSQL",
98
- "Exasol",
99
99
  ]
100
100
 
101
101
  MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
@@ -295,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
295
295
  return expression
296
296
 
297
297
 
298
+ def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
299
+ self._annotate_args(expression)
300
+
301
+ struct_type = exp.DataType(
302
+ this=exp.DataType.Type.STRUCT,
303
+ expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
304
+ nested=True,
305
+ )
306
+ self._set_type(
307
+ expression,
308
+ exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
309
+ )
310
+
311
+ return expression
312
+
313
+
298
314
  @unsupported_args("ins_cost", "del_cost", "sub_cost")
299
315
  def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
300
316
  max_dist = expression.args.get("max_dist")
@@ -324,16 +340,13 @@ def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List]
324
340
  return _builder
325
341
 
326
342
 
327
- def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous:
328
- if len(args) == 3:
329
- return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args)
330
-
343
+ def _build_contains_substring(args: t.List) -> exp.Contains:
331
344
  # Lowercase the operands in case of transpilation, as exp.Contains
332
345
  # is case-sensitive on other dialects
333
346
  this = exp.Lower(this=seq_get(args, 0))
334
347
  expr = exp.Lower(this=seq_get(args, 1))
335
348
 
336
- return exp.Contains(this=this, expression=expr)
349
+ return exp.Contains(this=this, expression=expr, json_scope=seq_get(args, 2))
337
350
 
338
351
 
339
352
  def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str:
@@ -473,6 +486,9 @@ class BigQuery(Dialect):
473
486
  exp.Substring,
474
487
  )
475
488
  },
489
+ exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
490
+ exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
491
+ exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
476
492
  exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
477
493
  exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
478
494
  exp.Array: _annotate_array,
@@ -484,20 +500,28 @@ class BigQuery(Dialect):
484
500
  exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
485
501
  exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
486
502
  exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
503
+ exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
504
+ e, exp.DataType.Type.BINARY
505
+ ),
487
506
  exp.CodePointsToString: lambda self, e: self._annotate_with_type(
488
507
  e, exp.DataType.Type.VARCHAR
489
508
  ),
490
509
  exp.Concat: _annotate_concat,
510
+ exp.Contains: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
491
511
  exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
492
512
  exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
493
513
  exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
494
514
  exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
495
515
  exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
516
+ exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
517
+ exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
518
+ exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
496
519
  exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
497
520
  e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
498
521
  ),
499
522
  exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
500
523
  exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
524
+ exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
501
525
  exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
502
526
  e, exp.DataType.Type.VARCHAR
503
527
  ),
@@ -506,12 +530,21 @@ class BigQuery(Dialect):
506
530
  ),
507
531
  exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
508
532
  exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
533
+ exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
509
534
  exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
535
+ exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
510
536
  exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
511
537
  exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
538
+ exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
539
+ e, exp.DataType.Type.BIGDECIMAL
540
+ ),
541
+ exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
512
542
  exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
513
543
  exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
514
544
  exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
545
+ exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
546
+ e, exp.DataType.Type.VARCHAR
547
+ ),
515
548
  exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
516
549
  exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
517
550
  exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
@@ -522,8 +555,11 @@ class BigQuery(Dialect):
522
555
  ),
523
556
  exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
524
557
  exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
525
- exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
526
558
  exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
559
+ exp.ToCodePoints: lambda self, e: self._annotate_with_type(
560
+ e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
561
+ ),
562
+ exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
527
563
  exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
528
564
  exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
529
565
  }
@@ -596,10 +632,13 @@ class BigQuery(Dialect):
596
632
  "EXPORT": TokenType.EXPORT,
597
633
  "FLOAT64": TokenType.DOUBLE,
598
634
  "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
635
+ "LOOP": TokenType.COMMAND,
599
636
  "MODEL": TokenType.MODEL,
600
637
  "NOT DETERMINISTIC": TokenType.VOLATILE,
601
638
  "RECORD": TokenType.STRUCT,
639
+ "REPEAT": TokenType.COMMAND,
602
640
  "TIMESTAMP": TokenType.TIMESTAMPTZ,
641
+ "WHILE": TokenType.COMMAND,
603
642
  }
604
643
  KEYWORDS.pop("DIV")
605
644
  KEYWORDS.pop("VALUES")
@@ -623,6 +662,8 @@ class BigQuery(Dialect):
623
662
 
624
663
  FUNCTIONS = {
625
664
  **parser.Parser.FUNCTIONS,
665
+ "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
666
+ "BOOL": exp.JSONBool.from_arg_list,
626
667
  "CONTAINS_SUBSTR": _build_contains_substring,
627
668
  "DATE": _build_date,
628
669
  "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
@@ -647,6 +688,10 @@ class BigQuery(Dialect):
647
688
  "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
648
689
  "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
649
690
  "MD5": exp.MD5Digest.from_arg_list,
691
+ "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize(
692
+ this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True
693
+ ),
694
+ "OCTET_LENGTH": exp.ByteLength.from_arg_list,
650
695
  "TO_HEX": _build_to_hex,
651
696
  "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")(
652
697
  [seq_get(args, 1), seq_get(args, 0)]
@@ -689,6 +734,7 @@ class BigQuery(Dialect):
689
734
  "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
690
735
  "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
691
736
  "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
737
+ "FROM_HEX": exp.Unhex.from_arg_list,
692
738
  "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
693
739
  }
694
740
 
@@ -699,7 +745,10 @@ class BigQuery(Dialect):
699
745
  exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
700
746
  ),
701
747
  "MAKE_INTERVAL": lambda self: self._parse_make_interval(),
748
+ "PREDICT": lambda self: self._parse_predict(),
702
749
  "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
750
+ "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
751
+ "VECTOR_SEARCH": lambda self: self._parse_vector_search(),
703
752
  }
704
753
  FUNCTION_PARSERS.pop("TRIM")
705
754
 
@@ -979,13 +1028,40 @@ class BigQuery(Dialect):
979
1028
 
980
1029
  return expr
981
1030
 
982
- def _parse_features_at_time(self) -> exp.FeaturesAtTime:
983
- expr = self.expression(
984
- exp.FeaturesAtTime,
985
- this=(self._match(TokenType.TABLE) and self._parse_table())
986
- or self._parse_select(nested=True),
1031
+ def _parse_predict(self) -> exp.Predict:
1032
+ self._match_text_seq("MODEL")
1033
+ this = self._parse_table()
1034
+
1035
+ self._match(TokenType.COMMA)
1036
+ self._match_text_seq("TABLE")
1037
+
1038
+ return self.expression(
1039
+ exp.Predict,
1040
+ this=this,
1041
+ expression=self._parse_table(),
1042
+ params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
1043
+ )
1044
+
1045
+ def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
1046
+ self._match_text_seq("MODEL")
1047
+ this = self._parse_table()
1048
+
1049
+ self._match(TokenType.COMMA)
1050
+ self._match_text_seq("TABLE")
1051
+
1052
+ return self.expression(
1053
+ exp.GenerateEmbedding,
1054
+ this=this,
1055
+ expression=self._parse_table(),
1056
+ params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
987
1057
  )
988
1058
 
1059
+ def _parse_features_at_time(self) -> exp.FeaturesAtTime:
1060
+ self._match(TokenType.TABLE)
1061
+ this = self._parse_table()
1062
+
1063
+ expr = self.expression(exp.FeaturesAtTime, this=this)
1064
+
989
1065
  while self._match(TokenType.COMMA):
990
1066
  arg = self._parse_lambda()
991
1067
 
@@ -996,6 +1072,37 @@ class BigQuery(Dialect):
996
1072
 
997
1073
  return expr
998
1074
 
1075
+ def _parse_vector_search(self) -> exp.VectorSearch:
1076
+ self._match(TokenType.TABLE)
1077
+ base_table = self._parse_table()
1078
+
1079
+ self._match(TokenType.COMMA)
1080
+
1081
+ column_to_search = self._parse_bitwise()
1082
+ self._match(TokenType.COMMA)
1083
+
1084
+ self._match(TokenType.TABLE)
1085
+ query_table = self._parse_table()
1086
+
1087
+ expr = self.expression(
1088
+ exp.VectorSearch,
1089
+ this=base_table,
1090
+ column_to_search=column_to_search,
1091
+ query_table=query_table,
1092
+ )
1093
+
1094
+ while self._match(TokenType.COMMA):
1095
+ # query_column_to_search can be named argument or positional
1096
+ if self._match(TokenType.STRING, advance=False):
1097
+ query_column = self._parse_string()
1098
+ expr.set("query_column_to_search", query_column)
1099
+ else:
1100
+ arg = self._parse_lambda()
1101
+ if arg:
1102
+ expr.set(arg.this.name, arg)
1103
+
1104
+ return expr
1105
+
999
1106
  def _parse_export_data(self) -> exp.Export:
1000
1107
  self._match_text_seq("DATA")
1001
1108
 
@@ -1043,6 +1150,7 @@ class BigQuery(Dialect):
1043
1150
 
1044
1151
  TRANSFORMS = {
1045
1152
  **generator.Generator.TRANSFORMS,
1153
+ exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
1046
1154
  exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
1047
1155
  exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
1048
1156
  exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
@@ -1050,6 +1158,7 @@ class BigQuery(Dialect):
1050
1158
  exp.ArrayContains: _array_contains_sql,
1051
1159
  exp.ArrayFilter: filter_array_using_unnest,
1052
1160
  exp.ArrayRemove: filter_array_using_unnest,
1161
+ exp.ByteLength: rename_func("BYTE_LENGTH"),
1053
1162
  exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
1054
1163
  exp.CollateProperty: lambda self, e: (
1055
1164
  f"DEFAULT COLLATE {self.sql(e, 'this')}"
@@ -1083,6 +1192,7 @@ class BigQuery(Dialect):
1083
1192
  exp.ILike: no_ilike_sql,
1084
1193
  exp.IntDiv: rename_func("DIV"),
1085
1194
  exp.Int64: rename_func("INT64"),
1195
+ exp.JSONBool: rename_func("BOOL"),
1086
1196
  exp.JSONExtract: _json_extract_sql,
1087
1197
  exp.JSONExtractArray: _json_extract_sql,
1088
1198
  exp.JSONExtractScalar: _json_extract_sql,
@@ -1092,6 +1202,11 @@ class BigQuery(Dialect):
1092
1202
  exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)),
1093
1203
  exp.MD5Digest: rename_func("MD5"),
1094
1204
  exp.Min: min_or_least,
1205
+ exp.Normalize: lambda self, e: self.func(
1206
+ "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE",
1207
+ e.this,
1208
+ e.args.get("form"),
1209
+ ),
1095
1210
  exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
1096
1211
  exp.RegexpExtract: lambda self, e: self.func(
1097
1212
  "REGEXP_EXTRACT",
@@ -1427,7 +1542,7 @@ class BigQuery(Dialect):
1427
1542
  this = this.this
1428
1543
  expr = expr.this
1429
1544
 
1430
- return self.func("CONTAINS_SUBSTR", this, expr)
1545
+ return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope"))
1431
1546
 
1432
1547
  def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
1433
1548
  this = expression.this
@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
345
345
  "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
346
346
  }
347
347
  FUNCTIONS.pop("TRANSFORM")
348
+ FUNCTIONS.pop("APPROX_TOP_SUM")
348
349
 
349
350
  AGG_FUNCTIONS = {
350
351
  "count",
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
379
380
  "argMax",
380
381
  "avgWeighted",
381
382
  "topK",
383
+ "approx_top_sum",
382
384
  "topKWeighted",
383
385
  "deltaSum",
384
386
  "deltaSumTimestamp",
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
977
979
 
978
980
  return value
979
981
 
982
+ def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
983
+ # ClickHouse allows custom expressions as partition key
984
+ # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
985
+ return self.expression(
986
+ exp.PartitionedByProperty,
987
+ this=self._parse_assignment(),
988
+ )
989
+
980
990
  class Generator(generator.Generator):
981
991
  QUERY_HINTS = False
982
992
  STRUCT_DELIMITER = ("(", ")")
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
1094
1104
  exp.DateStrToDate: rename_func("toDate"),
1095
1105
  exp.DateSub: _datetime_delta_sql("DATE_SUB"),
1096
1106
  exp.Explode: rename_func("arrayJoin"),
1107
+ exp.FarmFingerprint: rename_func("farmFingerprint64"),
1097
1108
  exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
1098
1109
  exp.IsNan: rename_func("isNaN"),
1099
1110
  exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
@@ -668,6 +668,7 @@ class Dialect(metaclass=_Dialect):
668
668
  exp.UnixMillis,
669
669
  },
670
670
  exp.DataType.Type.BINARY: {
671
+ exp.FromBase32,
671
672
  exp.FromBase64,
672
673
  },
673
674
  exp.DataType.Type.BOOLEAN: {
@@ -779,6 +780,7 @@ class Dialect(metaclass=_Dialect):
779
780
  exp.TimeToStr,
780
781
  exp.TimeToTimeStr,
781
782
  exp.Trim,
783
+ exp.ToBase32,
782
784
  exp.ToBase64,
783
785
  exp.TsOrDsToDateStr,
784
786
  exp.UnixToStr,
sqlglot/dialects/doris.py CHANGED
@@ -65,7 +65,11 @@ class Doris(MySQL):
65
65
  **MySQL.Parser.PROPERTY_PARSERS,
66
66
  "PROPERTIES": lambda self: self._parse_wrapped_properties(),
67
67
  "UNIQUE": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
68
+ # Plain KEY without UNIQUE/DUPLICATE/AGGREGATE prefixes should be treated as UniqueKeyProperty with unique=False
69
+ "KEY": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
68
70
  "PARTITION BY": lambda self: self._parse_partition_by_opt_range(),
71
+ "BUILD": lambda self: self._parse_build_property(),
72
+ "REFRESH": lambda self: self._parse_refresh_property(),
69
73
  }
70
74
 
71
75
  def _parse_partitioning_granularity_dynamic(self) -> exp.PartitionByRangePropertyDynamic:
@@ -104,9 +108,27 @@ class Doris(MySQL):
104
108
  part_range = self.expression(exp.PartitionRange, this=name, expressions=values)
105
109
  return self.expression(exp.Partition, expressions=[part_range])
106
110
 
111
+ def _parse_partition_definition_list(self) -> exp.Partition:
112
+ # PARTITION <name> VALUES IN (<value_csv>)
113
+ self._match_text_seq("PARTITION")
114
+ name = self._parse_id_var()
115
+ self._match_text_seq("VALUES", "IN")
116
+ values = self._parse_wrapped_csv(self._parse_expression)
117
+ part_list = self.expression(exp.PartitionList, this=name, expressions=values)
118
+ return self.expression(exp.Partition, expressions=[part_list])
119
+
107
120
  def _parse_partition_by_opt_range(
108
121
  self,
109
- ) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty:
122
+ ) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty | exp.PartitionByListProperty:
123
+ if self._match_text_seq("LIST"):
124
+ return self.expression(
125
+ exp.PartitionByListProperty,
126
+ partition_expressions=self._parse_wrapped_id_vars(),
127
+ create_expressions=self._parse_wrapped_csv(
128
+ self._parse_partition_definition_list
129
+ ),
130
+ )
131
+
110
132
  if not self._match_text_seq("RANGE"):
111
133
  return super()._parse_partitioned_by()
112
134
 
@@ -128,6 +150,28 @@ class Doris(MySQL):
128
150
  create_expressions=create_expressions,
129
151
  )
130
152
 
153
+ def _parse_build_property(self) -> exp.BuildProperty:
154
+ return self.expression(exp.BuildProperty, this=self._parse_var(upper=True))
155
+
156
+ def _parse_refresh_property(self) -> exp.RefreshTriggerProperty:
157
+ method = self._parse_var(upper=True)
158
+
159
+ self._match(TokenType.ON)
160
+
161
+ kind = self._match_texts(("MANUAL", "COMMIT", "SCHEDULE")) and self._prev.text.upper()
162
+ every = self._match_text_seq("EVERY") and self._parse_number()
163
+ unit = self._parse_var(any_token=True) if every else None
164
+ starts = self._match_text_seq("STARTS") and self._parse_string()
165
+
166
+ return self.expression(
167
+ exp.RefreshTriggerProperty,
168
+ method=method,
169
+ kind=kind,
170
+ every=every,
171
+ unit=unit,
172
+ starts=starts,
173
+ )
174
+
131
175
  class Generator(MySQL.Generator):
132
176
  LAST_DAY_SUPPORTS_DATE_PART = False
133
177
  VARCHAR_REQUIRES_SIZE = False
@@ -145,7 +189,10 @@ class Doris(MySQL):
145
189
  **MySQL.Generator.PROPERTIES_LOCATION,
146
190
  exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA,
147
191
  exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA,
192
+ exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA,
148
193
  exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
194
+ exp.BuildProperty: exp.Properties.Location.POST_SCHEMA,
195
+ exp.RefreshTriggerProperty: exp.Properties.Location.POST_SCHEMA,
149
196
  }
150
197
 
151
198
  CAST_MAPPING = {}
@@ -662,9 +709,18 @@ class Doris(MySQL):
662
709
  "year",
663
710
  }
664
711
 
712
+ def uniquekeyproperty_sql(
713
+ self, expression: exp.UniqueKeyProperty, prefix: str = "UNIQUE KEY"
714
+ ) -> str:
715
+ create_stmt = expression.find_ancestor(exp.Create)
716
+ if create_stmt and create_stmt.args["properties"].find(exp.MaterializedProperty):
717
+ return super().uniquekeyproperty_sql(expression, prefix="KEY")
718
+
719
+ return super().uniquekeyproperty_sql(expression)
720
+
665
721
  def partition_sql(self, expression: exp.Partition) -> str:
666
722
  parent = expression.parent
667
- if isinstance(parent, exp.PartitionByRangeProperty):
723
+ if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)):
668
724
  return ", ".join(self.sql(e) for e in expression.expressions)
669
725
  return super().partition_sql(expression)
670
726
 
@@ -685,7 +741,9 @@ class Doris(MySQL):
685
741
 
686
742
  return f"PARTITION {name} VALUES LESS THAN ({self.sql(values[0])})"
687
743
 
688
- def partitionbyrangepropertydynamic_sql(self, expression):
744
+ def partitionbyrangepropertydynamic_sql(
745
+ self, expression: exp.PartitionByRangePropertyDynamic
746
+ ) -> str:
689
747
  # Generates: FROM ("start") TO ("end") INTERVAL N UNIT
690
748
  start = self.sql(expression, "start")
691
749
  end = self.sql(expression, "end")
@@ -699,15 +757,25 @@ class Doris(MySQL):
699
757
 
700
758
  return f"FROM ({start}) TO ({end}) {interval}"
701
759
 
702
- def partitionbyrangeproperty_sql(self, expression):
703
- partition_expressions = ", ".join(
704
- self.sql(e) for e in expression.args.get("partition_expressions") or []
760
+ def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str:
761
+ partition_expressions = self.expressions(
762
+ expression, key="partition_expressions", indent=False
705
763
  )
706
- create_expressions = expression.args.get("create_expressions") or []
707
- # Handle both static and dynamic partition definitions
708
- create_sql = ", ".join(self.sql(e) for e in create_expressions)
764
+ create_sql = self.expressions(expression, key="create_expressions", indent=False)
709
765
  return f"PARTITION BY RANGE ({partition_expressions}) ({create_sql})"
710
766
 
767
+ def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str:
768
+ partition_expressions = self.expressions(
769
+ expression, key="partition_expressions", indent=False
770
+ )
771
+ create_sql = self.expressions(expression, key="create_expressions", indent=False)
772
+ return f"PARTITION BY LIST ({partition_expressions}) ({create_sql})"
773
+
774
+ def partitionlist_sql(self, expression: exp.PartitionList) -> str:
775
+ name = self.sql(expression, "this")
776
+ values = self.expressions(expression, indent=False)
777
+ return f"PARTITION {name} VALUES IN ({values})"
778
+
711
779
  def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
712
780
  node = expression.this
713
781
  if isinstance(node, exp.Schema):
@@ -1,17 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import typing as t
4
-
5
4
  from sqlglot import expressions as exp
6
5
  from sqlglot import parser, generator, tokens
7
6
  from sqlglot.dialects.dialect import (
8
7
  Dialect,
9
8
  build_timetostr_or_tochar,
10
9
  build_formatted_time,
10
+ build_date_delta,
11
11
  rename_func,
12
- unit_to_var,
13
12
  )
14
13
  from sqlglot.helper import seq_get
14
+ from sqlglot.tokens import TokenType
15
15
 
16
16
  if t.TYPE_CHECKING:
17
17
  from sqlglot.dialects.dialect import DialectType
@@ -21,20 +21,17 @@ DATE_DELTA = t.Union[exp.DateAdd, exp.DateSub]
21
21
 
22
22
  def _date_delta_sql(name: str) -> t.Callable[[Dremio.Generator, DATE_DELTA], str]:
23
23
  def _delta_sql(self: Dremio.Generator, expression: DATE_DELTA) -> str:
24
- unit = expression.text("unit")
24
+ unit = expression.text("unit").upper()
25
25
 
26
- if not unit or unit.upper() == "DAY":
26
+ # Fallback to default behavior if unit is missing or 'DAY'
27
+ if not unit or unit == "DAY":
27
28
  return self.func(name, expression.this, expression.expression)
28
29
 
29
- # to support units we need to use TIMESTAMPADD function
30
- increment = expression.expression
31
- if isinstance(expression, exp.DateSub):
32
- if isinstance(increment, exp.Literal):
33
- value = increment.to_py() if increment.is_number else int(increment.name)
34
- increment = exp.Literal.number(value * -1)
35
- else:
36
- increment *= exp.Literal.number(-1)
37
- return self.func("TIMESTAMPADD", unit_to_var(expression), increment, expression.this)
30
+ this_sql = self.sql(expression, "this")
31
+ expr_sql = self.sql(expression, "expression")
32
+
33
+ interval_sql = f"CAST({expr_sql} AS INTERVAL {unit})"
34
+ return f"{name}({this_sql}, {interval_sql})"
38
35
 
39
36
  return _delta_sql
40
37
 
@@ -50,6 +47,33 @@ def to_char_is_numeric_handler(args: t.List, dialect: DialectType) -> exp.TimeTo
50
47
  return expression
51
48
 
52
49
 
50
+ def build_date_delta_with_cast_interval(
51
+ expression_class: t.Type[DATE_DELTA],
52
+ ) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
53
+ fallback_builder = build_date_delta(expression_class)
54
+
55
+ def _builder(args):
56
+ if len(args) == 2:
57
+ date_arg, interval_arg = args
58
+
59
+ if (
60
+ isinstance(interval_arg, exp.Cast)
61
+ and isinstance(interval_arg.to, exp.DataType)
62
+ and isinstance(interval_arg.to.this, exp.Interval)
63
+ ):
64
+ return expression_class(
65
+ this=date_arg,
66
+ expression=interval_arg.this,
67
+ unit=interval_arg.to.this.unit,
68
+ )
69
+
70
+ return expression_class(this=date_arg, expression=interval_arg)
71
+
72
+ return fallback_builder(args)
73
+
74
+ return _builder
75
+
76
+
53
77
  class Dremio(Dialect):
54
78
  SUPPORTS_USER_DEFINED_TYPES = False
55
79
  CONCAT_COALESCE = True
@@ -108,16 +132,39 @@ class Dremio(Dialect):
108
132
  "tzo": "%z", # numeric offset (+0200)
109
133
  }
110
134
 
135
+ class Tokenizer(tokens.Tokenizer):
136
+ COMMENTS = ["--", "//", ("/*", "*/")]
137
+
111
138
  class Parser(parser.Parser):
112
139
  LOG_DEFAULTS_TO_LN = True
113
140
 
141
+ NO_PAREN_FUNCTION_PARSERS = {
142
+ **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
143
+ "CURRENT_DATE_UTC": lambda self: self._parse_current_date_utc(),
144
+ }
145
+
114
146
  FUNCTIONS = {
115
147
  **parser.Parser.FUNCTIONS,
116
148
  "TO_CHAR": to_char_is_numeric_handler,
117
149
  "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"),
118
150
  "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"),
151
+ "DATE_ADD": build_date_delta_with_cast_interval(exp.DateAdd),
152
+ "DATE_SUB": build_date_delta_with_cast_interval(exp.DateSub),
153
+ "ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list,
119
154
  }
120
155
 
156
+ def _parse_current_date_utc(self) -> exp.Cast:
157
+ if self._match(TokenType.L_PAREN):
158
+ self._match_r_paren()
159
+
160
+ return exp.Cast(
161
+ this=exp.AtTimeZone(
162
+ this=exp.CurrentTimestamp(),
163
+ zone=exp.Literal.string("UTC"),
164
+ ),
165
+ to=exp.DataType.build("DATE"),
166
+ )
167
+
121
168
  class Generator(generator.Generator):
122
169
  NVL2_SUPPORTED = False
123
170
  SUPPORTS_CONVERT_TIMEZONE = True
@@ -148,6 +195,7 @@ class Dremio(Dialect):
148
195
  exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)),
149
196
  exp.DateAdd: _date_delta_sql("DATE_ADD"),
150
197
  exp.DateSub: _date_delta_sql("DATE_SUB"),
198
+ exp.GenerateSeries: rename_func("ARRAY_GENERATE_RANGE"),
151
199
  }
152
200
 
153
201
  def datatype_sql(self, expression: exp.DataType) -> str:
@@ -162,5 +210,17 @@ class Dremio(Dialect):
162
210
 
163
211
  return super().datatype_sql(expression)
164
212
 
165
- class Tokenizer(tokens.Tokenizer):
166
- COMMENTS = ["--", "//", ("/*", "*/")]
213
+ def cast_sql(self, expression: exp.Cast, safe_prefix: str | None = None) -> str:
214
+ # Match: CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
215
+ if expression.is_type(exp.DataType.Type.DATE):
216
+ at_time_zone = expression.this
217
+
218
+ if (
219
+ isinstance(at_time_zone, exp.AtTimeZone)
220
+ and isinstance(at_time_zone.this, exp.CurrentTimestamp)
221
+ and isinstance(at_time_zone.args["zone"], exp.Literal)
222
+ and at_time_zone.text("zone").upper() == "UTC"
223
+ ):
224
+ return "CURRENT_DATE_UTC"
225
+
226
+ return super().cast_sql(expression, safe_prefix)