sqlglot 27.8.0__py3-none-any.whl → 27.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/__init__.py +1 -1
- sqlglot/dialects/bigquery.py +112 -6
- sqlglot/dialects/clickhouse.py +11 -0
- sqlglot/dialects/dialect.py +2 -0
- sqlglot/dialects/doris.py +77 -9
- sqlglot/dialects/dremio.py +75 -15
- sqlglot/dialects/duckdb.py +13 -3
- sqlglot/dialects/exasol.py +23 -0
- sqlglot/dialects/mysql.py +0 -33
- sqlglot/dialects/redshift.py +1 -0
- sqlglot/dialects/singlestore.py +165 -19
- sqlglot/dialects/tsql.py +2 -0
- sqlglot/expressions.py +115 -4
- sqlglot/generator.py +92 -26
- sqlglot/optimizer/qualify_columns.py +1 -1
- sqlglot/parser.py +43 -16
- {sqlglot-27.8.0.dist-info → sqlglot-27.9.0.dist-info}/METADATA +41 -1
- {sqlglot-27.8.0.dist-info → sqlglot-27.9.0.dist-info}/RECORD +22 -22
- {sqlglot-27.8.0.dist-info → sqlglot-27.9.0.dist-info}/WHEEL +0 -0
- {sqlglot-27.8.0.dist-info → sqlglot-27.9.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.8.0.dist-info → sqlglot-27.9.0.dist-info}/top_level.txt +0 -0
sqlglot/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '27.
|
|
32
|
-
__version_tuple__ = version_tuple = (27,
|
|
31
|
+
__version__ = version = '27.9.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (27, 9, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
sqlglot/dialects/__init__.py
CHANGED
|
@@ -75,6 +75,7 @@ DIALECTS = [
|
|
|
75
75
|
"Druid",
|
|
76
76
|
"DuckDB",
|
|
77
77
|
"Dune",
|
|
78
|
+
"Exasol",
|
|
78
79
|
"Fabric",
|
|
79
80
|
"Hive",
|
|
80
81
|
"Materialize",
|
|
@@ -95,7 +96,6 @@ DIALECTS = [
|
|
|
95
96
|
"Teradata",
|
|
96
97
|
"Trino",
|
|
97
98
|
"TSQL",
|
|
98
|
-
"Exasol",
|
|
99
99
|
]
|
|
100
100
|
|
|
101
101
|
MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
|
sqlglot/dialects/bigquery.py
CHANGED
|
@@ -295,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
|
|
|
295
295
|
return expression
|
|
296
296
|
|
|
297
297
|
|
|
298
|
+
def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
|
|
299
|
+
self._annotate_args(expression)
|
|
300
|
+
|
|
301
|
+
struct_type = exp.DataType(
|
|
302
|
+
this=exp.DataType.Type.STRUCT,
|
|
303
|
+
expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
|
|
304
|
+
nested=True,
|
|
305
|
+
)
|
|
306
|
+
self._set_type(
|
|
307
|
+
expression,
|
|
308
|
+
exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
return expression
|
|
312
|
+
|
|
313
|
+
|
|
298
314
|
@unsupported_args("ins_cost", "del_cost", "sub_cost")
|
|
299
315
|
def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
|
|
300
316
|
max_dist = expression.args.get("max_dist")
|
|
@@ -473,17 +489,24 @@ class BigQuery(Dialect):
|
|
|
473
489
|
exp.Substring,
|
|
474
490
|
)
|
|
475
491
|
},
|
|
492
|
+
exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
493
|
+
exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
494
|
+
exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
476
495
|
exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
|
|
477
496
|
exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
|
|
478
497
|
exp.Array: _annotate_array,
|
|
479
498
|
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
|
480
499
|
exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
500
|
+
exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
|
|
481
501
|
exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
482
502
|
exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
483
503
|
exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
484
504
|
exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
485
505
|
exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
486
506
|
exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
507
|
+
exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
|
|
508
|
+
e, exp.DataType.Type.BINARY
|
|
509
|
+
),
|
|
487
510
|
exp.CodePointsToString: lambda self, e: self._annotate_with_type(
|
|
488
511
|
e, exp.DataType.Type.VARCHAR
|
|
489
512
|
),
|
|
@@ -493,6 +516,9 @@ class BigQuery(Dialect):
|
|
|
493
516
|
exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
494
517
|
exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
|
|
495
518
|
exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
|
|
519
|
+
exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
520
|
+
exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
521
|
+
exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
496
522
|
exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
|
|
497
523
|
e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
|
|
498
524
|
),
|
|
@@ -506,12 +532,20 @@ class BigQuery(Dialect):
|
|
|
506
532
|
),
|
|
507
533
|
exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
508
534
|
exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
|
|
535
|
+
exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
509
536
|
exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
510
537
|
exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
511
538
|
exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
|
|
539
|
+
exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
|
|
540
|
+
e, exp.DataType.Type.BIGDECIMAL
|
|
541
|
+
),
|
|
542
|
+
exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
|
|
512
543
|
exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
513
544
|
exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
|
|
514
545
|
exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
|
|
546
|
+
exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
|
|
547
|
+
e, exp.DataType.Type.VARCHAR
|
|
548
|
+
),
|
|
515
549
|
exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
516
550
|
exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
517
551
|
exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
@@ -522,8 +556,11 @@ class BigQuery(Dialect):
|
|
|
522
556
|
),
|
|
523
557
|
exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
|
|
524
558
|
exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
525
|
-
exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
526
559
|
exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
560
|
+
exp.ToCodePoints: lambda self, e: self._annotate_with_type(
|
|
561
|
+
e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
|
|
562
|
+
),
|
|
563
|
+
exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
527
564
|
exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
|
|
528
565
|
exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
529
566
|
}
|
|
@@ -596,10 +633,13 @@ class BigQuery(Dialect):
|
|
|
596
633
|
"EXPORT": TokenType.EXPORT,
|
|
597
634
|
"FLOAT64": TokenType.DOUBLE,
|
|
598
635
|
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
|
|
636
|
+
"LOOP": TokenType.COMMAND,
|
|
599
637
|
"MODEL": TokenType.MODEL,
|
|
600
638
|
"NOT DETERMINISTIC": TokenType.VOLATILE,
|
|
601
639
|
"RECORD": TokenType.STRUCT,
|
|
640
|
+
"REPEAT": TokenType.COMMAND,
|
|
602
641
|
"TIMESTAMP": TokenType.TIMESTAMPTZ,
|
|
642
|
+
"WHILE": TokenType.COMMAND,
|
|
603
643
|
}
|
|
604
644
|
KEYWORDS.pop("DIV")
|
|
605
645
|
KEYWORDS.pop("VALUES")
|
|
@@ -623,6 +663,8 @@ class BigQuery(Dialect):
|
|
|
623
663
|
|
|
624
664
|
FUNCTIONS = {
|
|
625
665
|
**parser.Parser.FUNCTIONS,
|
|
666
|
+
"APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
|
|
667
|
+
"BOOL": exp.JSONBool.from_arg_list,
|
|
626
668
|
"CONTAINS_SUBSTR": _build_contains_substring,
|
|
627
669
|
"DATE": _build_date,
|
|
628
670
|
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
|
|
@@ -689,6 +731,7 @@ class BigQuery(Dialect):
|
|
|
689
731
|
"FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
|
|
690
732
|
"FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
|
|
691
733
|
"FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
|
|
734
|
+
"FROM_HEX": exp.Unhex.from_arg_list,
|
|
692
735
|
"WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
|
|
693
736
|
}
|
|
694
737
|
|
|
@@ -699,7 +742,10 @@ class BigQuery(Dialect):
|
|
|
699
742
|
exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
|
|
700
743
|
),
|
|
701
744
|
"MAKE_INTERVAL": lambda self: self._parse_make_interval(),
|
|
745
|
+
"PREDICT": lambda self: self._parse_predict(),
|
|
702
746
|
"FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
|
|
747
|
+
"GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
|
|
748
|
+
"VECTOR_SEARCH": lambda self: self._parse_vector_search(),
|
|
703
749
|
}
|
|
704
750
|
FUNCTION_PARSERS.pop("TRIM")
|
|
705
751
|
|
|
@@ -979,13 +1025,40 @@ class BigQuery(Dialect):
|
|
|
979
1025
|
|
|
980
1026
|
return expr
|
|
981
1027
|
|
|
982
|
-
def
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1028
|
+
def _parse_predict(self) -> exp.Predict:
|
|
1029
|
+
self._match_text_seq("MODEL")
|
|
1030
|
+
this = self._parse_table()
|
|
1031
|
+
|
|
1032
|
+
self._match(TokenType.COMMA)
|
|
1033
|
+
self._match_text_seq("TABLE")
|
|
1034
|
+
|
|
1035
|
+
return self.expression(
|
|
1036
|
+
exp.Predict,
|
|
1037
|
+
this=this,
|
|
1038
|
+
expression=self._parse_table(),
|
|
1039
|
+
params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
|
|
1043
|
+
self._match_text_seq("MODEL")
|
|
1044
|
+
this = self._parse_table()
|
|
1045
|
+
|
|
1046
|
+
self._match(TokenType.COMMA)
|
|
1047
|
+
self._match_text_seq("TABLE")
|
|
1048
|
+
|
|
1049
|
+
return self.expression(
|
|
1050
|
+
exp.GenerateEmbedding,
|
|
1051
|
+
this=this,
|
|
1052
|
+
expression=self._parse_table(),
|
|
1053
|
+
params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
|
|
987
1054
|
)
|
|
988
1055
|
|
|
1056
|
+
def _parse_features_at_time(self) -> exp.FeaturesAtTime:
|
|
1057
|
+
self._match(TokenType.TABLE)
|
|
1058
|
+
this = self._parse_table()
|
|
1059
|
+
|
|
1060
|
+
expr = self.expression(exp.FeaturesAtTime, this=this)
|
|
1061
|
+
|
|
989
1062
|
while self._match(TokenType.COMMA):
|
|
990
1063
|
arg = self._parse_lambda()
|
|
991
1064
|
|
|
@@ -996,6 +1069,37 @@ class BigQuery(Dialect):
|
|
|
996
1069
|
|
|
997
1070
|
return expr
|
|
998
1071
|
|
|
1072
|
+
def _parse_vector_search(self) -> exp.VectorSearch:
|
|
1073
|
+
self._match(TokenType.TABLE)
|
|
1074
|
+
base_table = self._parse_table()
|
|
1075
|
+
|
|
1076
|
+
self._match(TokenType.COMMA)
|
|
1077
|
+
|
|
1078
|
+
column_to_search = self._parse_bitwise()
|
|
1079
|
+
self._match(TokenType.COMMA)
|
|
1080
|
+
|
|
1081
|
+
self._match(TokenType.TABLE)
|
|
1082
|
+
query_table = self._parse_table()
|
|
1083
|
+
|
|
1084
|
+
expr = self.expression(
|
|
1085
|
+
exp.VectorSearch,
|
|
1086
|
+
this=base_table,
|
|
1087
|
+
column_to_search=column_to_search,
|
|
1088
|
+
query_table=query_table,
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
while self._match(TokenType.COMMA):
|
|
1092
|
+
# query_column_to_search can be named argument or positional
|
|
1093
|
+
if self._match(TokenType.STRING, advance=False):
|
|
1094
|
+
query_column = self._parse_string()
|
|
1095
|
+
expr.set("query_column_to_search", query_column)
|
|
1096
|
+
else:
|
|
1097
|
+
arg = self._parse_lambda()
|
|
1098
|
+
if arg:
|
|
1099
|
+
expr.set(arg.this.name, arg)
|
|
1100
|
+
|
|
1101
|
+
return expr
|
|
1102
|
+
|
|
999
1103
|
def _parse_export_data(self) -> exp.Export:
|
|
1000
1104
|
self._match_text_seq("DATA")
|
|
1001
1105
|
|
|
@@ -1043,6 +1147,7 @@ class BigQuery(Dialect):
|
|
|
1043
1147
|
|
|
1044
1148
|
TRANSFORMS = {
|
|
1045
1149
|
**generator.Generator.TRANSFORMS,
|
|
1150
|
+
exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
|
|
1046
1151
|
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
|
|
1047
1152
|
exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
|
|
1048
1153
|
exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
|
|
@@ -1083,6 +1188,7 @@ class BigQuery(Dialect):
|
|
|
1083
1188
|
exp.ILike: no_ilike_sql,
|
|
1084
1189
|
exp.IntDiv: rename_func("DIV"),
|
|
1085
1190
|
exp.Int64: rename_func("INT64"),
|
|
1191
|
+
exp.JSONBool: rename_func("BOOL"),
|
|
1086
1192
|
exp.JSONExtract: _json_extract_sql,
|
|
1087
1193
|
exp.JSONExtractArray: _json_extract_sql,
|
|
1088
1194
|
exp.JSONExtractScalar: _json_extract_sql,
|
sqlglot/dialects/clickhouse.py
CHANGED
|
@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
|
|
|
345
345
|
"LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
|
|
346
346
|
}
|
|
347
347
|
FUNCTIONS.pop("TRANSFORM")
|
|
348
|
+
FUNCTIONS.pop("APPROX_TOP_SUM")
|
|
348
349
|
|
|
349
350
|
AGG_FUNCTIONS = {
|
|
350
351
|
"count",
|
|
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
|
|
|
379
380
|
"argMax",
|
|
380
381
|
"avgWeighted",
|
|
381
382
|
"topK",
|
|
383
|
+
"approx_top_sum",
|
|
382
384
|
"topKWeighted",
|
|
383
385
|
"deltaSum",
|
|
384
386
|
"deltaSumTimestamp",
|
|
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
|
|
|
977
979
|
|
|
978
980
|
return value
|
|
979
981
|
|
|
982
|
+
def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
|
|
983
|
+
# ClickHouse allows custom expressions as partition key
|
|
984
|
+
# https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
|
|
985
|
+
return self.expression(
|
|
986
|
+
exp.PartitionedByProperty,
|
|
987
|
+
this=self._parse_assignment(),
|
|
988
|
+
)
|
|
989
|
+
|
|
980
990
|
class Generator(generator.Generator):
|
|
981
991
|
QUERY_HINTS = False
|
|
982
992
|
STRUCT_DELIMITER = ("(", ")")
|
|
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
|
|
|
1094
1104
|
exp.DateStrToDate: rename_func("toDate"),
|
|
1095
1105
|
exp.DateSub: _datetime_delta_sql("DATE_SUB"),
|
|
1096
1106
|
exp.Explode: rename_func("arrayJoin"),
|
|
1107
|
+
exp.FarmFingerprint: rename_func("farmFingerprint64"),
|
|
1097
1108
|
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
|
|
1098
1109
|
exp.IsNan: rename_func("isNaN"),
|
|
1099
1110
|
exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
|
sqlglot/dialects/dialect.py
CHANGED
|
@@ -668,6 +668,7 @@ class Dialect(metaclass=_Dialect):
|
|
|
668
668
|
exp.UnixMillis,
|
|
669
669
|
},
|
|
670
670
|
exp.DataType.Type.BINARY: {
|
|
671
|
+
exp.FromBase32,
|
|
671
672
|
exp.FromBase64,
|
|
672
673
|
},
|
|
673
674
|
exp.DataType.Type.BOOLEAN: {
|
|
@@ -779,6 +780,7 @@ class Dialect(metaclass=_Dialect):
|
|
|
779
780
|
exp.TimeToStr,
|
|
780
781
|
exp.TimeToTimeStr,
|
|
781
782
|
exp.Trim,
|
|
783
|
+
exp.ToBase32,
|
|
782
784
|
exp.ToBase64,
|
|
783
785
|
exp.TsOrDsToDateStr,
|
|
784
786
|
exp.UnixToStr,
|
sqlglot/dialects/doris.py
CHANGED
|
@@ -65,7 +65,11 @@ class Doris(MySQL):
|
|
|
65
65
|
**MySQL.Parser.PROPERTY_PARSERS,
|
|
66
66
|
"PROPERTIES": lambda self: self._parse_wrapped_properties(),
|
|
67
67
|
"UNIQUE": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
|
|
68
|
+
# Plain KEY without UNIQUE/DUPLICATE/AGGREGATE prefixes should be treated as UniqueKeyProperty with unique=False
|
|
69
|
+
"KEY": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
|
|
68
70
|
"PARTITION BY": lambda self: self._parse_partition_by_opt_range(),
|
|
71
|
+
"BUILD": lambda self: self._parse_build_property(),
|
|
72
|
+
"REFRESH": lambda self: self._parse_refresh_property(),
|
|
69
73
|
}
|
|
70
74
|
|
|
71
75
|
def _parse_partitioning_granularity_dynamic(self) -> exp.PartitionByRangePropertyDynamic:
|
|
@@ -104,9 +108,27 @@ class Doris(MySQL):
|
|
|
104
108
|
part_range = self.expression(exp.PartitionRange, this=name, expressions=values)
|
|
105
109
|
return self.expression(exp.Partition, expressions=[part_range])
|
|
106
110
|
|
|
111
|
+
def _parse_partition_definition_list(self) -> exp.Partition:
|
|
112
|
+
# PARTITION <name> VALUES IN (<value_csv>)
|
|
113
|
+
self._match_text_seq("PARTITION")
|
|
114
|
+
name = self._parse_id_var()
|
|
115
|
+
self._match_text_seq("VALUES", "IN")
|
|
116
|
+
values = self._parse_wrapped_csv(self._parse_expression)
|
|
117
|
+
part_list = self.expression(exp.PartitionList, this=name, expressions=values)
|
|
118
|
+
return self.expression(exp.Partition, expressions=[part_list])
|
|
119
|
+
|
|
107
120
|
def _parse_partition_by_opt_range(
|
|
108
121
|
self,
|
|
109
|
-
) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty:
|
|
122
|
+
) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty | exp.PartitionByListProperty:
|
|
123
|
+
if self._match_text_seq("LIST"):
|
|
124
|
+
return self.expression(
|
|
125
|
+
exp.PartitionByListProperty,
|
|
126
|
+
partition_expressions=self._parse_wrapped_id_vars(),
|
|
127
|
+
create_expressions=self._parse_wrapped_csv(
|
|
128
|
+
self._parse_partition_definition_list
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
|
|
110
132
|
if not self._match_text_seq("RANGE"):
|
|
111
133
|
return super()._parse_partitioned_by()
|
|
112
134
|
|
|
@@ -128,6 +150,28 @@ class Doris(MySQL):
|
|
|
128
150
|
create_expressions=create_expressions,
|
|
129
151
|
)
|
|
130
152
|
|
|
153
|
+
def _parse_build_property(self) -> exp.BuildProperty:
|
|
154
|
+
return self.expression(exp.BuildProperty, this=self._parse_var(upper=True))
|
|
155
|
+
|
|
156
|
+
def _parse_refresh_property(self) -> exp.RefreshTriggerProperty:
|
|
157
|
+
method = self._parse_var(upper=True)
|
|
158
|
+
|
|
159
|
+
self._match(TokenType.ON)
|
|
160
|
+
|
|
161
|
+
kind = self._match_texts(("MANUAL", "COMMIT", "SCHEDULE")) and self._prev.text.upper()
|
|
162
|
+
every = self._match_text_seq("EVERY") and self._parse_number()
|
|
163
|
+
unit = self._parse_var(any_token=True) if every else None
|
|
164
|
+
starts = self._match_text_seq("STARTS") and self._parse_string()
|
|
165
|
+
|
|
166
|
+
return self.expression(
|
|
167
|
+
exp.RefreshTriggerProperty,
|
|
168
|
+
method=method,
|
|
169
|
+
kind=kind,
|
|
170
|
+
every=every,
|
|
171
|
+
unit=unit,
|
|
172
|
+
starts=starts,
|
|
173
|
+
)
|
|
174
|
+
|
|
131
175
|
class Generator(MySQL.Generator):
|
|
132
176
|
LAST_DAY_SUPPORTS_DATE_PART = False
|
|
133
177
|
VARCHAR_REQUIRES_SIZE = False
|
|
@@ -145,7 +189,10 @@ class Doris(MySQL):
|
|
|
145
189
|
**MySQL.Generator.PROPERTIES_LOCATION,
|
|
146
190
|
exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA,
|
|
147
191
|
exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA,
|
|
192
|
+
exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA,
|
|
148
193
|
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
|
|
194
|
+
exp.BuildProperty: exp.Properties.Location.POST_SCHEMA,
|
|
195
|
+
exp.RefreshTriggerProperty: exp.Properties.Location.POST_SCHEMA,
|
|
149
196
|
}
|
|
150
197
|
|
|
151
198
|
CAST_MAPPING = {}
|
|
@@ -662,9 +709,18 @@ class Doris(MySQL):
|
|
|
662
709
|
"year",
|
|
663
710
|
}
|
|
664
711
|
|
|
712
|
+
def uniquekeyproperty_sql(
|
|
713
|
+
self, expression: exp.UniqueKeyProperty, prefix: str = "UNIQUE KEY"
|
|
714
|
+
) -> str:
|
|
715
|
+
create_stmt = expression.find_ancestor(exp.Create)
|
|
716
|
+
if create_stmt and create_stmt.args["properties"].find(exp.MaterializedProperty):
|
|
717
|
+
return super().uniquekeyproperty_sql(expression, prefix="KEY")
|
|
718
|
+
|
|
719
|
+
return super().uniquekeyproperty_sql(expression)
|
|
720
|
+
|
|
665
721
|
def partition_sql(self, expression: exp.Partition) -> str:
|
|
666
722
|
parent = expression.parent
|
|
667
|
-
if isinstance(parent, exp.PartitionByRangeProperty):
|
|
723
|
+
if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)):
|
|
668
724
|
return ", ".join(self.sql(e) for e in expression.expressions)
|
|
669
725
|
return super().partition_sql(expression)
|
|
670
726
|
|
|
@@ -685,7 +741,9 @@ class Doris(MySQL):
|
|
|
685
741
|
|
|
686
742
|
return f"PARTITION {name} VALUES LESS THAN ({self.sql(values[0])})"
|
|
687
743
|
|
|
688
|
-
def partitionbyrangepropertydynamic_sql(
|
|
744
|
+
def partitionbyrangepropertydynamic_sql(
|
|
745
|
+
self, expression: exp.PartitionByRangePropertyDynamic
|
|
746
|
+
) -> str:
|
|
689
747
|
# Generates: FROM ("start") TO ("end") INTERVAL N UNIT
|
|
690
748
|
start = self.sql(expression, "start")
|
|
691
749
|
end = self.sql(expression, "end")
|
|
@@ -699,15 +757,25 @@ class Doris(MySQL):
|
|
|
699
757
|
|
|
700
758
|
return f"FROM ({start}) TO ({end}) {interval}"
|
|
701
759
|
|
|
702
|
-
def partitionbyrangeproperty_sql(self, expression):
|
|
703
|
-
partition_expressions =
|
|
704
|
-
|
|
760
|
+
def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str:
|
|
761
|
+
partition_expressions = self.expressions(
|
|
762
|
+
expression, key="partition_expressions", indent=False
|
|
705
763
|
)
|
|
706
|
-
|
|
707
|
-
# Handle both static and dynamic partition definitions
|
|
708
|
-
create_sql = ", ".join(self.sql(e) for e in create_expressions)
|
|
764
|
+
create_sql = self.expressions(expression, key="create_expressions", indent=False)
|
|
709
765
|
return f"PARTITION BY RANGE ({partition_expressions}) ({create_sql})"
|
|
710
766
|
|
|
767
|
+
def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str:
|
|
768
|
+
partition_expressions = self.expressions(
|
|
769
|
+
expression, key="partition_expressions", indent=False
|
|
770
|
+
)
|
|
771
|
+
create_sql = self.expressions(expression, key="create_expressions", indent=False)
|
|
772
|
+
return f"PARTITION BY LIST ({partition_expressions}) ({create_sql})"
|
|
773
|
+
|
|
774
|
+
def partitionlist_sql(self, expression: exp.PartitionList) -> str:
|
|
775
|
+
name = self.sql(expression, "this")
|
|
776
|
+
values = self.expressions(expression, indent=False)
|
|
777
|
+
return f"PARTITION {name} VALUES IN ({values})"
|
|
778
|
+
|
|
711
779
|
def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
|
|
712
780
|
node = expression.this
|
|
713
781
|
if isinstance(node, exp.Schema):
|
sqlglot/dialects/dremio.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing as t
|
|
4
|
-
|
|
5
4
|
from sqlglot import expressions as exp
|
|
6
5
|
from sqlglot import parser, generator, tokens
|
|
7
6
|
from sqlglot.dialects.dialect import (
|
|
8
7
|
Dialect,
|
|
9
8
|
build_timetostr_or_tochar,
|
|
10
9
|
build_formatted_time,
|
|
10
|
+
build_date_delta,
|
|
11
11
|
rename_func,
|
|
12
|
-
unit_to_var,
|
|
13
12
|
)
|
|
14
13
|
from sqlglot.helper import seq_get
|
|
14
|
+
from sqlglot.tokens import TokenType
|
|
15
15
|
|
|
16
16
|
if t.TYPE_CHECKING:
|
|
17
17
|
from sqlglot.dialects.dialect import DialectType
|
|
@@ -21,20 +21,17 @@ DATE_DELTA = t.Union[exp.DateAdd, exp.DateSub]
|
|
|
21
21
|
|
|
22
22
|
def _date_delta_sql(name: str) -> t.Callable[[Dremio.Generator, DATE_DELTA], str]:
|
|
23
23
|
def _delta_sql(self: Dremio.Generator, expression: DATE_DELTA) -> str:
|
|
24
|
-
unit = expression.text("unit")
|
|
24
|
+
unit = expression.text("unit").upper()
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
# Fallback to default behavior if unit is missing or 'DAY'
|
|
27
|
+
if not unit or unit == "DAY":
|
|
27
28
|
return self.func(name, expression.this, expression.expression)
|
|
28
29
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
increment = exp.Literal.number(value * -1)
|
|
35
|
-
else:
|
|
36
|
-
increment *= exp.Literal.number(-1)
|
|
37
|
-
return self.func("TIMESTAMPADD", unit_to_var(expression), increment, expression.this)
|
|
30
|
+
this_sql = self.sql(expression, "this")
|
|
31
|
+
expr_sql = self.sql(expression, "expression")
|
|
32
|
+
|
|
33
|
+
interval_sql = f"CAST({expr_sql} AS INTERVAL {unit})"
|
|
34
|
+
return f"{name}({this_sql}, {interval_sql})"
|
|
38
35
|
|
|
39
36
|
return _delta_sql
|
|
40
37
|
|
|
@@ -50,6 +47,33 @@ def to_char_is_numeric_handler(args: t.List, dialect: DialectType) -> exp.TimeTo
|
|
|
50
47
|
return expression
|
|
51
48
|
|
|
52
49
|
|
|
50
|
+
def build_date_delta_with_cast_interval(
|
|
51
|
+
expression_class: t.Type[DATE_DELTA],
|
|
52
|
+
) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
|
|
53
|
+
fallback_builder = build_date_delta(expression_class)
|
|
54
|
+
|
|
55
|
+
def _builder(args):
|
|
56
|
+
if len(args) == 2:
|
|
57
|
+
date_arg, interval_arg = args
|
|
58
|
+
|
|
59
|
+
if (
|
|
60
|
+
isinstance(interval_arg, exp.Cast)
|
|
61
|
+
and isinstance(interval_arg.to, exp.DataType)
|
|
62
|
+
and isinstance(interval_arg.to.this, exp.Interval)
|
|
63
|
+
):
|
|
64
|
+
return expression_class(
|
|
65
|
+
this=date_arg,
|
|
66
|
+
expression=interval_arg.this,
|
|
67
|
+
unit=interval_arg.to.this.unit,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return expression_class(this=date_arg, expression=interval_arg)
|
|
71
|
+
|
|
72
|
+
return fallback_builder(args)
|
|
73
|
+
|
|
74
|
+
return _builder
|
|
75
|
+
|
|
76
|
+
|
|
53
77
|
class Dremio(Dialect):
|
|
54
78
|
SUPPORTS_USER_DEFINED_TYPES = False
|
|
55
79
|
CONCAT_COALESCE = True
|
|
@@ -108,16 +132,39 @@ class Dremio(Dialect):
|
|
|
108
132
|
"tzo": "%z", # numeric offset (+0200)
|
|
109
133
|
}
|
|
110
134
|
|
|
135
|
+
class Tokenizer(tokens.Tokenizer):
|
|
136
|
+
COMMENTS = ["--", "//", ("/*", "*/")]
|
|
137
|
+
|
|
111
138
|
class Parser(parser.Parser):
|
|
112
139
|
LOG_DEFAULTS_TO_LN = True
|
|
113
140
|
|
|
141
|
+
NO_PAREN_FUNCTION_PARSERS = {
|
|
142
|
+
**parser.Parser.NO_PAREN_FUNCTION_PARSERS,
|
|
143
|
+
"CURRENT_DATE_UTC": lambda self: self._parse_current_date_utc(),
|
|
144
|
+
}
|
|
145
|
+
|
|
114
146
|
FUNCTIONS = {
|
|
115
147
|
**parser.Parser.FUNCTIONS,
|
|
116
148
|
"TO_CHAR": to_char_is_numeric_handler,
|
|
117
149
|
"DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"),
|
|
118
150
|
"TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"),
|
|
151
|
+
"DATE_ADD": build_date_delta_with_cast_interval(exp.DateAdd),
|
|
152
|
+
"DATE_SUB": build_date_delta_with_cast_interval(exp.DateSub),
|
|
153
|
+
"ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list,
|
|
119
154
|
}
|
|
120
155
|
|
|
156
|
+
def _parse_current_date_utc(self) -> exp.Cast:
|
|
157
|
+
if self._match(TokenType.L_PAREN):
|
|
158
|
+
self._match_r_paren()
|
|
159
|
+
|
|
160
|
+
return exp.Cast(
|
|
161
|
+
this=exp.AtTimeZone(
|
|
162
|
+
this=exp.CurrentTimestamp(),
|
|
163
|
+
zone=exp.Literal.string("UTC"),
|
|
164
|
+
),
|
|
165
|
+
to=exp.DataType.build("DATE"),
|
|
166
|
+
)
|
|
167
|
+
|
|
121
168
|
class Generator(generator.Generator):
|
|
122
169
|
NVL2_SUPPORTED = False
|
|
123
170
|
SUPPORTS_CONVERT_TIMEZONE = True
|
|
@@ -148,6 +195,7 @@ class Dremio(Dialect):
|
|
|
148
195
|
exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)),
|
|
149
196
|
exp.DateAdd: _date_delta_sql("DATE_ADD"),
|
|
150
197
|
exp.DateSub: _date_delta_sql("DATE_SUB"),
|
|
198
|
+
exp.GenerateSeries: rename_func("ARRAY_GENERATE_RANGE"),
|
|
151
199
|
}
|
|
152
200
|
|
|
153
201
|
def datatype_sql(self, expression: exp.DataType) -> str:
|
|
@@ -162,5 +210,17 @@ class Dremio(Dialect):
|
|
|
162
210
|
|
|
163
211
|
return super().datatype_sql(expression)
|
|
164
212
|
|
|
165
|
-
|
|
166
|
-
|
|
213
|
+
def cast_sql(self, expression: exp.Cast, safe_prefix: str | None = None) -> str:
|
|
214
|
+
# Match: CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
|
|
215
|
+
if expression.is_type(exp.DataType.Type.DATE):
|
|
216
|
+
at_time_zone = expression.this
|
|
217
|
+
|
|
218
|
+
if (
|
|
219
|
+
isinstance(at_time_zone, exp.AtTimeZone)
|
|
220
|
+
and isinstance(at_time_zone.this, exp.CurrentTimestamp)
|
|
221
|
+
and isinstance(at_time_zone.args["zone"], exp.Literal)
|
|
222
|
+
and at_time_zone.text("zone").upper() == "UTC"
|
|
223
|
+
):
|
|
224
|
+
return "CURRENT_DATE_UTC"
|
|
225
|
+
|
|
226
|
+
return super().cast_sql(expression, safe_prefix)
|
sqlglot/dialects/duckdb.py
CHANGED
|
@@ -386,10 +386,12 @@ class DuckDB(Dialect):
|
|
|
386
386
|
"JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
|
|
387
387
|
"JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
|
|
388
388
|
"LIST_CONTAINS": exp.ArrayContains.from_arg_list,
|
|
389
|
+
"LIST_FILTER": exp.ArrayFilter.from_arg_list,
|
|
389
390
|
"LIST_HAS": exp.ArrayContains.from_arg_list,
|
|
390
391
|
"LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list,
|
|
391
392
|
"LIST_REVERSE_SORT": _build_sort_array_desc,
|
|
392
393
|
"LIST_SORT": exp.SortArray.from_arg_list,
|
|
394
|
+
"LIST_TRANSFORM": exp.Transform.from_arg_list,
|
|
393
395
|
"LIST_VALUE": lambda args: exp.Array(expressions=args),
|
|
394
396
|
"MAKE_TIME": exp.TimeFromParts.from_arg_list,
|
|
395
397
|
"MAKE_TIMESTAMP": _build_make_timestamp,
|
|
@@ -643,6 +645,9 @@ class DuckDB(Dialect):
|
|
|
643
645
|
exp.ArrayRemove: remove_from_array_using_filter,
|
|
644
646
|
exp.ArraySort: _array_sort_sql,
|
|
645
647
|
exp.ArraySum: rename_func("LIST_SUM"),
|
|
648
|
+
exp.ArrayUniqueAgg: lambda self, e: self.func(
|
|
649
|
+
"LIST", exp.Distinct(expressions=[e.this])
|
|
650
|
+
),
|
|
646
651
|
exp.BitwiseXor: rename_func("XOR"),
|
|
647
652
|
exp.CommentColumnConstraint: no_comment_column_constraint_sql,
|
|
648
653
|
exp.CurrentDate: lambda *_: "CURRENT_DATE",
|
|
@@ -1117,15 +1122,20 @@ class DuckDB(Dialect):
|
|
|
1117
1122
|
return super().unnest_sql(expression)
|
|
1118
1123
|
|
|
1119
1124
|
def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str:
|
|
1120
|
-
|
|
1125
|
+
this = expression.this
|
|
1126
|
+
|
|
1127
|
+
if isinstance(this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
|
|
1121
1128
|
# DuckDB should render IGNORE NULLS only for the general-purpose
|
|
1122
1129
|
# window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...)
|
|
1123
1130
|
return super().ignorenulls_sql(expression)
|
|
1124
1131
|
|
|
1125
|
-
if
|
|
1132
|
+
if isinstance(this, exp.First):
|
|
1133
|
+
this = exp.AnyValue(this=this.this)
|
|
1134
|
+
|
|
1135
|
+
if not isinstance(this, exp.AnyValue):
|
|
1126
1136
|
self.unsupported("IGNORE NULLS is not supported for non-window functions.")
|
|
1127
1137
|
|
|
1128
|
-
return self.sql(
|
|
1138
|
+
return self.sql(this)
|
|
1129
1139
|
|
|
1130
1140
|
def respectnulls_sql(self, expression: exp.RespectNulls) -> str:
|
|
1131
1141
|
if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS):
|