sqlglot 27.8.0__py3-none-any.whl → 27.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +2 -2
- sqlglot/dialects/__init__.py +1 -1
- sqlglot/dialects/bigquery.py +127 -12
- sqlglot/dialects/clickhouse.py +11 -0
- sqlglot/dialects/dialect.py +2 -0
- sqlglot/dialects/doris.py +77 -9
- sqlglot/dialects/dremio.py +75 -15
- sqlglot/dialects/duckdb.py +13 -3
- sqlglot/dialects/exasol.py +23 -0
- sqlglot/dialects/mysql.py +0 -33
- sqlglot/dialects/postgres.py +0 -1
- sqlglot/dialects/redshift.py +1 -0
- sqlglot/dialects/singlestore.py +185 -19
- sqlglot/dialects/tsql.py +2 -0
- sqlglot/expressions.py +123 -7
- sqlglot/generator.py +123 -29
- sqlglot/optimizer/qualify_columns.py +1 -1
- sqlglot/optimizer/scope.py +1 -0
- sqlglot/parser.py +83 -19
- sqlglot/tokens.py +2 -0
- {sqlglot-27.8.0.dist-info → sqlglot-27.10.0.dist-info}/METADATA +41 -1
- {sqlglot-27.8.0.dist-info → sqlglot-27.10.0.dist-info}/RECORD +25 -25
- {sqlglot-27.8.0.dist-info → sqlglot-27.10.0.dist-info}/WHEEL +0 -0
- {sqlglot-27.8.0.dist-info → sqlglot-27.10.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.8.0.dist-info → sqlglot-27.10.0.dist-info}/top_level.txt +0 -0
sqlglot/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '27.
|
|
32
|
-
__version_tuple__ = version_tuple = (27,
|
|
31
|
+
__version__ = version = '27.10.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (27, 10, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
sqlglot/dialects/__init__.py
CHANGED
|
@@ -75,6 +75,7 @@ DIALECTS = [
|
|
|
75
75
|
"Druid",
|
|
76
76
|
"DuckDB",
|
|
77
77
|
"Dune",
|
|
78
|
+
"Exasol",
|
|
78
79
|
"Fabric",
|
|
79
80
|
"Hive",
|
|
80
81
|
"Materialize",
|
|
@@ -95,7 +96,6 @@ DIALECTS = [
|
|
|
95
96
|
"Teradata",
|
|
96
97
|
"Trino",
|
|
97
98
|
"TSQL",
|
|
98
|
-
"Exasol",
|
|
99
99
|
]
|
|
100
100
|
|
|
101
101
|
MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
|
sqlglot/dialects/bigquery.py
CHANGED
|
@@ -295,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
|
|
|
295
295
|
return expression
|
|
296
296
|
|
|
297
297
|
|
|
298
|
+
def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
|
|
299
|
+
self._annotate_args(expression)
|
|
300
|
+
|
|
301
|
+
struct_type = exp.DataType(
|
|
302
|
+
this=exp.DataType.Type.STRUCT,
|
|
303
|
+
expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
|
|
304
|
+
nested=True,
|
|
305
|
+
)
|
|
306
|
+
self._set_type(
|
|
307
|
+
expression,
|
|
308
|
+
exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
return expression
|
|
312
|
+
|
|
313
|
+
|
|
298
314
|
@unsupported_args("ins_cost", "del_cost", "sub_cost")
|
|
299
315
|
def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
|
|
300
316
|
max_dist = expression.args.get("max_dist")
|
|
@@ -324,16 +340,13 @@ def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List]
|
|
|
324
340
|
return _builder
|
|
325
341
|
|
|
326
342
|
|
|
327
|
-
def _build_contains_substring(args: t.List) -> exp.Contains
|
|
328
|
-
if len(args) == 3:
|
|
329
|
-
return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args)
|
|
330
|
-
|
|
343
|
+
def _build_contains_substring(args: t.List) -> exp.Contains:
|
|
331
344
|
# Lowercase the operands in case of transpilation, as exp.Contains
|
|
332
345
|
# is case-sensitive on other dialects
|
|
333
346
|
this = exp.Lower(this=seq_get(args, 0))
|
|
334
347
|
expr = exp.Lower(this=seq_get(args, 1))
|
|
335
348
|
|
|
336
|
-
return exp.Contains(this=this, expression=expr)
|
|
349
|
+
return exp.Contains(this=this, expression=expr, json_scope=seq_get(args, 2))
|
|
337
350
|
|
|
338
351
|
|
|
339
352
|
def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str:
|
|
@@ -473,6 +486,9 @@ class BigQuery(Dialect):
|
|
|
473
486
|
exp.Substring,
|
|
474
487
|
)
|
|
475
488
|
},
|
|
489
|
+
exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
490
|
+
exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
491
|
+
exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
476
492
|
exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
|
|
477
493
|
exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
|
|
478
494
|
exp.Array: _annotate_array,
|
|
@@ -484,20 +500,28 @@ class BigQuery(Dialect):
|
|
|
484
500
|
exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
485
501
|
exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
486
502
|
exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
503
|
+
exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
|
|
504
|
+
e, exp.DataType.Type.BINARY
|
|
505
|
+
),
|
|
487
506
|
exp.CodePointsToString: lambda self, e: self._annotate_with_type(
|
|
488
507
|
e, exp.DataType.Type.VARCHAR
|
|
489
508
|
),
|
|
490
509
|
exp.Concat: _annotate_concat,
|
|
510
|
+
exp.Contains: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
|
|
491
511
|
exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
492
512
|
exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
493
513
|
exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
494
514
|
exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
|
|
495
515
|
exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
|
|
516
|
+
exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
517
|
+
exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
518
|
+
exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
496
519
|
exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
|
|
497
520
|
e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
|
|
498
521
|
),
|
|
499
522
|
exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
500
523
|
exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
|
|
524
|
+
exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
|
|
501
525
|
exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
|
|
502
526
|
e, exp.DataType.Type.VARCHAR
|
|
503
527
|
),
|
|
@@ -506,12 +530,21 @@ class BigQuery(Dialect):
|
|
|
506
530
|
),
|
|
507
531
|
exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
508
532
|
exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
|
|
533
|
+
exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
509
534
|
exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
535
|
+
exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
510
536
|
exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
511
537
|
exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
|
|
538
|
+
exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
|
|
539
|
+
e, exp.DataType.Type.BIGDECIMAL
|
|
540
|
+
),
|
|
541
|
+
exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
|
|
512
542
|
exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
513
543
|
exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
|
|
514
544
|
exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
|
|
545
|
+
exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
|
|
546
|
+
e, exp.DataType.Type.VARCHAR
|
|
547
|
+
),
|
|
515
548
|
exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
516
549
|
exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
517
550
|
exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
@@ -522,8 +555,11 @@ class BigQuery(Dialect):
|
|
|
522
555
|
),
|
|
523
556
|
exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
|
|
524
557
|
exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
525
|
-
exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
526
558
|
exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
559
|
+
exp.ToCodePoints: lambda self, e: self._annotate_with_type(
|
|
560
|
+
e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
|
|
561
|
+
),
|
|
562
|
+
exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
527
563
|
exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
|
|
528
564
|
exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
529
565
|
}
|
|
@@ -596,10 +632,13 @@ class BigQuery(Dialect):
|
|
|
596
632
|
"EXPORT": TokenType.EXPORT,
|
|
597
633
|
"FLOAT64": TokenType.DOUBLE,
|
|
598
634
|
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
|
|
635
|
+
"LOOP": TokenType.COMMAND,
|
|
599
636
|
"MODEL": TokenType.MODEL,
|
|
600
637
|
"NOT DETERMINISTIC": TokenType.VOLATILE,
|
|
601
638
|
"RECORD": TokenType.STRUCT,
|
|
639
|
+
"REPEAT": TokenType.COMMAND,
|
|
602
640
|
"TIMESTAMP": TokenType.TIMESTAMPTZ,
|
|
641
|
+
"WHILE": TokenType.COMMAND,
|
|
603
642
|
}
|
|
604
643
|
KEYWORDS.pop("DIV")
|
|
605
644
|
KEYWORDS.pop("VALUES")
|
|
@@ -623,6 +662,8 @@ class BigQuery(Dialect):
|
|
|
623
662
|
|
|
624
663
|
FUNCTIONS = {
|
|
625
664
|
**parser.Parser.FUNCTIONS,
|
|
665
|
+
"APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
|
|
666
|
+
"BOOL": exp.JSONBool.from_arg_list,
|
|
626
667
|
"CONTAINS_SUBSTR": _build_contains_substring,
|
|
627
668
|
"DATE": _build_date,
|
|
628
669
|
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
|
|
@@ -647,6 +688,10 @@ class BigQuery(Dialect):
|
|
|
647
688
|
"JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
|
|
648
689
|
"LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
|
|
649
690
|
"MD5": exp.MD5Digest.from_arg_list,
|
|
691
|
+
"NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize(
|
|
692
|
+
this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True
|
|
693
|
+
),
|
|
694
|
+
"OCTET_LENGTH": exp.ByteLength.from_arg_list,
|
|
650
695
|
"TO_HEX": _build_to_hex,
|
|
651
696
|
"PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")(
|
|
652
697
|
[seq_get(args, 1), seq_get(args, 0)]
|
|
@@ -689,6 +734,7 @@ class BigQuery(Dialect):
|
|
|
689
734
|
"FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
|
|
690
735
|
"FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
|
|
691
736
|
"FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
|
|
737
|
+
"FROM_HEX": exp.Unhex.from_arg_list,
|
|
692
738
|
"WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
|
|
693
739
|
}
|
|
694
740
|
|
|
@@ -699,7 +745,10 @@ class BigQuery(Dialect):
|
|
|
699
745
|
exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
|
|
700
746
|
),
|
|
701
747
|
"MAKE_INTERVAL": lambda self: self._parse_make_interval(),
|
|
748
|
+
"PREDICT": lambda self: self._parse_predict(),
|
|
702
749
|
"FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
|
|
750
|
+
"GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
|
|
751
|
+
"VECTOR_SEARCH": lambda self: self._parse_vector_search(),
|
|
703
752
|
}
|
|
704
753
|
FUNCTION_PARSERS.pop("TRIM")
|
|
705
754
|
|
|
@@ -979,13 +1028,40 @@ class BigQuery(Dialect):
|
|
|
979
1028
|
|
|
980
1029
|
return expr
|
|
981
1030
|
|
|
982
|
-
def
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1031
|
+
def _parse_predict(self) -> exp.Predict:
|
|
1032
|
+
self._match_text_seq("MODEL")
|
|
1033
|
+
this = self._parse_table()
|
|
1034
|
+
|
|
1035
|
+
self._match(TokenType.COMMA)
|
|
1036
|
+
self._match_text_seq("TABLE")
|
|
1037
|
+
|
|
1038
|
+
return self.expression(
|
|
1039
|
+
exp.Predict,
|
|
1040
|
+
this=this,
|
|
1041
|
+
expression=self._parse_table(),
|
|
1042
|
+
params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
|
|
1046
|
+
self._match_text_seq("MODEL")
|
|
1047
|
+
this = self._parse_table()
|
|
1048
|
+
|
|
1049
|
+
self._match(TokenType.COMMA)
|
|
1050
|
+
self._match_text_seq("TABLE")
|
|
1051
|
+
|
|
1052
|
+
return self.expression(
|
|
1053
|
+
exp.GenerateEmbedding,
|
|
1054
|
+
this=this,
|
|
1055
|
+
expression=self._parse_table(),
|
|
1056
|
+
params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
|
|
987
1057
|
)
|
|
988
1058
|
|
|
1059
|
+
def _parse_features_at_time(self) -> exp.FeaturesAtTime:
|
|
1060
|
+
self._match(TokenType.TABLE)
|
|
1061
|
+
this = self._parse_table()
|
|
1062
|
+
|
|
1063
|
+
expr = self.expression(exp.FeaturesAtTime, this=this)
|
|
1064
|
+
|
|
989
1065
|
while self._match(TokenType.COMMA):
|
|
990
1066
|
arg = self._parse_lambda()
|
|
991
1067
|
|
|
@@ -996,6 +1072,37 @@ class BigQuery(Dialect):
|
|
|
996
1072
|
|
|
997
1073
|
return expr
|
|
998
1074
|
|
|
1075
|
+
def _parse_vector_search(self) -> exp.VectorSearch:
|
|
1076
|
+
self._match(TokenType.TABLE)
|
|
1077
|
+
base_table = self._parse_table()
|
|
1078
|
+
|
|
1079
|
+
self._match(TokenType.COMMA)
|
|
1080
|
+
|
|
1081
|
+
column_to_search = self._parse_bitwise()
|
|
1082
|
+
self._match(TokenType.COMMA)
|
|
1083
|
+
|
|
1084
|
+
self._match(TokenType.TABLE)
|
|
1085
|
+
query_table = self._parse_table()
|
|
1086
|
+
|
|
1087
|
+
expr = self.expression(
|
|
1088
|
+
exp.VectorSearch,
|
|
1089
|
+
this=base_table,
|
|
1090
|
+
column_to_search=column_to_search,
|
|
1091
|
+
query_table=query_table,
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
while self._match(TokenType.COMMA):
|
|
1095
|
+
# query_column_to_search can be named argument or positional
|
|
1096
|
+
if self._match(TokenType.STRING, advance=False):
|
|
1097
|
+
query_column = self._parse_string()
|
|
1098
|
+
expr.set("query_column_to_search", query_column)
|
|
1099
|
+
else:
|
|
1100
|
+
arg = self._parse_lambda()
|
|
1101
|
+
if arg:
|
|
1102
|
+
expr.set(arg.this.name, arg)
|
|
1103
|
+
|
|
1104
|
+
return expr
|
|
1105
|
+
|
|
999
1106
|
def _parse_export_data(self) -> exp.Export:
|
|
1000
1107
|
self._match_text_seq("DATA")
|
|
1001
1108
|
|
|
@@ -1043,6 +1150,7 @@ class BigQuery(Dialect):
|
|
|
1043
1150
|
|
|
1044
1151
|
TRANSFORMS = {
|
|
1045
1152
|
**generator.Generator.TRANSFORMS,
|
|
1153
|
+
exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
|
|
1046
1154
|
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
|
|
1047
1155
|
exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
|
|
1048
1156
|
exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
|
|
@@ -1050,6 +1158,7 @@ class BigQuery(Dialect):
|
|
|
1050
1158
|
exp.ArrayContains: _array_contains_sql,
|
|
1051
1159
|
exp.ArrayFilter: filter_array_using_unnest,
|
|
1052
1160
|
exp.ArrayRemove: filter_array_using_unnest,
|
|
1161
|
+
exp.ByteLength: rename_func("BYTE_LENGTH"),
|
|
1053
1162
|
exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]),
|
|
1054
1163
|
exp.CollateProperty: lambda self, e: (
|
|
1055
1164
|
f"DEFAULT COLLATE {self.sql(e, 'this')}"
|
|
@@ -1083,6 +1192,7 @@ class BigQuery(Dialect):
|
|
|
1083
1192
|
exp.ILike: no_ilike_sql,
|
|
1084
1193
|
exp.IntDiv: rename_func("DIV"),
|
|
1085
1194
|
exp.Int64: rename_func("INT64"),
|
|
1195
|
+
exp.JSONBool: rename_func("BOOL"),
|
|
1086
1196
|
exp.JSONExtract: _json_extract_sql,
|
|
1087
1197
|
exp.JSONExtractArray: _json_extract_sql,
|
|
1088
1198
|
exp.JSONExtractScalar: _json_extract_sql,
|
|
@@ -1092,6 +1202,11 @@ class BigQuery(Dialect):
|
|
|
1092
1202
|
exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)),
|
|
1093
1203
|
exp.MD5Digest: rename_func("MD5"),
|
|
1094
1204
|
exp.Min: min_or_least,
|
|
1205
|
+
exp.Normalize: lambda self, e: self.func(
|
|
1206
|
+
"NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE",
|
|
1207
|
+
e.this,
|
|
1208
|
+
e.args.get("form"),
|
|
1209
|
+
),
|
|
1095
1210
|
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
|
|
1096
1211
|
exp.RegexpExtract: lambda self, e: self.func(
|
|
1097
1212
|
"REGEXP_EXTRACT",
|
|
@@ -1427,7 +1542,7 @@ class BigQuery(Dialect):
|
|
|
1427
1542
|
this = this.this
|
|
1428
1543
|
expr = expr.this
|
|
1429
1544
|
|
|
1430
|
-
return self.func("CONTAINS_SUBSTR", this, expr)
|
|
1545
|
+
return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope"))
|
|
1431
1546
|
|
|
1432
1547
|
def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str:
|
|
1433
1548
|
this = expression.this
|
sqlglot/dialects/clickhouse.py
CHANGED
|
@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
|
|
|
345
345
|
"LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
|
|
346
346
|
}
|
|
347
347
|
FUNCTIONS.pop("TRANSFORM")
|
|
348
|
+
FUNCTIONS.pop("APPROX_TOP_SUM")
|
|
348
349
|
|
|
349
350
|
AGG_FUNCTIONS = {
|
|
350
351
|
"count",
|
|
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
|
|
|
379
380
|
"argMax",
|
|
380
381
|
"avgWeighted",
|
|
381
382
|
"topK",
|
|
383
|
+
"approx_top_sum",
|
|
382
384
|
"topKWeighted",
|
|
383
385
|
"deltaSum",
|
|
384
386
|
"deltaSumTimestamp",
|
|
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
|
|
|
977
979
|
|
|
978
980
|
return value
|
|
979
981
|
|
|
982
|
+
def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
|
|
983
|
+
# ClickHouse allows custom expressions as partition key
|
|
984
|
+
# https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
|
|
985
|
+
return self.expression(
|
|
986
|
+
exp.PartitionedByProperty,
|
|
987
|
+
this=self._parse_assignment(),
|
|
988
|
+
)
|
|
989
|
+
|
|
980
990
|
class Generator(generator.Generator):
|
|
981
991
|
QUERY_HINTS = False
|
|
982
992
|
STRUCT_DELIMITER = ("(", ")")
|
|
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
|
|
|
1094
1104
|
exp.DateStrToDate: rename_func("toDate"),
|
|
1095
1105
|
exp.DateSub: _datetime_delta_sql("DATE_SUB"),
|
|
1096
1106
|
exp.Explode: rename_func("arrayJoin"),
|
|
1107
|
+
exp.FarmFingerprint: rename_func("farmFingerprint64"),
|
|
1097
1108
|
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
|
|
1098
1109
|
exp.IsNan: rename_func("isNaN"),
|
|
1099
1110
|
exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
|
sqlglot/dialects/dialect.py
CHANGED
|
@@ -668,6 +668,7 @@ class Dialect(metaclass=_Dialect):
|
|
|
668
668
|
exp.UnixMillis,
|
|
669
669
|
},
|
|
670
670
|
exp.DataType.Type.BINARY: {
|
|
671
|
+
exp.FromBase32,
|
|
671
672
|
exp.FromBase64,
|
|
672
673
|
},
|
|
673
674
|
exp.DataType.Type.BOOLEAN: {
|
|
@@ -779,6 +780,7 @@ class Dialect(metaclass=_Dialect):
|
|
|
779
780
|
exp.TimeToStr,
|
|
780
781
|
exp.TimeToTimeStr,
|
|
781
782
|
exp.Trim,
|
|
783
|
+
exp.ToBase32,
|
|
782
784
|
exp.ToBase64,
|
|
783
785
|
exp.TsOrDsToDateStr,
|
|
784
786
|
exp.UnixToStr,
|
sqlglot/dialects/doris.py
CHANGED
|
@@ -65,7 +65,11 @@ class Doris(MySQL):
|
|
|
65
65
|
**MySQL.Parser.PROPERTY_PARSERS,
|
|
66
66
|
"PROPERTIES": lambda self: self._parse_wrapped_properties(),
|
|
67
67
|
"UNIQUE": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
|
|
68
|
+
# Plain KEY without UNIQUE/DUPLICATE/AGGREGATE prefixes should be treated as UniqueKeyProperty with unique=False
|
|
69
|
+
"KEY": lambda self: self._parse_composite_key_property(exp.UniqueKeyProperty),
|
|
68
70
|
"PARTITION BY": lambda self: self._parse_partition_by_opt_range(),
|
|
71
|
+
"BUILD": lambda self: self._parse_build_property(),
|
|
72
|
+
"REFRESH": lambda self: self._parse_refresh_property(),
|
|
69
73
|
}
|
|
70
74
|
|
|
71
75
|
def _parse_partitioning_granularity_dynamic(self) -> exp.PartitionByRangePropertyDynamic:
|
|
@@ -104,9 +108,27 @@ class Doris(MySQL):
|
|
|
104
108
|
part_range = self.expression(exp.PartitionRange, this=name, expressions=values)
|
|
105
109
|
return self.expression(exp.Partition, expressions=[part_range])
|
|
106
110
|
|
|
111
|
+
def _parse_partition_definition_list(self) -> exp.Partition:
|
|
112
|
+
# PARTITION <name> VALUES IN (<value_csv>)
|
|
113
|
+
self._match_text_seq("PARTITION")
|
|
114
|
+
name = self._parse_id_var()
|
|
115
|
+
self._match_text_seq("VALUES", "IN")
|
|
116
|
+
values = self._parse_wrapped_csv(self._parse_expression)
|
|
117
|
+
part_list = self.expression(exp.PartitionList, this=name, expressions=values)
|
|
118
|
+
return self.expression(exp.Partition, expressions=[part_list])
|
|
119
|
+
|
|
107
120
|
def _parse_partition_by_opt_range(
|
|
108
121
|
self,
|
|
109
|
-
) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty:
|
|
122
|
+
) -> exp.PartitionedByProperty | exp.PartitionByRangeProperty | exp.PartitionByListProperty:
|
|
123
|
+
if self._match_text_seq("LIST"):
|
|
124
|
+
return self.expression(
|
|
125
|
+
exp.PartitionByListProperty,
|
|
126
|
+
partition_expressions=self._parse_wrapped_id_vars(),
|
|
127
|
+
create_expressions=self._parse_wrapped_csv(
|
|
128
|
+
self._parse_partition_definition_list
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
|
|
110
132
|
if not self._match_text_seq("RANGE"):
|
|
111
133
|
return super()._parse_partitioned_by()
|
|
112
134
|
|
|
@@ -128,6 +150,28 @@ class Doris(MySQL):
|
|
|
128
150
|
create_expressions=create_expressions,
|
|
129
151
|
)
|
|
130
152
|
|
|
153
|
+
def _parse_build_property(self) -> exp.BuildProperty:
|
|
154
|
+
return self.expression(exp.BuildProperty, this=self._parse_var(upper=True))
|
|
155
|
+
|
|
156
|
+
def _parse_refresh_property(self) -> exp.RefreshTriggerProperty:
|
|
157
|
+
method = self._parse_var(upper=True)
|
|
158
|
+
|
|
159
|
+
self._match(TokenType.ON)
|
|
160
|
+
|
|
161
|
+
kind = self._match_texts(("MANUAL", "COMMIT", "SCHEDULE")) and self._prev.text.upper()
|
|
162
|
+
every = self._match_text_seq("EVERY") and self._parse_number()
|
|
163
|
+
unit = self._parse_var(any_token=True) if every else None
|
|
164
|
+
starts = self._match_text_seq("STARTS") and self._parse_string()
|
|
165
|
+
|
|
166
|
+
return self.expression(
|
|
167
|
+
exp.RefreshTriggerProperty,
|
|
168
|
+
method=method,
|
|
169
|
+
kind=kind,
|
|
170
|
+
every=every,
|
|
171
|
+
unit=unit,
|
|
172
|
+
starts=starts,
|
|
173
|
+
)
|
|
174
|
+
|
|
131
175
|
class Generator(MySQL.Generator):
|
|
132
176
|
LAST_DAY_SUPPORTS_DATE_PART = False
|
|
133
177
|
VARCHAR_REQUIRES_SIZE = False
|
|
@@ -145,7 +189,10 @@ class Doris(MySQL):
|
|
|
145
189
|
**MySQL.Generator.PROPERTIES_LOCATION,
|
|
146
190
|
exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA,
|
|
147
191
|
exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA,
|
|
192
|
+
exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA,
|
|
148
193
|
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
|
|
194
|
+
exp.BuildProperty: exp.Properties.Location.POST_SCHEMA,
|
|
195
|
+
exp.RefreshTriggerProperty: exp.Properties.Location.POST_SCHEMA,
|
|
149
196
|
}
|
|
150
197
|
|
|
151
198
|
CAST_MAPPING = {}
|
|
@@ -662,9 +709,18 @@ class Doris(MySQL):
|
|
|
662
709
|
"year",
|
|
663
710
|
}
|
|
664
711
|
|
|
712
|
+
def uniquekeyproperty_sql(
|
|
713
|
+
self, expression: exp.UniqueKeyProperty, prefix: str = "UNIQUE KEY"
|
|
714
|
+
) -> str:
|
|
715
|
+
create_stmt = expression.find_ancestor(exp.Create)
|
|
716
|
+
if create_stmt and create_stmt.args["properties"].find(exp.MaterializedProperty):
|
|
717
|
+
return super().uniquekeyproperty_sql(expression, prefix="KEY")
|
|
718
|
+
|
|
719
|
+
return super().uniquekeyproperty_sql(expression)
|
|
720
|
+
|
|
665
721
|
def partition_sql(self, expression: exp.Partition) -> str:
|
|
666
722
|
parent = expression.parent
|
|
667
|
-
if isinstance(parent, exp.PartitionByRangeProperty):
|
|
723
|
+
if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)):
|
|
668
724
|
return ", ".join(self.sql(e) for e in expression.expressions)
|
|
669
725
|
return super().partition_sql(expression)
|
|
670
726
|
|
|
@@ -685,7 +741,9 @@ class Doris(MySQL):
|
|
|
685
741
|
|
|
686
742
|
return f"PARTITION {name} VALUES LESS THAN ({self.sql(values[0])})"
|
|
687
743
|
|
|
688
|
-
def partitionbyrangepropertydynamic_sql(
|
|
744
|
+
def partitionbyrangepropertydynamic_sql(
|
|
745
|
+
self, expression: exp.PartitionByRangePropertyDynamic
|
|
746
|
+
) -> str:
|
|
689
747
|
# Generates: FROM ("start") TO ("end") INTERVAL N UNIT
|
|
690
748
|
start = self.sql(expression, "start")
|
|
691
749
|
end = self.sql(expression, "end")
|
|
@@ -699,15 +757,25 @@ class Doris(MySQL):
|
|
|
699
757
|
|
|
700
758
|
return f"FROM ({start}) TO ({end}) {interval}"
|
|
701
759
|
|
|
702
|
-
def partitionbyrangeproperty_sql(self, expression):
|
|
703
|
-
partition_expressions =
|
|
704
|
-
|
|
760
|
+
def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str:
|
|
761
|
+
partition_expressions = self.expressions(
|
|
762
|
+
expression, key="partition_expressions", indent=False
|
|
705
763
|
)
|
|
706
|
-
|
|
707
|
-
# Handle both static and dynamic partition definitions
|
|
708
|
-
create_sql = ", ".join(self.sql(e) for e in create_expressions)
|
|
764
|
+
create_sql = self.expressions(expression, key="create_expressions", indent=False)
|
|
709
765
|
return f"PARTITION BY RANGE ({partition_expressions}) ({create_sql})"
|
|
710
766
|
|
|
767
|
+
def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str:
|
|
768
|
+
partition_expressions = self.expressions(
|
|
769
|
+
expression, key="partition_expressions", indent=False
|
|
770
|
+
)
|
|
771
|
+
create_sql = self.expressions(expression, key="create_expressions", indent=False)
|
|
772
|
+
return f"PARTITION BY LIST ({partition_expressions}) ({create_sql})"
|
|
773
|
+
|
|
774
|
+
def partitionlist_sql(self, expression: exp.PartitionList) -> str:
|
|
775
|
+
name = self.sql(expression, "this")
|
|
776
|
+
values = self.expressions(expression, indent=False)
|
|
777
|
+
return f"PARTITION {name} VALUES IN ({values})"
|
|
778
|
+
|
|
711
779
|
def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
|
|
712
780
|
node = expression.this
|
|
713
781
|
if isinstance(node, exp.Schema):
|
sqlglot/dialects/dremio.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing as t
|
|
4
|
-
|
|
5
4
|
from sqlglot import expressions as exp
|
|
6
5
|
from sqlglot import parser, generator, tokens
|
|
7
6
|
from sqlglot.dialects.dialect import (
|
|
8
7
|
Dialect,
|
|
9
8
|
build_timetostr_or_tochar,
|
|
10
9
|
build_formatted_time,
|
|
10
|
+
build_date_delta,
|
|
11
11
|
rename_func,
|
|
12
|
-
unit_to_var,
|
|
13
12
|
)
|
|
14
13
|
from sqlglot.helper import seq_get
|
|
14
|
+
from sqlglot.tokens import TokenType
|
|
15
15
|
|
|
16
16
|
if t.TYPE_CHECKING:
|
|
17
17
|
from sqlglot.dialects.dialect import DialectType
|
|
@@ -21,20 +21,17 @@ DATE_DELTA = t.Union[exp.DateAdd, exp.DateSub]
|
|
|
21
21
|
|
|
22
22
|
def _date_delta_sql(name: str) -> t.Callable[[Dremio.Generator, DATE_DELTA], str]:
|
|
23
23
|
def _delta_sql(self: Dremio.Generator, expression: DATE_DELTA) -> str:
|
|
24
|
-
unit = expression.text("unit")
|
|
24
|
+
unit = expression.text("unit").upper()
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
# Fallback to default behavior if unit is missing or 'DAY'
|
|
27
|
+
if not unit or unit == "DAY":
|
|
27
28
|
return self.func(name, expression.this, expression.expression)
|
|
28
29
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
increment = exp.Literal.number(value * -1)
|
|
35
|
-
else:
|
|
36
|
-
increment *= exp.Literal.number(-1)
|
|
37
|
-
return self.func("TIMESTAMPADD", unit_to_var(expression), increment, expression.this)
|
|
30
|
+
this_sql = self.sql(expression, "this")
|
|
31
|
+
expr_sql = self.sql(expression, "expression")
|
|
32
|
+
|
|
33
|
+
interval_sql = f"CAST({expr_sql} AS INTERVAL {unit})"
|
|
34
|
+
return f"{name}({this_sql}, {interval_sql})"
|
|
38
35
|
|
|
39
36
|
return _delta_sql
|
|
40
37
|
|
|
@@ -50,6 +47,33 @@ def to_char_is_numeric_handler(args: t.List, dialect: DialectType) -> exp.TimeTo
|
|
|
50
47
|
return expression
|
|
51
48
|
|
|
52
49
|
|
|
50
|
+
def build_date_delta_with_cast_interval(
|
|
51
|
+
expression_class: t.Type[DATE_DELTA],
|
|
52
|
+
) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
|
|
53
|
+
fallback_builder = build_date_delta(expression_class)
|
|
54
|
+
|
|
55
|
+
def _builder(args):
|
|
56
|
+
if len(args) == 2:
|
|
57
|
+
date_arg, interval_arg = args
|
|
58
|
+
|
|
59
|
+
if (
|
|
60
|
+
isinstance(interval_arg, exp.Cast)
|
|
61
|
+
and isinstance(interval_arg.to, exp.DataType)
|
|
62
|
+
and isinstance(interval_arg.to.this, exp.Interval)
|
|
63
|
+
):
|
|
64
|
+
return expression_class(
|
|
65
|
+
this=date_arg,
|
|
66
|
+
expression=interval_arg.this,
|
|
67
|
+
unit=interval_arg.to.this.unit,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return expression_class(this=date_arg, expression=interval_arg)
|
|
71
|
+
|
|
72
|
+
return fallback_builder(args)
|
|
73
|
+
|
|
74
|
+
return _builder
|
|
75
|
+
|
|
76
|
+
|
|
53
77
|
class Dremio(Dialect):
|
|
54
78
|
SUPPORTS_USER_DEFINED_TYPES = False
|
|
55
79
|
CONCAT_COALESCE = True
|
|
@@ -108,16 +132,39 @@ class Dremio(Dialect):
|
|
|
108
132
|
"tzo": "%z", # numeric offset (+0200)
|
|
109
133
|
}
|
|
110
134
|
|
|
135
|
+
class Tokenizer(tokens.Tokenizer):
|
|
136
|
+
COMMENTS = ["--", "//", ("/*", "*/")]
|
|
137
|
+
|
|
111
138
|
class Parser(parser.Parser):
|
|
112
139
|
LOG_DEFAULTS_TO_LN = True
|
|
113
140
|
|
|
141
|
+
NO_PAREN_FUNCTION_PARSERS = {
|
|
142
|
+
**parser.Parser.NO_PAREN_FUNCTION_PARSERS,
|
|
143
|
+
"CURRENT_DATE_UTC": lambda self: self._parse_current_date_utc(),
|
|
144
|
+
}
|
|
145
|
+
|
|
114
146
|
FUNCTIONS = {
|
|
115
147
|
**parser.Parser.FUNCTIONS,
|
|
116
148
|
"TO_CHAR": to_char_is_numeric_handler,
|
|
117
149
|
"DATE_FORMAT": build_formatted_time(exp.TimeToStr, "dremio"),
|
|
118
150
|
"TO_DATE": build_formatted_time(exp.TsOrDsToDate, "dremio"),
|
|
151
|
+
"DATE_ADD": build_date_delta_with_cast_interval(exp.DateAdd),
|
|
152
|
+
"DATE_SUB": build_date_delta_with_cast_interval(exp.DateSub),
|
|
153
|
+
"ARRAY_GENERATE_RANGE": exp.GenerateSeries.from_arg_list,
|
|
119
154
|
}
|
|
120
155
|
|
|
156
|
+
def _parse_current_date_utc(self) -> exp.Cast:
|
|
157
|
+
if self._match(TokenType.L_PAREN):
|
|
158
|
+
self._match_r_paren()
|
|
159
|
+
|
|
160
|
+
return exp.Cast(
|
|
161
|
+
this=exp.AtTimeZone(
|
|
162
|
+
this=exp.CurrentTimestamp(),
|
|
163
|
+
zone=exp.Literal.string("UTC"),
|
|
164
|
+
),
|
|
165
|
+
to=exp.DataType.build("DATE"),
|
|
166
|
+
)
|
|
167
|
+
|
|
121
168
|
class Generator(generator.Generator):
|
|
122
169
|
NVL2_SUPPORTED = False
|
|
123
170
|
SUPPORTS_CONVERT_TIMEZONE = True
|
|
@@ -148,6 +195,7 @@ class Dremio(Dialect):
|
|
|
148
195
|
exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)),
|
|
149
196
|
exp.DateAdd: _date_delta_sql("DATE_ADD"),
|
|
150
197
|
exp.DateSub: _date_delta_sql("DATE_SUB"),
|
|
198
|
+
exp.GenerateSeries: rename_func("ARRAY_GENERATE_RANGE"),
|
|
151
199
|
}
|
|
152
200
|
|
|
153
201
|
def datatype_sql(self, expression: exp.DataType) -> str:
|
|
@@ -162,5 +210,17 @@ class Dremio(Dialect):
|
|
|
162
210
|
|
|
163
211
|
return super().datatype_sql(expression)
|
|
164
212
|
|
|
165
|
-
|
|
166
|
-
|
|
213
|
+
def cast_sql(self, expression: exp.Cast, safe_prefix: str | None = None) -> str:
|
|
214
|
+
# Match: CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
|
|
215
|
+
if expression.is_type(exp.DataType.Type.DATE):
|
|
216
|
+
at_time_zone = expression.this
|
|
217
|
+
|
|
218
|
+
if (
|
|
219
|
+
isinstance(at_time_zone, exp.AtTimeZone)
|
|
220
|
+
and isinstance(at_time_zone.this, exp.CurrentTimestamp)
|
|
221
|
+
and isinstance(at_time_zone.args["zone"], exp.Literal)
|
|
222
|
+
and at_time_zone.text("zone").upper() == "UTC"
|
|
223
|
+
):
|
|
224
|
+
return "CURRENT_DATE_UTC"
|
|
225
|
+
|
|
226
|
+
return super().cast_sql(expression, safe_prefix)
|