sqlglot 27.7.0__py3-none-any.whl → 27.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/_version.py +16 -3
- sqlglot/dialects/__init__.py +1 -1
- sqlglot/dialects/bigquery.py +129 -9
- sqlglot/dialects/clickhouse.py +11 -0
- sqlglot/dialects/databricks.py +5 -1
- sqlglot/dialects/dialect.py +74 -23
- sqlglot/dialects/doris.py +77 -9
- sqlglot/dialects/dremio.py +102 -21
- sqlglot/dialects/duckdb.py +20 -43
- sqlglot/dialects/exasol.py +28 -0
- sqlglot/dialects/mysql.py +0 -48
- sqlglot/dialects/presto.py +0 -2
- sqlglot/dialects/redshift.py +1 -0
- sqlglot/dialects/singlestore.py +252 -13
- sqlglot/dialects/spark.py +6 -0
- sqlglot/dialects/trino.py +1 -0
- sqlglot/dialects/tsql.py +2 -0
- sqlglot/expressions.py +143 -7
- sqlglot/generator.py +98 -27
- sqlglot/jsonpath.py +10 -3
- sqlglot/optimizer/qualify_columns.py +1 -1
- sqlglot/parser.py +58 -17
- {sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/METADATA +42 -2
- {sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/RECORD +27 -27
- {sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/WHEEL +0 -0
- {sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/top_level.txt +0 -0
sqlglot/_version.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '27.
|
|
21
|
-
__version_tuple__ = version_tuple = (27,
|
|
31
|
+
__version__ = version = '27.9.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (27, 9, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
sqlglot/dialects/__init__.py
CHANGED
|
@@ -75,6 +75,7 @@ DIALECTS = [
|
|
|
75
75
|
"Druid",
|
|
76
76
|
"DuckDB",
|
|
77
77
|
"Dune",
|
|
78
|
+
"Exasol",
|
|
78
79
|
"Fabric",
|
|
79
80
|
"Hive",
|
|
80
81
|
"Materialize",
|
|
@@ -95,7 +96,6 @@ DIALECTS = [
|
|
|
95
96
|
"Teradata",
|
|
96
97
|
"Trino",
|
|
97
98
|
"TSQL",
|
|
98
|
-
"Exasol",
|
|
99
99
|
]
|
|
100
100
|
|
|
101
101
|
MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
|
sqlglot/dialects/bigquery.py
CHANGED
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
import re
|
|
5
5
|
import typing as t
|
|
6
6
|
|
|
7
|
-
from sqlglot import exp, generator, parser, tokens, transforms
|
|
7
|
+
from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
|
|
8
8
|
from sqlglot._typing import E
|
|
9
9
|
from sqlglot.dialects.dialect import (
|
|
10
10
|
Dialect,
|
|
@@ -30,7 +30,6 @@ from sqlglot.dialects.dialect import (
|
|
|
30
30
|
unit_to_var,
|
|
31
31
|
strposition_sql,
|
|
32
32
|
groupconcat_sql,
|
|
33
|
-
space_sql,
|
|
34
33
|
)
|
|
35
34
|
from sqlglot.helper import seq_get, split_num_words
|
|
36
35
|
from sqlglot.tokens import TokenType
|
|
@@ -296,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
|
|
|
296
295
|
return expression
|
|
297
296
|
|
|
298
297
|
|
|
298
|
+
def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
|
|
299
|
+
self._annotate_args(expression)
|
|
300
|
+
|
|
301
|
+
struct_type = exp.DataType(
|
|
302
|
+
this=exp.DataType.Type.STRUCT,
|
|
303
|
+
expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
|
|
304
|
+
nested=True,
|
|
305
|
+
)
|
|
306
|
+
self._set_type(
|
|
307
|
+
expression,
|
|
308
|
+
exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
return expression
|
|
312
|
+
|
|
313
|
+
|
|
299
314
|
@unsupported_args("ins_cost", "del_cost", "sub_cost")
|
|
300
315
|
def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
|
|
301
316
|
max_dist = expression.args.get("max_dist")
|
|
@@ -474,15 +489,24 @@ class BigQuery(Dialect):
|
|
|
474
489
|
exp.Substring,
|
|
475
490
|
)
|
|
476
491
|
},
|
|
492
|
+
exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
493
|
+
exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
494
|
+
exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
495
|
+
exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
|
|
496
|
+
exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
|
|
477
497
|
exp.Array: _annotate_array,
|
|
478
498
|
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
|
479
499
|
exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
500
|
+
exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
|
|
480
501
|
exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
481
502
|
exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
482
503
|
exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
483
504
|
exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
484
505
|
exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
485
506
|
exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
507
|
+
exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
|
|
508
|
+
e, exp.DataType.Type.BINARY
|
|
509
|
+
),
|
|
486
510
|
exp.CodePointsToString: lambda self, e: self._annotate_with_type(
|
|
487
511
|
e, exp.DataType.Type.VARCHAR
|
|
488
512
|
),
|
|
@@ -492,9 +516,13 @@ class BigQuery(Dialect):
|
|
|
492
516
|
exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
493
517
|
exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
|
|
494
518
|
exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
|
|
519
|
+
exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
520
|
+
exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
521
|
+
exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
|
|
495
522
|
exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
|
|
496
523
|
e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
|
|
497
524
|
),
|
|
525
|
+
exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
498
526
|
exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
|
|
499
527
|
exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
|
|
500
528
|
e, exp.DataType.Type.VARCHAR
|
|
@@ -504,9 +532,21 @@ class BigQuery(Dialect):
|
|
|
504
532
|
),
|
|
505
533
|
exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
506
534
|
exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
|
|
535
|
+
exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
536
|
+
exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
507
537
|
exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
508
538
|
exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
|
|
539
|
+
exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
|
|
540
|
+
e, exp.DataType.Type.BIGDECIMAL
|
|
541
|
+
),
|
|
542
|
+
exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
|
|
543
|
+
exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
544
|
+
exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
|
|
509
545
|
exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
|
|
546
|
+
exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
|
|
547
|
+
e, exp.DataType.Type.VARCHAR
|
|
548
|
+
),
|
|
549
|
+
exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
|
|
510
550
|
exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
511
551
|
exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
|
|
512
552
|
exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
|
|
@@ -516,8 +556,12 @@ class BigQuery(Dialect):
|
|
|
516
556
|
),
|
|
517
557
|
exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
|
|
518
558
|
exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
519
|
-
exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
520
559
|
exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
560
|
+
exp.ToCodePoints: lambda self, e: self._annotate_with_type(
|
|
561
|
+
e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
|
|
562
|
+
),
|
|
563
|
+
exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
|
|
564
|
+
exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
|
|
521
565
|
exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
|
|
522
566
|
}
|
|
523
567
|
|
|
@@ -550,6 +594,12 @@ class BigQuery(Dialect):
|
|
|
550
594
|
|
|
551
595
|
return super().normalize_identifier(expression)
|
|
552
596
|
|
|
597
|
+
class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
|
|
598
|
+
VAR_TOKENS = {
|
|
599
|
+
TokenType.DASH,
|
|
600
|
+
TokenType.VAR,
|
|
601
|
+
}
|
|
602
|
+
|
|
553
603
|
class Tokenizer(tokens.Tokenizer):
|
|
554
604
|
QUOTES = ["'", '"', '"""', "'''"]
|
|
555
605
|
COMMENTS = ["--", "#", ("/*", "*/")]
|
|
@@ -583,10 +633,13 @@ class BigQuery(Dialect):
|
|
|
583
633
|
"EXPORT": TokenType.EXPORT,
|
|
584
634
|
"FLOAT64": TokenType.DOUBLE,
|
|
585
635
|
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
|
|
636
|
+
"LOOP": TokenType.COMMAND,
|
|
586
637
|
"MODEL": TokenType.MODEL,
|
|
587
638
|
"NOT DETERMINISTIC": TokenType.VOLATILE,
|
|
588
639
|
"RECORD": TokenType.STRUCT,
|
|
640
|
+
"REPEAT": TokenType.COMMAND,
|
|
589
641
|
"TIMESTAMP": TokenType.TIMESTAMPTZ,
|
|
642
|
+
"WHILE": TokenType.COMMAND,
|
|
590
643
|
}
|
|
591
644
|
KEYWORDS.pop("DIV")
|
|
592
645
|
KEYWORDS.pop("VALUES")
|
|
@@ -610,6 +663,8 @@ class BigQuery(Dialect):
|
|
|
610
663
|
|
|
611
664
|
FUNCTIONS = {
|
|
612
665
|
**parser.Parser.FUNCTIONS,
|
|
666
|
+
"APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
|
|
667
|
+
"BOOL": exp.JSONBool.from_arg_list,
|
|
613
668
|
"CONTAINS_SUBSTR": _build_contains_substring,
|
|
614
669
|
"DATE": _build_date,
|
|
615
670
|
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
|
|
@@ -676,6 +731,7 @@ class BigQuery(Dialect):
|
|
|
676
731
|
"FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
|
|
677
732
|
"FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
|
|
678
733
|
"FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
|
|
734
|
+
"FROM_HEX": exp.Unhex.from_arg_list,
|
|
679
735
|
"WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
|
|
680
736
|
}
|
|
681
737
|
|
|
@@ -686,7 +742,10 @@ class BigQuery(Dialect):
|
|
|
686
742
|
exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
|
|
687
743
|
),
|
|
688
744
|
"MAKE_INTERVAL": lambda self: self._parse_make_interval(),
|
|
745
|
+
"PREDICT": lambda self: self._parse_predict(),
|
|
689
746
|
"FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
|
|
747
|
+
"GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
|
|
748
|
+
"VECTOR_SEARCH": lambda self: self._parse_vector_search(),
|
|
690
749
|
}
|
|
691
750
|
FUNCTION_PARSERS.pop("TRIM")
|
|
692
751
|
|
|
@@ -966,13 +1025,40 @@ class BigQuery(Dialect):
|
|
|
966
1025
|
|
|
967
1026
|
return expr
|
|
968
1027
|
|
|
969
|
-
def
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1028
|
+
def _parse_predict(self) -> exp.Predict:
|
|
1029
|
+
self._match_text_seq("MODEL")
|
|
1030
|
+
this = self._parse_table()
|
|
1031
|
+
|
|
1032
|
+
self._match(TokenType.COMMA)
|
|
1033
|
+
self._match_text_seq("TABLE")
|
|
1034
|
+
|
|
1035
|
+
return self.expression(
|
|
1036
|
+
exp.Predict,
|
|
1037
|
+
this=this,
|
|
1038
|
+
expression=self._parse_table(),
|
|
1039
|
+
params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
|
|
1043
|
+
self._match_text_seq("MODEL")
|
|
1044
|
+
this = self._parse_table()
|
|
1045
|
+
|
|
1046
|
+
self._match(TokenType.COMMA)
|
|
1047
|
+
self._match_text_seq("TABLE")
|
|
1048
|
+
|
|
1049
|
+
return self.expression(
|
|
1050
|
+
exp.GenerateEmbedding,
|
|
1051
|
+
this=this,
|
|
1052
|
+
expression=self._parse_table(),
|
|
1053
|
+
params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
|
|
974
1054
|
)
|
|
975
1055
|
|
|
1056
|
+
def _parse_features_at_time(self) -> exp.FeaturesAtTime:
|
|
1057
|
+
self._match(TokenType.TABLE)
|
|
1058
|
+
this = self._parse_table()
|
|
1059
|
+
|
|
1060
|
+
expr = self.expression(exp.FeaturesAtTime, this=this)
|
|
1061
|
+
|
|
976
1062
|
while self._match(TokenType.COMMA):
|
|
977
1063
|
arg = self._parse_lambda()
|
|
978
1064
|
|
|
@@ -983,6 +1069,37 @@ class BigQuery(Dialect):
|
|
|
983
1069
|
|
|
984
1070
|
return expr
|
|
985
1071
|
|
|
1072
|
+
def _parse_vector_search(self) -> exp.VectorSearch:
|
|
1073
|
+
self._match(TokenType.TABLE)
|
|
1074
|
+
base_table = self._parse_table()
|
|
1075
|
+
|
|
1076
|
+
self._match(TokenType.COMMA)
|
|
1077
|
+
|
|
1078
|
+
column_to_search = self._parse_bitwise()
|
|
1079
|
+
self._match(TokenType.COMMA)
|
|
1080
|
+
|
|
1081
|
+
self._match(TokenType.TABLE)
|
|
1082
|
+
query_table = self._parse_table()
|
|
1083
|
+
|
|
1084
|
+
expr = self.expression(
|
|
1085
|
+
exp.VectorSearch,
|
|
1086
|
+
this=base_table,
|
|
1087
|
+
column_to_search=column_to_search,
|
|
1088
|
+
query_table=query_table,
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
while self._match(TokenType.COMMA):
|
|
1092
|
+
# query_column_to_search can be named argument or positional
|
|
1093
|
+
if self._match(TokenType.STRING, advance=False):
|
|
1094
|
+
query_column = self._parse_string()
|
|
1095
|
+
expr.set("query_column_to_search", query_column)
|
|
1096
|
+
else:
|
|
1097
|
+
arg = self._parse_lambda()
|
|
1098
|
+
if arg:
|
|
1099
|
+
expr.set(arg.this.name, arg)
|
|
1100
|
+
|
|
1101
|
+
return expr
|
|
1102
|
+
|
|
986
1103
|
def _parse_export_data(self) -> exp.Export:
|
|
987
1104
|
self._match_text_seq("DATA")
|
|
988
1105
|
|
|
@@ -1019,6 +1136,8 @@ class BigQuery(Dialect):
|
|
|
1019
1136
|
EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
|
|
1020
1137
|
SUPPORTS_UNIX_SECONDS = True
|
|
1021
1138
|
|
|
1139
|
+
SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$")
|
|
1140
|
+
|
|
1022
1141
|
TS_OR_DS_TYPES = (
|
|
1023
1142
|
exp.TsOrDsToDatetime,
|
|
1024
1143
|
exp.TsOrDsToTimestamp,
|
|
@@ -1028,6 +1147,7 @@ class BigQuery(Dialect):
|
|
|
1028
1147
|
|
|
1029
1148
|
TRANSFORMS = {
|
|
1030
1149
|
**generator.Generator.TRANSFORMS,
|
|
1150
|
+
exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
|
|
1031
1151
|
exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
|
|
1032
1152
|
exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
|
|
1033
1153
|
exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
|
|
@@ -1068,6 +1188,7 @@ class BigQuery(Dialect):
|
|
|
1068
1188
|
exp.ILike: no_ilike_sql,
|
|
1069
1189
|
exp.IntDiv: rename_func("DIV"),
|
|
1070
1190
|
exp.Int64: rename_func("INT64"),
|
|
1191
|
+
exp.JSONBool: rename_func("BOOL"),
|
|
1071
1192
|
exp.JSONExtract: _json_extract_sql,
|
|
1072
1193
|
exp.JSONExtractArray: _json_extract_sql,
|
|
1073
1194
|
exp.JSONExtractScalar: _json_extract_sql,
|
|
@@ -1107,7 +1228,6 @@ class BigQuery(Dialect):
|
|
|
1107
1228
|
),
|
|
1108
1229
|
exp.SHA: rename_func("SHA1"),
|
|
1109
1230
|
exp.SHA2: sha256_sql,
|
|
1110
|
-
exp.Space: space_sql,
|
|
1111
1231
|
exp.StabilityProperty: lambda self, e: (
|
|
1112
1232
|
"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
|
|
1113
1233
|
),
|
sqlglot/dialects/clickhouse.py
CHANGED
|
@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
|
|
|
345
345
|
"LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
|
|
346
346
|
}
|
|
347
347
|
FUNCTIONS.pop("TRANSFORM")
|
|
348
|
+
FUNCTIONS.pop("APPROX_TOP_SUM")
|
|
348
349
|
|
|
349
350
|
AGG_FUNCTIONS = {
|
|
350
351
|
"count",
|
|
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
|
|
|
379
380
|
"argMax",
|
|
380
381
|
"avgWeighted",
|
|
381
382
|
"topK",
|
|
383
|
+
"approx_top_sum",
|
|
382
384
|
"topKWeighted",
|
|
383
385
|
"deltaSum",
|
|
384
386
|
"deltaSumTimestamp",
|
|
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
|
|
|
977
979
|
|
|
978
980
|
return value
|
|
979
981
|
|
|
982
|
+
def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
|
|
983
|
+
# ClickHouse allows custom expressions as partition key
|
|
984
|
+
# https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
|
|
985
|
+
return self.expression(
|
|
986
|
+
exp.PartitionedByProperty,
|
|
987
|
+
this=self._parse_assignment(),
|
|
988
|
+
)
|
|
989
|
+
|
|
980
990
|
class Generator(generator.Generator):
|
|
981
991
|
QUERY_HINTS = False
|
|
982
992
|
STRUCT_DELIMITER = ("(", ")")
|
|
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
|
|
|
1094
1104
|
exp.DateStrToDate: rename_func("toDate"),
|
|
1095
1105
|
exp.DateSub: _datetime_delta_sql("DATE_SUB"),
|
|
1096
1106
|
exp.Explode: rename_func("arrayJoin"),
|
|
1107
|
+
exp.FarmFingerprint: rename_func("farmFingerprint64"),
|
|
1097
1108
|
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
|
|
1098
1109
|
exp.IsNan: rename_func("isNaN"),
|
|
1099
1110
|
exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
|
sqlglot/dialects/databricks.py
CHANGED
|
@@ -99,7 +99,11 @@ class Databricks(Spark):
|
|
|
99
99
|
exp.JSONExtract: _jsonextract_sql,
|
|
100
100
|
exp.JSONExtractScalar: _jsonextract_sql,
|
|
101
101
|
exp.JSONPathRoot: lambda *_: "",
|
|
102
|
-
exp.ToChar: lambda self, e:
|
|
102
|
+
exp.ToChar: lambda self, e: (
|
|
103
|
+
self.cast_sql(exp.Cast(this=e.this, to=exp.DataType(this="STRING")))
|
|
104
|
+
if e.args.get("is_numeric")
|
|
105
|
+
else self.function_fallback_sql(e)
|
|
106
|
+
),
|
|
103
107
|
}
|
|
104
108
|
|
|
105
109
|
TRANSFORMS.pop(exp.TryCast)
|
sqlglot/dialects/dialect.py
CHANGED
|
@@ -35,8 +35,18 @@ DATE_ADD_OR_DIFF = t.Union[
|
|
|
35
35
|
exp.TsOrDsDiff,
|
|
36
36
|
]
|
|
37
37
|
DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
|
|
38
|
-
JSON_EXTRACT_TYPE = t.Union[
|
|
39
|
-
|
|
38
|
+
JSON_EXTRACT_TYPE = t.Union[
|
|
39
|
+
exp.JSONExtract, exp.JSONExtractScalar, exp.JSONBExtract, exp.JSONBExtractScalar
|
|
40
|
+
]
|
|
41
|
+
DATETIME_DELTA = t.Union[
|
|
42
|
+
exp.DateAdd,
|
|
43
|
+
exp.DatetimeAdd,
|
|
44
|
+
exp.DatetimeSub,
|
|
45
|
+
exp.TimeAdd,
|
|
46
|
+
exp.TimeSub,
|
|
47
|
+
exp.TimestampSub,
|
|
48
|
+
exp.TsOrDsAdd,
|
|
49
|
+
]
|
|
40
50
|
|
|
41
51
|
if t.TYPE_CHECKING:
|
|
42
52
|
from sqlglot._typing import B, E, F
|
|
@@ -658,6 +668,7 @@ class Dialect(metaclass=_Dialect):
|
|
|
658
668
|
exp.UnixMillis,
|
|
659
669
|
},
|
|
660
670
|
exp.DataType.Type.BINARY: {
|
|
671
|
+
exp.FromBase32,
|
|
661
672
|
exp.FromBase64,
|
|
662
673
|
},
|
|
663
674
|
exp.DataType.Type.BOOLEAN: {
|
|
@@ -769,6 +780,7 @@ class Dialect(metaclass=_Dialect):
|
|
|
769
780
|
exp.TimeToStr,
|
|
770
781
|
exp.TimeToTimeStr,
|
|
771
782
|
exp.Trim,
|
|
783
|
+
exp.ToBase32,
|
|
772
784
|
exp.ToBase64,
|
|
773
785
|
exp.TsOrDsToDateStr,
|
|
774
786
|
exp.UnixToStr,
|
|
@@ -1059,7 +1071,9 @@ class Dialect(metaclass=_Dialect):
|
|
|
1059
1071
|
try:
|
|
1060
1072
|
return parse_json_path(path_text, self)
|
|
1061
1073
|
except ParseError as e:
|
|
1062
|
-
if self.STRICT_JSON_PATH_SYNTAX
|
|
1074
|
+
if self.STRICT_JSON_PATH_SYNTAX and not path_text.lstrip().startswith(
|
|
1075
|
+
("lax", "strict")
|
|
1076
|
+
):
|
|
1063
1077
|
logger.warning(f"Invalid JSON path syntax. {str(e)}")
|
|
1064
1078
|
|
|
1065
1079
|
return path
|
|
@@ -1643,14 +1657,49 @@ def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE
|
|
|
1643
1657
|
return _delta_sql
|
|
1644
1658
|
|
|
1645
1659
|
|
|
1660
|
+
def date_delta_to_binary_interval_op(
|
|
1661
|
+
cast: bool = True,
|
|
1662
|
+
) -> t.Callable[[Generator, DATETIME_DELTA], str]:
|
|
1663
|
+
def date_delta_to_binary_interval_op_sql(self: Generator, expression: DATETIME_DELTA) -> str:
|
|
1664
|
+
this = expression.this
|
|
1665
|
+
unit = unit_to_var(expression)
|
|
1666
|
+
op = (
|
|
1667
|
+
"+"
|
|
1668
|
+
if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
|
|
1669
|
+
else "-"
|
|
1670
|
+
)
|
|
1671
|
+
|
|
1672
|
+
to_type: t.Optional[exp.DATA_TYPE] = None
|
|
1673
|
+
if cast:
|
|
1674
|
+
if isinstance(expression, exp.TsOrDsAdd):
|
|
1675
|
+
to_type = expression.return_type
|
|
1676
|
+
elif this.is_string:
|
|
1677
|
+
# Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
|
|
1678
|
+
to_type = (
|
|
1679
|
+
exp.DataType.Type.DATETIME
|
|
1680
|
+
if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
|
|
1681
|
+
else exp.DataType.Type.DATE
|
|
1682
|
+
)
|
|
1683
|
+
|
|
1684
|
+
this = exp.cast(this, to_type) if to_type else this
|
|
1685
|
+
|
|
1686
|
+
expr = expression.expression
|
|
1687
|
+
interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
|
|
1688
|
+
|
|
1689
|
+
return f"{self.sql(this)} {op} {self.sql(interval)}"
|
|
1690
|
+
|
|
1691
|
+
return date_delta_to_binary_interval_op_sql
|
|
1692
|
+
|
|
1693
|
+
|
|
1646
1694
|
def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
|
|
1647
1695
|
unit = expression.args.get("unit")
|
|
1696
|
+
if not unit:
|
|
1697
|
+
return exp.Literal.string(default) if default else None
|
|
1648
1698
|
|
|
1649
|
-
if isinstance(unit, exp.Placeholder):
|
|
1699
|
+
if isinstance(unit, exp.Placeholder) or type(unit) not in (exp.Var, exp.Literal):
|
|
1650
1700
|
return unit
|
|
1651
|
-
|
|
1652
|
-
|
|
1653
|
-
return exp.Literal.string(default) if default else None
|
|
1701
|
+
|
|
1702
|
+
return exp.Literal.string(unit.name)
|
|
1654
1703
|
|
|
1655
1704
|
|
|
1656
1705
|
def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
|
|
@@ -1730,7 +1779,10 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
|
|
|
1730
1779
|
|
|
1731
1780
|
|
|
1732
1781
|
def build_json_extract_path(
|
|
1733
|
-
expr_type: t.Type[F],
|
|
1782
|
+
expr_type: t.Type[F],
|
|
1783
|
+
zero_based_indexing: bool = True,
|
|
1784
|
+
arrow_req_json_type: bool = False,
|
|
1785
|
+
json_type: t.Optional[str] = None,
|
|
1734
1786
|
) -> t.Callable[[t.List], F]:
|
|
1735
1787
|
def _builder(args: t.List) -> F:
|
|
1736
1788
|
segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
|
|
@@ -1750,11 +1802,19 @@ def build_json_extract_path(
|
|
|
1750
1802
|
|
|
1751
1803
|
# This is done to avoid failing in the expression validator due to the arg count
|
|
1752
1804
|
del args[2:]
|
|
1753
|
-
|
|
1754
|
-
this
|
|
1755
|
-
expression
|
|
1756
|
-
|
|
1757
|
-
|
|
1805
|
+
kwargs = {
|
|
1806
|
+
"this": seq_get(args, 0),
|
|
1807
|
+
"expression": exp.JSONPath(expressions=segments),
|
|
1808
|
+
}
|
|
1809
|
+
|
|
1810
|
+
is_jsonb = issubclass(expr_type, (exp.JSONBExtract, exp.JSONBExtractScalar))
|
|
1811
|
+
if not is_jsonb:
|
|
1812
|
+
kwargs["only_json_types"] = arrow_req_json_type
|
|
1813
|
+
|
|
1814
|
+
if json_type is not None:
|
|
1815
|
+
kwargs["json_type"] = json_type
|
|
1816
|
+
|
|
1817
|
+
return expr_type(**kwargs)
|
|
1758
1818
|
|
|
1759
1819
|
return _builder
|
|
1760
1820
|
|
|
@@ -1962,7 +2022,7 @@ def groupconcat_sql(
|
|
|
1962
2022
|
return self.sql(listagg)
|
|
1963
2023
|
|
|
1964
2024
|
|
|
1965
|
-
def build_timetostr_or_tochar(args: t.List, dialect:
|
|
2025
|
+
def build_timetostr_or_tochar(args: t.List, dialect: DialectType) -> exp.TimeToStr | exp.ToChar:
|
|
1966
2026
|
if len(args) == 2:
|
|
1967
2027
|
this = args[0]
|
|
1968
2028
|
if not this.type:
|
|
@@ -1983,12 +2043,3 @@ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
|
|
|
1983
2043
|
expression=seq_get(args, 1),
|
|
1984
2044
|
replacement=seq_get(args, 2) or exp.Literal.string(""),
|
|
1985
2045
|
)
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
def space_sql(self: Generator, expression: exp.Space) -> str:
|
|
1989
|
-
return self.sql(
|
|
1990
|
-
exp.Repeat(
|
|
1991
|
-
this=exp.Literal.string(" "),
|
|
1992
|
-
times=expression.this,
|
|
1993
|
-
)
|
|
1994
|
-
)
|