sqlglot 27.7.0__py3-none-any.whl → 27.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '27.7.0'
21
- __version_tuple__ = version_tuple = (27, 7, 0)
31
+ __version__ = version = '27.9.0'
32
+ __version_tuple__ = version_tuple = (27, 9, 0)
33
+
34
+ __commit_id__ = commit_id = None
@@ -75,6 +75,7 @@ DIALECTS = [
75
75
  "Druid",
76
76
  "DuckDB",
77
77
  "Dune",
78
+ "Exasol",
78
79
  "Fabric",
79
80
  "Hive",
80
81
  "Materialize",
@@ -95,7 +96,6 @@ DIALECTS = [
95
96
  "Teradata",
96
97
  "Trino",
97
98
  "TSQL",
98
- "Exasol",
99
99
  ]
100
100
 
101
101
  MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}
@@ -4,7 +4,7 @@ import logging
4
4
  import re
5
5
  import typing as t
6
6
 
7
- from sqlglot import exp, generator, parser, tokens, transforms
7
+ from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
8
8
  from sqlglot._typing import E
9
9
  from sqlglot.dialects.dialect import (
10
10
  Dialect,
@@ -30,7 +30,6 @@ from sqlglot.dialects.dialect import (
30
30
  unit_to_var,
31
31
  strposition_sql,
32
32
  groupconcat_sql,
33
- space_sql,
34
33
  )
35
34
  from sqlglot.helper import seq_get, split_num_words
36
35
  from sqlglot.tokens import TokenType
@@ -296,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
296
295
  return expression
297
296
 
298
297
 
298
+ def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
299
+ self._annotate_args(expression)
300
+
301
+ struct_type = exp.DataType(
302
+ this=exp.DataType.Type.STRUCT,
303
+ expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
304
+ nested=True,
305
+ )
306
+ self._set_type(
307
+ expression,
308
+ exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
309
+ )
310
+
311
+ return expression
312
+
313
+
299
314
  @unsupported_args("ins_cost", "del_cost", "sub_cost")
300
315
  def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
301
316
  max_dist = expression.args.get("max_dist")
@@ -474,15 +489,24 @@ class BigQuery(Dialect):
474
489
  exp.Substring,
475
490
  )
476
491
  },
492
+ exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
493
+ exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
494
+ exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
495
+ exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
496
+ exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
477
497
  exp.Array: _annotate_array,
478
498
  exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
479
499
  exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
500
+ exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
480
501
  exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
481
502
  exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
482
503
  exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
483
504
  exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
484
505
  exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
485
506
  exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
507
+ exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
508
+ e, exp.DataType.Type.BINARY
509
+ ),
486
510
  exp.CodePointsToString: lambda self, e: self._annotate_with_type(
487
511
  e, exp.DataType.Type.VARCHAR
488
512
  ),
@@ -492,9 +516,13 @@ class BigQuery(Dialect):
492
516
  exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
493
517
  exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
494
518
  exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
519
+ exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
520
+ exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
521
+ exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
495
522
  exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
496
523
  e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
497
524
  ),
525
+ exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
498
526
  exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
499
527
  exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
500
528
  e, exp.DataType.Type.VARCHAR
@@ -504,9 +532,21 @@ class BigQuery(Dialect):
504
532
  ),
505
533
  exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
506
534
  exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
535
+ exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
536
+ exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
507
537
  exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
508
538
  exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
539
+ exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
540
+ e, exp.DataType.Type.BIGDECIMAL
541
+ ),
542
+ exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
543
+ exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
544
+ exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
509
545
  exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
546
+ exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
547
+ e, exp.DataType.Type.VARCHAR
548
+ ),
549
+ exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
510
550
  exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
511
551
  exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
512
552
  exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
@@ -516,8 +556,12 @@ class BigQuery(Dialect):
516
556
  ),
517
557
  exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
518
558
  exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
519
- exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
520
559
  exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
560
+ exp.ToCodePoints: lambda self, e: self._annotate_with_type(
561
+ e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
562
+ ),
563
+ exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
564
+ exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
521
565
  exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
522
566
  }
523
567
 
@@ -550,6 +594,12 @@ class BigQuery(Dialect):
550
594
 
551
595
  return super().normalize_identifier(expression)
552
596
 
597
+ class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
598
+ VAR_TOKENS = {
599
+ TokenType.DASH,
600
+ TokenType.VAR,
601
+ }
602
+
553
603
  class Tokenizer(tokens.Tokenizer):
554
604
  QUOTES = ["'", '"', '"""', "'''"]
555
605
  COMMENTS = ["--", "#", ("/*", "*/")]
@@ -583,10 +633,13 @@ class BigQuery(Dialect):
583
633
  "EXPORT": TokenType.EXPORT,
584
634
  "FLOAT64": TokenType.DOUBLE,
585
635
  "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
636
+ "LOOP": TokenType.COMMAND,
586
637
  "MODEL": TokenType.MODEL,
587
638
  "NOT DETERMINISTIC": TokenType.VOLATILE,
588
639
  "RECORD": TokenType.STRUCT,
640
+ "REPEAT": TokenType.COMMAND,
589
641
  "TIMESTAMP": TokenType.TIMESTAMPTZ,
642
+ "WHILE": TokenType.COMMAND,
590
643
  }
591
644
  KEYWORDS.pop("DIV")
592
645
  KEYWORDS.pop("VALUES")
@@ -610,6 +663,8 @@ class BigQuery(Dialect):
610
663
 
611
664
  FUNCTIONS = {
612
665
  **parser.Parser.FUNCTIONS,
666
+ "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
667
+ "BOOL": exp.JSONBool.from_arg_list,
613
668
  "CONTAINS_SUBSTR": _build_contains_substring,
614
669
  "DATE": _build_date,
615
670
  "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
@@ -676,6 +731,7 @@ class BigQuery(Dialect):
676
731
  "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
677
732
  "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
678
733
  "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
734
+ "FROM_HEX": exp.Unhex.from_arg_list,
679
735
  "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
680
736
  }
681
737
 
@@ -686,7 +742,10 @@ class BigQuery(Dialect):
686
742
  exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
687
743
  ),
688
744
  "MAKE_INTERVAL": lambda self: self._parse_make_interval(),
745
+ "PREDICT": lambda self: self._parse_predict(),
689
746
  "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
747
+ "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
748
+ "VECTOR_SEARCH": lambda self: self._parse_vector_search(),
690
749
  }
691
750
  FUNCTION_PARSERS.pop("TRIM")
692
751
 
@@ -966,13 +1025,40 @@ class BigQuery(Dialect):
966
1025
 
967
1026
  return expr
968
1027
 
969
- def _parse_features_at_time(self) -> exp.FeaturesAtTime:
970
- expr = self.expression(
971
- exp.FeaturesAtTime,
972
- this=(self._match(TokenType.TABLE) and self._parse_table())
973
- or self._parse_select(nested=True),
1028
+ def _parse_predict(self) -> exp.Predict:
1029
+ self._match_text_seq("MODEL")
1030
+ this = self._parse_table()
1031
+
1032
+ self._match(TokenType.COMMA)
1033
+ self._match_text_seq("TABLE")
1034
+
1035
+ return self.expression(
1036
+ exp.Predict,
1037
+ this=this,
1038
+ expression=self._parse_table(),
1039
+ params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
1040
+ )
1041
+
1042
+ def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
1043
+ self._match_text_seq("MODEL")
1044
+ this = self._parse_table()
1045
+
1046
+ self._match(TokenType.COMMA)
1047
+ self._match_text_seq("TABLE")
1048
+
1049
+ return self.expression(
1050
+ exp.GenerateEmbedding,
1051
+ this=this,
1052
+ expression=self._parse_table(),
1053
+ params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
974
1054
  )
975
1055
 
1056
+ def _parse_features_at_time(self) -> exp.FeaturesAtTime:
1057
+ self._match(TokenType.TABLE)
1058
+ this = self._parse_table()
1059
+
1060
+ expr = self.expression(exp.FeaturesAtTime, this=this)
1061
+
976
1062
  while self._match(TokenType.COMMA):
977
1063
  arg = self._parse_lambda()
978
1064
 
@@ -983,6 +1069,37 @@ class BigQuery(Dialect):
983
1069
 
984
1070
  return expr
985
1071
 
1072
+ def _parse_vector_search(self) -> exp.VectorSearch:
1073
+ self._match(TokenType.TABLE)
1074
+ base_table = self._parse_table()
1075
+
1076
+ self._match(TokenType.COMMA)
1077
+
1078
+ column_to_search = self._parse_bitwise()
1079
+ self._match(TokenType.COMMA)
1080
+
1081
+ self._match(TokenType.TABLE)
1082
+ query_table = self._parse_table()
1083
+
1084
+ expr = self.expression(
1085
+ exp.VectorSearch,
1086
+ this=base_table,
1087
+ column_to_search=column_to_search,
1088
+ query_table=query_table,
1089
+ )
1090
+
1091
+ while self._match(TokenType.COMMA):
1092
+ # query_column_to_search can be named argument or positional
1093
+ if self._match(TokenType.STRING, advance=False):
1094
+ query_column = self._parse_string()
1095
+ expr.set("query_column_to_search", query_column)
1096
+ else:
1097
+ arg = self._parse_lambda()
1098
+ if arg:
1099
+ expr.set(arg.this.name, arg)
1100
+
1101
+ return expr
1102
+
986
1103
  def _parse_export_data(self) -> exp.Export:
987
1104
  self._match_text_seq("DATA")
988
1105
 
@@ -1019,6 +1136,8 @@ class BigQuery(Dialect):
1019
1136
  EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
1020
1137
  SUPPORTS_UNIX_SECONDS = True
1021
1138
 
1139
+ SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$")
1140
+
1022
1141
  TS_OR_DS_TYPES = (
1023
1142
  exp.TsOrDsToDatetime,
1024
1143
  exp.TsOrDsToTimestamp,
@@ -1028,6 +1147,7 @@ class BigQuery(Dialect):
1028
1147
 
1029
1148
  TRANSFORMS = {
1030
1149
  **generator.Generator.TRANSFORMS,
1150
+ exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
1031
1151
  exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
1032
1152
  exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
1033
1153
  exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
@@ -1068,6 +1188,7 @@ class BigQuery(Dialect):
1068
1188
  exp.ILike: no_ilike_sql,
1069
1189
  exp.IntDiv: rename_func("DIV"),
1070
1190
  exp.Int64: rename_func("INT64"),
1191
+ exp.JSONBool: rename_func("BOOL"),
1071
1192
  exp.JSONExtract: _json_extract_sql,
1072
1193
  exp.JSONExtractArray: _json_extract_sql,
1073
1194
  exp.JSONExtractScalar: _json_extract_sql,
@@ -1107,7 +1228,6 @@ class BigQuery(Dialect):
1107
1228
  ),
1108
1229
  exp.SHA: rename_func("SHA1"),
1109
1230
  exp.SHA2: sha256_sql,
1110
- exp.Space: space_sql,
1111
1231
  exp.StabilityProperty: lambda self, e: (
1112
1232
  "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
1113
1233
  ),
@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
345
345
  "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
346
346
  }
347
347
  FUNCTIONS.pop("TRANSFORM")
348
+ FUNCTIONS.pop("APPROX_TOP_SUM")
348
349
 
349
350
  AGG_FUNCTIONS = {
350
351
  "count",
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
379
380
  "argMax",
380
381
  "avgWeighted",
381
382
  "topK",
383
+ "approx_top_sum",
382
384
  "topKWeighted",
383
385
  "deltaSum",
384
386
  "deltaSumTimestamp",
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
977
979
 
978
980
  return value
979
981
 
982
+ def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
983
+ # ClickHouse allows custom expressions as partition key
984
+ # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
985
+ return self.expression(
986
+ exp.PartitionedByProperty,
987
+ this=self._parse_assignment(),
988
+ )
989
+
980
990
  class Generator(generator.Generator):
981
991
  QUERY_HINTS = False
982
992
  STRUCT_DELIMITER = ("(", ")")
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
1094
1104
  exp.DateStrToDate: rename_func("toDate"),
1095
1105
  exp.DateSub: _datetime_delta_sql("DATE_SUB"),
1096
1106
  exp.Explode: rename_func("arrayJoin"),
1107
+ exp.FarmFingerprint: rename_func("farmFingerprint64"),
1097
1108
  exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
1098
1109
  exp.IsNan: rename_func("isNaN"),
1099
1110
  exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",
@@ -99,7 +99,11 @@ class Databricks(Spark):
99
99
  exp.JSONExtract: _jsonextract_sql,
100
100
  exp.JSONExtractScalar: _jsonextract_sql,
101
101
  exp.JSONPathRoot: lambda *_: "",
102
- exp.ToChar: lambda self, e: self.function_fallback_sql(e),
102
+ exp.ToChar: lambda self, e: (
103
+ self.cast_sql(exp.Cast(this=e.this, to=exp.DataType(this="STRING")))
104
+ if e.args.get("is_numeric")
105
+ else self.function_fallback_sql(e)
106
+ ),
103
107
  }
104
108
 
105
109
  TRANSFORMS.pop(exp.TryCast)
@@ -35,8 +35,18 @@ DATE_ADD_OR_DIFF = t.Union[
35
35
  exp.TsOrDsDiff,
36
36
  ]
37
37
  DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
38
- JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
39
-
38
+ JSON_EXTRACT_TYPE = t.Union[
39
+ exp.JSONExtract, exp.JSONExtractScalar, exp.JSONBExtract, exp.JSONBExtractScalar
40
+ ]
41
+ DATETIME_DELTA = t.Union[
42
+ exp.DateAdd,
43
+ exp.DatetimeAdd,
44
+ exp.DatetimeSub,
45
+ exp.TimeAdd,
46
+ exp.TimeSub,
47
+ exp.TimestampSub,
48
+ exp.TsOrDsAdd,
49
+ ]
40
50
 
41
51
  if t.TYPE_CHECKING:
42
52
  from sqlglot._typing import B, E, F
@@ -658,6 +668,7 @@ class Dialect(metaclass=_Dialect):
658
668
  exp.UnixMillis,
659
669
  },
660
670
  exp.DataType.Type.BINARY: {
671
+ exp.FromBase32,
661
672
  exp.FromBase64,
662
673
  },
663
674
  exp.DataType.Type.BOOLEAN: {
@@ -769,6 +780,7 @@ class Dialect(metaclass=_Dialect):
769
780
  exp.TimeToStr,
770
781
  exp.TimeToTimeStr,
771
782
  exp.Trim,
783
+ exp.ToBase32,
772
784
  exp.ToBase64,
773
785
  exp.TsOrDsToDateStr,
774
786
  exp.UnixToStr,
@@ -1059,7 +1071,9 @@ class Dialect(metaclass=_Dialect):
1059
1071
  try:
1060
1072
  return parse_json_path(path_text, self)
1061
1073
  except ParseError as e:
1062
- if self.STRICT_JSON_PATH_SYNTAX:
1074
+ if self.STRICT_JSON_PATH_SYNTAX and not path_text.lstrip().startswith(
1075
+ ("lax", "strict")
1076
+ ):
1063
1077
  logger.warning(f"Invalid JSON path syntax. {str(e)}")
1064
1078
 
1065
1079
  return path
@@ -1643,14 +1657,49 @@ def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE
1643
1657
  return _delta_sql
1644
1658
 
1645
1659
 
1660
+ def date_delta_to_binary_interval_op(
1661
+ cast: bool = True,
1662
+ ) -> t.Callable[[Generator, DATETIME_DELTA], str]:
1663
+ def date_delta_to_binary_interval_op_sql(self: Generator, expression: DATETIME_DELTA) -> str:
1664
+ this = expression.this
1665
+ unit = unit_to_var(expression)
1666
+ op = (
1667
+ "+"
1668
+ if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
1669
+ else "-"
1670
+ )
1671
+
1672
+ to_type: t.Optional[exp.DATA_TYPE] = None
1673
+ if cast:
1674
+ if isinstance(expression, exp.TsOrDsAdd):
1675
+ to_type = expression.return_type
1676
+ elif this.is_string:
1677
+ # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
1678
+ to_type = (
1679
+ exp.DataType.Type.DATETIME
1680
+ if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
1681
+ else exp.DataType.Type.DATE
1682
+ )
1683
+
1684
+ this = exp.cast(this, to_type) if to_type else this
1685
+
1686
+ expr = expression.expression
1687
+ interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
1688
+
1689
+ return f"{self.sql(this)} {op} {self.sql(interval)}"
1690
+
1691
+ return date_delta_to_binary_interval_op_sql
1692
+
1693
+
1646
1694
  def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1647
1695
  unit = expression.args.get("unit")
1696
+ if not unit:
1697
+ return exp.Literal.string(default) if default else None
1648
1698
 
1649
- if isinstance(unit, exp.Placeholder):
1699
+ if isinstance(unit, exp.Placeholder) or type(unit) not in (exp.Var, exp.Literal):
1650
1700
  return unit
1651
- if unit:
1652
- return exp.Literal.string(unit.name)
1653
- return exp.Literal.string(default) if default else None
1701
+
1702
+ return exp.Literal.string(unit.name)
1654
1703
 
1655
1704
 
1656
1705
  def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
@@ -1730,7 +1779,10 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1730
1779
 
1731
1780
 
1732
1781
  def build_json_extract_path(
1733
- expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1782
+ expr_type: t.Type[F],
1783
+ zero_based_indexing: bool = True,
1784
+ arrow_req_json_type: bool = False,
1785
+ json_type: t.Optional[str] = None,
1734
1786
  ) -> t.Callable[[t.List], F]:
1735
1787
  def _builder(args: t.List) -> F:
1736
1788
  segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
@@ -1750,11 +1802,19 @@ def build_json_extract_path(
1750
1802
 
1751
1803
  # This is done to avoid failing in the expression validator due to the arg count
1752
1804
  del args[2:]
1753
- return expr_type(
1754
- this=seq_get(args, 0),
1755
- expression=exp.JSONPath(expressions=segments),
1756
- only_json_types=arrow_req_json_type,
1757
- )
1805
+ kwargs = {
1806
+ "this": seq_get(args, 0),
1807
+ "expression": exp.JSONPath(expressions=segments),
1808
+ }
1809
+
1810
+ is_jsonb = issubclass(expr_type, (exp.JSONBExtract, exp.JSONBExtractScalar))
1811
+ if not is_jsonb:
1812
+ kwargs["only_json_types"] = arrow_req_json_type
1813
+
1814
+ if json_type is not None:
1815
+ kwargs["json_type"] = json_type
1816
+
1817
+ return expr_type(**kwargs)
1758
1818
 
1759
1819
  return _builder
1760
1820
 
@@ -1962,7 +2022,7 @@ def groupconcat_sql(
1962
2022
  return self.sql(listagg)
1963
2023
 
1964
2024
 
1965
- def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
2025
+ def build_timetostr_or_tochar(args: t.List, dialect: DialectType) -> exp.TimeToStr | exp.ToChar:
1966
2026
  if len(args) == 2:
1967
2027
  this = args[0]
1968
2028
  if not this.type:
@@ -1983,12 +2043,3 @@ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
1983
2043
  expression=seq_get(args, 1),
1984
2044
  replacement=seq_get(args, 2) or exp.Literal.string(""),
1985
2045
  )
1986
-
1987
-
1988
- def space_sql(self: Generator, expression: exp.Space) -> str:
1989
- return self.sql(
1990
- exp.Repeat(
1991
- this=exp.Literal.string(" "),
1992
- times=expression.this,
1993
- )
1994
- )