sqlglot 27.6.0__py3-none-any.whl → 27.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlglot/_version.py CHANGED
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '27.6.0'
21
- __version_tuple__ = version_tuple = (27, 6, 0)
31
+ __version__ = version = '27.8.0'
32
+ __version_tuple__ = version_tuple = (27, 8, 0)
33
+
34
+ __commit_id__ = commit_id = None
@@ -4,7 +4,7 @@ import logging
4
4
  import re
5
5
  import typing as t
6
6
 
7
- from sqlglot import exp, generator, parser, tokens, transforms
7
+ from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
8
8
  from sqlglot._typing import E
9
9
  from sqlglot.dialects.dialect import (
10
10
  Dialect,
@@ -30,7 +30,6 @@ from sqlglot.dialects.dialect import (
30
30
  unit_to_var,
31
31
  strposition_sql,
32
32
  groupconcat_sql,
33
- space_sql,
34
33
  )
35
34
  from sqlglot.helper import seq_get, split_num_words
36
35
  from sqlglot.tokens import TokenType
@@ -474,6 +473,8 @@ class BigQuery(Dialect):
474
473
  exp.Substring,
475
474
  )
476
475
  },
476
+ exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
477
+ exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
477
478
  exp.Array: _annotate_array,
478
479
  exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
479
480
  exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
@@ -481,10 +482,21 @@ class BigQuery(Dialect):
481
482
  exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
482
483
  exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
483
484
  exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
485
+ exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
486
+ exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
487
+ exp.CodePointsToString: lambda self, e: self._annotate_with_type(
488
+ e, exp.DataType.Type.VARCHAR
489
+ ),
484
490
  exp.Concat: _annotate_concat,
485
491
  exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
486
492
  exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
487
493
  exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
494
+ exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
495
+ exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
496
+ exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
497
+ e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
498
+ ),
499
+ exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
488
500
  exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
489
501
  exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
490
502
  e, exp.DataType.Type.VARCHAR
@@ -494,6 +506,13 @@ class BigQuery(Dialect):
494
506
  ),
495
507
  exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
496
508
  exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
509
+ exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
510
+ exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
511
+ exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
512
+ exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
513
+ exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
514
+ exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
515
+ exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
497
516
  exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
498
517
  exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
499
518
  exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
@@ -501,6 +520,11 @@ class BigQuery(Dialect):
501
520
  exp.TimestampFromParts: lambda self, e: self._annotate_with_type(
502
521
  e, exp.DataType.Type.DATETIME
503
522
  ),
523
+ exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
524
+ exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
525
+ exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
526
+ exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
527
+ exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
504
528
  exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
505
529
  }
506
530
 
@@ -533,6 +557,12 @@ class BigQuery(Dialect):
533
557
 
534
558
  return super().normalize_identifier(expression)
535
559
 
560
+ class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
561
+ VAR_TOKENS = {
562
+ TokenType.DASH,
563
+ TokenType.VAR,
564
+ }
565
+
536
566
  class Tokenizer(tokens.Tokenizer):
537
567
  QUOTES = ["'", '"', '"""', "'''"]
538
568
  COMMENTS = ["--", "#", ("/*", "*/")]
@@ -621,7 +651,13 @@ class BigQuery(Dialect):
621
651
  "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")(
622
652
  [seq_get(args, 1), seq_get(args, 0)]
623
653
  ),
654
+ "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")(
655
+ [seq_get(args, 1), seq_get(args, 0)]
656
+ ),
624
657
  "PARSE_TIMESTAMP": _build_parse_timestamp,
658
+ "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")(
659
+ [seq_get(args, 1), seq_get(args, 0)]
660
+ ),
625
661
  "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
626
662
  "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract),
627
663
  "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
@@ -652,6 +688,8 @@ class BigQuery(Dialect):
652
688
  "TO_JSON_STRING": exp.JSONFormat.from_arg_list,
653
689
  "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
654
690
  "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
691
+ "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
692
+ "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
655
693
  }
656
694
 
657
695
  FUNCTION_PARSERS = {
@@ -994,6 +1032,15 @@ class BigQuery(Dialect):
994
1032
  EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
995
1033
  SUPPORTS_UNIX_SECONDS = True
996
1034
 
1035
+ SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$")
1036
+
1037
+ TS_OR_DS_TYPES = (
1038
+ exp.TsOrDsToDatetime,
1039
+ exp.TsOrDsToTimestamp,
1040
+ exp.TsOrDsToTime,
1041
+ exp.TsOrDsToDate,
1042
+ )
1043
+
997
1044
  TRANSFORMS = {
998
1045
  **generator.Generator.TRANSFORMS,
999
1046
  exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
@@ -1022,6 +1069,7 @@ class BigQuery(Dialect):
1022
1069
  exp.DateSub: date_add_interval_sql("DATE", "SUB"),
1023
1070
  exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
1024
1071
  exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
1072
+ exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"),
1025
1073
  exp.FromTimeZone: lambda self, e: self.func(
1026
1074
  "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
1027
1075
  ),
@@ -1059,6 +1107,10 @@ class BigQuery(Dialect):
1059
1107
  exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
1060
1108
  exp.ReturnsProperty: _returnsproperty_sql,
1061
1109
  exp.Rollback: lambda *_: "ROLLBACK TRANSACTION",
1110
+ exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this),
1111
+ exp.ParseDatetime: lambda self, e: self.func(
1112
+ "PARSE_DATETIME", self.format_time(e), e.this
1113
+ ),
1062
1114
  exp.Select: transforms.preprocess(
1063
1115
  [
1064
1116
  transforms.explode_projection_to_unnest(),
@@ -1070,7 +1122,6 @@ class BigQuery(Dialect):
1070
1122
  ),
1071
1123
  exp.SHA: rename_func("SHA1"),
1072
1124
  exp.SHA2: sha256_sql,
1073
- exp.Space: space_sql,
1074
1125
  exp.StabilityProperty: lambda self, e: (
1075
1126
  "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
1076
1127
  ),
@@ -1297,14 +1348,12 @@ class BigQuery(Dialect):
1297
1348
  func_name = "FORMAT_DATETIME"
1298
1349
  elif isinstance(this, exp.TsOrDsToTimestamp):
1299
1350
  func_name = "FORMAT_TIMESTAMP"
1351
+ elif isinstance(this, exp.TsOrDsToTime):
1352
+ func_name = "FORMAT_TIME"
1300
1353
  else:
1301
1354
  func_name = "FORMAT_DATE"
1302
1355
 
1303
- time_expr = (
1304
- this
1305
- if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate))
1306
- else expression
1307
- )
1356
+ time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression
1308
1357
  return self.func(
1309
1358
  func_name, self.format_time(expression), time_expr.this, expression.args.get("zone")
1310
1359
  )
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
  import typing as t
3
3
  import datetime
4
4
  from sqlglot import exp, generator, parser, tokens
5
+ from sqlglot._typing import E
5
6
  from sqlglot.dialects.dialect import (
6
7
  Dialect,
7
8
  NormalizationStrategy,
@@ -31,14 +32,19 @@ from sqlglot.generator import unsupported_args
31
32
  DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd]
32
33
 
33
34
 
34
- def _build_date_format(args: t.List) -> exp.TimeToStr:
35
- expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args)
35
+ def _build_datetime_format(
36
+ expr_type: t.Type[E],
37
+ ) -> t.Callable[[t.List], E]:
38
+ def _builder(args: t.List) -> E:
39
+ expr = build_formatted_time(expr_type, "clickhouse")(args)
36
40
 
37
- timezone = seq_get(args, 2)
38
- if timezone:
39
- expr.set("zone", timezone)
41
+ timezone = seq_get(args, 2)
42
+ if timezone:
43
+ expr.set("zone", timezone)
40
44
 
41
- return expr
45
+ return expr
46
+
47
+ return _builder
42
48
 
43
49
 
44
50
  def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str:
@@ -310,16 +316,17 @@ class ClickHouse(Dialect):
310
316
  "DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
311
317
  "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
312
318
  "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
313
- "DATE_FORMAT": _build_date_format,
319
+ "DATE_FORMAT": _build_datetime_format(exp.TimeToStr),
314
320
  "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
315
321
  "DATESUB": build_date_delta(exp.DateSub, default_unit=None),
316
- "FORMATDATETIME": _build_date_format,
322
+ "FORMATDATETIME": _build_datetime_format(exp.TimeToStr),
317
323
  "JSONEXTRACTSTRING": build_json_extract_path(
318
324
  exp.JSONExtractScalar, zero_based_indexing=False
319
325
  ),
320
326
  "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
321
327
  "MAP": parser.build_var_map,
322
328
  "MATCH": exp.RegexpLike.from_arg_list,
329
+ "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime),
323
330
  "RANDCANONICAL": exp.Rand.from_arg_list,
324
331
  "STR_TO_DATE": _build_str_to_date,
325
332
  "TUPLE": exp.Struct.from_arg_list,
@@ -1141,6 +1148,7 @@ class ClickHouse(Dialect):
1141
1148
  exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
1142
1149
  rename_func("editDistance")
1143
1150
  ),
1151
+ exp.ParseDatetime: rename_func("parseDateTime"),
1144
1152
  }
1145
1153
 
1146
1154
  PROPERTIES_LOCATION = {
@@ -1177,6 +1185,17 @@ class ClickHouse(Dialect):
1177
1185
  exp.DataType.Type.MULTIPOLYGON,
1178
1186
  }
1179
1187
 
1188
+ def offset_sql(self, expression: exp.Offset) -> str:
1189
+ offset = super().offset_sql(expression)
1190
+
1191
+ # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword
1192
+ # https://clickhouse.com/docs/sql-reference/statements/select/offset
1193
+ parent = expression.parent
1194
+ if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch):
1195
+ offset = f"{offset} ROWS"
1196
+
1197
+ return offset
1198
+
1180
1199
  def strtodate_sql(self, expression: exp.StrToDate) -> str:
1181
1200
  strtodate_sql = self.function_fallback_sql(expression)
1182
1201
 
@@ -99,7 +99,11 @@ class Databricks(Spark):
99
99
  exp.JSONExtract: _jsonextract_sql,
100
100
  exp.JSONExtractScalar: _jsonextract_sql,
101
101
  exp.JSONPathRoot: lambda *_: "",
102
- exp.ToChar: lambda self, e: self.function_fallback_sql(e),
102
+ exp.ToChar: lambda self, e: (
103
+ self.cast_sql(exp.Cast(this=e.this, to=exp.DataType(this="STRING")))
104
+ if e.args.get("is_numeric")
105
+ else self.function_fallback_sql(e)
106
+ ),
103
107
  }
104
108
 
105
109
  TRANSFORMS.pop(exp.TryCast)
@@ -35,8 +35,18 @@ DATE_ADD_OR_DIFF = t.Union[
35
35
  exp.TsOrDsDiff,
36
36
  ]
37
37
  DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
38
- JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
39
-
38
+ JSON_EXTRACT_TYPE = t.Union[
39
+ exp.JSONExtract, exp.JSONExtractScalar, exp.JSONBExtract, exp.JSONBExtractScalar
40
+ ]
41
+ DATETIME_DELTA = t.Union[
42
+ exp.DateAdd,
43
+ exp.DatetimeAdd,
44
+ exp.DatetimeSub,
45
+ exp.TimeAdd,
46
+ exp.TimeSub,
47
+ exp.TimestampSub,
48
+ exp.TsOrDsAdd,
49
+ ]
40
50
 
41
51
  if t.TYPE_CHECKING:
42
52
  from sqlglot._typing import B, E, F
@@ -654,6 +664,8 @@ class Dialect(metaclass=_Dialect):
654
664
  exp.Length,
655
665
  exp.UnixDate,
656
666
  exp.UnixSeconds,
667
+ exp.UnixMicros,
668
+ exp.UnixMillis,
657
669
  },
658
670
  exp.DataType.Type.BINARY: {
659
671
  exp.FromBase64,
@@ -674,6 +686,7 @@ class Dialect(metaclass=_Dialect):
674
686
  exp.DateFromParts,
675
687
  exp.DateStrToDate,
676
688
  exp.DiToDate,
689
+ exp.LastDay,
677
690
  exp.StrToDate,
678
691
  exp.TimeStrToDate,
679
692
  exp.TsOrDsToDate,
@@ -718,6 +731,9 @@ class Dialect(metaclass=_Dialect):
718
731
  },
719
732
  exp.DataType.Type.INTERVAL: {
720
733
  exp.Interval,
734
+ exp.JustifyDays,
735
+ exp.JustifyHours,
736
+ exp.JustifyInterval,
721
737
  exp.MakeInterval,
722
738
  },
723
739
  exp.DataType.Type.JSON: {
@@ -1053,7 +1069,9 @@ class Dialect(metaclass=_Dialect):
1053
1069
  try:
1054
1070
  return parse_json_path(path_text, self)
1055
1071
  except ParseError as e:
1056
- if self.STRICT_JSON_PATH_SYNTAX:
1072
+ if self.STRICT_JSON_PATH_SYNTAX and not path_text.lstrip().startswith(
1073
+ ("lax", "strict")
1074
+ ):
1057
1075
  logger.warning(f"Invalid JSON path syntax. {str(e)}")
1058
1076
 
1059
1077
  return path
@@ -1637,22 +1655,59 @@ def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE
1637
1655
  return _delta_sql
1638
1656
 
1639
1657
 
1658
+ def date_delta_to_binary_interval_op(
1659
+ cast: bool = True,
1660
+ ) -> t.Callable[[Generator, DATETIME_DELTA], str]:
1661
+ def date_delta_to_binary_interval_op_sql(self: Generator, expression: DATETIME_DELTA) -> str:
1662
+ this = expression.this
1663
+ unit = unit_to_var(expression)
1664
+ op = (
1665
+ "+"
1666
+ if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
1667
+ else "-"
1668
+ )
1669
+
1670
+ to_type: t.Optional[exp.DATA_TYPE] = None
1671
+ if cast:
1672
+ if isinstance(expression, exp.TsOrDsAdd):
1673
+ to_type = expression.return_type
1674
+ elif this.is_string:
1675
+ # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
1676
+ to_type = (
1677
+ exp.DataType.Type.DATETIME
1678
+ if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
1679
+ else exp.DataType.Type.DATE
1680
+ )
1681
+
1682
+ this = exp.cast(this, to_type) if to_type else this
1683
+
1684
+ expr = expression.expression
1685
+ interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
1686
+
1687
+ return f"{self.sql(this)} {op} {self.sql(interval)}"
1688
+
1689
+ return date_delta_to_binary_interval_op_sql
1690
+
1691
+
1640
1692
  def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1641
1693
  unit = expression.args.get("unit")
1694
+ if not unit:
1695
+ return exp.Literal.string(default) if default else None
1642
1696
 
1643
- if isinstance(unit, exp.Placeholder):
1697
+ if isinstance(unit, exp.Placeholder) or type(unit) not in (exp.Var, exp.Literal):
1644
1698
  return unit
1645
- if unit:
1646
- return exp.Literal.string(unit.name)
1647
- return exp.Literal.string(default) if default else None
1699
+
1700
+ return exp.Literal.string(unit.name)
1648
1701
 
1649
1702
 
1650
1703
  def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
1651
1704
  unit = expression.args.get("unit")
1652
1705
 
1653
- if isinstance(unit, (exp.Var, exp.Placeholder)):
1706
+ if isinstance(unit, (exp.Var, exp.Placeholder, exp.WeekStart)):
1654
1707
  return unit
1655
- return exp.Var(this=default) if default else None
1708
+
1709
+ value = unit.name if unit else default
1710
+ return exp.Var(this=value) if value else None
1656
1711
 
1657
1712
 
1658
1713
  @t.overload
@@ -1722,7 +1777,10 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
1722
1777
 
1723
1778
 
1724
1779
  def build_json_extract_path(
1725
- expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
1780
+ expr_type: t.Type[F],
1781
+ zero_based_indexing: bool = True,
1782
+ arrow_req_json_type: bool = False,
1783
+ json_type: t.Optional[str] = None,
1726
1784
  ) -> t.Callable[[t.List], F]:
1727
1785
  def _builder(args: t.List) -> F:
1728
1786
  segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
@@ -1742,11 +1800,19 @@ def build_json_extract_path(
1742
1800
 
1743
1801
  # This is done to avoid failing in the expression validator due to the arg count
1744
1802
  del args[2:]
1745
- return expr_type(
1746
- this=seq_get(args, 0),
1747
- expression=exp.JSONPath(expressions=segments),
1748
- only_json_types=arrow_req_json_type,
1749
- )
1803
+ kwargs = {
1804
+ "this": seq_get(args, 0),
1805
+ "expression": exp.JSONPath(expressions=segments),
1806
+ }
1807
+
1808
+ is_jsonb = issubclass(expr_type, (exp.JSONBExtract, exp.JSONBExtractScalar))
1809
+ if not is_jsonb:
1810
+ kwargs["only_json_types"] = arrow_req_json_type
1811
+
1812
+ if json_type is not None:
1813
+ kwargs["json_type"] = json_type
1814
+
1815
+ return expr_type(**kwargs)
1750
1816
 
1751
1817
  return _builder
1752
1818
 
@@ -1954,7 +2020,7 @@ def groupconcat_sql(
1954
2020
  return self.sql(listagg)
1955
2021
 
1956
2022
 
1957
- def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
2023
+ def build_timetostr_or_tochar(args: t.List, dialect: DialectType) -> exp.TimeToStr | exp.ToChar:
1958
2024
  if len(args) == 2:
1959
2025
  this = args[0]
1960
2026
  if not this.type:
@@ -1975,12 +2041,3 @@ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
1975
2041
  expression=seq_get(args, 1),
1976
2042
  replacement=seq_get(args, 2) or exp.Literal.string(""),
1977
2043
  )
1978
-
1979
-
1980
- def space_sql(self: Generator, expression: exp.Space) -> str:
1981
- return self.sql(
1982
- exp.Repeat(
1983
- this=exp.Literal.string(" "),
1984
- times=expression.this,
1985
- )
1986
- )
sqlglot/dialects/doris.py CHANGED
@@ -1,15 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import typing as t
4
+
3
5
  from sqlglot import exp
4
6
  from sqlglot.dialects.dialect import (
5
7
  approx_count_distinct_sql,
6
- build_timestamp_trunc,
7
8
  property_sql,
8
9
  rename_func,
9
10
  time_format,
10
11
  unit_to_str,
11
12
  )
12
13
  from sqlglot.dialects.mysql import MySQL
14
+ from sqlglot.helper import seq_get
13
15
  from sqlglot.tokens import TokenType
14
16
 
15
17
 
@@ -22,6 +24,22 @@ def _lag_lead_sql(self, expression: exp.Lag | exp.Lead) -> str:
22
24
  )
23
25
 
24
26
 
27
+ # Accept both DATE_TRUNC(datetime, unit) and DATE_TRUNC(unit, datetime)
28
+ def _build_date_trunc(args: t.List[exp.Expression]) -> exp.Expression:
29
+ a0, a1 = seq_get(args, 0), seq_get(args, 1)
30
+
31
+ def _is_unit_like(e: exp.Expression | None) -> bool:
32
+ if not (isinstance(e, exp.Literal) and e.is_string):
33
+ return False
34
+ text = e.this
35
+ return not any(ch.isdigit() for ch in text)
36
+
37
+ # Determine which argument is the unit
38
+ unit, this = (a0, a1) if _is_unit_like(a0) else (a1, a0)
39
+
40
+ return exp.TimestampTrunc(this=this, unit=unit)
41
+
42
+
25
43
  class Doris(MySQL):
26
44
  DATE_FORMAT = "'yyyy-MM-dd'"
27
45
  DATEINT_FORMAT = "'yyyyMMdd'"
@@ -31,7 +49,7 @@ class Doris(MySQL):
31
49
  FUNCTIONS = {
32
50
  **MySQL.Parser.FUNCTIONS,
33
51
  "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
34
- "DATE_TRUNC": build_timestamp_trunc,
52
+ "DATE_TRUNC": _build_date_trunc,
35
53
  "MONTHS_ADD": exp.AddMonths.from_arg_list,
36
54
  "REGEXP": exp.RegexpLike.from_arg_list,
37
55
  "TO_DATE": exp.TsOrDsToDate.from_arg_list,
@@ -40,6 +58,9 @@ class Doris(MySQL):
40
58
  FUNCTION_PARSERS = MySQL.Parser.FUNCTION_PARSERS.copy()
41
59
  FUNCTION_PARSERS.pop("GROUP_CONCAT")
42
60
 
61
+ NO_PAREN_FUNCTIONS = MySQL.Parser.NO_PAREN_FUNCTIONS.copy()
62
+ NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_DATE)
63
+
43
64
  PROPERTY_PARSERS = {
44
65
  **MySQL.Parser.PROPERTY_PARSERS,
45
66
  "PROPERTIES": lambda self: self._parse_wrapped_properties(),
@@ -111,6 +132,7 @@ class Doris(MySQL):
111
132
  LAST_DAY_SUPPORTS_DATE_PART = False
112
133
  VARCHAR_REQUIRES_SIZE = False
113
134
  WITH_PROPERTIES_PREFIX = "PROPERTIES"
135
+ RENAME_TABLE_WITH_DB = False
114
136
 
115
137
  TYPE_MAPPING = {
116
138
  **MySQL.Generator.TYPE_MAPPING,
@@ -123,6 +145,7 @@ class Doris(MySQL):
123
145
  **MySQL.Generator.PROPERTIES_LOCATION,
124
146
  exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA,
125
147
  exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA,
148
+ exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
126
149
  }
127
150
 
128
151
  CAST_MAPPING = {}
@@ -137,6 +160,7 @@ class Doris(MySQL):
137
160
  exp.ArrayAgg: rename_func("COLLECT_LIST"),
138
161
  exp.ArrayToString: rename_func("ARRAY_JOIN"),
139
162
  exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
163
+ exp.CurrentDate: lambda self, _: self.func("CURRENT_DATE"),
140
164
  exp.CurrentTimestamp: lambda self, _: self.func("NOW"),
141
165
  exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
142
166
  exp.GroupConcat: lambda self, e: self.func(
@@ -683,3 +707,20 @@ class Doris(MySQL):
683
707
  # Handle both static and dynamic partition definitions
684
708
  create_sql = ", ".join(self.sql(e) for e in create_expressions)
685
709
  return f"PARTITION BY RANGE ({partition_expressions}) ({create_sql})"
710
+
711
+ def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
712
+ node = expression.this
713
+ if isinstance(node, exp.Schema):
714
+ parts = ", ".join(self.sql(e) for e in node.expressions)
715
+ return f"PARTITION BY ({parts})"
716
+ return f"PARTITION BY ({self.sql(node)})"
717
+
718
+ def table_sql(self, expression: exp.Table, sep: str = " AS ") -> str:
719
+ """Override table_sql to avoid AS keyword in UPDATE and DELETE statements."""
720
+ ancestor = expression.find_ancestor(exp.Update, exp.Delete, exp.Select)
721
+ if not isinstance(ancestor, exp.Select):
722
+ sep = " "
723
+ return super().table_sql(expression, sep=sep)
724
+
725
+ def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str:
726
+ return super().alterrename_sql(expression, include_to=False)