PyPI - sqlglot - Versions diffs - 27.6.0__py3-none-any.whl → 27.8.0__py3-none-any.whl - Mend

sqlglot 27.6.0py3-none-any.whl → 27.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

sqlglot/_version.py +16 -3
sqlglot/dialects/bigquery.py +57 -8
sqlglot/dialects/clickhouse.py +27 -8
sqlglot/dialects/databricks.py +5 -1
sqlglot/dialects/dialect.py +82 -25
sqlglot/dialects/doris.py +43 -2
sqlglot/dialects/dremio.py +53 -11
sqlglot/dialects/duckdb.py +39 -43
sqlglot/dialects/exasol.py +30 -29
sqlglot/dialects/mysql.py +0 -15
sqlglot/dialects/presto.py +0 -2
sqlglot/dialects/singlestore.py +121 -1
sqlglot/dialects/snowflake.py +1 -1
sqlglot/dialects/spark.py +7 -0
sqlglot/dialects/teradata.py +58 -0
sqlglot/dialects/trino.py +1 -0
sqlglot/expressions.py +86 -4
sqlglot/generator.py +27 -3
sqlglot/jsonpath.py +10 -3
sqlglot/optimizer/annotate_types.py +4 -1
sqlglot/optimizer/merge_subqueries.py +4 -0
sqlglot/optimizer/qualify_tables.py +0 -8
sqlglot/parser.py +44 -6
{sqlglot-27.6.0.dist-info → sqlglot-27.8.0.dist-info}/METADATA +2 -2
{sqlglot-27.6.0.dist-info → sqlglot-27.8.0.dist-info}/RECORD +28 -28
{sqlglot-27.6.0.dist-info → sqlglot-27.8.0.dist-info}/WHEEL +0 -0
{sqlglot-27.6.0.dist-info → sqlglot-27.8.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.6.0.dist-info → sqlglot-27.8.0.dist-info}/top_level.txt +0 -0

sqlglot/_version.py CHANGED Viewed

@@ -1,7 +1,14 @@
 # file generated by setuptools-scm
 # don't change, don't track in version control
-__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
     from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
 else:
     VERSION_TUPLE = object
+    COMMIT_ID = object
 version: str
 __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
-__version__ = version = '27.6.0'
-__version_tuple__ = version_tuple = (27, 6, 0)
+__version__ = version = '27.8.0'
+__version_tuple__ = version_tuple = (27, 8, 0)
+__commit_id__ = commit_id = None

sqlglot/dialects/bigquery.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 import re
 import typing as t
-from sqlglot import exp, generator, parser, tokens, transforms
+from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
 from sqlglot._typing import E
 from sqlglot.dialects.dialect import (
     Dialect,
@@ -30,7 +30,6 @@ from sqlglot.dialects.dialect import (
     unit_to_var,
     strposition_sql,
     groupconcat_sql,
-    space_sql,
 )
 from sqlglot.helper import seq_get, split_num_words
 from sqlglot.tokens import TokenType
@@ -474,6 +473,8 @@ class BigQuery(Dialect):
                 exp.Substring,
             )
         },
+        exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Array: _annotate_array,
         exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
         exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
@@ -481,10 +482,21 @@ class BigQuery(Dialect):
         exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
+        exp.CodePointsToString: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.Type.VARCHAR
+        ),
         exp.Concat: _annotate_concat,
         exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
+        exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
+        exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
+        ),
+        exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
         exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.VARCHAR
@@ -494,6 +506,13 @@ class BigQuery(Dialect):
         ),
         exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
+        exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
+        exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
+        exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
+        exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
+        exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
@@ -501,6 +520,11 @@ class BigQuery(Dialect):
         exp.TimestampFromParts: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.DATETIME
         ),
+        exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
+        exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
+        exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
+        exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
     }
@@ -533,6 +557,12 @@ class BigQuery(Dialect):
         return super().normalize_identifier(expression)
+    class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
+        VAR_TOKENS = {
+            TokenType.DASH,
+            TokenType.VAR,
+        }
     class Tokenizer(tokens.Tokenizer):
         QUOTES = ["'", '"', '"""', "'''"]
         COMMENTS = ["--", "#", ("/*", "*/")]
@@ -621,7 +651,13 @@ class BigQuery(Dialect):
             "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")(
                 [seq_get(args, 1), seq_get(args, 0)]
             ),
+            "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")(
+                [seq_get(args, 1), seq_get(args, 0)]
+            ),
             "PARSE_TIMESTAMP": _build_parse_timestamp,
+            "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")(
+                [seq_get(args, 1), seq_get(args, 0)]
+            ),
             "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list,
             "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract),
             "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
@@ -652,6 +688,8 @@ class BigQuery(Dialect):
             "TO_JSON_STRING": exp.JSONFormat.from_arg_list,
             "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
             "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
+            "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
+            "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
         }
         FUNCTION_PARSERS = {
@@ -994,6 +1032,15 @@ class BigQuery(Dialect):
         EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
         SUPPORTS_UNIX_SECONDS = True
+        SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$")
+        TS_OR_DS_TYPES = (
+            exp.TsOrDsToDatetime,
+            exp.TsOrDsToTimestamp,
+            exp.TsOrDsToTime,
+            exp.TsOrDsToDate,
+        )
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,
             exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
@@ -1022,6 +1069,7 @@ class BigQuery(Dialect):
             exp.DateSub: date_add_interval_sql("DATE", "SUB"),
             exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"),
             exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"),
+            exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"),
             exp.FromTimeZone: lambda self, e: self.func(
                 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'"
             ),
@@ -1059,6 +1107,10 @@ class BigQuery(Dialect):
             exp.RegexpLike: rename_func("REGEXP_CONTAINS"),
             exp.ReturnsProperty: _returnsproperty_sql,
             exp.Rollback: lambda *_: "ROLLBACK TRANSACTION",
+            exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this),
+            exp.ParseDatetime: lambda self, e: self.func(
+                "PARSE_DATETIME", self.format_time(e), e.this
+            ),
             exp.Select: transforms.preprocess(
                 [
                     transforms.explode_projection_to_unnest(),
@@ -1070,7 +1122,6 @@ class BigQuery(Dialect):
             ),
             exp.SHA: rename_func("SHA1"),
             exp.SHA2: sha256_sql,
-            exp.Space: space_sql,
             exp.StabilityProperty: lambda self, e: (
                 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
             ),
@@ -1297,14 +1348,12 @@ class BigQuery(Dialect):
                 func_name = "FORMAT_DATETIME"
             elif isinstance(this, exp.TsOrDsToTimestamp):
                 func_name = "FORMAT_TIMESTAMP"
+            elif isinstance(this, exp.TsOrDsToTime):
+                func_name = "FORMAT_TIME"
             else:
                 func_name = "FORMAT_DATE"
-            time_expr = (
-                this
-                if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate))
-                else expression
-            )
+            time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression
             return self.func(
                 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone")
             )

sqlglot/dialects/clickhouse.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import typing as t
 import datetime
 from sqlglot import exp, generator, parser, tokens
+from sqlglot._typing import E
 from sqlglot.dialects.dialect import (
     Dialect,
     NormalizationStrategy,
@@ -31,14 +32,19 @@ from sqlglot.generator import unsupported_args
 DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd]
-def _build_date_format(args: t.List) -> exp.TimeToStr:
-    expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args)
+def _build_datetime_format(
+    expr_type: t.Type[E],
+) -> t.Callable[[t.List], E]:
+    def _builder(args: t.List) -> E:
+        expr = build_formatted_time(expr_type, "clickhouse")(args)
-    timezone = seq_get(args, 2)
-    if timezone:
-        expr.set("zone", timezone)
+        timezone = seq_get(args, 2)
+        if timezone:
+            expr.set("zone", timezone)
-    return expr
+        return expr
+    return _builder
 def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str:
@@ -310,16 +316,17 @@ class ClickHouse(Dialect):
             "DATEADD": build_date_delta(exp.DateAdd, default_unit=None),
             "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
             "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True),
-            "DATE_FORMAT": _build_date_format,
+            "DATE_FORMAT": _build_datetime_format(exp.TimeToStr),
             "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None),
             "DATESUB": build_date_delta(exp.DateSub, default_unit=None),
-            "FORMATDATETIME": _build_date_format,
+            "FORMATDATETIME": _build_datetime_format(exp.TimeToStr),
             "JSONEXTRACTSTRING": build_json_extract_path(
                 exp.JSONExtractScalar, zero_based_indexing=False
             ),
             "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
             "MAP": parser.build_var_map,
             "MATCH": exp.RegexpLike.from_arg_list,
+            "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime),
             "RANDCANONICAL": exp.Rand.from_arg_list,
             "STR_TO_DATE": _build_str_to_date,
             "TUPLE": exp.Struct.from_arg_list,
@@ -1141,6 +1148,7 @@ class ClickHouse(Dialect):
             exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
                 rename_func("editDistance")
             ),
+            exp.ParseDatetime: rename_func("parseDateTime"),
         }
         PROPERTIES_LOCATION = {
@@ -1177,6 +1185,17 @@ class ClickHouse(Dialect):
             exp.DataType.Type.MULTIPOLYGON,
         }
+        def offset_sql(self, expression: exp.Offset) -> str:
+            offset = super().offset_sql(expression)
+            # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword
+            # https://clickhouse.com/docs/sql-reference/statements/select/offset
+            parent = expression.parent
+            if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch):
+                offset = f"{offset} ROWS"
+            return offset
         def strtodate_sql(self, expression: exp.StrToDate) -> str:
             strtodate_sql = self.function_fallback_sql(expression)

sqlglot/dialects/databricks.py CHANGED Viewed

@@ -99,7 +99,11 @@ class Databricks(Spark):
             exp.JSONExtract: _jsonextract_sql,
             exp.JSONExtractScalar: _jsonextract_sql,
             exp.JSONPathRoot: lambda *_: "",
-            exp.ToChar: lambda self, e: self.function_fallback_sql(e),
+            exp.ToChar: lambda self, e: (
+                self.cast_sql(exp.Cast(this=e.this, to=exp.DataType(this="STRING")))
+                if e.args.get("is_numeric")
+                else self.function_fallback_sql(e)
+            ),
         }
         TRANSFORMS.pop(exp.TryCast)

sqlglot/dialects/dialect.py CHANGED Viewed

@@ -35,8 +35,18 @@ DATE_ADD_OR_DIFF = t.Union[
     exp.TsOrDsDiff,
 ]
 DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
-JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
+JSON_EXTRACT_TYPE = t.Union[
+    exp.JSONExtract, exp.JSONExtractScalar, exp.JSONBExtract, exp.JSONBExtractScalar
+]
+DATETIME_DELTA = t.Union[
+    exp.DateAdd,
+    exp.DatetimeAdd,
+    exp.DatetimeSub,
+    exp.TimeAdd,
+    exp.TimeSub,
+    exp.TimestampSub,
+    exp.TsOrDsAdd,
+]
 if t.TYPE_CHECKING:
     from sqlglot._typing import B, E, F
@@ -654,6 +664,8 @@ class Dialect(metaclass=_Dialect):
             exp.Length,
             exp.UnixDate,
             exp.UnixSeconds,
+            exp.UnixMicros,
+            exp.UnixMillis,
         },
         exp.DataType.Type.BINARY: {
             exp.FromBase64,
@@ -674,6 +686,7 @@ class Dialect(metaclass=_Dialect):
             exp.DateFromParts,
             exp.DateStrToDate,
             exp.DiToDate,
+            exp.LastDay,
             exp.StrToDate,
             exp.TimeStrToDate,
             exp.TsOrDsToDate,
@@ -718,6 +731,9 @@ class Dialect(metaclass=_Dialect):
         },
         exp.DataType.Type.INTERVAL: {
             exp.Interval,
+            exp.JustifyDays,
+            exp.JustifyHours,
+            exp.JustifyInterval,
             exp.MakeInterval,
         },
         exp.DataType.Type.JSON: {
@@ -1053,7 +1069,9 @@ class Dialect(metaclass=_Dialect):
             try:
                 return parse_json_path(path_text, self)
             except ParseError as e:
-                if self.STRICT_JSON_PATH_SYNTAX:
+                if self.STRICT_JSON_PATH_SYNTAX and not path_text.lstrip().startswith(
+                    ("lax", "strict")
+                ):
                     logger.warning(f"Invalid JSON path syntax. {str(e)}")
         return path
@@ -1637,22 +1655,59 @@ def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE
     return _delta_sql
+def date_delta_to_binary_interval_op(
+    cast: bool = True,
+) -> t.Callable[[Generator, DATETIME_DELTA], str]:
+    def date_delta_to_binary_interval_op_sql(self: Generator, expression: DATETIME_DELTA) -> str:
+        this = expression.this
+        unit = unit_to_var(expression)
+        op = (
+            "+"
+            if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
+            else "-"
+        )
+        to_type: t.Optional[exp.DATA_TYPE] = None
+        if cast:
+            if isinstance(expression, exp.TsOrDsAdd):
+                to_type = expression.return_type
+            elif this.is_string:
+                # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
+                to_type = (
+                    exp.DataType.Type.DATETIME
+                    if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
+                    else exp.DataType.Type.DATE
+                )
+        this = exp.cast(this, to_type) if to_type else this
+        expr = expression.expression
+        interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
+        return f"{self.sql(this)} {op} {self.sql(interval)}"
+    return date_delta_to_binary_interval_op_sql
 def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
     unit = expression.args.get("unit")
+    if not unit:
+        return exp.Literal.string(default) if default else None
-    if isinstance(unit, exp.Placeholder):
+    if isinstance(unit, exp.Placeholder) or type(unit) not in (exp.Var, exp.Literal):
         return unit
-    if unit:
-        return exp.Literal.string(unit.name)
-    return exp.Literal.string(default) if default else None
+    return exp.Literal.string(unit.name)
 def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
     unit = expression.args.get("unit")
-    if isinstance(unit, (exp.Var, exp.Placeholder)):
+    if isinstance(unit, (exp.Var, exp.Placeholder, exp.WeekStart)):
         return unit
-    return exp.Var(this=default) if default else None
+    value = unit.name if unit else default
+    return exp.Var(this=value) if value else None
 @t.overload
@@ -1722,7 +1777,10 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
 def build_json_extract_path(
-    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
+    expr_type: t.Type[F],
+    zero_based_indexing: bool = True,
+    arrow_req_json_type: bool = False,
+    json_type: t.Optional[str] = None,
 ) -> t.Callable[[t.List], F]:
     def _builder(args: t.List) -> F:
         segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
@@ -1742,11 +1800,19 @@ def build_json_extract_path(
         # This is done to avoid failing in the expression validator due to the arg count
         del args[2:]
-        return expr_type(
-            this=seq_get(args, 0),
-            expression=exp.JSONPath(expressions=segments),
-            only_json_types=arrow_req_json_type,
-        )
+        kwargs = {
+            "this": seq_get(args, 0),
+            "expression": exp.JSONPath(expressions=segments),
+        }
+        is_jsonb = issubclass(expr_type, (exp.JSONBExtract, exp.JSONBExtractScalar))
+        if not is_jsonb:
+            kwargs["only_json_types"] = arrow_req_json_type
+        if json_type is not None:
+            kwargs["json_type"] = json_type
+        return expr_type(**kwargs)
     return _builder
@@ -1954,7 +2020,7 @@ def groupconcat_sql(
     return self.sql(listagg)
-def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
+def build_timetostr_or_tochar(args: t.List, dialect: DialectType) -> exp.TimeToStr | exp.ToChar:
     if len(args) == 2:
         this = args[0]
         if not this.type:
@@ -1975,12 +2041,3 @@ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
         expression=seq_get(args, 1),
         replacement=seq_get(args, 2) or exp.Literal.string(""),
     )
-def space_sql(self: Generator, expression: exp.Space) -> str:
-    return self.sql(
-        exp.Repeat(
-            this=exp.Literal.string(" "),
-            times=expression.this,
-        )
-    )

sqlglot/dialects/doris.py CHANGED Viewed

@@ -1,15 +1,17 @@
 from __future__ import annotations
+import typing as t
 from sqlglot import exp
 from sqlglot.dialects.dialect import (
     approx_count_distinct_sql,
-    build_timestamp_trunc,
     property_sql,
     rename_func,
     time_format,
     unit_to_str,
 )
 from sqlglot.dialects.mysql import MySQL
+from sqlglot.helper import seq_get
 from sqlglot.tokens import TokenType
@@ -22,6 +24,22 @@ def _lag_lead_sql(self, expression: exp.Lag | exp.Lead) -> str:
     )
+# Accept both DATE_TRUNC(datetime, unit) and DATE_TRUNC(unit, datetime)
+def _build_date_trunc(args: t.List[exp.Expression]) -> exp.Expression:
+    a0, a1 = seq_get(args, 0), seq_get(args, 1)
+    def _is_unit_like(e: exp.Expression | None) -> bool:
+        if not (isinstance(e, exp.Literal) and e.is_string):
+            return False
+        text = e.this
+        return not any(ch.isdigit() for ch in text)
+    # Determine which argument is the unit
+    unit, this = (a0, a1) if _is_unit_like(a0) else (a1, a0)
+    return exp.TimestampTrunc(this=this, unit=unit)
 class Doris(MySQL):
     DATE_FORMAT = "'yyyy-MM-dd'"
     DATEINT_FORMAT = "'yyyyMMdd'"
@@ -31,7 +49,7 @@ class Doris(MySQL):
         FUNCTIONS = {
             **MySQL.Parser.FUNCTIONS,
             "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
-            "DATE_TRUNC": build_timestamp_trunc,
+            "DATE_TRUNC": _build_date_trunc,
             "MONTHS_ADD": exp.AddMonths.from_arg_list,
             "REGEXP": exp.RegexpLike.from_arg_list,
             "TO_DATE": exp.TsOrDsToDate.from_arg_list,
@@ -40,6 +58,9 @@ class Doris(MySQL):
         FUNCTION_PARSERS = MySQL.Parser.FUNCTION_PARSERS.copy()
         FUNCTION_PARSERS.pop("GROUP_CONCAT")
+        NO_PAREN_FUNCTIONS = MySQL.Parser.NO_PAREN_FUNCTIONS.copy()
+        NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_DATE)
         PROPERTY_PARSERS = {
             **MySQL.Parser.PROPERTY_PARSERS,
             "PROPERTIES": lambda self: self._parse_wrapped_properties(),
@@ -111,6 +132,7 @@ class Doris(MySQL):
         LAST_DAY_SUPPORTS_DATE_PART = False
         VARCHAR_REQUIRES_SIZE = False
         WITH_PROPERTIES_PREFIX = "PROPERTIES"
+        RENAME_TABLE_WITH_DB = False
         TYPE_MAPPING = {
             **MySQL.Generator.TYPE_MAPPING,
@@ -123,6 +145,7 @@ class Doris(MySQL):
             **MySQL.Generator.PROPERTIES_LOCATION,
             exp.UniqueKeyProperty: exp.Properties.Location.POST_SCHEMA,
             exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA,
+            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
         }
         CAST_MAPPING = {}
@@ -137,6 +160,7 @@ class Doris(MySQL):
             exp.ArrayAgg: rename_func("COLLECT_LIST"),
             exp.ArrayToString: rename_func("ARRAY_JOIN"),
             exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
+            exp.CurrentDate: lambda self, _: self.func("CURRENT_DATE"),
             exp.CurrentTimestamp: lambda self, _: self.func("NOW"),
             exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, unit_to_str(e)),
             exp.GroupConcat: lambda self, e: self.func(
@@ -683,3 +707,20 @@ class Doris(MySQL):
             # Handle both static and dynamic partition definitions
             create_sql = ", ".join(self.sql(e) for e in create_expressions)
             return f"PARTITION BY RANGE ({partition_expressions}) ({create_sql})"
+        def partitionedbyproperty_sql(self, expression: exp.PartitionedByProperty) -> str:
+            node = expression.this
+            if isinstance(node, exp.Schema):
+                parts = ", ".join(self.sql(e) for e in node.expressions)
+                return f"PARTITION BY ({parts})"
+            return f"PARTITION BY ({self.sql(node)})"
+        def table_sql(self, expression: exp.Table, sep: str = " AS ") -> str:
+            """Override table_sql to avoid AS keyword in UPDATE and DELETE statements."""
+            ancestor = expression.find_ancestor(exp.Update, exp.Delete, exp.Select)
+            if not isinstance(ancestor, exp.Select):
+                sep = " "
+            return super().table_sql(expression, sep=sep)
+        def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str:
+            return super().alterrename_sql(expression, include_to=False)

sqlglot 27.6.0__py3-none-any.whl → 27.8.0__py3-none-any.whl

sqlglot 27.6.0py3-none-any.whl → 27.8.0py3-none-any.whl