PyPI - sqlglot - Versions diffs - 27.7.0__py3-none-any.whl → 27.9.0__py3-none-any.whl - Mend

sqlglot 27.7.0py3-none-any.whl → 27.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sqlglot/_version.py +16 -3
sqlglot/dialects/__init__.py +1 -1
sqlglot/dialects/bigquery.py +129 -9
sqlglot/dialects/clickhouse.py +11 -0
sqlglot/dialects/databricks.py +5 -1
sqlglot/dialects/dialect.py +74 -23
sqlglot/dialects/doris.py +77 -9
sqlglot/dialects/dremio.py +102 -21
sqlglot/dialects/duckdb.py +20 -43
sqlglot/dialects/exasol.py +28 -0
sqlglot/dialects/mysql.py +0 -48
sqlglot/dialects/presto.py +0 -2
sqlglot/dialects/redshift.py +1 -0
sqlglot/dialects/singlestore.py +252 -13
sqlglot/dialects/spark.py +6 -0
sqlglot/dialects/trino.py +1 -0
sqlglot/dialects/tsql.py +2 -0
sqlglot/expressions.py +143 -7
sqlglot/generator.py +98 -27
sqlglot/jsonpath.py +10 -3
sqlglot/optimizer/qualify_columns.py +1 -1
sqlglot/parser.py +58 -17
{sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/METADATA +42 -2
{sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/RECORD +27 -27
{sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/WHEEL +0 -0
{sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.7.0.dist-info → sqlglot-27.9.0.dist-info}/top_level.txt +0 -0

sqlglot/_version.py CHANGED Viewed

@@ -1,7 +1,14 @@
 # file generated by setuptools-scm
 # don't change, don't track in version control
-__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
     from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
 else:
     VERSION_TUPLE = object
+    COMMIT_ID = object
 version: str
 __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
-__version__ = version = '27.7.0'
-__version_tuple__ = version_tuple = (27, 7, 0)
+__version__ = version = '27.9.0'
+__version_tuple__ = version_tuple = (27, 9, 0)
+__commit_id__ = commit_id = None

sqlglot/dialects/__init__.py CHANGED Viewed

@@ -75,6 +75,7 @@ DIALECTS = [
     "Druid",
     "DuckDB",
     "Dune",
+    "Exasol",
     "Fabric",
     "Hive",
     "Materialize",
@@ -95,7 +96,6 @@ DIALECTS = [
     "Teradata",
     "Trino",
     "TSQL",
-    "Exasol",
 ]
 MODULE_BY_DIALECT = {name: name.lower() for name in DIALECTS}

sqlglot/dialects/bigquery.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 import re
 import typing as t
-from sqlglot import exp, generator, parser, tokens, transforms
+from sqlglot import exp, generator, jsonpath, parser, tokens, transforms
 from sqlglot._typing import E
 from sqlglot.dialects.dialect import (
     Dialect,
@@ -30,7 +30,6 @@ from sqlglot.dialects.dialect import (
     unit_to_var,
     strposition_sql,
     groupconcat_sql,
-    space_sql,
 )
 from sqlglot.helper import seq_get, split_num_words
 from sqlglot.tokens import TokenType
@@ -296,6 +295,22 @@ def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
     return expression
+def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
+    self._annotate_args(expression)
+    struct_type = exp.DataType(
+        this=exp.DataType.Type.STRUCT,
+        expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
+        nested=True,
+    )
+    self._set_type(
+        expression,
+        exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
+    )
+    return expression
 @unsupported_args("ins_cost", "del_cost", "sub_cost")
 def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
     max_dist = expression.args.get("max_dist")
@@ -474,15 +489,24 @@ class BigQuery(Dialect):
                 exp.Substring,
             )
         },
+        exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
+        exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
+        exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
+        exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Array: _annotate_array,
         exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
         exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN),
         exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
+        exp.CodePointsToBytes: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.Type.BINARY
+        ),
         exp.CodePointsToString: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.VARCHAR
         ),
@@ -492,9 +516,13 @@ class BigQuery(Dialect):
         exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE),
         exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
+        exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
+        exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE),
         exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
             e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
         ),
+        exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
         exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON),
         exp.JSONExtractScalar: lambda self, e: self._annotate_with_type(
             e, exp.DataType.Type.VARCHAR
@@ -504,9 +532,21 @@ class BigQuery(Dialect):
         ),
         exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
+        exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
+        exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
         exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME),
+        exp.ParseBignumeric: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.Type.BIGDECIMAL
+        ),
+        exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL),
+        exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
+        exp.Replace: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"),
+        exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.Type.VARCHAR
+        ),
+        exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR),
         exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY),
         exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
@@ -516,8 +556,12 @@ class BigQuery(Dialect):
         ),
         exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"),
         exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
-        exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
         exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
+        exp.ToCodePoints: lambda self, e: self._annotate_with_type(
+            e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
+        ),
+        exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME),
+        exp.Translate: lambda self, e: self._annotate_by_args(e, "this"),
         exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT),
     }
@@ -550,6 +594,12 @@ class BigQuery(Dialect):
         return super().normalize_identifier(expression)
+    class JSONPathTokenizer(jsonpath.JSONPathTokenizer):
+        VAR_TOKENS = {
+            TokenType.DASH,
+            TokenType.VAR,
+        }
     class Tokenizer(tokens.Tokenizer):
         QUOTES = ["'", '"', '"""', "'''"]
         COMMENTS = ["--", "#", ("/*", "*/")]
@@ -583,10 +633,13 @@ class BigQuery(Dialect):
             "EXPORT": TokenType.EXPORT,
             "FLOAT64": TokenType.DOUBLE,
             "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
+            "LOOP": TokenType.COMMAND,
             "MODEL": TokenType.MODEL,
             "NOT DETERMINISTIC": TokenType.VOLATILE,
             "RECORD": TokenType.STRUCT,
+            "REPEAT": TokenType.COMMAND,
             "TIMESTAMP": TokenType.TIMESTAMPTZ,
+            "WHILE": TokenType.COMMAND,
         }
         KEYWORDS.pop("DIV")
         KEYWORDS.pop("VALUES")
@@ -610,6 +663,8 @@ class BigQuery(Dialect):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
+            "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list,
+            "BOOL": exp.JSONBool.from_arg_list,
             "CONTAINS_SUBSTR": _build_contains_substring,
             "DATE": _build_date,
             "DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
@@ -676,6 +731,7 @@ class BigQuery(Dialect):
             "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
             "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
             "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime),
+            "FROM_HEX": exp.Unhex.from_arg_list,
             "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))),
         }
@@ -686,7 +742,10 @@ class BigQuery(Dialect):
                 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
             ),
             "MAKE_INTERVAL": lambda self: self._parse_make_interval(),
+            "PREDICT": lambda self: self._parse_predict(),
             "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(),
+            "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(),
+            "VECTOR_SEARCH": lambda self: self._parse_vector_search(),
         }
         FUNCTION_PARSERS.pop("TRIM")
@@ -966,13 +1025,40 @@ class BigQuery(Dialect):
             return expr
-        def _parse_features_at_time(self) -> exp.FeaturesAtTime:
-            expr = self.expression(
-                exp.FeaturesAtTime,
-                this=(self._match(TokenType.TABLE) and self._parse_table())
-                or self._parse_select(nested=True),
+        def _parse_predict(self) -> exp.Predict:
+            self._match_text_seq("MODEL")
+            this = self._parse_table()
+            self._match(TokenType.COMMA)
+            self._match_text_seq("TABLE")
+            return self.expression(
+                exp.Predict,
+                this=this,
+                expression=self._parse_table(),
+                params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
+            )
+        def _parse_generate_embedding(self) -> exp.GenerateEmbedding:
+            self._match_text_seq("MODEL")
+            this = self._parse_table()
+            self._match(TokenType.COMMA)
+            self._match_text_seq("TABLE")
+            return self.expression(
+                exp.GenerateEmbedding,
+                this=this,
+                expression=self._parse_table(),
+                params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(),
             )
+        def _parse_features_at_time(self) -> exp.FeaturesAtTime:
+            self._match(TokenType.TABLE)
+            this = self._parse_table()
+            expr = self.expression(exp.FeaturesAtTime, this=this)
             while self._match(TokenType.COMMA):
                 arg = self._parse_lambda()
@@ -983,6 +1069,37 @@ class BigQuery(Dialect):
             return expr
+        def _parse_vector_search(self) -> exp.VectorSearch:
+            self._match(TokenType.TABLE)
+            base_table = self._parse_table()
+            self._match(TokenType.COMMA)
+            column_to_search = self._parse_bitwise()
+            self._match(TokenType.COMMA)
+            self._match(TokenType.TABLE)
+            query_table = self._parse_table()
+            expr = self.expression(
+                exp.VectorSearch,
+                this=base_table,
+                column_to_search=column_to_search,
+                query_table=query_table,
+            )
+            while self._match(TokenType.COMMA):
+                # query_column_to_search can be named argument or positional
+                if self._match(TokenType.STRING, advance=False):
+                    query_column = self._parse_string()
+                    expr.set("query_column_to_search", query_column)
+                else:
+                    arg = self._parse_lambda()
+                    if arg:
+                        expr.set(arg.this.name, arg)
+            return expr
         def _parse_export_data(self) -> exp.Export:
             self._match_text_seq("DATA")
@@ -1019,6 +1136,8 @@ class BigQuery(Dialect):
         EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False
         SUPPORTS_UNIX_SECONDS = True
+        SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$")
         TS_OR_DS_TYPES = (
             exp.TsOrDsToDatetime,
             exp.TsOrDsToTimestamp,
@@ -1028,6 +1147,7 @@ class BigQuery(Dialect):
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,
+            exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"),
             exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"),
             exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
             exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
@@ -1068,6 +1188,7 @@ class BigQuery(Dialect):
             exp.ILike: no_ilike_sql,
             exp.IntDiv: rename_func("DIV"),
             exp.Int64: rename_func("INT64"),
+            exp.JSONBool: rename_func("BOOL"),
             exp.JSONExtract: _json_extract_sql,
             exp.JSONExtractArray: _json_extract_sql,
             exp.JSONExtractScalar: _json_extract_sql,
@@ -1107,7 +1228,6 @@ class BigQuery(Dialect):
             ),
             exp.SHA: rename_func("SHA1"),
             exp.SHA2: sha256_sql,
-            exp.Space: space_sql,
             exp.StabilityProperty: lambda self, e: (
                 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
             ),

sqlglot/dialects/clickhouse.py CHANGED Viewed

@@ -345,6 +345,7 @@ class ClickHouse(Dialect):
             "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list,
         }
         FUNCTIONS.pop("TRANSFORM")
+        FUNCTIONS.pop("APPROX_TOP_SUM")
         AGG_FUNCTIONS = {
             "count",
@@ -379,6 +380,7 @@ class ClickHouse(Dialect):
             "argMax",
             "avgWeighted",
             "topK",
+            "approx_top_sum",
             "topKWeighted",
             "deltaSum",
             "deltaSumTimestamp",
@@ -977,6 +979,14 @@ class ClickHouse(Dialect):
             return value
+        def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
+            # ClickHouse allows custom expressions as partition key
+            # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key
+            return self.expression(
+                exp.PartitionedByProperty,
+                this=self._parse_assignment(),
+            )
     class Generator(generator.Generator):
         QUERY_HINTS = False
         STRUCT_DELIMITER = ("(", ")")
@@ -1094,6 +1104,7 @@ class ClickHouse(Dialect):
             exp.DateStrToDate: rename_func("toDate"),
             exp.DateSub: _datetime_delta_sql("DATE_SUB"),
             exp.Explode: rename_func("arrayJoin"),
+            exp.FarmFingerprint: rename_func("farmFingerprint64"),
             exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
             exp.IsNan: rename_func("isNaN"),
             exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}",

sqlglot/dialects/databricks.py CHANGED Viewed

@@ -99,7 +99,11 @@ class Databricks(Spark):
             exp.JSONExtract: _jsonextract_sql,
             exp.JSONExtractScalar: _jsonextract_sql,
             exp.JSONPathRoot: lambda *_: "",
-            exp.ToChar: lambda self, e: self.function_fallback_sql(e),
+            exp.ToChar: lambda self, e: (
+                self.cast_sql(exp.Cast(this=e.this, to=exp.DataType(this="STRING")))
+                if e.args.get("is_numeric")
+                else self.function_fallback_sql(e)
+            ),
         }
         TRANSFORMS.pop(exp.TryCast)

sqlglot/dialects/dialect.py CHANGED Viewed

@@ -35,8 +35,18 @@ DATE_ADD_OR_DIFF = t.Union[
     exp.TsOrDsDiff,
 ]
 DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub]
-JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar]
+JSON_EXTRACT_TYPE = t.Union[
+    exp.JSONExtract, exp.JSONExtractScalar, exp.JSONBExtract, exp.JSONBExtractScalar
+]
+DATETIME_DELTA = t.Union[
+    exp.DateAdd,
+    exp.DatetimeAdd,
+    exp.DatetimeSub,
+    exp.TimeAdd,
+    exp.TimeSub,
+    exp.TimestampSub,
+    exp.TsOrDsAdd,
+]
 if t.TYPE_CHECKING:
     from sqlglot._typing import B, E, F
@@ -658,6 +668,7 @@ class Dialect(metaclass=_Dialect):
             exp.UnixMillis,
         },
         exp.DataType.Type.BINARY: {
+            exp.FromBase32,
             exp.FromBase64,
         },
         exp.DataType.Type.BOOLEAN: {
@@ -769,6 +780,7 @@ class Dialect(metaclass=_Dialect):
             exp.TimeToStr,
             exp.TimeToTimeStr,
             exp.Trim,
+            exp.ToBase32,
             exp.ToBase64,
             exp.TsOrDsToDateStr,
             exp.UnixToStr,
@@ -1059,7 +1071,9 @@ class Dialect(metaclass=_Dialect):
             try:
                 return parse_json_path(path_text, self)
             except ParseError as e:
-                if self.STRICT_JSON_PATH_SYNTAX:
+                if self.STRICT_JSON_PATH_SYNTAX and not path_text.lstrip().startswith(
+                    ("lax", "strict")
+                ):
                     logger.warning(f"Invalid JSON path syntax. {str(e)}")
         return path
@@ -1643,14 +1657,49 @@ def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE
     return _delta_sql
+def date_delta_to_binary_interval_op(
+    cast: bool = True,
+) -> t.Callable[[Generator, DATETIME_DELTA], str]:
+    def date_delta_to_binary_interval_op_sql(self: Generator, expression: DATETIME_DELTA) -> str:
+        this = expression.this
+        unit = unit_to_var(expression)
+        op = (
+            "+"
+            if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd))
+            else "-"
+        )
+        to_type: t.Optional[exp.DATA_TYPE] = None
+        if cast:
+            if isinstance(expression, exp.TsOrDsAdd):
+                to_type = expression.return_type
+            elif this.is_string:
+                # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work
+                to_type = (
+                    exp.DataType.Type.DATETIME
+                    if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub))
+                    else exp.DataType.Type.DATE
+                )
+        this = exp.cast(this, to_type) if to_type else this
+        expr = expression.expression
+        interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit)
+        return f"{self.sql(this)} {op} {self.sql(interval)}"
+    return date_delta_to_binary_interval_op_sql
 def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
     unit = expression.args.get("unit")
+    if not unit:
+        return exp.Literal.string(default) if default else None
-    if isinstance(unit, exp.Placeholder):
+    if isinstance(unit, exp.Placeholder) or type(unit) not in (exp.Var, exp.Literal):
         return unit
-    if unit:
-        return exp.Literal.string(unit.name)
-    return exp.Literal.string(default) if default else None
+    return exp.Literal.string(unit.name)
 def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]:
@@ -1730,7 +1779,10 @@ def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str:
 def build_json_extract_path(
-    expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False
+    expr_type: t.Type[F],
+    zero_based_indexing: bool = True,
+    arrow_req_json_type: bool = False,
+    json_type: t.Optional[str] = None,
 ) -> t.Callable[[t.List], F]:
     def _builder(args: t.List) -> F:
         segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()]
@@ -1750,11 +1802,19 @@ def build_json_extract_path(
         # This is done to avoid failing in the expression validator due to the arg count
         del args[2:]
-        return expr_type(
-            this=seq_get(args, 0),
-            expression=exp.JSONPath(expressions=segments),
-            only_json_types=arrow_req_json_type,
-        )
+        kwargs = {
+            "this": seq_get(args, 0),
+            "expression": exp.JSONPath(expressions=segments),
+        }
+        is_jsonb = issubclass(expr_type, (exp.JSONBExtract, exp.JSONBExtractScalar))
+        if not is_jsonb:
+            kwargs["only_json_types"] = arrow_req_json_type
+        if json_type is not None:
+            kwargs["json_type"] = json_type
+        return expr_type(**kwargs)
     return _builder
@@ -1962,7 +2022,7 @@ def groupconcat_sql(
     return self.sql(listagg)
-def build_timetostr_or_tochar(args: t.List, dialect: Dialect) -> exp.TimeToStr | exp.ToChar:
+def build_timetostr_or_tochar(args: t.List, dialect: DialectType) -> exp.TimeToStr | exp.ToChar:
     if len(args) == 2:
         this = args[0]
         if not this.type:
@@ -1983,12 +2043,3 @@ def build_replace_with_optional_replacement(args: t.List) -> exp.Replace:
         expression=seq_get(args, 1),
         replacement=seq_get(args, 2) or exp.Literal.string(""),
     )
-def space_sql(self: Generator, expression: exp.Space) -> str:
-    return self.sql(
-        exp.Repeat(
-            this=exp.Literal.string(" "),
-            times=expression.this,
-        )
-    )

sqlglot 27.7.0__py3-none-any.whl → 27.9.0__py3-none-any.whl

sqlglot 27.7.0py3-none-any.whl → 27.9.0py3-none-any.whl