PyPI - sqlglot - Versions diffs - 28.4.0__py3-none-any.whl → 28.8.0__py3-none-any.whl - Mend

sqlglot 28.4.0py3-none-any.whl → 28.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +20 -23
sqlglot/dialects/clickhouse.py +2 -0
sqlglot/dialects/dialect.py +355 -18
sqlglot/dialects/doris.py +38 -90
sqlglot/dialects/druid.py +1 -0
sqlglot/dialects/duckdb.py +1739 -163
sqlglot/dialects/exasol.py +17 -1
sqlglot/dialects/hive.py +27 -2
sqlglot/dialects/mysql.py +103 -11
sqlglot/dialects/oracle.py +38 -1
sqlglot/dialects/postgres.py +142 -33
sqlglot/dialects/presto.py +6 -2
sqlglot/dialects/redshift.py +7 -1
sqlglot/dialects/singlestore.py +13 -3
sqlglot/dialects/snowflake.py +271 -21
sqlglot/dialects/spark.py +25 -0
sqlglot/dialects/spark2.py +4 -3
sqlglot/dialects/starrocks.py +152 -17
sqlglot/dialects/trino.py +1 -0
sqlglot/dialects/tsql.py +5 -0
sqlglot/diff.py +1 -1
sqlglot/expressions.py +239 -47
sqlglot/generator.py +173 -44
sqlglot/optimizer/annotate_types.py +129 -60
sqlglot/optimizer/merge_subqueries.py +13 -2
sqlglot/optimizer/qualify_columns.py +7 -0
sqlglot/optimizer/resolver.py +19 -0
sqlglot/optimizer/scope.py +12 -0
sqlglot/optimizer/unnest_subqueries.py +7 -0
sqlglot/parser.py +251 -58
sqlglot/schema.py +186 -14
sqlglot/tokens.py +36 -6
sqlglot/transforms.py +6 -5
sqlglot/typing/__init__.py +29 -10
sqlglot/typing/bigquery.py +5 -10
sqlglot/typing/duckdb.py +39 -0
sqlglot/typing/hive.py +50 -1
sqlglot/typing/mysql.py +32 -0
sqlglot/typing/presto.py +0 -1
sqlglot/typing/snowflake.py +80 -17
sqlglot/typing/spark.py +29 -0
sqlglot/typing/spark2.py +9 -1
sqlglot/typing/tsql.py +21 -0
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/METADATA +47 -2
sqlglot-28.8.0.dist-info/RECORD +95 -0
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/WHEEL +1 -1
sqlglot-28.4.0.dist-info/RECORD +0 -92
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/top_level.txt +0 -0

sqlglot/dialects/postgres.py CHANGED Viewed

@@ -8,11 +8,14 @@ from sqlglot.dialects.dialect import (
     Dialect,
     JSON_EXTRACT_TYPE,
     any_value_to_max_sql,
+    array_append_sql,
+    array_concat_sql,
     binary_from_function,
     bool_xor_sql,
     datestrtodate_sql,
     build_formatted_time,
     filter_array_using_unnest,
+    getbit_sql,
     inline_array_sql,
     json_extract_segments,
     json_path_key_only_name,
@@ -345,6 +348,7 @@ class Postgres(Dialect):
         BIT_STRINGS = [("b'", "'"), ("B'", "'")]
         HEX_STRINGS = [("x'", "'"), ("X'", "'")]
         BYTE_STRINGS = [("e'", "'"), ("E'", "'")]
+        BYTE_STRING_ESCAPES = ["'", "\\"]
         HEREDOC_STRINGS = ["$"]
         HEREDOC_TAG_IS_IDENTIFIER = True
@@ -358,8 +362,6 @@ class Postgres(Dialect):
             "<@": TokenType.LT_AT,
             "?&": TokenType.QMARK_AMP,
             "?|": TokenType.QMARK_PIPE,
-            "&<": TokenType.AMP_LT,
-            "&>": TokenType.AMP_GT,
             "#-": TokenType.HASH_DASH,
             "|/": TokenType.PIPE_SLASH,
             "||/": TokenType.DPIPE_SLASH,
@@ -376,7 +378,7 @@ class Postgres(Dialect):
             "NAME": TokenType.NAME,
             "OID": TokenType.OBJECT_IDENTIFIER,
             "ONLY": TokenType.ONLY,
-            "OPERATOR": TokenType.OPERATOR,
+            "POINT": TokenType.POINT,
             "REFRESH": TokenType.COMMAND,
             "REINDEX": TokenType.COMMAND,
             "RESET": TokenType.COMMAND,
@@ -396,6 +398,8 @@ class Postgres(Dialect):
             "REGTYPE": TokenType.OBJECT_IDENTIFIER,
             "FLOAT": TokenType.DOUBLE,
             "XML": TokenType.XML,
+            "VARIADIC": TokenType.VARIADIC,
+            "INOUT": TokenType.INOUT,
         }
         KEYWORDS.pop("/*+")
         KEYWORDS.pop("DIV")
@@ -424,6 +428,9 @@ class Postgres(Dialect):
         FUNCTIONS = {
             **parser.Parser.FUNCTIONS,
+            "ARRAY_PREPEND": lambda args: exp.ArrayPrepend(
+                this=seq_get(args, 1), expression=seq_get(args, 0)
+            ),
             "BIT_AND": exp.BitwiseAndAgg.from_arg_list,
             "BIT_OR": exp.BitwiseOrAgg.from_arg_list,
             "BIT_XOR": exp.BitwiseXorAgg.from_arg_list,
@@ -432,6 +439,9 @@ class Postgres(Dialect):
                 binary_from_function(exp.IntDiv)(args), exp.DataType.Type.DECIMAL
             ),
             "GENERATE_SERIES": _build_generate_series,
+            "GET_BIT": lambda args: exp.Getbit(
+                this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True
+            ),
             "JSON_EXTRACT_PATH": build_json_extract_path(exp.JSONExtract),
             "JSON_EXTRACT_PATH_TEXT": build_json_extract_path(exp.JSONExtractScalar),
             "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), encoding=seq_get(args, 1)),
@@ -449,6 +459,16 @@ class Postgres(Dialect):
             "LEVENSHTEIN_LESS_EQUAL": _build_levenshtein_less_equal,
             "JSON_OBJECT_AGG": lambda args: exp.JSONObjectAgg(expressions=args),
             "JSONB_OBJECT_AGG": exp.JSONBObjectAgg.from_arg_list,
+            "WIDTH_BUCKET": lambda args: exp.WidthBucket(
+                this=seq_get(args, 0), threshold=seq_get(args, 1)
+            )
+            if len(args) == 2
+            else exp.WidthBucket.from_arg_list(args),
+        }
+        NO_PAREN_FUNCTION_PARSERS = {
+            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
+            "VARIADIC": lambda self: self.expression(exp.Variadic, this=self._parse_bitwise()),
         }
         NO_PAREN_FUNCTIONS = {
@@ -479,12 +499,9 @@ class Postgres(Dialect):
         RANGE_PARSERS = {
             **parser.Parser.RANGE_PARSERS,
             TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps),
-            TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft),
-            TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight),
             TokenType.DAT: lambda self, this: self.expression(
                 exp.MatchAgainst, this=self._parse_bitwise(), expressions=[this]
             ),
-            TokenType.OPERATOR: lambda self, this: self._parse_operator(this),
         }
         STATEMENT_PARSERS = {
@@ -492,6 +509,12 @@ class Postgres(Dialect):
             TokenType.END: lambda self: self._parse_commit_or_rollback(),
         }
+        UNARY_PARSERS = {
+            **parser.Parser.UNARY_PARSERS,
+            # The `~` token is remapped from TILDE to RLIKE in Postgres due to the binary REGEXP LIKE operator
+            TokenType.RLIKE: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
+        }
         JSON_ARROWS_REQUIRE_JSON_TYPE = True
         COLUMN_OPERATORS = {
@@ -508,6 +531,88 @@ class Postgres(Dialect):
             ),
         }
+        ARG_MODE_TOKENS = {TokenType.IN, TokenType.OUT, TokenType.INOUT, TokenType.VARIADIC}
+        def _parse_parameter_mode(self) -> t.Optional[TokenType]:
+            """
+            Parse PostgreSQL function parameter mode (IN, OUT, INOUT, VARIADIC).
+            Disambiguates between mode keywords and identifiers with the same name:
+            - MODE TYPE      → keyword is identifier (e.g., "out INT")
+            - MODE NAME TYPE → keyword is mode (e.g., "OUT x INT")
+            Returns:
+                Mode token type if current token is a mode keyword, None otherwise.
+            """
+            if not self._match_set(self.ARG_MODE_TOKENS, advance=False) or not self._next:
+                return None
+            mode_token = self._curr
+            # Check Pattern 1: MODE TYPE
+            # Try parsing next token as a built-in type (not UDT)
+            # If successful, the keyword is an identifier, not a mode
+            is_followed_by_builtin_type = self._try_parse(
+                lambda: self._advance()  # type: ignore
+                or self._parse_types(check_func=False, allow_identifiers=False),
+                retreat=True,
+            )
+            if is_followed_by_builtin_type:
+                return None  # Pattern: "out INT" → out is parameter name
+            # Check Pattern 2: MODE NAME TYPE
+            # If next token is an identifier, check if there's a type after it
+            # The type can be built-in or user-defined (allow_identifiers=True)
+            if self._next.token_type not in self.ID_VAR_TOKENS:
+                return None
+            is_followed_by_any_type = self._try_parse(
+                lambda: self._advance(2)  # type: ignore
+                or self._parse_types(check_func=False, allow_identifiers=True),
+                retreat=True,
+            )
+            if is_followed_by_any_type:
+                return mode_token.token_type  # Pattern: "OUT x INT" → OUT is mode
+            return None
+        def _create_mode_constraint(self, param_mode: TokenType) -> exp.InOutColumnConstraint:
+            """
+            Create parameter mode constraint for function parameters.
+            Args:
+                param_mode: The parameter mode token (IN, OUT, INOUT, or VARIADIC).
+            Returns:
+                InOutColumnConstraint expression representing the parameter mode.
+            """
+            return self.expression(
+                exp.InOutColumnConstraint,
+                input_=(param_mode in {TokenType.IN, TokenType.INOUT}),
+                output=(param_mode in {TokenType.OUT, TokenType.INOUT}),
+                variadic=(param_mode == TokenType.VARIADIC),
+            )
+        def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
+            param_mode = self._parse_parameter_mode()
+            if param_mode:
+                self._advance()
+            # Parse parameter name and type
+            param_name = self._parse_id_var()
+            column_def = self._parse_column_def(this=param_name, computed_column=False)
+            # Attach mode as constraint
+            if param_mode and column_def:
+                constraint = self._create_mode_constraint(param_mode)
+                if not column_def.args.get("constraints"):
+                    column_def.set("constraints", [])
+                column_def.args["constraints"].insert(0, constraint)
+            return column_def
         def _parse_query_parameter(self) -> t.Optional[exp.Expression]:
             this = (
                 self._parse_wrapped(self._parse_id_var)
@@ -517,29 +622,6 @@ class Postgres(Dialect):
             self._match_text_seq("S")
             return self.expression(exp.Placeholder, this=this)
-        def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
-            while True:
-                if not self._match(TokenType.L_PAREN):
-                    break
-                op = ""
-                while self._curr and not self._match(TokenType.R_PAREN):
-                    op += self._curr.text
-                    self._advance()
-                this = self.expression(
-                    exp.Operator,
-                    comments=self._prev_comments,
-                    this=this,
-                    operator=op,
-                    expression=self._parse_bitwise(),
-                )
-                if not self._match(TokenType.OPERATOR):
-                    break
-            return this
         def _parse_date_part(self) -> exp.Expression:
             part = self._parse_type()
             self._match(TokenType.COMMA)
@@ -611,6 +693,7 @@ class Postgres(Dialect):
         SUPPORTS_MEDIAN = False
         ARRAY_SIZE_DIM_REQUIRED = True
         SUPPORTS_BETWEEN_FLAGS = True
+        INOUT_SEPARATOR = ""  # PostgreSQL uses "INOUT" (no space)
         SUPPORTED_JSON_PATH_PARTS = {
             exp.JSONPathKey,
@@ -618,6 +701,14 @@ class Postgres(Dialect):
             exp.JSONPathSubscript,
         }
+        def lateral_sql(self, expression: exp.Lateral) -> str:
+            sql = super().lateral_sql(expression)
+            if expression.args.get("cross_apply") is not None:
+                sql = f"{sql} ON TRUE"
+            return sql
         TYPE_MAPPING = {
             **generator.Generator.TYPE_MAPPING,
             exp.DataType.Type.TINYINT: "SMALLINT",
@@ -634,8 +725,10 @@ class Postgres(Dialect):
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,
             exp.AnyValue: _versioned_anyvalue_sql,
-            exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"),
+            exp.ArrayConcat: array_concat_sql("ARRAY_CAT"),
             exp.ArrayFilter: filter_array_using_unnest,
+            exp.ArrayAppend: array_append_sql("ARRAY_APPEND"),
+            exp.ArrayPrepend: array_append_sql("ARRAY_PREPEND", swap_params=True),
             exp.BitwiseAndAgg: rename_func("BIT_AND"),
             exp.BitwiseOrAgg: rename_func("BIT_OR"),
             exp.BitwiseXor: lambda self, e: self.binary(e, "#"),
@@ -650,6 +743,7 @@ class Postgres(Dialect):
             exp.DateSub: _date_add_sql("-"),
             exp.Explode: rename_func("UNNEST"),
             exp.ExplodingGenerateSeries: rename_func("GENERATE_SERIES"),
+            exp.Getbit: getbit_sql,
             exp.GroupConcat: lambda self, e: groupconcat_sql(
                 self, e, func_name="STRING_AGG", within_group=False
             ),
@@ -714,7 +808,9 @@ class Postgres(Dialect):
             exp.TimestampTrunc: timestamptrunc_sql(zone=True),
             exp.TimeStrToTime: timestrtotime_sql,
             exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)),
-            exp.ToChar: lambda self, e: self.function_fallback_sql(e),
+            exp.ToChar: lambda self, e: self.function_fallback_sql(e)
+            if e.args.get("format")
+            else self.tochar_sql(e),
             exp.Trim: trim_sql,
             exp.TryCast: no_trycast_sql,
             exp.TsOrDsAdd: _date_add_sql("+"),
@@ -752,6 +848,18 @@ class Postgres(Dialect):
             self.unsupported("Column comments are not supported in the CREATE statement")
             return ""
+        def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str:
+            # PostgreSQL places parameter modes BEFORE parameter name
+            param_constraint = expression.find(exp.InOutColumnConstraint)
+            if param_constraint:
+                mode_sql = self.sql(param_constraint)
+                param_constraint.pop()  # Remove to prevent double-rendering
+                base_sql = super().columndef_sql(expression, sep)
+                return f"{mode_sql} {base_sql}"
+            return super().columndef_sql(expression, sep)
         def unnest_sql(self, expression: exp.Unnest) -> str:
             if len(expression.expressions) == 1:
                 arg = expression.expressions[0]
@@ -865,8 +973,9 @@ class Postgres(Dialect):
         def interval_sql(self, expression: exp.Interval) -> str:
             unit = expression.text("unit").lower()
-            if unit.startswith("quarter") and isinstance(expression.this, exp.Literal):
-                expression.this.replace(exp.Literal.number(int(expression.this.to_py()) * 3))
+            this = expression.this
+            if unit.startswith("quarter") and isinstance(this, exp.Literal):
+                this.replace(exp.Literal.string(int(this.to_py()) * 3))
                 expression.args["unit"].replace(exp.var("MONTH"))
             return super().interval_sql(expression)

sqlglot/dialects/presto.py CHANGED Viewed

@@ -267,6 +267,7 @@ class Presto(Dialect):
     TABLESAMPLE_SIZE_IS_PERCENT = True
     LOG_BASE_FIRST: t.Optional[bool] = None
     SUPPORTS_VALUES_DEFAULT = False
+    LEAST_GREATEST_IGNORES_NULLS = False
     TIME_MAPPING = MySQL.TIME_MAPPING
@@ -373,6 +374,7 @@ class Presto(Dialect):
             "MD5": exp.MD5Digest.from_arg_list,
             "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
             "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
+            "WEEK": exp.WeekOfYear.from_arg_list,
         }
         FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy()
@@ -707,9 +709,11 @@ class Presto(Dialect):
             return super().bracket_sql(expression)
         def struct_sql(self, expression: exp.Struct) -> str:
-            from sqlglot.optimizer.annotate_types import annotate_types
+            if not expression.type:
+                from sqlglot.optimizer.annotate_types import annotate_types
+                annotate_types(expression, dialect=self.dialect)
-            expression = annotate_types(expression, dialect=self.dialect)
             values: t.List[str] = []
             schema: t.List[str] = []
             unknown_type = False

sqlglot/dialects/redshift.py CHANGED Viewed

@@ -5,6 +5,7 @@ import typing as t
 from sqlglot import exp, transforms
 from sqlglot.dialects.dialect import (
     NormalizationStrategy,
+    array_concat_sql,
     concat_to_dpipe_sql,
     concat_ws_to_dpipe_sql,
     date_delta_sql,
@@ -49,6 +50,7 @@ class Redshift(Postgres):
     HAS_DISTINCT_ARRAY_CONSTRUCTORS = True
     COALESCE_COMPARISON_NON_STANDARD = True
     REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL = False
+    ARRAY_FUNCS_PROPAGATES_NULLS = True
     # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html
     TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"
@@ -82,6 +84,7 @@ class Redshift(Postgres):
             ),
             "STRTOL": exp.FromBase.from_arg_list,
         }
+        FUNCTIONS.pop("GET_BIT")
         NO_PAREN_FUNCTION_PARSERS = {
             **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS,
@@ -189,7 +192,7 @@ class Redshift(Postgres):
         TRANSFORMS = {
             **Postgres.Generator.TRANSFORMS,
-            exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"),
+            exp.ArrayConcat: array_concat_sql("ARRAY_CONCAT"),
             exp.Concat: concat_to_dpipe_sql,
             exp.ConcatWs: concat_ws_to_dpipe_sql,
             exp.ApproxDistinct: lambda self,
@@ -244,6 +247,9 @@ class Redshift(Postgres):
         TRANSFORMS.pop(exp.LastDay)
         TRANSFORMS.pop(exp.SHA2)
+        # Postgres and Redshift have different semantics for Getbit
+        TRANSFORMS.pop(exp.Getbit)
         # Postgres does not permit a double precision argument in ROUND; Redshift does
         TRANSFORMS.pop(exp.Round)

sqlglot/dialects/singlestore.py CHANGED Viewed

@@ -82,6 +82,7 @@ class SingleStore(MySQL):
             "::$": TokenType.DCOLONDOLLAR,
             "::%": TokenType.DCOLONPERCENT,
             "::?": TokenType.DCOLONQMARK,
+            "RECORD": TokenType.STRUCT,
         }
     class Parser(MySQL.Parser):
@@ -176,6 +177,10 @@ class SingleStore(MySQL):
                 expression=seq_get(args, 0),
                 json_type="JSON",
             ),
+            "JSON_KEYS": lambda args: exp.JSONKeys(
+                this=seq_get(args, 0),
+                expressions=args[1:],
+            ),
             "JSON_PRETTY": exp.JSONFormat.from_arg_list,
             "JSON_BUILD_ARRAY": lambda args: exp.JSONArray(expressions=args),
             "JSON_BUILD_OBJECT": lambda args: exp.JSONObject(expressions=args),
@@ -328,6 +333,7 @@ class SingleStore(MySQL):
         SUPPORTS_UESCAPE = False
         NULL_ORDERING_SUPPORTED = True
         MATCH_AGAINST_TABLE_PREFIX = "TABLE "
+        STRUCT_DELIMITER = ("(", ")")
         @staticmethod
         def _unicode_substitute(m: re.Match[str]) -> str:
@@ -497,7 +503,6 @@ class SingleStore(MySQL):
             ),
             exp.IsAscii: lambda self, e: f"({self.sql(e, 'this')} RLIKE '^[\x00-\x7f]*$')",
             exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
-            exp.Chr: rename_func("CHAR"),
             exp.Contains: rename_func("INSTR"),
             exp.RegexpExtractAll: unsupported_args("position", "occurrence", "group")(
                 lambda self, e: self.func(
@@ -613,7 +618,6 @@ class SingleStore(MySQL):
             exp.DataType.Type.SERIAL,
             exp.DataType.Type.SMALLSERIAL,
             exp.DataType.Type.SMALLMONEY,
-            exp.DataType.Type.STRUCT,
             exp.DataType.Type.SUPER,
             exp.DataType.Type.TIMETZ,
             exp.DataType.Type.TIMESTAMPNTZ,
@@ -654,6 +658,7 @@ class SingleStore(MySQL):
             exp.DataType.Type.LINESTRING: "GEOGRAPHY",
             exp.DataType.Type.POLYGON: "GEOGRAPHY",
             exp.DataType.Type.MULTIPOLYGON: "GEOGRAPHY",
+            exp.DataType.Type.STRUCT: "RECORD",
             exp.DataType.Type.JSONB: "BSON",
             exp.DataType.Type.TIMESTAMP: "TIMESTAMP",
             exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP",
@@ -1760,8 +1765,13 @@ class SingleStore(MySQL):
                 self.func("TO_JSON", expression.this),
             )
-        @unsupported_args("kind", "nested", "values")
+        @unsupported_args("kind", "values")
         def datatype_sql(self, expression: exp.DataType) -> str:
+            if expression.args.get("nested") and not expression.is_type(exp.DataType.Type.STRUCT):
+                self.unsupported(
+                    f"Argument 'nested' is not supported for representation of '{expression.this.value}' in SingleStore"
+                )
             if expression.is_type(exp.DataType.Type.VARBINARY) and not expression.expressions:
                 # `VARBINARY` must always have a size - if it doesn't, we always generate `BLOB`
                 return "BLOB"

sqlglot 28.4.0__py3-none-any.whl → 28.8.0__py3-none-any.whl

sqlglot 28.4.0py3-none-any.whl → 28.8.0py3-none-any.whl