PyPI - sqlglot - Versions diffs - 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl - Mend

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

sqlglot/__main__.py +6 -4
sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +116 -295
sqlglot/dialects/clickhouse.py +67 -2
sqlglot/dialects/databricks.py +38 -1
sqlglot/dialects/dialect.py +327 -286
sqlglot/dialects/dremio.py +4 -1
sqlglot/dialects/duckdb.py +718 -22
sqlglot/dialects/exasol.py +243 -10
sqlglot/dialects/hive.py +8 -8
sqlglot/dialects/mysql.py +11 -2
sqlglot/dialects/oracle.py +29 -0
sqlglot/dialects/postgres.py +46 -24
sqlglot/dialects/presto.py +47 -16
sqlglot/dialects/redshift.py +16 -0
sqlglot/dialects/risingwave.py +3 -0
sqlglot/dialects/singlestore.py +12 -3
sqlglot/dialects/snowflake.py +199 -271
sqlglot/dialects/spark.py +2 -2
sqlglot/dialects/spark2.py +11 -48
sqlglot/dialects/sqlite.py +9 -0
sqlglot/dialects/teradata.py +5 -8
sqlglot/dialects/trino.py +6 -0
sqlglot/dialects/tsql.py +61 -25
sqlglot/diff.py +4 -2
sqlglot/errors.py +69 -0
sqlglot/expressions.py +484 -84
sqlglot/generator.py +143 -41
sqlglot/helper.py +2 -2
sqlglot/optimizer/annotate_types.py +247 -140
sqlglot/optimizer/canonicalize.py +6 -1
sqlglot/optimizer/eliminate_joins.py +1 -1
sqlglot/optimizer/eliminate_subqueries.py +2 -2
sqlglot/optimizer/merge_subqueries.py +5 -5
sqlglot/optimizer/normalize.py +20 -13
sqlglot/optimizer/normalize_identifiers.py +17 -3
sqlglot/optimizer/optimizer.py +4 -0
sqlglot/optimizer/pushdown_predicates.py +1 -1
sqlglot/optimizer/qualify.py +14 -6
sqlglot/optimizer/qualify_columns.py +113 -352
sqlglot/optimizer/qualify_tables.py +112 -70
sqlglot/optimizer/resolver.py +374 -0
sqlglot/optimizer/scope.py +27 -16
sqlglot/optimizer/simplify.py +1074 -964
sqlglot/optimizer/unnest_subqueries.py +12 -2
sqlglot/parser.py +276 -160
sqlglot/planner.py +2 -2
sqlglot/schema.py +15 -4
sqlglot/tokens.py +42 -7
sqlglot/transforms.py +77 -22
sqlglot/typing/__init__.py +316 -0
sqlglot/typing/bigquery.py +376 -0
sqlglot/typing/hive.py +12 -0
sqlglot/typing/presto.py +24 -0
sqlglot/typing/snowflake.py +505 -0
sqlglot/typing/spark2.py +58 -0
sqlglot/typing/tsql.py +9 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
sqlglot-28.4.0.dist-info/RECORD +92 -0
sqlglot-27.29.0.dist-info/RECORD +0 -84
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0

sqlglot/dialects/presto.py CHANGED Viewed

@@ -31,6 +31,7 @@ from sqlglot.dialects.dialect import (
     sequence_sql,
     build_regexp_extract,
     explode_to_unnest_sql,
+    sha2_digest_sql,
 )
 from sqlglot.dialects.hive import Hive
 from sqlglot.dialects.mysql import MySQL
@@ -39,11 +40,18 @@ from sqlglot.optimizer.scope import find_all_in_scope
 from sqlglot.tokens import TokenType
 from sqlglot.transforms import unqualify_columns
 from sqlglot.generator import unsupported_args
+from sqlglot.typing.presto import EXPRESSION_METADATA
 DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub]
 def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str:
+    delimiters = expression.expression
+    if delimiters and not (
+        delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS
+    ):
+        self.unsupported("INITCAP does not support custom delimiters")
     regex = r"(\w)(\w*)"
     return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))"
@@ -267,20 +275,7 @@ class Presto(Dialect):
     # https://github.com/prestodb/presto/issues/2863
     NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
-    # The result of certain math functions in Presto/Trino is of type
-    # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT
-    ANNOTATORS = {
-        **Dialect.ANNOTATORS,
-        exp.Floor: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"),
-        exp.Round: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Sign: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Abs: lambda self, e: self._annotate_by_args(e, "this"),
-        exp.Rand: lambda self, e: self._annotate_by_args(e, "this")
-        if e.this
-        else self._set_type(e, exp.DataType.Type.DOUBLE),
-    }
+    EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
     SUPPORTED_SETTINGS = {
         *Dialect.SUPPORTED_SETTINGS,
@@ -429,10 +424,19 @@ class Presto(Dialect):
         TRANSFORMS = {
             **generator.Generator.TRANSFORMS,
             exp.AnyValue: rename_func("ARBITRARY"),
-            exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"),
+            exp.ApproxQuantile: lambda self, e: self.func(
+                "APPROX_PERCENTILE",
+                e.this,
+                e.args.get("weight"),
+                e.args.get("quantile"),
+                e.args.get("accuracy"),
+            ),
             exp.ArgMax: rename_func("MAX_BY"),
             exp.ArgMin: rename_func("MIN_BY"),
-            exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
+            exp.Array: transforms.preprocess(
+                [transforms.inherit_struct_field_names],
+                generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]",
+            ),
             exp.ArrayAny: rename_func("ANY_MATCH"),
             exp.ArrayConcat: rename_func("CONCAT"),
             exp.ArrayContains: rename_func("CONTAINS"),
@@ -542,7 +546,9 @@ class Presto(Dialect):
             exp.Xor: bool_xor_sql,
             exp.MD5Digest: rename_func("MD5"),
             exp.SHA: rename_func("SHA1"),
+            exp.SHA1Digest: rename_func("SHA1"),
             exp.SHA2: sha256_sql,
+            exp.SHA2Digest: sha2_digest_sql,
         }
         RESERVED_KEYWORDS = {
@@ -606,6 +612,31 @@ class Presto(Dialect):
             "with",
         }
+        def extract_sql(self, expression: exp.Extract) -> str:
+            date_part = expression.name
+            if not date_part.startswith("EPOCH"):
+                return super().extract_sql(expression)
+            if date_part == "EPOCH_MILLISECOND":
+                scale = 10**3
+            elif date_part == "EPOCH_MICROSECOND":
+                scale = 10**6
+            elif date_part == "EPOCH_NANOSECOND":
+                scale = 10**9
+            else:
+                scale = None
+            value = expression.expression
+            ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP"))
+            to_unix: exp.Expression = exp.TimeToUnix(this=ts)
+            if scale:
+                to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale))
+            return self.sql(to_unix)
         def jsonformat_sql(self, expression: exp.JSONFormat) -> str:
             this = expression.this
             is_json = expression.args.get("is_json")

sqlglot/dialects/redshift.py CHANGED Viewed

@@ -47,6 +47,8 @@ class Redshift(Postgres):
     COPY_PARAMS_ARE_CSV = False
     HEX_LOWERCASE = True
     HAS_DISTINCT_ARRAY_CONSTRUCTORS = True
+    COALESCE_COMPARISON_NON_STANDARD = True
+    REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL = False
     # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html
     TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'"
@@ -68,6 +70,13 @@ class Redshift(Postgres):
             "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff),
             "GETDATE": exp.CurrentTimestamp.from_arg_list,
             "LISTAGG": exp.GroupConcat.from_arg_list,
+            "REGEXP_SUBSTR": lambda args: exp.RegexpExtract(
+                this=seq_get(args, 0),
+                expression=seq_get(args, 1),
+                position=seq_get(args, 2),
+                occurrence=seq_get(args, 3),
+                parameters=seq_get(args, 4),
+            ),
             "SPLIT_TO_ARRAY": lambda args: exp.StringToArray(
                 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",")
             ),
@@ -200,6 +209,7 @@ class Redshift(Postgres):
             exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"),
             exp.GroupConcat: rename_func("LISTAGG"),
             exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
+            exp.RegexpExtract: rename_func("REGEXP_SUBSTR"),
             exp.Select: transforms.preprocess(
                 [
                     transforms.eliminate_window_clause,
@@ -218,6 +228,9 @@ class Redshift(Postgres):
             exp.TsOrDsAdd: date_delta_sql("DATEADD"),
             exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
             exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
+            exp.SHA2Digest: lambda self, e: self.func(
+                "SHA2", e.this, e.args.get("length") or exp.Literal.number(256)
+            ),
         }
         # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
@@ -231,6 +244,9 @@ class Redshift(Postgres):
         TRANSFORMS.pop(exp.LastDay)
         TRANSFORMS.pop(exp.SHA2)
+        # Postgres does not permit a double precision argument in ROUND; Redshift does
+        TRANSFORMS.pop(exp.Round)
         RESERVED_KEYWORDS = {
             "aes128",
             "aes256",

sqlglot/dialects/risingwave.py CHANGED Viewed

@@ -8,6 +8,9 @@ from sqlglot import exp
 class RisingWave(Postgres):
+    REQUIRES_PARENTHESIZED_STRUCT_ACCESS = True
+    SUPPORTS_STRUCT_STAR_EXPANSION = True
     class Tokenizer(Postgres.Tokenizer):
         KEYWORDS = {
             **Postgres.Tokenizer.KEYWORDS,

sqlglot/dialects/singlestore.py CHANGED Viewed

@@ -81,6 +81,7 @@ class SingleStore(MySQL):
             "!:>": TokenType.NCOLON_GT,
             "::$": TokenType.DCOLONDOLLAR,
             "::%": TokenType.DCOLONPERCENT,
+            "::?": TokenType.DCOLONQMARK,
         }
     class Parser(MySQL.Parser):
@@ -253,6 +254,12 @@ class SingleStore(MySQL):
             TokenType.DCOLONPERCENT: lambda self, this, path: build_json_extract_path(
                 exp.JSONExtractScalar, json_type="DOUBLE"
             )([this, exp.Literal.string(path.name)]),
+            TokenType.DCOLONQMARK: lambda self, this, path: self.expression(
+                exp.JSONExists,
+                this=this,
+                path=path.name,
+                from_dcolonqmark=True,
+            ),
         }
         COLUMN_OPERATORS.pop(TokenType.ARROW)
         COLUMN_OPERATORS.pop(TokenType.DARROW)
@@ -452,8 +459,10 @@ class SingleStore(MySQL):
             exp.JSONBExists: lambda self, e: self.func(
                 "BSON_MATCH_ANY_EXISTS", e.this, e.args.get("path")
             ),
-            exp.JSONExists: unsupported_args("passing", "on_condition")(
-                lambda self, e: self.func("JSON_MATCH_ANY_EXISTS", e.this, e.args.get("path"))
+            exp.JSONExists: lambda self, e: (
+                f"{self.sql(e.this)}::?{self.sql(e.args.get('path'))}"
+                if e.args.get("from_dcolonqmark")
+                else self.func("JSON_MATCH_ANY_EXISTS", e.this, e.args.get("path"))
             ),
             exp.JSONObject: unsupported_args(
                 "null_handling", "unique_keys", "return_type", "encoding"
@@ -542,7 +551,7 @@ class SingleStore(MySQL):
                 "offset",
                 "starts_with",
                 "limit",
-                "from",
+                "from_",
                 "scope",
                 "scope_kind",
                 "mutex",

sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl