PyPI - sqlglot - Versions diffs - 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl - Mend

sqlglot 26.30.0py3-none-any.whl → 26.32.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/__init__.py +2 -0
sqlglot/dialects/athena.py +237 -116
sqlglot/dialects/bigquery.py +9 -4
sqlglot/dialects/clickhouse.py +5 -0
sqlglot/dialects/databricks.py +2 -0
sqlglot/dialects/dialect.py +39 -19
sqlglot/dialects/dremio.py +53 -0
sqlglot/dialects/duckdb.py +45 -0
sqlglot/dialects/exasol.py +89 -0
sqlglot/dialects/fabric.py +60 -33
sqlglot/dialects/presto.py +6 -0
sqlglot/dialects/redshift.py +10 -2
sqlglot/dialects/snowflake.py +3 -1
sqlglot/dialects/spark2.py +2 -0
sqlglot/dialects/tsql.py +7 -5
sqlglot/expressions.py +44 -2
sqlglot/generator.py +3 -3
sqlglot/jsonpath.py +1 -1
sqlglot/optimizer/annotate_types.py +13 -0
sqlglot/optimizer/pushdown_predicates.py +2 -1
sqlglot/optimizer/scope.py +13 -3
sqlglot/parser.py +4 -3
sqlglot/tokens.py +7 -1
sqlglot/transforms.py +15 -1
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/METADATA +2 -2
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/RECORD +30 -28
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/WHEEL +0 -0
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-26.30.0.dist-info → sqlglot-26.32.0.dist-info}/top_level.txt +0 -0

sqlglot/dialects/duckdb.py CHANGED Viewed

@@ -1165,3 +1165,48 @@ class DuckDB(Dialect):
         def autoincrementcolumnconstraint_sql(self, _) -> str:
             self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
             return ""
+        def aliases_sql(self, expression: exp.Aliases) -> str:
+            this = expression.this
+            if isinstance(this, exp.Posexplode):
+                return self.posexplode_sql(this)
+            return super().aliases_sql(expression)
+        def posexplode_sql(self, expression: exp.Posexplode) -> str:
+            this = expression.this
+            parent = expression.parent
+            # The default Spark aliases are "pos" and "col", unless specified otherwise
+            pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
+            if isinstance(parent, exp.Aliases):
+                # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
+                pos, col = parent.expressions
+            elif isinstance(parent, exp.Table):
+                # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
+                alias = parent.args.get("alias")
+                if alias:
+                    pos, col = alias.columns or [pos, col]
+                    alias.pop()
+            # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
+            # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
+            unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
+            gen_subscripts = self.sql(
+                exp.Alias(
+                    this=exp.Anonymous(
+                        this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
+                    )
+                    - exp.Literal.number(1),
+                    alias=pos,
+                )
+            )
+            posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
+            if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
+                # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
+                return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
+            return posexplode_sql

sqlglot/dialects/exasol.py ADDED Viewed

@@ -0,0 +1,89 @@
+from __future__ import annotations
+from sqlglot import exp, generator, parser
+from sqlglot.dialects.dialect import Dialect, rename_func, binary_from_function
+from sqlglot.helper import seq_get
+from sqlglot.generator import unsupported_args
+class Exasol(Dialect):
+    class Parser(parser.Parser):
+        FUNCTIONS = {
+            **parser.Parser.FUNCTIONS,
+            "BIT_AND": binary_from_function(exp.BitwiseAnd),
+            "BIT_OR": binary_from_function(exp.BitwiseOr),
+            "BIT_XOR": binary_from_function(exp.BitwiseXor),
+            "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
+            "BIT_LSHIFT": binary_from_function(exp.BitwiseLeftShift),
+            "BIT_RSHIFT": binary_from_function(exp.BitwiseRightShift),
+            "EVERY": lambda args: exp.All(this=seq_get(args, 0)),
+            "EDIT_DISTANCE": exp.Levenshtein.from_arg_list,
+            "REGEXP_REPLACE": lambda args: exp.RegexpReplace(
+                this=seq_get(args, 0),
+                expression=seq_get(args, 1),
+                replacement=seq_get(args, 2),
+                position=seq_get(args, 3),
+                occurrence=seq_get(args, 4),
+            ),
+        }
+    class Generator(generator.Generator):
+        # https://docs.exasol.com/db/latest/sql_references/data_types/datatypedetails.htm#StringDataType
+        STRING_TYPE_MAPPING = {
+            exp.DataType.Type.BLOB: "VARCHAR",
+            exp.DataType.Type.LONGBLOB: "VARCHAR",
+            exp.DataType.Type.LONGTEXT: "VARCHAR",
+            exp.DataType.Type.MEDIUMBLOB: "VARCHAR",
+            exp.DataType.Type.MEDIUMTEXT: "VARCHAR",
+            exp.DataType.Type.TINYBLOB: "VARCHAR",
+            exp.DataType.Type.TINYTEXT: "VARCHAR",
+            exp.DataType.Type.TEXT: "VARCHAR",
+            exp.DataType.Type.VARBINARY: "VARCHAR",
+        }
+        # https://docs.exasol.com/db/latest/sql_references/data_types/datatypealiases.htm
+        TYPE_MAPPING = {
+            **generator.Generator.TYPE_MAPPING,
+            **STRING_TYPE_MAPPING,
+            exp.DataType.Type.TINYINT: "SMALLINT",
+            exp.DataType.Type.MEDIUMINT: "INT",
+            exp.DataType.Type.DECIMAL32: "DECIMAL",
+            exp.DataType.Type.DECIMAL64: "DECIMAL",
+            exp.DataType.Type.DECIMAL128: "DECIMAL",
+            exp.DataType.Type.DECIMAL256: "DECIMAL",
+            exp.DataType.Type.DATETIME: "TIMESTAMP",
+        }
+        def datatype_sql(self, expression: exp.DataType) -> str:
+            # Exasol supports a fixed default precision of 3 for TIMESTAMP WITH LOCAL TIME ZONE
+            # and does not allow specifying a different custom precision
+            if expression.is_type(exp.DataType.Type.TIMESTAMPLTZ):
+                return "TIMESTAMP WITH LOCAL TIME ZONE"
+            return super().datatype_sql(expression)
+        TRANSFORMS = {
+            **generator.Generator.TRANSFORMS,
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
+            exp.All: rename_func("EVERY"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_and.htm
+            exp.BitwiseAnd: rename_func("BIT_AND"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_or.htm
+            exp.BitwiseOr: rename_func("BIT_OR"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_not.htm
+            exp.BitwiseNot: rename_func("BIT_NOT"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_lshift.htm
+            exp.BitwiseLeftShift: rename_func("BIT_LSHIFT"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_rshift.htm
+            exp.BitwiseRightShift: rename_func("BIT_RSHIFT"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_xor.htm
+            exp.BitwiseXor: rename_func("BIT_XOR"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
+            exp.All: rename_func("EVERY"),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/edit_distance.htm#EDIT_DISTANCE
+            exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
+                rename_func("EDIT_DISTANCE")
+            ),
+            # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/mod.htm
+            exp.Mod: rename_func("MOD"),
+            exp.RegexpReplace: unsupported_args("modifiers")(rename_func("REGEXP_REPLACE")),
+        }

sqlglot/dialects/fabric.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 from sqlglot import exp
 from sqlglot.dialects.dialect import NormalizationStrategy
 from sqlglot.dialects.tsql import TSQL
+from sqlglot.tokens import TokenType
 class Fabric(TSQL):
@@ -28,61 +29,87 @@ class Fabric(TSQL):
     # Fabric is case-sensitive unlike T-SQL which is case-insensitive
     NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
+    class Tokenizer(TSQL.Tokenizer):
+        # Override T-SQL tokenizer to handle TIMESTAMP differently
+        # In T-SQL, TIMESTAMP is a synonym for ROWVERSION, but in Fabric we want it to be a datetime type
+        # Also add UTINYINT keyword mapping since T-SQL doesn't have it
+        KEYWORDS = {
+            **TSQL.Tokenizer.KEYWORDS,
+            "TIMESTAMP": TokenType.TIMESTAMP,
+            "UTINYINT": TokenType.UTINYINT,
+        }
     class Generator(TSQL.Generator):
         # Fabric-specific type mappings - override T-SQL types that aren't supported
         # Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
         TYPE_MAPPING = {
             **TSQL.Generator.TYPE_MAPPING,
-            # Fabric doesn't support these types, map to alternatives
+            exp.DataType.Type.DATETIME: "DATETIME2",
+            exp.DataType.Type.DECIMAL: "DECIMAL",
+            exp.DataType.Type.IMAGE: "VARBINARY",
+            exp.DataType.Type.INT: "INT",
+            exp.DataType.Type.JSON: "VARCHAR",
             exp.DataType.Type.MONEY: "DECIMAL",
-            exp.DataType.Type.SMALLMONEY: "DECIMAL",
-            exp.DataType.Type.DATETIME: "DATETIME2(6)",
-            exp.DataType.Type.SMALLDATETIME: "DATETIME2(6)",
             exp.DataType.Type.NCHAR: "CHAR",
             exp.DataType.Type.NVARCHAR: "VARCHAR",
-            exp.DataType.Type.TEXT: "VARCHAR(MAX)",
-            exp.DataType.Type.IMAGE: "VARBINARY",
+            exp.DataType.Type.ROWVERSION: "ROWVERSION",
+            exp.DataType.Type.SMALLDATETIME: "DATETIME2",
+            exp.DataType.Type.SMALLMONEY: "DECIMAL",
+            exp.DataType.Type.TIMESTAMP: "DATETIME2",
+            exp.DataType.Type.TIMESTAMPNTZ: "DATETIME2",
+            exp.DataType.Type.TIMESTAMPTZ: "DATETIMEOFFSET",
             exp.DataType.Type.TINYINT: "SMALLINT",
-            exp.DataType.Type.UTINYINT: "SMALLINT",  # T-SQL parses TINYINT as UTINYINT
-            exp.DataType.Type.JSON: "VARCHAR",
+            exp.DataType.Type.UTINYINT: "SMALLINT",
+            exp.DataType.Type.UUID: "VARBINARY(MAX)",
             exp.DataType.Type.XML: "VARCHAR",
-            exp.DataType.Type.UUID: "VARBINARY(MAX)",  # UNIQUEIDENTIFIER has limitations in Fabric
-            # Override T-SQL mappings that use different names in Fabric
-            exp.DataType.Type.DECIMAL: "DECIMAL",  # T-SQL uses NUMERIC
-            exp.DataType.Type.DOUBLE: "FLOAT",
-            exp.DataType.Type.INT: "INT",  # T-SQL uses INTEGER
         }
         def datatype_sql(self, expression: exp.DataType) -> str:
-            """
-            Override datatype generation to handle Fabric-specific precision limitations.
-            Fabric limits temporal types (TIME, DATETIME2, DATETIMEOFFSET) to max 6 digits precision.
-            When no precision is specified, we default to 6 digits.
-            """
-            if expression.is_type(
-                exp.DataType.Type.TIME,
-                exp.DataType.Type.DATETIME2,
-                exp.DataType.Type.TIMESTAMPTZ,  # DATETIMEOFFSET in Fabric
+            # Check if this is a temporal type that needs precision handling. Fabric limits temporal
+            # types to max 6 digits precision. When no precision is specified, we default to 6 digits.
+            if (
+                expression.is_type(*exp.DataType.TEMPORAL_TYPES)
+                and expression.this != exp.DataType.Type.DATE
             ):
                 # Get the current precision (first expression if it exists)
-                precision = expression.find(exp.DataTypeParam)
+                precision_param = expression.find(exp.DataTypeParam)
+                target_precision = 6
-                # Determine the target precision
-                if precision is None:
-                    # No precision specified, default to 6
-                    target_precision = 6
-                elif precision.this.is_int:
+                if precision_param and precision_param.this.is_int:
                     # Cap precision at 6
-                    current_precision = precision.this.to_py()
+                    current_precision = precision_param.this.to_py()
                     target_precision = min(current_precision, 6)
+                else:
+                    # If precision exists but is not an integer, default to 6
+                    target_precision = 6
                 # Create a new expression with the target precision
-                new_expression = exp.DataType(
+                expression = exp.DataType(
                     this=expression.this,
                     expressions=[exp.DataTypeParam(this=exp.Literal.number(target_precision))],
                 )
-                return super().datatype_sql(new_expression)
             return super().datatype_sql(expression)
+        def unixtotime_sql(self, expression: exp.UnixToTime) -> str:
+            scale = expression.args.get("scale")
+            timestamp = expression.this
+            if scale not in (None, exp.UnixToTime.SECONDS):
+                self.unsupported(f"UnixToTime scale {scale} is not supported by Fabric")
+                return ""
+            # Convert unix timestamp (seconds) to microseconds and round to avoid decimals
+            microseconds = timestamp * exp.Literal.number("1e6")
+            rounded = exp.func("round", microseconds, 0)
+            rounded_ms_as_bigint = exp.cast(rounded, exp.DataType.Type.BIGINT)
+            # Create the base datetime as '1970-01-01' cast to DATETIME2(6)
+            epoch_start = exp.cast("'1970-01-01'", "datetime2(6)", dialect="fabric")
+            dateadd = exp.DateAdd(
+                this=epoch_start,
+                expression=rounded_ms_as_bigint,
+                unit=exp.Literal.string("MICROSECONDS"),
+            )
+            return self.sql(dateadd)

sqlglot/dialects/presto.py CHANGED Viewed

@@ -8,6 +8,7 @@ from sqlglot.dialects.dialect import (
     NormalizationStrategy,
     binary_from_function,
     bool_xor_sql,
+    build_replace_with_optional_replacement,
     date_trunc_to_time,
     datestrtodate_sql,
     encode_decode_sql,
@@ -30,6 +31,7 @@ from sqlglot.dialects.dialect import (
     sequence_sql,
     build_regexp_extract,
     explode_to_unnest_sql,
+    space_sql,
 )
 from sqlglot.dialects.hive import Hive
 from sqlglot.dialects.mysql import MySQL
@@ -360,6 +362,7 @@ class Presto(Dialect):
                 expression=seq_get(args, 1),
                 replacement=seq_get(args, 2) or exp.Literal.string(""),
             ),
+            "REPLACE": build_replace_with_optional_replacement,
             "ROW": exp.Struct.from_arg_list,
             "SEQUENCE": exp.GenerateSeries.from_arg_list,
             "SET_AGG": exp.ArrayUniqueAgg.from_arg_list,
@@ -367,6 +370,7 @@ class Presto(Dialect):
             "STRPOS": lambda args: exp.StrPosition(
                 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2)
             ),
+            "SLICE": exp.ArraySlice.from_arg_list,
             "TO_CHAR": _build_to_char,
             "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
             "TO_UTF8": lambda args: exp.Encode(
@@ -435,6 +439,7 @@ class Presto(Dialect):
             exp.ArrayContains: rename_func("CONTAINS"),
             exp.ArrayToString: rename_func("ARRAY_JOIN"),
             exp.ArrayUniqueAgg: rename_func("SET_AGG"),
+            exp.ArraySlice: rename_func("SLICE"),
             exp.AtTimeZone: rename_func("AT_TIMEZONE"),
             exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression),
             exp.BitwiseLeftShift: lambda self, e: self.func(
@@ -501,6 +506,7 @@ class Presto(Dialect):
                     amend_exploded_column_table,
                 ]
             ),
+            exp.Space: space_sql,
             exp.SortArray: _no_sort_array,
             exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True),
             exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",

sqlglot/dialects/redshift.py CHANGED Viewed

@@ -213,8 +213,7 @@ class Redshift(Postgres):
             exp.TableSample: no_tablesample_sql,
             exp.TsOrDsAdd: date_delta_sql("DATEADD"),
             exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
-            exp.UnixToTime: lambda self,
-            e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')",
+            exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
         }
         # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
@@ -447,3 +446,12 @@ class Redshift(Postgres):
         def explode_sql(self, expression: exp.Explode) -> str:
             self.unsupported("Unsupported EXPLODE() function")
             return ""
+        def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str:
+            scale = expression.args.get("scale")
+            this = self.sql(expression.this)
+            if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int:
+                this = f"({this} / POWER(10, {scale.to_py()}))"
+            return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"

sqlglot/dialects/snowflake.py CHANGED Viewed

@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
     build_timetostr_or_tochar,
     binary_from_function,
     build_default_decimal_type,
+    build_replace_with_optional_replacement,
     build_timestamp_from_parts,
     date_delta_sql,
     date_trunc_to_time,
@@ -484,6 +485,7 @@ class Snowflake(Dialect):
             "REGEXP_REPLACE": _build_regexp_replace,
             "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
             "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll),
+            "REPLACE": build_replace_with_optional_replacement,
             "RLIKE": exp.RegexpLike.from_arg_list,
             "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
             "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)),
@@ -1416,7 +1418,7 @@ class Snowflake(Dialect):
         def timetostr_sql(self, expression: exp.TimeToStr) -> str:
             this = expression.this
-            if not isinstance(this, exp.TsOrDsToTimestamp):
+            if this.is_string:
                 this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
             return self.func("TO_CHAR", this, self.format_time(expression))

sqlglot/dialects/spark2.py CHANGED Viewed

@@ -201,6 +201,7 @@ class Spark2(Hive):
             "SHIFTLEFT": binary_from_function(exp.BitwiseLeftShift),
             "SHIFTRIGHT": binary_from_function(exp.BitwiseRightShift),
             "STRING": _build_as_cast("string"),
+            "SLICE": exp.ArraySlice.from_arg_list,
             "TIMESTAMP": _build_as_cast("timestamp"),
             "TO_TIMESTAMP": lambda args: (
                 _build_as_cast("timestamp")(args)
@@ -261,6 +262,7 @@ class Spark2(Hive):
             exp.ArraySum: lambda self,
             e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
             exp.ArrayToString: rename_func("ARRAY_JOIN"),
+            exp.ArraySlice: rename_func("SLICE"),
             exp.AtTimeZone: lambda self, e: self.func(
                 "FROM_UTC_TIMESTAMP", e.this, e.args.get("zone")
             ),

sqlglot/dialects/tsql.py CHANGED Viewed

@@ -612,6 +612,7 @@ class TSQL(Dialect):
             "SYSDATETIME": exp.CurrentTimestamp.from_arg_list,
             "SUSER_NAME": exp.CurrentUser.from_arg_list,
             "SUSER_SNAME": exp.CurrentUser.from_arg_list,
+            "SYSDATETIMEOFFSET": exp.CurrentTimestampLTZ.from_arg_list,
             "SYSTEM_USER": exp.CurrentUser.from_arg_list,
             "TIMEFROMPARTS": _build_timefromparts,
             "DATETRUNC": _build_datetrunc,
@@ -1020,6 +1021,7 @@ class TSQL(Dialect):
             exp.CTE: transforms.preprocess([qualify_derived_table_outputs]),
             exp.CurrentDate: rename_func("GETDATE"),
             exp.CurrentTimestamp: rename_func("GETDATE"),
+            exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"),
             exp.DateStrToDate: datestrtodate_sql,
             exp.Extract: rename_func("DATEPART"),
             exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql,
@@ -1249,15 +1251,15 @@ class TSQL(Dialect):
                 sql_with_ctes = self.prepend_ctes(expression, sql)
                 sql_literal = self.sql(exp.Literal.string(sql_with_ctes))
                 if kind == "SCHEMA":
-                    return f"""IF NOT EXISTS (SELECT * FROM information_schema.schemata WHERE schema_name = {identifier}) EXEC({sql_literal})"""
+                    return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = {identifier}) EXEC({sql_literal})"""
                 elif kind == "TABLE":
                     assert table
                     where = exp.and_(
-                        exp.column("table_name").eq(table.name),
-                        exp.column("table_schema").eq(table.db) if table.db else None,
-                        exp.column("table_catalog").eq(table.catalog) if table.catalog else None,
+                        exp.column("TABLE_NAME").eq(table.name),
+                        exp.column("TABLE_SCHEMA").eq(table.db) if table.db else None,
+                        exp.column("TABLE_CATALOG").eq(table.catalog) if table.catalog else None,
                     )
-                    return f"""IF NOT EXISTS (SELECT * FROM information_schema.tables WHERE {where}) EXEC({sql_literal})"""
+                    return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE {where}) EXEC({sql_literal})"""
                 elif kind == "INDEX":
                     index = self.sql(exp.Literal.string(expression.this.text("this")))
                     return f"""IF NOT EXISTS (SELECT * FROM sys.indexes WHERE object_id = object_id({identifier}) AND name = {index}) EXEC({sql_literal})"""

sqlglot/expressions.py CHANGED Viewed

@@ -5569,6 +5569,22 @@ class ArrayFilter(Func):
     _sql_names = ["FILTER", "ARRAY_FILTER"]
+class ArrayFirst(Func):
+    pass
+class ArrayLast(Func):
+    pass
+class ArrayReverse(Func):
+    pass
+class ArraySlice(Func):
+    arg_types = {"this": True, "start": True, "end": False, "step": False}
 class ArrayToString(Func):
     arg_types = {"this": True, "expression": True, "null": False}
     _sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
@@ -5806,6 +5822,10 @@ class CurrentTimestamp(Func):
     arg_types = {"this": False, "sysdate": False}
+class CurrentTimestampLTZ(Func):
+    arg_types = {}
 class CurrentSchema(Func):
     arg_types = {"this": False}
@@ -5846,8 +5866,6 @@ class DateTrunc(Func):
                 unit_name = TimeUnit.UNABBREVIATED_UNIT_NAME[unit_name]
             args["unit"] = Literal.string(unit_name)
-        elif isinstance(unit, Week):
-            unit.set("this", Literal.string(unit.this.name.upper()))
         super().__init__(**args)
@@ -6669,6 +6687,11 @@ class Repeat(Func):
     arg_types = {"this": True, "times": True}
+# Some dialects like Snowflake support two argument replace
+class Replace(Func):
+    arg_types = {"this": True, "expression": True, "replacement": False}
 # https://learn.microsoft.com/en-us/sql/t-sql/functions/round-transact-sql?view=sql-server-ver16
 # tsql third argument function == trunctaion if not 0
 class Round(Func):
@@ -6716,6 +6739,17 @@ class Substring(Func):
     arg_types = {"this": True, "start": False, "length": False}
+class SubstringIndex(Func):
+    """
+    SUBSTRING_INDEX(str, delim, count)
+    *count* > 0  → left slice before the *count*-th delimiter
+    *count* < 0  → right slice after the |count|-th delimiter
+    """
+    arg_types = {"this": True, "delimiter": True, "count": True}
 class StandardHash(Func):
     arg_types = {"this": True, "expression": False}
@@ -6772,6 +6806,14 @@ class FromBase(Func):
     arg_types = {"this": True, "expression": True}
+class Space(Func):
+    """
+    SPACE(n) → string consisting of n blank characters
+    """
+    pass
 class Struct(Func):
     arg_types = {"expressions": False}
     is_var_len_args = True

sqlglot/generator.py CHANGED Viewed

@@ -3480,7 +3480,7 @@ class Generator(metaclass=_Generator):
                 actions_list.append(action_sql)
-            actions_sql = self.format_args(*actions_list)
+            actions_sql = self.format_args(*actions_list).lstrip("\n")
         exists = " IF EXISTS" if expression.args.get("exists") else ""
         on_cluster = self.sql(expression, "cluster")
@@ -3491,7 +3491,7 @@ class Generator(metaclass=_Generator):
         kind = self.sql(expression, "kind")
         not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
-        return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster} {actions_sql}{not_valid}{options}"
+        return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster}{self.sep()}{actions_sql}{not_valid}{options}"
     def add_column_sql(self, expression: exp.Expression) -> str:
         sql = self.sql(expression)
@@ -3510,7 +3510,7 @@ class Generator(metaclass=_Generator):
         return f"DROP{exists}{expressions}"
     def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
-        return f"ADD {self.expressions(expression)}"
+        return f"ADD {self.expressions(expression, indent=False)}"
     def addpartition_sql(self, expression: exp.AddPartition) -> str:
         exists = "IF NOT EXISTS " if expression.args.get("exists") else ""

sqlglot/jsonpath.py CHANGED Viewed

@@ -41,7 +41,7 @@ def parse(path: str, dialect: DialectType = None) -> exp.JSONPath:
     """Takes in a JSON path string and parses it into a JSONPath expression."""
     from sqlglot.dialects import Dialect
-    jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer
+    jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer()
     tokens = jsonpath_tokenizer.tokenize(path)
     size = len(tokens)

sqlglot/optimizer/annotate_types.py CHANGED Viewed

@@ -329,6 +329,7 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
                 ],
                 nested=True,
             )
             if not any(
                 cd.kind.is_type(exp.DataType.Type.UNKNOWN)
                 for cd in struct_type.expressions
@@ -630,3 +631,15 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
         else:
             self._set_type(expression, exp.DataType.Type.INT)
         return expression
+    def _annotate_by_array_element(self, expression: exp.Expression) -> exp.Expression:
+        self._annotate_args(expression)
+        array_arg = expression.this
+        if array_arg.type.is_type(exp.DataType.Type.ARRAY):
+            element_type = seq_get(array_arg.type.expressions, 0) or exp.DataType.Type.UNKNOWN
+            self._set_type(expression, element_type)
+        else:
+            self._set_type(expression, exp.DataType.Type.UNKNOWN)
+        return expression

sqlglot/optimizer/pushdown_predicates.py CHANGED Viewed

@@ -21,12 +21,13 @@ def pushdown_predicates(expression, dialect=None):
     Returns:
         sqlglot.Expression: optimized expression
     """
+    from sqlglot.dialects.athena import Athena
     from sqlglot.dialects.presto import Presto
     root = build_scope(expression)
     dialect = Dialect.get_or_raise(dialect)
-    unnest_requires_cross_join = isinstance(dialect, Presto)
+    unnest_requires_cross_join = isinstance(dialect, (Athena, Presto))
     if root:
         scope_ref_count = root.ref_count()

sqlglot/optimizer/scope.py CHANGED Viewed

@@ -358,7 +358,7 @@ class Scope:
             for expression in itertools.chain(self.derived_tables, self.udtfs):
                 self._references.append(
                     (
-                        expression.alias,
+                        _get_source_alias(expression),
                         expression if expression.args.get("pivots") else expression.unnest(),
                     )
                 )
@@ -785,7 +785,7 @@ def _traverse_tables(scope):
             # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
             # Until then, this means that only a single, unaliased derived table is allowed (rather,
             # the latest one wins.
-            sources[expression.alias] = child_scope
+            sources[_get_source_alias(expression)] = child_scope
         # append the final child_scope yielded
         if child_scope:
@@ -825,7 +825,7 @@ def _traverse_udtfs(scope):
             ):
                 yield child_scope
                 top = child_scope
-                sources[expression.alias] = child_scope
+                sources[_get_source_alias(expression)] = child_scope
             scope.subquery_scopes.append(top)
@@ -915,3 +915,13 @@ def find_in_scope(expression, expression_types, bfs=True):
         the criteria was found.
     """
     return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
+def _get_source_alias(expression):
+    alias_arg = expression.args.get("alias")
+    alias_name = expression.alias
+    if not alias_name and isinstance(alias_arg, exp.TableAlias) and len(alias_arg.columns) == 1:
+        alias_name = alias_arg.columns[0].name
+    return alias_name

sqlglot/parser.py CHANGED Viewed

@@ -1895,7 +1895,7 @@ class Parser(metaclass=_Parser):
             stmt.add_comments(comments, prepend=True)
             return stmt
-        if self._match_set(self.dialect.tokenizer.COMMANDS):
+        if self._match_set(self.dialect.tokenizer_class.COMMANDS):
             return self._parse_command()
         expression = self._parse_expression()
@@ -7362,8 +7362,9 @@ class Parser(metaclass=_Parser):
             return None
-        if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq(
-            "COLUMNS"
+        if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and (
+            not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
+            or self._match_text_seq("COLUMNS")
         ):
             schema = self._parse_schema()

sqlglot 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl

sqlglot 26.30.0py3-none-any.whl → 26.32.0py3-none-any.whl