PyPI - sqlglot - Versions diffs - 28.4.0__py3-none-any.whl → 28.8.0__py3-none-any.whl - Mend

sqlglot 28.4.0py3-none-any.whl → 28.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +20 -23
sqlglot/dialects/clickhouse.py +2 -0
sqlglot/dialects/dialect.py +355 -18
sqlglot/dialects/doris.py +38 -90
sqlglot/dialects/druid.py +1 -0
sqlglot/dialects/duckdb.py +1739 -163
sqlglot/dialects/exasol.py +17 -1
sqlglot/dialects/hive.py +27 -2
sqlglot/dialects/mysql.py +103 -11
sqlglot/dialects/oracle.py +38 -1
sqlglot/dialects/postgres.py +142 -33
sqlglot/dialects/presto.py +6 -2
sqlglot/dialects/redshift.py +7 -1
sqlglot/dialects/singlestore.py +13 -3
sqlglot/dialects/snowflake.py +271 -21
sqlglot/dialects/spark.py +25 -0
sqlglot/dialects/spark2.py +4 -3
sqlglot/dialects/starrocks.py +152 -17
sqlglot/dialects/trino.py +1 -0
sqlglot/dialects/tsql.py +5 -0
sqlglot/diff.py +1 -1
sqlglot/expressions.py +239 -47
sqlglot/generator.py +173 -44
sqlglot/optimizer/annotate_types.py +129 -60
sqlglot/optimizer/merge_subqueries.py +13 -2
sqlglot/optimizer/qualify_columns.py +7 -0
sqlglot/optimizer/resolver.py +19 -0
sqlglot/optimizer/scope.py +12 -0
sqlglot/optimizer/unnest_subqueries.py +7 -0
sqlglot/parser.py +251 -58
sqlglot/schema.py +186 -14
sqlglot/tokens.py +36 -6
sqlglot/transforms.py +6 -5
sqlglot/typing/__init__.py +29 -10
sqlglot/typing/bigquery.py +5 -10
sqlglot/typing/duckdb.py +39 -0
sqlglot/typing/hive.py +50 -1
sqlglot/typing/mysql.py +32 -0
sqlglot/typing/presto.py +0 -1
sqlglot/typing/snowflake.py +80 -17
sqlglot/typing/spark.py +29 -0
sqlglot/typing/spark2.py +9 -1
sqlglot/typing/tsql.py +21 -0
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/METADATA +47 -2
sqlglot-28.8.0.dist-info/RECORD +95 -0
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/WHEEL +1 -1
sqlglot-28.4.0.dist-info/RECORD +0 -92
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-28.4.0.dist-info → sqlglot-28.8.0.dist-info}/top_level.txt +0 -0

sqlglot/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '28.4.0'
-__version_tuple__ = version_tuple = (28, 4, 0)
+__version__ = version = '28.8.0'
+__version_tuple__ = version_tuple = (28, 8, 0)
 __commit_id__ = commit_id = None

sqlglot/dialects/bigquery.py CHANGED Viewed

@@ -51,6 +51,8 @@ JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtr
 DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY")
+MAKE_INTERVAL_KWARGS = ["year", "month", "day", "hour", "minute", "second"]
 def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str:
     if not expression.find_ancestor(exp.From, exp.Join):
@@ -389,7 +391,9 @@ class BigQuery(Dialect):
     EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True
     QUERY_RESULTS_ARE_STRUCTS = True
     JSON_EXTRACT_SCALAR_SCALAR_ONLY = True
+    LEAST_GREATEST_IGNORES_NULLS = False
     DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT
+    PRIORITIZE_NON_LITERAL_TYPES = True
     # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
     INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'
@@ -602,12 +606,6 @@ class BigQuery(Dialect):
             "EDIT_DISTANCE": _build_levenshtein,
             "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
             "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
-            "GREATEST": lambda args: exp.Greatest(
-                this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
-            ),
-            "LEAST": lambda args: exp.Least(
-                this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
-            ),
             "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
             "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
             "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
@@ -964,7 +962,7 @@ class BigQuery(Dialect):
         def _parse_make_interval(self) -> exp.MakeInterval:
             expr = exp.MakeInterval()
-            for arg_key in expr.arg_types:
+            for arg_key in MAKE_INTERVAL_KWARGS:
                 value = self._parse_lambda()
                 if not value:
@@ -1069,20 +1067,23 @@ class BigQuery(Dialect):
             )
         def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
+            func_index = self._index + 1
             this = super()._parse_column_ops(this)
-            if isinstance(this, exp.Dot):
-                prefix_name = this.this.name.upper()
-                func_name = this.name.upper()
-                if prefix_name == "NET":
-                    if func_name == "HOST":
-                        this = self.expression(
-                            exp.NetHost, this=seq_get(this.expression.expressions, 0)
-                        )
-                elif prefix_name == "SAFE":
-                    if func_name == "TIMESTAMP":
-                        this = _build_timestamp(this.expression.expressions)
-                        this.set("safe", True)
+            if isinstance(this, exp.Dot) and isinstance(this.expression, exp.Func):
+                prefix = this.this.name.upper()
+                func: t.Optional[t.Type[exp.Func]] = None
+                if prefix == "NET":
+                    func = exp.NetFunc
+                elif prefix == "SAFE":
+                    func = exp.SafeFunc
+                if func:
+                    # Retreat to try and parse a known function instead of an anonymous one,
+                    # which is parsed by the base column ops parser due to anonymous_func=true
+                    self._retreat(func_index)
+                    this = func(this=self._parse_function(any_token=True))
             return this
@@ -1551,7 +1552,3 @@ class BigQuery(Dialect):
             kind = f" {kind}" if kind else ""
             return f"{variables}{kind}{default}"
-        def timestamp_sql(self, expression: exp.Timestamp) -> str:
-            prefix = "SAFE." if expression.args.get("safe") else ""
-            return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))

sqlglot/dialects/clickhouse.py CHANGED Viewed

@@ -565,6 +565,8 @@ class ClickHouse(Dialect):
             "MEDIAN": lambda self: self._parse_quantile(),
             "COLUMNS": lambda self: self._parse_columns(),
             "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)),
+            "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)),
+            "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)),
         }
         FUNCTION_PARSERS.pop("MATCH")

sqlglot/dialects/dialect.py CHANGED Viewed

@@ -19,6 +19,7 @@ from sqlglot.helper import (
     seq_get,
     suggest_closest_match_and_fail,
     to_bool,
+    ensure_list,
 )
 from sqlglot.jsonpath import JSONPathTokenizer, parse as parse_json_path
 from sqlglot.parser import Parser
@@ -27,6 +28,8 @@ from sqlglot.tokens import Token, Tokenizer, TokenType
 from sqlglot.trie import new_trie
 from sqlglot.typing import EXPRESSION_METADATA
+from importlib.metadata import entry_points
 DATE_ADD_OR_DIFF = t.Union[
     exp.DateAdd,
     exp.DateDiff,
@@ -66,6 +69,8 @@ UNESCAPED_SEQUENCES = {
     "\\\\": "\\",
 }
+PLUGIN_GROUP_NAME = "sqlglot.dialects"
 class Dialects(str, Enum):
     """Dialects supported by SQLGLot."""
@@ -153,12 +158,54 @@ class _Dialect(type):
         if isinstance(key, Dialects):
             key = key.value
-        # This import will lead to a new dialect being loaded, and hence, registered.
-        # We check that the key is an actual sqlglot module to avoid blindly importing
-        # files. Custom user dialects need to be imported at the top-level package, in
-        # order for them to be registered as soon as possible.
+        # 1. Try standard sqlglot modules first
         if key in DIALECT_MODULE_NAMES:
+            module = importlib.import_module(f"sqlglot.dialects.{key}")
+            # If module was already imported, the class may not be in _classes
+            # Find and register the dialect class from the module
+            if key not in cls._classes:
+                for attr_name in dir(module):
+                    attr = getattr(module, attr_name, None)
+                    if (
+                        isinstance(attr, type)
+                        and issubclass(attr, Dialect)
+                        and attr.__name__.lower() == key
+                    ):
+                        cls._classes[key] = attr
+                        break
+            return
+        # 2. Try entry points (for plugins)
+        try:
+            all_eps = entry_points()
+            # Python 3.10+ has select() method, older versions use dict-like access
+            if hasattr(all_eps, "select"):
+                eps = all_eps.select(group=PLUGIN_GROUP_NAME, name=key)
+            else:
+                # For older Python versions, entry_points() returns a dict-like object
+                group_eps = all_eps.get(PLUGIN_GROUP_NAME, [])  # type: ignore
+                eps = [ep for ep in group_eps if ep.name == key]  # type: ignore
+            for entry_point in eps:
+                dialect_class = entry_point.load()
+                # Verify it's a Dialect subclass
+                # issubclass() returns False if not a subclass, TypeError only if not a class at all
+                if isinstance(dialect_class, type) and issubclass(dialect_class, Dialect):
+                    # Register the dialect using the entry point name (key)
+                    # The metaclass may have registered it by class name, but we need it by entry point name
+                    if key not in cls._classes:
+                        cls._classes[key] = dialect_class
+                    return
+        except ImportError:
+            # entry_point.load() failed (bad plugin - module/class doesn't exist)
+            pass
+        # 3. Try direct import (for backward compatibility)
+        # This allows namespace packages or explicit imports to work
+        try:
             importlib.import_module(f"sqlglot.dialects.{key}")
+        except ImportError:
+            pass
     @classmethod
     def __getitem__(cls, key: str) -> t.Type[Dialect]:
@@ -235,7 +282,12 @@ class _Dialect(type):
         klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING)
         klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING)
-        if "\\" in klass.tokenizer_class.STRING_ESCAPES:
+        klass.STRINGS_SUPPORT_ESCAPED_SEQUENCES = "\\" in klass.tokenizer_class.STRING_ESCAPES
+        klass.BYTE_STRINGS_SUPPORT_ESCAPED_SEQUENCES = (
+            "\\" in klass.tokenizer_class.BYTE_STRING_ESCAPES
+        )
+        if klass.STRINGS_SUPPORT_ESCAPED_SEQUENCES or klass.BYTE_STRINGS_SUPPORT_ESCAPED_SEQUENCES:
             klass.UNESCAPED_SEQUENCES = {
                 **UNESCAPED_SEQUENCES,
                 **klass.UNESCAPED_SEQUENCES,
@@ -650,6 +702,9 @@ class Dialect(metaclass=_Dialect):
     ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True
     """Whether ArrayAgg needs to filter NULL values."""
+    ARRAY_FUNCS_PROPAGATES_NULLS = False
+    """Whether Array update functions return NULL when the input array is NULL."""
     PROMOTE_TO_INFERRED_DATETIME_TYPE = False
     """
     This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted
@@ -741,6 +796,18 @@ class Dialect(metaclass=_Dialect):
     For example, in BigQuery the default type of the NULL value is INT64.
     """
+    LEAST_GREATEST_IGNORES_NULLS = True
+    """
+    Whether LEAST/GREATEST functions ignore NULL values, e.g:
+    - BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
+    - Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
+    """
+    PRIORITIZE_NON_LITERAL_TYPES = False
+    """
+    Whether to prioritize non-literal types over literals during type annotation.
+    """
     # --- Autofilled ---
     tokenizer_class = Tokenizer
@@ -935,7 +1002,9 @@ class Dialect(metaclass=_Dialect):
             result = cls.get(dialect_name.strip())
             if not result:
-                suggest_closest_match_and_fail("dialect", dialect_name, list(DIALECT_MODULE_NAMES))
+                # Include both built-in dialects and any loaded dialects for better error messages
+                all_dialects = set(DIALECT_MODULE_NAMES) | set(cls._classes.keys())
+                suggest_closest_match_and_fail("dialect", dialect_name, all_dialects)
             assert result is not None
             return result(**kwargs)
@@ -1282,6 +1351,138 @@ def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str:
     )
+def array_append_sql(
+    name: str, swap_params: bool = False
+) -> t.Callable[[Generator, exp.ArrayAppend | exp.ArrayPrepend], str]:
+    """
+    Transpile ARRAY_APPEND/ARRAY_PREPEND between dialects with different NULL propagation semantics.
+    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
+    Others (DuckDB, Postgres) create a new single-element array instead.
+    Args:
+        name: Target dialect's function name (e.g., "ARRAY_APPEND", "ARRAY_PREPEND")
+        swap_params: If True, generate (element, array) order instead of (array, element).
+                     DuckDB LIST_PREPEND and Postgres ARRAY_PREPEND use (element, array).
+    Returns:
+        A callable that generates SQL with appropriate NULL handling for the target dialect.
+        Dialects that propagate NULLs need to set `ARRAY_FUNCS_PROPAGATES_NULLS` to True.
+    """
+    def _array_append_sql(self: Generator, expression: exp.ArrayAppend | exp.ArrayPrepend) -> str:
+        this = expression.this
+        element = expression.expression
+        args = [element, this] if swap_params else [this, element]
+        func_sql = self.func(name, *args)
+        source_null_propagation = bool(expression.args.get("null_propagation"))
+        target_null_propagation = self.dialect.ARRAY_FUNCS_PROPAGATES_NULLS
+        # No transpilation needed when source and target have matching NULL semantics
+        if source_null_propagation == target_null_propagation:
+            return func_sql
+        # Source propagates NULLs, target doesn't: wrap in conditional to return NULL explicitly
+        if source_null_propagation:
+            return self.sql(
+                exp.If(
+                    this=exp.Is(this=this, expression=exp.Null()),
+                    true=exp.Null(),
+                    false=func_sql,
+                )
+            )
+        # Source doesn't propagate NULLs, target does: use COALESCE to convert NULL to empty array
+        this = exp.Coalesce(expressions=[this, exp.Array(expressions=[])])
+        args = [element, this] if swap_params else [this, element]
+        return self.func(name, *args)
+    return _array_append_sql
+def array_concat_sql(
+    name: str,
+) -> t.Callable[[Generator, exp.ArrayConcat], str]:
+    """
+    Transpile ARRAY_CONCAT/ARRAY_CAT between dialects with different NULL propagation semantics.
+    Some dialects (Redshift, Snowflake, Spark) return NULL when ANY input array is NULL.
+    Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
+    Args:
+        name: Target dialect's function name (e.g., "ARRAY_CAT", "ARRAY_CONCAT", "LIST_CONCAT")
+    Returns:
+        A callable that generates SQL with appropriate NULL handling for the target dialect.
+        Dialects that propagate NULLs need to set `ARRAY_FUNCS_PROPAGATES_NULLS` to True.
+    """
+    def _build_func_call(self: Generator, func_name: str, args: t.Sequence[exp.Expression]) -> str:
+        """Build ARRAY_CONCAT call from a list of arguments, handling variadic vs binary nesting."""
+        if self.ARRAY_CONCAT_IS_VAR_LEN:
+            return self.func(func_name, *args)
+        elif len(args) == 1:
+            # Single arg gets empty array to preserve semantics
+            return self.func(func_name, args[0], exp.Array(expressions=[]))
+        else:
+            # Snowflake/PostgreSQL/Redshift require binary nesting: ARRAY_CAT(a, ARRAY_CAT(b, c))
+            # Build right-deep tree recursively to avoid creating new ArrayConcat expressions
+            result = self.func(func_name, args[-2], args[-1])
+            for arg in reversed(args[:-2]):
+                result = f"{func_name}({self.sql(arg)}, {result})"
+            return result
+    def _array_concat_sql(self: Generator, expression: exp.ArrayConcat) -> str:
+        this = expression.this
+        exprs = expression.expressions
+        all_args = [this] + exprs
+        source_null_propagation = bool(expression.args.get("null_propagation"))
+        target_null_propagation = self.dialect.ARRAY_FUNCS_PROPAGATES_NULLS
+        # Skip wrapper when source and target have matching NULL semantics,
+        # or when the first argument is an array literal (which can never be NULL),
+        # or when it's a single-argument call (empty array is added, preserving NULL semantics)
+        if (
+            source_null_propagation == target_null_propagation
+            or isinstance(this, exp.Array)
+            or len(exprs) == 0
+        ):
+            return _build_func_call(self, name, all_args)
+        # Case 1: Source propagates NULLs, target doesn't (Snowflake → DuckDB)
+        # Check if ANY argument is NULL and return NULL explicitly
+        if source_null_propagation:
+            # Build OR-chain: a IS NULL OR b IS NULL OR c IS NULL
+            null_checks: t.List[exp.Expression] = [
+                exp.Is(this=arg.copy(), expression=exp.Null()) for arg in all_args
+            ]
+            combined_check: exp.Expression = reduce(
+                lambda a, b: exp.Or(this=a, expression=b), null_checks
+            )
+            func_sql = _build_func_call(self, name, all_args)
+            return self.sql(
+                exp.If(
+                    this=combined_check,
+                    true=exp.Null(),
+                    false=func_sql,
+                )
+            )
+        # Case 2: Source doesn't propagate NULLs, target does (DuckDB → Snowflake)
+        # Wrap ALL arguments in COALESCE to convert NULL → empty array
+        wrapped_args = [
+            exp.Coalesce(expressions=[arg.copy(), exp.Array(expressions=[])]) for arg in all_args
+        ]
+        return _build_func_call(self, name, wrapped_args)
+    return _array_concat_sql
 def var_map_sql(
     self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP"
 ) -> str:
@@ -1300,6 +1501,59 @@ def var_map_sql(
     return self.func(map_func_name, *args)
+def months_between_sql(self: Generator, expression: exp.MonthsBetween) -> str:
+    """
+    Transpile MONTHS_BETWEEN to dialects that don't have native support.
+    Snowflake's MONTHS_BETWEEN returns whole months + fractional part where:
+    - Fractional part = (DAY(date1) - DAY(date2)) / 31
+    - Special case: If both dates are last day of month, fractional part = 0
+    Formula: DATEDIFF('month', date2, date1) + (DAY(date1) - DAY(date2)) / 31.0
+    """
+    date1 = expression.this
+    date2 = expression.expression
+    # Cast to DATE to ensure consistent behavior
+    date1_cast = exp.cast(date1, exp.DataType.Type.DATE, copy=False)
+    date2_cast = exp.cast(date2, exp.DataType.Type.DATE, copy=False)
+    # Whole months: DATEDIFF('month', date2, date1)
+    whole_months = exp.DateDiff(this=date1_cast, expression=date2_cast, unit=exp.var("month"))
+    # Day components
+    day1 = exp.Day(this=date1_cast.copy())
+    day2 = exp.Day(this=date2_cast.copy())
+    # Last day of month components
+    last_day_of_month1 = exp.LastDay(this=date1_cast.copy())
+    last_day_of_month2 = exp.LastDay(this=date2_cast.copy())
+    day_of_last_day1 = exp.Day(this=last_day_of_month1)
+    day_of_last_day2 = exp.Day(this=last_day_of_month2)
+    # Check if both are last day of month
+    last_day1 = exp.EQ(this=day1.copy(), expression=day_of_last_day1)
+    last_day2 = exp.EQ(this=day2.copy(), expression=day_of_last_day2)
+    both_last_day = exp.And(this=last_day1, expression=last_day2)
+    # Fractional part: (DAY(date1) - DAY(date2)) / 31.0
+    fractional = exp.Div(
+        this=exp.Paren(this=exp.Sub(this=day1.copy(), expression=day2.copy())),
+        expression=exp.Literal.number("31.0"),
+    )
+    # If both are last day of month, fractional = 0, else calculate fractional
+    fractional_with_check = exp.If(
+        this=both_last_day, true=exp.Literal.number("0"), false=fractional
+    )
+    # Final result: whole_months + fractional
+    result = exp.Add(this=whole_months, expression=fractional_with_check)
+    return self.sql(result)
 def build_formatted_time(
     exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None
 ) -> t.Callable[[t.List], E]:
@@ -1899,15 +2153,54 @@ def filter_array_using_unnest(
     return self.sql(exp.Array(expressions=[filtered]))
+def array_compact_sql(self: Generator, expression: exp.ArrayCompact) -> str:
+    lambda_id = exp.to_identifier("_u")
+    cond = exp.Is(this=lambda_id, expression=exp.null()).not_()
+    return self.sql(
+        exp.ArrayFilter(
+            this=expression.this,
+            expression=exp.Lambda(this=cond, expressions=[lambda_id]),
+        )
+    )
 def remove_from_array_using_filter(self: Generator, expression: exp.ArrayRemove) -> str:
     lambda_id = exp.to_identifier("_u")
     cond = exp.NEQ(this=lambda_id, expression=expression.expression)
-    return self.sql(
+    filter_sql = self.sql(
         exp.ArrayFilter(
-            this=expression.this, expression=exp.Lambda(this=cond, expressions=[lambda_id])
+            this=expression.this,
+            expression=exp.Lambda(this=cond, expressions=[lambda_id]),
         )
     )
+    # Handle NULL propagation for ArrayRemove
+    source_null_propagation = bool(expression.args.get("null_propagation"))
+    target_null_propagation = self.dialect.ARRAY_FUNCS_PROPAGATES_NULLS
+    # Source propagates NULLs (Snowflake), target doesn't (DuckDB):
+    # When removal value is NULL, return NULL instead of applying filter
+    if source_null_propagation and not target_null_propagation:
+        removal_value = expression.expression
+        # Optimization: skip wrapper if removal value is a non-NULL literal
+        # (e.g., 5, 'a', TRUE) or an array literal (e.g., [1, 2])
+        if (
+            isinstance(removal_value, exp.Literal) and not isinstance(removal_value, exp.Null)
+        ) or isinstance(removal_value, exp.Array):
+            return filter_sql
+        return self.sql(
+            exp.If(
+                this=exp.Is(this=removal_value, expression=exp.Null()),
+                true=exp.Null(),
+                false=filter_sql,
+            )
+        )
+    return filter_sql
 def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str:
     return self.func(
@@ -2036,17 +2329,40 @@ def build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect],
 def explode_to_unnest_sql(self: Generator, expression: exp.Lateral) -> str:
-    if isinstance(expression.this, exp.Explode):
-        return self.sql(
-            exp.Join(
-                this=exp.Unnest(
-                    expressions=[expression.this.this],
-                    alias=expression.args.get("alias"),
-                    offset=isinstance(expression.this, exp.Posexplode),
-                ),
-                kind="cross",
-            )
+    this = expression.this
+    alias = expression.args.get("alias")
+    cross_join_expr: t.Optional[exp.Expression] = None
+    if isinstance(this, exp.Posexplode) and alias:
+        # Spark's `FROM x LATERAL VIEW POSEXPLODE(y) t AS pos, col` has the following semantics:
+        # - The first column is the position and the rest (1 for array, 2 for maps) are the exploded values
+        # - The position is 0-based whereas WITH ORDINALITY is 1-based
+        # For that matter, we must (1) subtract 1 from the ORDINALITY position and (2) rearrange the columns accordingly, returning:
+        # `FROM x CROSS JOIN LATERAL (SELECT pos - 1 AS pos, col FROM UNNEST(y) WITH ORDINALITY AS t(col, pos))
+        pos, cols = alias.columns[0], alias.columns[1:]
+        cols = ensure_list(cols)
+        lateral_subquery = exp.select(
+            exp.alias_(pos - 1, pos),
+            *cols,
+        ).from_(
+            exp.Unnest(
+                expressions=[this.this],
+                offset=True,
+                alias=exp.TableAlias(this=alias.this, columns=[*cols, pos]),
+            ),
+        )
+        cross_join_expr = exp.Lateral(this=lateral_subquery.subquery())
+    elif isinstance(this, exp.Explode):
+        cross_join_expr = exp.Unnest(
+            expressions=[this.this],
+            alias=alias,
         )
+    if cross_join_expr:
+        return self.sql(exp.Join(this=cross_join_expr, kind="cross"))
     return self.lateral_sql(expression)
@@ -2154,3 +2470,24 @@ def regexp_replace_global_modifier(expression: exp.RegexpReplace) -> exp.Express
             modifiers = exp.Literal.string(value + "g")
     return modifiers
+def getbit_sql(self: Generator, expression: exp.Getbit) -> str:
+    """
+    Generates SQL for Getbit according to DuckDB and Postgres, transpiling it if either:
+    1. The zero index corresponds to the least-significant bit
+    2. The input type is an integer value
+    """
+    value = expression.this
+    position = expression.expression
+    if not expression.args.get("zero_is_msb") and expression.is_type(
+        *exp.DataType.SIGNED_INTEGER_TYPES, *exp.DataType.UNSIGNED_INTEGER_TYPES
+    ):
+        # Use bitwise operations: (value >> position) & 1
+        shifted = exp.BitwiseRightShift(this=value, expression=position)
+        masked = exp.BitwiseAnd(this=shifted, expression=exp.Literal.number(1))
+        return self.sql(masked)
+    return self.func("GET_BIT", value, position)

sqlglot 28.4.0__py3-none-any.whl → 28.8.0__py3-none-any.whl

sqlglot 28.4.0py3-none-any.whl → 28.8.0py3-none-any.whl