PyPI - sqlglot - Versions diffs - 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl - Mend

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

sqlglot/__main__.py +6 -4
sqlglot/_version.py +2 -2
sqlglot/dialects/bigquery.py +116 -295
sqlglot/dialects/clickhouse.py +67 -2
sqlglot/dialects/databricks.py +38 -1
sqlglot/dialects/dialect.py +327 -286
sqlglot/dialects/dremio.py +4 -1
sqlglot/dialects/duckdb.py +718 -22
sqlglot/dialects/exasol.py +243 -10
sqlglot/dialects/hive.py +8 -8
sqlglot/dialects/mysql.py +11 -2
sqlglot/dialects/oracle.py +29 -0
sqlglot/dialects/postgres.py +46 -24
sqlglot/dialects/presto.py +47 -16
sqlglot/dialects/redshift.py +16 -0
sqlglot/dialects/risingwave.py +3 -0
sqlglot/dialects/singlestore.py +12 -3
sqlglot/dialects/snowflake.py +199 -271
sqlglot/dialects/spark.py +2 -2
sqlglot/dialects/spark2.py +11 -48
sqlglot/dialects/sqlite.py +9 -0
sqlglot/dialects/teradata.py +5 -8
sqlglot/dialects/trino.py +6 -0
sqlglot/dialects/tsql.py +61 -25
sqlglot/diff.py +4 -2
sqlglot/errors.py +69 -0
sqlglot/expressions.py +484 -84
sqlglot/generator.py +143 -41
sqlglot/helper.py +2 -2
sqlglot/optimizer/annotate_types.py +247 -140
sqlglot/optimizer/canonicalize.py +6 -1
sqlglot/optimizer/eliminate_joins.py +1 -1
sqlglot/optimizer/eliminate_subqueries.py +2 -2
sqlglot/optimizer/merge_subqueries.py +5 -5
sqlglot/optimizer/normalize.py +20 -13
sqlglot/optimizer/normalize_identifiers.py +17 -3
sqlglot/optimizer/optimizer.py +4 -0
sqlglot/optimizer/pushdown_predicates.py +1 -1
sqlglot/optimizer/qualify.py +14 -6
sqlglot/optimizer/qualify_columns.py +113 -352
sqlglot/optimizer/qualify_tables.py +112 -70
sqlglot/optimizer/resolver.py +374 -0
sqlglot/optimizer/scope.py +27 -16
sqlglot/optimizer/simplify.py +1074 -964
sqlglot/optimizer/unnest_subqueries.py +12 -2
sqlglot/parser.py +276 -160
sqlglot/planner.py +2 -2
sqlglot/schema.py +15 -4
sqlglot/tokens.py +42 -7
sqlglot/transforms.py +77 -22
sqlglot/typing/__init__.py +316 -0
sqlglot/typing/bigquery.py +376 -0
sqlglot/typing/hive.py +12 -0
sqlglot/typing/presto.py +24 -0
sqlglot/typing/snowflake.py +505 -0
sqlglot/typing/spark2.py +58 -0
sqlglot/typing/tsql.py +9 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/METADATA +2 -2
sqlglot-28.4.0.dist-info/RECORD +92 -0
sqlglot-27.29.0.dist-info/RECORD +0 -84
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/WHEEL +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/licenses/LICENSE +0 -0
{sqlglot-27.29.0.dist-info → sqlglot-28.4.0.dist-info}/top_level.txt +0 -0

sqlglot/optimizer/qualify_columns.py CHANGED Viewed

@@ -5,9 +5,10 @@ import typing as t
 from sqlglot import alias, exp
 from sqlglot.dialects.dialect import Dialect, DialectType
-from sqlglot.errors import OptimizeError
-from sqlglot.helper import seq_get, SingleValuedMapping
+from sqlglot.errors import OptimizeError, highlight_sql
+from sqlglot.helper import seq_get
 from sqlglot.optimizer.annotate_types import TypeAnnotator
+from sqlglot.optimizer.resolver import Resolver
 from sqlglot.optimizer.scope import Scope, build_scope, traverse_scope, walk_in_scope
 from sqlglot.optimizer.simplify import simplify_parens
 from sqlglot.schema import Schema, ensure_schema
@@ -54,22 +55,17 @@ def qualify_columns(
     schema = ensure_schema(schema, dialect=dialect)
     annotator = TypeAnnotator(schema)
     infer_schema = schema.empty if infer_schema is None else infer_schema
-    dialect = Dialect.get_or_raise(schema.dialect)
+    dialect = schema.dialect or Dialect()
     pseudocolumns = dialect.PSEUDOCOLUMNS
-    bigquery = dialect == "bigquery"
     for scope in traverse_scope(expression):
+        if dialect.PREFER_CTE_ALIAS_COLUMN:
+            pushdown_cte_alias_columns(scope)
         scope_expression = scope.expression
         is_select = isinstance(scope_expression, exp.Select)
-        if is_select and scope_expression.args.get("connect"):
-            # In Snowflake / Oracle queries that have a CONNECT BY clause, one can use the LEVEL
-            # pseudocolumn, which doesn't belong to a table, so we change it into an identifier
-            scope_expression.transform(
-                lambda n: n.this if isinstance(n, exp.Column) and n.name == "LEVEL" else n,
-                copy=False,
-            )
-            scope.clear_cache()
+        _separate_pseudocolumns(scope, pseudocolumns)
         resolver = Resolver(scope, schema, infer_schema=infer_schema)
         _pop_table_column_aliases(scope.ctes)
@@ -81,11 +77,15 @@ def qualify_columns(
                 scope,
                 resolver,
                 dialect,
-                expand_only_groupby=bigquery,
+                expand_only_groupby=dialect.EXPAND_ONLY_GROUP_ALIAS_REF,
             )
         _convert_columns_to_dots(scope, resolver)
-        _qualify_columns(scope, resolver, allow_partial_qualification=allow_partial_qualification)
+        _qualify_columns(
+            scope,
+            resolver,
+            allow_partial_qualification=allow_partial_qualification,
+        )
         if not schema.empty and expand_alias_refs:
             _expand_alias_refs(scope, resolver, dialect)
@@ -107,13 +107,13 @@ def qualify_columns(
         # https://www.postgresql.org/docs/current/sql-select.html#SQL-DISTINCT
         _expand_order_by_and_distinct_on(scope, resolver)
-        if bigquery:
+        if dialect.ANNOTATE_ALL_SCOPES:
             annotator.annotate_scope(scope)
     return expression
-def validate_qualify_columns(expression: E) -> E:
+def validate_qualify_columns(expression: E, sql: t.Optional[str] = None) -> E:
     """Raise an `OptimizeError` if any columns aren't qualified"""
     all_unqualified_columns = []
     for scope in traverse_scope(expression):
@@ -123,7 +123,19 @@ def validate_qualify_columns(expression: E) -> E:
             if scope.external_columns and not scope.is_correlated_subquery and not scope.pivots:
                 column = scope.external_columns[0]
                 for_table = f" for table: '{column.table}'" if column.table else ""
-                raise OptimizeError(f"Column '{column}' could not be resolved{for_table}")
+                line = column.this.meta.get("line")
+                col = column.this.meta.get("col")
+                start = column.this.meta.get("start")
+                end = column.this.meta.get("end")
+                error_msg = f"Column '{column.name}' could not be resolved{for_table}."
+                if line and col:
+                    error_msg += f" Line: {line}, Col: {col}"
+                if sql and start is not None and end is not None:
+                    formatted_sql = highlight_sql(sql, [(start, end)])[0]
+                    error_msg += f"\n  {formatted_sql}"
+                raise OptimizeError(error_msg)
             if unqualified_columns and scope.pivots and scope.pivots[0].unpivot:
                 # New columns produced by the UNPIVOT can't be qualified, but there may be columns
@@ -135,11 +147,46 @@ def validate_qualify_columns(expression: E) -> E:
             all_unqualified_columns.extend(unqualified_columns)
     if all_unqualified_columns:
-        raise OptimizeError(f"Ambiguous columns: {all_unqualified_columns}")
+        first_column = all_unqualified_columns[0]
+        line = first_column.this.meta.get("line")
+        col = first_column.this.meta.get("col")
+        start = first_column.this.meta.get("start")
+        end = first_column.this.meta.get("end")
+        error_msg = f"Ambiguous column '{first_column.name}'"
+        if line and col:
+            error_msg += f" (Line: {line}, Col: {col})"
+        if sql and start is not None and end is not None:
+            formatted_sql = highlight_sql(sql, [(start, end)])[0]
+            error_msg += f"\n  {formatted_sql}"
+        raise OptimizeError(error_msg)
     return expression
+def _separate_pseudocolumns(scope: Scope, pseudocolumns: t.Set[str]) -> None:
+    if not pseudocolumns:
+        return
+    has_pseudocolumns = False
+    scope_expression = scope.expression
+    for column in scope.columns:
+        name = column.name.upper()
+        if name not in pseudocolumns:
+            continue
+        if name != "LEVEL" or (
+            isinstance(scope_expression, exp.Select) and scope_expression.args.get("connect")
+        ):
+            column.replace(exp.Pseudocolumn(**column.args))
+            has_pseudocolumns = True
+    if has_pseudocolumns:
+        scope.clear_cache()
 def _unpivot_columns(unpivot: exp.Pivot) -> t.Iterator[exp.Column]:
     name_columns = [
         field.this
@@ -274,12 +321,11 @@ def _expand_alias_refs(
     """
     expression = scope.expression
-    if not isinstance(expression, exp.Select) or dialect == "oracle":
+    if not isinstance(expression, exp.Select) or dialect.DISABLES_ALIAS_REF_EXPANSION:
         return
     alias_to_expression: t.Dict[str, t.Tuple[exp.Expression, int]] = {}
     projections = {s.alias_or_name for s in expression.selects}
-    is_bigquery = dialect == "bigquery"
     replaced = False
     def replace_columns(
@@ -317,12 +363,12 @@ def _expand_alias_refs(
                 # SELECT x.a, max(x.b) as x FROM x GROUP BY 1 HAVING x > 1;
                 # If "HAVING x" is expanded to "HAVING max(x.b)", BQ would blindly replace the "x" reference with the projection MAX(x.b)
                 # i.e HAVING MAX(MAX(x.b).b), resulting in the error: "Aggregations of aggregations are not allowed"
-                if is_having and is_bigquery:
+                if is_having and dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES:
                     skip_replace = skip_replace or any(
                         node.parts[0].name in projections
                         for node in alias_expr.find_all(exp.Column)
                     )
-            elif is_bigquery and (is_group_by or is_having):
+            elif dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES and (is_group_by or is_having):
                 column_table = table.name if table else column.table
                 if column_table in projections:
                     # BigQuery's GROUP BY and HAVING clauses get confused if the column name
@@ -375,9 +421,7 @@ def _expand_alias_refs(
     replace_columns(expression.args.get("having"), resolve_table=True)
     replace_columns(expression.args.get("qualify"), resolve_table=True)
-    # Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
-    # https://docs.snowflake.com/en/sql-reference/sql/select#usage-notes
-    if dialect == "snowflake":
+    if dialect.SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS:
         for join in expression.args.get("joins") or []:
             replace_columns(join)
@@ -385,7 +429,7 @@ def _expand_alias_refs(
         scope.clear_cache()
-def _expand_group_by(scope: Scope, dialect: DialectType) -> None:
+def _expand_group_by(scope: Scope, dialect: Dialect) -> None:
     expression = scope.expression
     group = expression.args.get("group")
     if not group:
@@ -411,7 +455,7 @@ def _expand_order_by_and_distinct_on(scope: Scope, resolver: Resolver) -> None:
         for original, expanded in zip(
             modifier_expressions,
             _expand_positional_references(
-                scope, modifier_expressions, resolver.schema.dialect, alias=True
+                scope, modifier_expressions, resolver.dialect, alias=True
             ),
         ):
             for agg in original.find_all(exp.AggFunc):
@@ -433,7 +477,7 @@ def _expand_order_by_and_distinct_on(scope: Scope, resolver: Resolver) -> None:
 def _expand_positional_references(
-    scope: Scope, expressions: t.Iterable[exp.Expression], dialect: DialectType, alias: bool = False
+    scope: Scope, expressions: t.Iterable[exp.Expression], dialect: Dialect, alias: bool = False
 ) -> t.List[exp.Expression]:
     new_nodes: t.List[exp.Expression] = []
     ambiguous_projections = None
@@ -447,7 +491,7 @@ def _expand_positional_references(
             else:
                 select = select.this
-                if dialect == "bigquery":
+                if dialect.PROJECTION_ALIASES_SHADOW_SOURCE_NAMES:
                     if ambiguous_projections is None:
                         # When a projection name is also a source name and it is referenced in the
                         # GROUP BY clause, BQ can't understand what the identifier corresponds to
@@ -488,10 +532,10 @@ def _select_by_pos(scope: Scope, node: exp.Literal) -> exp.Alias:
 def _convert_columns_to_dots(scope: Scope, resolver: Resolver) -> None:
     """
-    Converts `Column` instances that represent struct field lookup into chained `Dots`.
+    Converts `Column` instances that represent STRUCT or JSON field lookup into chained `Dots`.
-    Struct field lookups look like columns (e.g. "struct"."field"), but they need to be
-    qualified separately and represented as Dot(Dot(...(<table>.<column>, field1), field2, ...)).
+    These lookups may be parsed as columns (e.g. "col"."field"."field2"), but they need to be
+    normalized to `Dot(Dot(...(<table>.<column>, field1), field2, ...))` to be qualified properly.
     """
     converted = False
     for column in itertools.chain(scope.columns, scope.stars):
@@ -499,6 +543,7 @@ def _convert_columns_to_dots(scope: Scope, resolver: Resolver) -> None:
             continue
         column_table: t.Optional[str | exp.Identifier] = column.table
+        dot_parts = column.meta.pop("dot_parts", [])
         if (
             column_table
             and column_table not in scope.sources
@@ -514,12 +559,20 @@ def _convert_columns_to_dots(scope: Scope, resolver: Resolver) -> None:
                 # The struct is already qualified, but we still need to change the AST
                 column_table = root
                 root, *parts = parts
+                was_qualified = True
             else:
                 column_table = resolver.get_table(root.name)
+                was_qualified = False
             if column_table:
                 converted = True
-                column.replace(exp.Dot.build([exp.column(root, table=column_table), *parts]))
+                new_column = exp.column(root, table=column_table)
+                if dot_parts:
+                    # Remove the actual column parts from the rest of dot parts
+                    new_column.meta["dot_parts"] = dot_parts[2 if was_qualified else 1 :]
+                column.replace(exp.Dot.build([new_column, *parts]))
     if converted:
         # We want to re-aggregate the converted columns, otherwise they'd be skipped in
@@ -527,7 +580,11 @@ def _convert_columns_to_dots(scope: Scope, resolver: Resolver) -> None:
         scope.clear_cache()
-def _qualify_columns(scope: Scope, resolver: Resolver, allow_partial_qualification: bool) -> None:
+def _qualify_columns(
+    scope: Scope,
+    resolver: Resolver,
+    allow_partial_qualification: bool,
+) -> None:
     """Disambiguate columns, ensuring each column specifies a source"""
     for column in scope.columns:
         column_table = column.table
@@ -556,11 +613,11 @@ def _qualify_columns(scope: Scope, resolver: Resolver, allow_partial_qualificati
             if column_table:
                 column.set("table", column_table)
             elif (
-                resolver.schema.dialect == "bigquery"
+                resolver.dialect.TABLES_REFERENCEABLE_AS_COLUMNS
                 and len(column.parts) == 1
                 and column_name in scope.selected_sources
             ):
-                # BigQuery allows tables to be referenced as columns, treating them as structs
+                # BigQuery and Postgres allow tables to be referenced as columns, treating them as structs/records
                 scope.replace(column, exp.TableColumn(this=column.this))
     for pivot in scope.pivots:
@@ -571,7 +628,7 @@ def _qualify_columns(scope: Scope, resolver: Resolver, allow_partial_qualificati
                     column.set("table", column_table)
-def _expand_struct_stars_bigquery(
+def _expand_struct_stars_no_parens(
     expression: exp.Dot,
 ) -> t.List[exp.Alias]:
     """[BigQuery] Expand/Flatten foo.bar.* where bar is a struct column"""
@@ -625,7 +682,7 @@ def _expand_struct_stars_bigquery(
     return new_selections
-def _expand_struct_stars_risingwave(expression: exp.Dot) -> t.List[exp.Alias]:
+def _expand_struct_stars_with_parens(expression: exp.Dot) -> t.List[exp.Alias]:
     """[RisingWave] Expand/Flatten (<exp>.bar).*, where bar is a struct column"""
     # it is not (<sub_exp>).* pattern, which means we can't expand
@@ -702,7 +759,7 @@ def _expand_stars(
     rename_columns: t.Dict[int, t.Dict[str, str]] = {}
     coalesced_columns = set()
-    dialect = resolver.schema.dialect
+    dialect = resolver.dialect
     pivot_output_columns = None
     pivot_exclude_columns: t.Set[str] = set()
@@ -725,10 +782,9 @@ def _expand_stars(
             if not pivot_output_columns:
                 pivot_output_columns = [c.alias_or_name for c in pivot.expressions]
-    is_bigquery = dialect == "bigquery"
-    is_risingwave = dialect == "risingwave"
-    if (is_bigquery or is_risingwave) and any(isinstance(col, exp.Dot) for col in scope.stars):
+    if dialect.SUPPORTS_STRUCT_STAR_EXPANSION and any(
+        isinstance(col, exp.Dot) for col in scope.stars
+    ):
         # Found struct expansion, annotate scope ahead of time
         annotator.annotate_scope(scope)
@@ -745,13 +801,16 @@ def _expand_stars(
                 _add_except_columns(expression.this, tables, except_columns)
                 _add_replace_columns(expression.this, tables, replace_columns)
                 _add_rename_columns(expression.this, tables, rename_columns)
-            elif is_bigquery:
-                struct_fields = _expand_struct_stars_bigquery(expression)
+            elif (
+                dialect.SUPPORTS_STRUCT_STAR_EXPANSION
+                and not dialect.REQUIRES_PARENTHESIZED_STRUCT_ACCESS
+            ):
+                struct_fields = _expand_struct_stars_no_parens(expression)
                 if struct_fields:
                     new_selections.extend(struct_fields)
                     continue
-            elif is_risingwave:
-                struct_fields = _expand_struct_stars_risingwave(expression)
+            elif dialect.REQUIRES_PARENTHESIZED_STRUCT_ACCESS:
+                struct_fields = _expand_struct_stars_with_parens(expression)
                 if struct_fields:
                     new_selections.extend(struct_fields)
                     continue
@@ -767,7 +826,7 @@ def _expand_stars(
             columns = resolver.get_source_columns(table, only_visible=True)
             columns = columns or scope.outer_columns
-            if pseudocolumns:
+            if pseudocolumns and dialect.EXCLUDES_PSEUDOCOLUMNS_FROM_STAR:
                 columns = [name for name in columns if name.upper() not in pseudocolumns]
             if not columns or "*" in columns:
@@ -821,7 +880,7 @@ def _expand_stars(
 def _add_except_columns(
     expression: exp.Expression, tables, except_columns: t.Dict[int, t.Set[str]]
 ) -> None:
-    except_ = expression.args.get("except")
+    except_ = expression.args.get("except_")
     if not except_:
         return
@@ -901,320 +960,22 @@ def quote_identifiers(expression: E, dialect: DialectType = None, identify: bool
     )  # type: ignore
-def pushdown_cte_alias_columns(expression: exp.Expression) -> exp.Expression:
+def pushdown_cte_alias_columns(scope: Scope) -> None:
     """
     Pushes down the CTE alias columns into the projection,
     This step is useful in Snowflake where the CTE alias columns can be referenced in the HAVING.
-    Example:
-        >>> import sqlglot
-        >>> expression = sqlglot.parse_one("WITH y (c) AS (SELECT SUM(a) FROM ( SELECT 1 a ) AS x HAVING c > 0) SELECT c FROM y")
-        >>> pushdown_cte_alias_columns(expression).sql()
-        'WITH y(c) AS (SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0) SELECT c FROM y'
     Args:
-        expression: Expression to pushdown.
-    Returns:
-        The expression with the CTE aliases pushed down into the projection.
+        scope: Scope to find ctes to pushdown aliases.
     """
-    for cte in expression.find_all(exp.CTE):
-        if cte.alias_column_names:
+    for cte in scope.ctes:
+        if cte.alias_column_names and isinstance(cte.this, exp.Select):
             new_expressions = []
             for _alias, projection in zip(cte.alias_column_names, cte.this.expressions):
                 if isinstance(projection, exp.Alias):
-                    projection.set("alias", _alias)
+                    projection.set("alias", exp.to_identifier(_alias))
                 else:
                     projection = alias(projection, alias=_alias)
                 new_expressions.append(projection)
             cte.this.set("expressions", new_expressions)
-    return expression
-class Resolver:
-    """
-    Helper for resolving columns.
-    This is a class so we can lazily load some things and easily share them across functions.
-    """
-    def __init__(self, scope: Scope, schema: Schema, infer_schema: bool = True):
-        self.scope = scope
-        self.schema = schema
-        self._source_columns: t.Optional[t.Dict[str, t.Sequence[str]]] = None
-        self._unambiguous_columns: t.Optional[t.Mapping[str, str]] = None
-        self._all_columns: t.Optional[t.Set[str]] = None
-        self._infer_schema = infer_schema
-        self._get_source_columns_cache: t.Dict[t.Tuple[str, bool], t.Sequence[str]] = {}
-    def get_table(self, column: str | exp.Column) -> t.Optional[exp.Identifier]:
-        """
-        Get the table for a column name.
-        Args:
-            column: The column expression (or column name) to find the table for.
-        Returns:
-            The table name if it can be found/inferred.
-        """
-        column_name = column if isinstance(column, str) else column.name
-        table_name = self._get_table_name_from_sources(column_name)
-        if not table_name and isinstance(column, exp.Column):
-            # Fall-back case: If we couldn't find the `table_name` from ALL of the sources,
-            # attempt to disambiguate the column based on other characteristics e.g if this column is in a join condition,
-            # we may be able to disambiguate based on the source order.
-            if join_context := self._get_column_join_context(column):
-                # In this case, the return value will be the join that _may_ be able to disambiguate the column
-                # and we can use the source columns available at that join to get the table name
-                table_name = self._get_table_name_from_sources(
-                    column_name, self._get_available_source_columns(join_context)
-                )
-        if not table_name and self._infer_schema:
-            sources_without_schema = tuple(
-                source
-                for source, columns in self._get_all_source_columns().items()
-                if not columns or "*" in columns
-            )
-            if len(sources_without_schema) == 1:
-                table_name = sources_without_schema[0]
-        if table_name not in self.scope.selected_sources:
-            return exp.to_identifier(table_name)
-        node, _ = self.scope.selected_sources.get(table_name)
-        if isinstance(node, exp.Query):
-            while node and node.alias != table_name:
-                node = node.parent
-        node_alias = node.args.get("alias")
-        if node_alias:
-            return exp.to_identifier(node_alias.this)
-        return exp.to_identifier(table_name)
-    @property
-    def all_columns(self) -> t.Set[str]:
-        """All available columns of all sources in this scope"""
-        if self._all_columns is None:
-            self._all_columns = {
-                column for columns in self._get_all_source_columns().values() for column in columns
-            }
-        return self._all_columns
-    def get_source_columns_from_set_op(self, expression: exp.Expression) -> t.List[str]:
-        if isinstance(expression, exp.Select):
-            return expression.named_selects
-        if isinstance(expression, exp.Subquery) and isinstance(expression.this, exp.SetOperation):
-            # Different types of SET modifiers can be chained together if they're explicitly grouped by nesting
-            return self.get_source_columns_from_set_op(expression.this)
-        if not isinstance(expression, exp.SetOperation):
-            raise OptimizeError(f"Unknown set operation: {expression}")
-        set_op = expression
-        # BigQuery specific set operations modifiers, e.g INNER UNION ALL BY NAME
-        on_column_list = set_op.args.get("on")
-        if on_column_list:
-            # The resulting columns are the columns in the ON clause:
-            # {INNER | LEFT | FULL} UNION ALL BY NAME ON (col1, col2, ...)
-            columns = [col.name for col in on_column_list]
-        elif set_op.side or set_op.kind:
-            side = set_op.side
-            kind = set_op.kind
-            # Visit the children UNIONs (if any) in a post-order traversal
-            left = self.get_source_columns_from_set_op(set_op.left)
-            right = self.get_source_columns_from_set_op(set_op.right)
-            # We use dict.fromkeys to deduplicate keys and maintain insertion order
-            if side == "LEFT":
-                columns = left
-            elif side == "FULL":
-                columns = list(dict.fromkeys(left + right))
-            elif kind == "INNER":
-                columns = list(dict.fromkeys(left).keys() & dict.fromkeys(right).keys())
-        else:
-            columns = set_op.named_selects
-        return columns
-    def get_source_columns(self, name: str, only_visible: bool = False) -> t.Sequence[str]:
-        """Resolve the source columns for a given source `name`."""
-        cache_key = (name, only_visible)
-        if cache_key not in self._get_source_columns_cache:
-            if name not in self.scope.sources:
-                raise OptimizeError(f"Unknown table: {name}")
-            source = self.scope.sources[name]
-            if isinstance(source, exp.Table):
-                columns = self.schema.column_names(source, only_visible)
-            elif isinstance(source, Scope) and isinstance(
-                source.expression, (exp.Values, exp.Unnest)
-            ):
-                columns = source.expression.named_selects
-                # in bigquery, unnest structs are automatically scoped as tables, so you can
-                # directly select a struct field in a query.
-                # this handles the case where the unnest is statically defined.
-                if self.schema.dialect == "bigquery":
-                    if source.expression.is_type(exp.DataType.Type.STRUCT):
-                        for k in source.expression.type.expressions:  # type: ignore
-                            columns.append(k.name)
-            elif isinstance(source, Scope) and isinstance(source.expression, exp.SetOperation):
-                columns = self.get_source_columns_from_set_op(source.expression)
-            else:
-                select = seq_get(source.expression.selects, 0)
-                if isinstance(select, exp.QueryTransform):
-                    # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-qry-select-transform.html
-                    schema = select.args.get("schema")
-                    columns = [c.name for c in schema.expressions] if schema else ["key", "value"]
-                else:
-                    columns = source.expression.named_selects
-            node, _ = self.scope.selected_sources.get(name) or (None, None)
-            if isinstance(node, Scope):
-                column_aliases = node.expression.alias_column_names
-            elif isinstance(node, exp.Expression):
-                column_aliases = node.alias_column_names
-            else:
-                column_aliases = []
-            if column_aliases:
-                # If the source's columns are aliased, their aliases shadow the corresponding column names.
-                # This can be expensive if there are lots of columns, so only do this if column_aliases exist.
-                columns = [
-                    alias or name
-                    for (name, alias) in itertools.zip_longest(columns, column_aliases)
-                ]
-            self._get_source_columns_cache[cache_key] = columns
-        return self._get_source_columns_cache[cache_key]
-    def _get_all_source_columns(self) -> t.Dict[str, t.Sequence[str]]:
-        if self._source_columns is None:
-            self._source_columns = {
-                source_name: self.get_source_columns(source_name)
-                for source_name, source in itertools.chain(
-                    self.scope.selected_sources.items(), self.scope.lateral_sources.items()
-                )
-            }
-        return self._source_columns
-    def _get_table_name_from_sources(
-        self, column_name: str, source_columns: t.Optional[t.Dict[str, t.Sequence[str]]] = None
-    ) -> t.Optional[str]:
-        if not source_columns:
-            # If not supplied, get all sources to calculate unambiguous columns
-            if self._unambiguous_columns is None:
-                self._unambiguous_columns = self._get_unambiguous_columns(
-                    self._get_all_source_columns()
-                )
-            unambiguous_columns = self._unambiguous_columns
-        else:
-            unambiguous_columns = self._get_unambiguous_columns(source_columns)
-        return unambiguous_columns.get(column_name)
-    def _get_column_join_context(self, column: exp.Column) -> t.Optional[exp.Join]:
-        """
-        Check if a column participating in a join can be qualified based on the source order.
-        """
-        args = self.scope.expression.args
-        joins = args.get("joins")
-        if not joins or args.get("laterals") or args.get("pivots"):
-            # Feature gap: We currently don't try to disambiguate columns if other sources
-            # (e.g laterals, pivots) exist alongside joins
-            return None
-        join_ancestor = column.find_ancestor(exp.Join, exp.Select)
-        if (
-            isinstance(join_ancestor, exp.Join)
-            and join_ancestor.alias_or_name in self.scope.selected_sources
-        ):
-            # Ensure that the found ancestor is a join that contains an actual source,
-            # e.g in Clickhouse `b` is an array expression in `a ARRAY JOIN b`
-            return join_ancestor
-        return None
-    def _get_available_source_columns(
-        self, join_ancestor: exp.Join
-    ) -> t.Dict[str, t.Sequence[str]]:
-        """
-        Get the source columns that are available at the point where a column is referenced.
-        For columns in JOIN conditions, this only includes tables that have been joined
-        up to that point. Example:
-        ```
-        SELECT * FROM t_1 INNER JOIN ... INNER JOIN t_n ON t_1.a = c INNER JOIN t_n+1 ON ...
-        ```                                                        ^
-                                                                   |
-                                +----------------------------------+
-                                |
-                                ⌄
-        The unqualified column `c` is not ambiguous if no other sources up until that
-        join i.e t_1, ..., t_n, contain a column named `c`.
-        """
-        args = self.scope.expression.args
-        # Collect tables in order: FROM clause tables + joined tables up to current join
-        from_name = args["from"].alias_or_name
-        available_sources = {from_name: self.get_source_columns(from_name)}
-        for join in args["joins"][: t.cast(int, join_ancestor.index) + 1]:
-            available_sources[join.alias_or_name] = self.get_source_columns(join.alias_or_name)
-        return available_sources
-    def _get_unambiguous_columns(
-        self, source_columns: t.Dict[str, t.Sequence[str]]
-    ) -> t.Mapping[str, str]:
-        """
-        Find all the unambiguous columns in sources.
-        Args:
-            source_columns: Mapping of names to source columns.
-        Returns:
-            Mapping of column name to source name.
-        """
-        if not source_columns:
-            return {}
-        source_columns_pairs = list(source_columns.items())
-        first_table, first_columns = source_columns_pairs[0]
-        if len(source_columns_pairs) == 1:
-            # Performance optimization - avoid copying first_columns if there is only one table.
-            return SingleValuedMapping(first_columns, first_table)
-        unambiguous_columns = {col: first_table for col in first_columns}
-        all_columns = set(unambiguous_columns)
-        for table, columns in source_columns_pairs[1:]:
-            unique = set(columns)
-            ambiguous = all_columns.intersection(unique)
-            all_columns.update(columns)
-            for column in ambiguous:
-                unambiguous_columns.pop(column, None)
-            for column in unique.difference(ambiguous):
-                unambiguous_columns[column] = table
-        return unambiguous_columns

sqlglot 27.29.0__py3-none-any.whl → 28.4.0__py3-none-any.whl

sqlglot 27.29.0py3-none-any.whl → 28.4.0py3-none-any.whl