PyPI - sqlframe - Versions diffs - 3.39.3__py3-none-any.whl → 3.40.0__py3-none-any.whl - Mend

sqlframe 3.39.3py3-none-any.whl → 3.40.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

sqlframe/_version.py +3 -3
sqlframe/base/dataframe.py +37 -32
sqlframe/base/functions.py +25 -34
sqlframe/base/mixins/table_mixins.py +6 -2
sqlframe/base/normalize.py +197 -8
sqlframe/base/operations.py +3 -4
{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/METADATA +2 -2
{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/RECORD +11 -11
{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/LICENSE +0 -0
{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/WHEEL +0 -0
{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '3.39.3'
-__version_tuple__ = version_tuple = (3, 39, 3)
+__version__ = version = '3.40.0'
+__version_tuple__ = version_tuple = (3, 40, 0)
-__commit_id__ = commit_id = 'g9d915cb1e'
+__commit_id__ = commit_id = 'g93abcd907'

sqlframe/base/dataframe.py CHANGED Viewed

@@ -16,7 +16,6 @@ from dataclasses import dataclass
 from uuid import uuid4
 import sqlglot
-from more_itertools import partition
 from prettytable import PrettyTable
 from sqlglot import Dialect, maybe_parse
 from sqlglot import expressions as exp
@@ -397,12 +396,21 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         return Column.ensure_cols(ensure_list(cols))  # type: ignore
     def _ensure_and_normalize_cols(
-        self, cols, expression: t.Optional[exp.Select] = None, skip_star_expansion: bool = False
+        self,
+        cols,
+        expression: t.Optional[exp.Select] = None,
+        skip_star_expansion: bool = False,
+        remove_identifier_if_possible: bool = True,
     ) -> t.List[Column]:
         from sqlframe.base.normalize import normalize
         cols = self._ensure_list_of_columns(cols)
-        normalize(self.session, expression or self.expression, cols)
+        normalize(
+            self.session,
+            expression or self.expression,
+            cols,
+            remove_identifier_if_possible=remove_identifier_if_possible,
+        )
         if not skip_star_expansion:
             cols = list(flatten([self._expand_star(col) for col in cols]))
         self._resolve_ambiguous_columns(cols)
@@ -542,23 +550,16 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         expression.set("with", exp.With(expressions=existing_ctes))
         return expression
-    @classmethod
-    def _get_outer_select_expressions(
-        cls, item: exp.Expression
-    ) -> t.List[t.Union[exp.Column, exp.Alias]]:
-        outer_select = item.find(exp.Select)
-        if outer_select:
-            return outer_select.expressions
-        return []
     @classmethod
     def _get_outer_select_columns(cls, item: exp.Expression) -> t.List[Column]:
         from sqlframe.base.session import _BaseSession
         col = get_func_from_session("col", _BaseSession())
-        outer_expressions = cls._get_outer_select_expressions(item)
-        return [col(quote_preserving_alias_or_name(x)) for x in outer_expressions]
+        outer_select = item.find(exp.Select)
+        if outer_select:
+            return [col(quote_preserving_alias_or_name(x)) for x in outer_select.expressions]
+        return []
     def _create_hash_from_expression(self, expression: exp.Expression) -> str:
         from sqlframe.base.session import _BaseSession
@@ -1025,9 +1026,17 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         return join_column_pairs, join_clause
     def _normalize_join_clause(
-        self, join_columns: t.List[Column], join_expression: t.Optional[exp.Select]
+        self,
+        join_columns: t.List[Column],
+        join_expression: t.Optional[exp.Select],
+        *,
+        remove_identifier_if_possible: bool = True,
     ) -> Column:
-        join_columns = self._ensure_and_normalize_cols(join_columns, join_expression)
+        join_columns = self._ensure_and_normalize_cols(
+            join_columns,
+            join_expression,
+            remove_identifier_if_possible=remove_identifier_if_possible,
+        )
         if len(join_columns) > 1:
             join_columns = [functools.reduce(lambda x, y: x & y, join_columns)]
         join_clause = join_columns[0]
@@ -1512,23 +1521,20 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         """
         return func(self, *args, **kwargs)  # type: ignore
-    @operation(Operation.SELECT_CONSTRAINED)
+    @operation(Operation.SELECT)
     def withColumn(self, colName: str, col: Column) -> Self:
         return self.withColumns.__wrapped__(self, {colName: col})  # type: ignore
-    @operation(Operation.SELECT_CONSTRAINED)
+    @operation(Operation.SELECT)
     def withColumnRenamed(self, existing: str, new: str) -> Self:
-        col_func = get_func_from_session("col", self.session)
         expression = self.expression.copy()
         existing = self.session._normalize_string(existing)
-        outer_expressions = self._get_outer_select_expressions(expression)
+        columns = self._get_outer_select_columns(expression)
         results = []
         found_match = False
-        for expr in outer_expressions:
-            column = col_func(expr.copy())
-            if existing == quote_preserving_alias_or_name(expr):
-                if isinstance(column.expression, exp.Alias):
-                    column.expression.set("alias", exp.to_identifier(new))
+        for column in columns:
+            if column.alias_or_name == existing:
+                column = column.alias(new)
                 self._update_display_name_mapping([column], [new])
                 found_match = True
             results.append(column)
@@ -1536,7 +1542,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
             raise ValueError("Tried to rename a column that doesn't exist")
         return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True)  # type: ignore
-    @operation(Operation.SELECT_CONSTRAINED)
+    @operation(Operation.SELECT)
     def withColumnsRenamed(self, colsMap: t.Dict[str, str]) -> Self:
         """
         Returns a new :class:`DataFrame` by renaming multiple columns. If a non-existing column is
@@ -1582,7 +1588,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         return self.select.__wrapped__(self, *results, skip_update_display_name_mapping=True)  # type: ignore
-    @operation(Operation.SELECT_CONSTRAINED)
+    @operation(Operation.SELECT)
     def withColumns(self, *colsMap: t.Dict[str, Column]) -> Self:
         """
         Returns a new :class:`DataFrame` by adding multiple columns or replacing the
@@ -1620,14 +1626,13 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         """
         if len(colsMap) != 1:
             raise ValueError("Only a single map is supported")
-        col_func = get_func_from_session("col")
         col_map = {
             self._ensure_and_normalize_col(k): (self._ensure_and_normalize_col(v), k)
             for k, v in colsMap[0].items()
         }
-        existing_expr = self._get_outer_select_expressions(self.expression)
-        existing_col_names = [x.alias_or_name for x in existing_expr]
-        select_columns = [col_func(x) for x in existing_expr]
+        existing_cols = self._get_outer_select_columns(self.expression)
+        existing_col_names = [x.alias_or_name for x in existing_cols]
+        select_columns = existing_cols
         for col, (col_value, display_name) in col_map.items():
             column_name = col.alias_or_name
             existing_col_index = (
@@ -1644,7 +1649,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         )
         return self.select.__wrapped__(self, *select_columns, skip_update_display_name_mapping=True)  # type: ignore
-    @operation(Operation.SELECT_CONSTRAINED)
+    @operation(Operation.SELECT)
     def drop(self, *cols: t.Union[str, Column]) -> Self:
         # Separate string column names from Column objects for different handling
         column_objs, column_names = partition_to(lambda x: isinstance(x, str), cols, list, set)

sqlframe/base/functions.py CHANGED Viewed

@@ -37,9 +37,7 @@ def _get_session() -> _BaseSession:
 @meta()
 def col(column_name: t.Union[ColumnOrName, t.Any]) -> Column:
-    from sqlframe.base.session import _BaseSession
-    dialect = _BaseSession().input_dialect
+    dialect = _get_session().input_dialect
     if isinstance(column_name, str):
         col_expression = expression.to_column(column_name, dialect=dialect).transform(
             dialect.normalize_identifier
@@ -192,27 +190,27 @@ sum_distinct = sumDistinct
 @meta()
 def acos(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "ACOS")
+    return Column.invoke_expression_over_column(col, expression.Acos)
 @meta(unsupported_engines="duckdb")
 def acosh(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "ACOSH")
+    return Column.invoke_expression_over_column(col, expression.Acosh)
 @meta()
 def asin(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "ASIN")
+    return Column.invoke_expression_over_column(col, expression.Asin)
 @meta(unsupported_engines="duckdb")
 def asinh(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "ASINH")
+    return Column.invoke_expression_over_column(col, expression.Asinh)
 @meta()
 def atan(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "ATAN")
+    return Column.invoke_expression_over_column(col, expression.Atan)
 @meta()
@@ -220,12 +218,12 @@ def atan2(col1: t.Union[ColumnOrName, float], col2: t.Union[ColumnOrName, float]
     col1_value = lit(col1) if isinstance(col1, (int, float)) else col1
     col2_value = lit(col2) if isinstance(col2, (int, float)) else col2
-    return Column.invoke_anonymous_function(col1_value, "ATAN2", col2_value)
+    return Column.invoke_expression_over_column(col1_value, expression.Atan2, expression=col2_value)
 @meta(unsupported_engines="duckdb")
 def atanh(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "ATANH")
+    return Column.invoke_expression_over_column(col, expression.Atanh)
 @meta()
@@ -253,12 +251,12 @@ def cosh(col: ColumnOrName) -> Column:
 @meta()
 def cot(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "COT")
+    return Column.invoke_expression_over_column(col, expression.Cot)
 @meta(unsupported_engines=["duckdb", "postgres", "snowflake"])
 def csc(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "CSC")
+    return Column.invoke_expression_over_column(col, expression.Csc)
 @meta()
@@ -364,7 +362,7 @@ def rint(col: ColumnOrName) -> Column:
 @meta(unsupported_engines=["duckdb", "postgres", "snowflake"])
 def sec(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "SEC")
+    return Column.invoke_expression_over_column(col, expression.Sec)
 @meta()
@@ -374,12 +372,12 @@ def signum(col: ColumnOrName) -> Column:
 @meta()
 def sin(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "SIN")
+    return Column.invoke_expression_over_column(col, expression.Sin)
 @meta(unsupported_engines="duckdb")
 def sinh(col: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(col, "SINH")
+    return Column.invoke_expression_over_column(col, expression.Sinh)
 @meta()
@@ -662,9 +660,7 @@ def grouping_id(*cols: ColumnOrName) -> Column:
 @meta()
 def input_file_name() -> Column:
-    from sqlframe.base.session import _BaseSession
-    return Column(expression.Literal.string(_BaseSession()._last_loaded_file or ""))
+    return Column(expression.Literal.string(_get_session()._last_loaded_file or ""))
 @meta()
@@ -944,7 +940,7 @@ def nth_value(
 @meta()
 def ntile(n: int) -> Column:
-    return Column.invoke_anonymous_function(None, "NTILE", lit(n))
+    return Column.invoke_expression_over_column(lit(n), expression.Ntile)
 @meta()
@@ -959,12 +955,10 @@ def current_timestamp() -> Column:
 @meta()
 def date_format(col: ColumnOrName, format: str) -> Column:
-    from sqlframe.base.session import _BaseSession
     return Column.invoke_expression_over_column(
         Column(expression.TimeStrToTime(this=Column.ensure_col(col).column_expression)),
         expression.TimeToStr,
-        format=_BaseSession().format_time(format),
+        format=_get_session().format_time(format),
     )
@@ -2832,7 +2826,7 @@ def make_interval(
 @meta(unsupported_engines="*")
 def try_add(left: ColumnOrName, right: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(left, "TRY_ADD", right)
+    return Column.invoke_expression_over_column(left, expression.SafeAdd, expression=right)
 @meta(unsupported_engines="*")
@@ -2849,12 +2843,12 @@ def try_divide(left: ColumnOrName, right: ColumnOrName) -> Column:
 @meta(unsupported_engines="*")
 def try_multiply(left: ColumnOrName, right: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(left, "TRY_MULTIPLY", right)
+    return Column.invoke_expression_over_column(left, expression.SafeMultiply, expression=right)
 @meta(unsupported_engines="*")
 def try_subtract(left: ColumnOrName, right: ColumnOrName) -> Column:
-    return Column.invoke_anonymous_function(left, "TRY_SUBTRACT", right)
+    return Column.invoke_expression_over_column(left, expression.SafeSubtract, expression=right)
 @meta(unsupported_engines="*")
@@ -3378,10 +3372,9 @@ def get(col: ColumnOrName, index: t.Union[ColumnOrName, int]) -> Column:
 def get_active_spark_context() -> SparkContext:
     """Raise RuntimeError if SparkContext is not initialized,
     otherwise, returns the active SparkContext."""
-    from sqlframe.base.session import _BaseSession
     from sqlframe.spark.session import SparkSession
-    session: _BaseSession = _BaseSession()
+    session = _get_session()
     if not isinstance(session, SparkSession):
         raise RuntimeError("This function is only available in SparkSession.")
     return session.spark_session.sparkContext
@@ -5263,7 +5256,7 @@ def regexp_extract_all(
     )
-@meta(unsupported_engines="*")
+@meta(unsupported_engines=["duckdb", "bigquery", "postgres", "snowflake"])
 def regexp_instr(
     str: ColumnOrName, regexp: ColumnOrName, idx: t.Optional[t.Union[int, Column]] = None
 ) -> Column:
@@ -5298,11 +5291,9 @@ def regexp_instr(
     >>> df.select(regexp_instr('str', col("regexp")).alias('d')).collect()
     [Row(d=1)]
     """
-    if idx is None:
-        return Column.invoke_anonymous_function(str, "regexp_instr", regexp)
-    else:
-        idx = lit(idx) if isinstance(idx, int) else idx
-        return Column.invoke_anonymous_function(str, "regexp_instr", regexp, idx)
+    return Column.invoke_expression_over_column(
+        str, expression.RegexpInstr, expression=regexp, group=idx
+    )
 @meta(unsupported_engines="snowflake")
@@ -6344,7 +6335,7 @@ def to_unix_timestamp(
     session = _get_session()
     if session._is_duckdb:
-        format = format or _BaseSession().default_time_format
+        format = format or session.default_time_format
         timestamp = Column.ensure_col(timestamp).cast("string")
     if format is not None:

sqlframe/base/mixins/table_mixins.py CHANGED Viewed

@@ -275,7 +275,9 @@ class MergeSupportMixin(_BaseTable, t.Generic[DF]):
         join_expression = self._add_ctes_to_expression(
             self.expression, other_df.expression.copy().ctes
         )
-        condition = self._ensure_and_normalize_cols(condition, self.expression)
+        condition = self._ensure_and_normalize_cols(
+            condition, self.expression, remove_identifier_if_possible=False
+        )
         self._handle_self_join(other_df, condition)
         if isinstance(condition[0].expression, exp.Column) and not clause:
@@ -291,7 +293,9 @@ class MergeSupportMixin(_BaseTable, t.Generic[DF]):
                 condition, join_expression, other_df, table_names
             )
         else:
-            join_clause = self._normalize_join_clause(condition, join_expression)
+            join_clause = self._normalize_join_clause(
+                condition, join_expression, remove_identifier_if_possible=False
+            )
         return join_clause
     def _ensure_and_normalize_assignments(

sqlframe/base/normalize.py CHANGED Viewed

@@ -16,33 +16,103 @@ if t.TYPE_CHECKING:
     NORMALIZE_INPUT = t.TypeVar("NORMALIZE_INPUT", bound=t.Union[str, exp.Expression, Column])
-def normalize(session: SESSION, expression_context: exp.Select, expr: t.List[NORMALIZE_INPUT]):
+def normalize(
+    session: SESSION,
+    expression_context: exp.Select,
+    expr: t.List[NORMALIZE_INPUT],
+    *,
+    remove_identifier_if_possible: bool = True,
+):
     expr = ensure_list(expr)
     expressions = _ensure_expressions(expr)
     for expression in expressions:
         identifiers = expression.find_all(exp.Identifier)
         for identifier in identifiers:
             identifier.transform(session.input_dialect.normalize_identifier)
-            replace_alias_name_with_cte_name(session, expression_context, identifier)
-            replace_branch_and_sequence_ids_with_cte_name(session, expression_context, identifier)
+            replace_alias_name_with_cte_name(
+                session,
+                expression_context,
+                identifier,
+                remove_identifier_if_possible=remove_identifier_if_possible,
+            )
+            replace_branch_and_sequence_ids_with_cte_name(
+                session,
+                expression_context,
+                identifier,
+                remove_identifier_if_possible=remove_identifier_if_possible,
+            )
 def replace_alias_name_with_cte_name(
-    session: SESSION, expression_context: exp.Select, id: exp.Identifier
+    session: SESSION,
+    expression_context: exp.Select,
+    id: exp.Identifier,
+    *,
+    remove_identifier_if_possible: bool,
 ):
     normalized_id = session._normalize_string(id.alias_or_name)
     if normalized_id in session.name_to_sequence_id_mapping:
-        for cte in reversed(expression_context.ctes):
+        # Get CTEs that are referenced in the FROM clause
+        referenced_cte_names = get_cte_names_from_from_clause(expression_context)
+        # Filter CTEs to only include those defined and referenced by the FROM clause
+        filtered_ctes = [
+            cte
+            for cte in reversed(expression_context.ctes)
+            if cte.alias_or_name in referenced_cte_names
+        ]
+        for cte in filtered_ctes:
             if cte.args["sequence_id"] in session.name_to_sequence_id_mapping[normalized_id]:
                 _set_alias_name(id, cte.alias_or_name)
                 break
+        else:
+            # Fallback: If not found in filtered CTEs, search through ALL CTEs unfiltered
+            for cte in reversed(expression_context.ctes):
+                if cte.args["sequence_id"] in session.name_to_sequence_id_mapping[normalized_id]:
+                    _set_alias_name(id, cte.alias_or_name)
+                    break
+            else:
+                # Final fallback: If this is a qualified column reference (table.column)
+                # and the table doesn't exist in FROM clause, remove the qualifier IF the column is unambiguously available
+                parent = id.parent
+                if parent and isinstance(parent, exp.Column) and remove_identifier_if_possible:
+                    # Check if this table is not available in current context
+                    current_tables = get_cte_names_from_from_clause(expression_context)
+                    if normalized_id not in current_tables:
+                        # Check if this table ID matches any CTE name directly (cross-context CTE reference)
+                        cte_exists = any(
+                            cte.alias_or_name == normalized_id for cte in expression_context.ctes
+                        )
+                        if cte_exists:
+                            # This is a reference to a CTE that exists but is not in the current FROM clause
+                            # Get the column name being referenced
+                            column_name = (
+                                _extract_column_name(parent.this)
+                                if hasattr(parent, "this")
+                                else None
+                            )
+                            # Only remove qualifier if the column is unambiguously available in current context
+                            if column_name and is_column_unambiguously_available(
+                                expression_context, column_name
+                            ):
+                                parent.set("table", None)
 def replace_branch_and_sequence_ids_with_cte_name(
-    session: SESSION, expression_context: exp.Select, id: exp.Identifier
+    session: SESSION,
+    expression_context: exp.Select,
+    id: exp.Identifier,
+    *,
+    remove_identifier_if_possible: bool,
 ):
     normalized_id = session._normalize_string(id.alias_or_name)
     if normalized_id in session.known_ids:
+        # Get CTEs that are referenced in the FROM clause
+        referenced_cte_names = get_cte_names_from_from_clause(expression_context)
         # Check if we have a join and if both the tables in that join share a common branch id
         # If so we need to have this reference the left table by default unless the id is a sequence
         # id then it keeps that reference. This handles the weird edge case in spark that shouldn't
@@ -51,19 +121,138 @@ def replace_branch_and_sequence_ids_with_cte_name(
             join_table_aliases = [
                 x.alias_or_name for x in get_tables_from_expression_with_join(expression_context)
             ]
+            # Filter CTEs to only include those referenced in the FROM clause
             ctes_in_join = [
-                cte for cte in expression_context.ctes if cte.alias_or_name in join_table_aliases
+                cte
+                for cte in expression_context.ctes
+                if cte.alias_or_name in join_table_aliases
+                and cte.alias_or_name in referenced_cte_names
             ]
-            if ctes_in_join[0].args["branch_id"] == ctes_in_join[1].args["branch_id"]:
+            if (
+                len(ctes_in_join) >= 2
+                and ctes_in_join[0].args["branch_id"] == ctes_in_join[1].args["branch_id"]
+            ):
                 assert len(ctes_in_join) == 2
                 _set_alias_name(id, ctes_in_join[0].alias_or_name)
                 return
+        # Filter CTEs to only include those defined and referenced by the FROM clause
+        filtered_ctes = [
+            cte
+            for cte in reversed(expression_context.ctes)
+            if cte.alias_or_name in referenced_cte_names
+        ]
+        for cte in filtered_ctes:
+            if normalized_id in (cte.args["branch_id"], cte.args["sequence_id"]):
+                _set_alias_name(id, cte.alias_or_name)
+                return
+        # Fallback: If not found in filtered CTEs, search through ALL CTEs unfiltered
         for cte in reversed(expression_context.ctes):
             if normalized_id in (cte.args["branch_id"], cte.args["sequence_id"]):
                 _set_alias_name(id, cte.alias_or_name)
                 return
+    # Final fallback: If this is a qualified column reference (table.column)
+    # and the table doesn't exist in FROM clause, remove the qualifier IF the column is unambiguously available
+    parent = id.parent
+    if parent and isinstance(parent, exp.Column) and remove_identifier_if_possible:
+        # Check if this table is not available in current context
+        current_tables = get_cte_names_from_from_clause(expression_context)
+        if normalized_id not in current_tables:
+            # Check if this table ID matches any CTE name directly (cross-context CTE reference)
+            cte_exists = any(cte.alias_or_name == normalized_id for cte in expression_context.ctes)
+            if cte_exists:
+                # This is a reference to a CTE that exists but is not in the current FROM clause
+                # Get the column name being referenced
+                column_name = _extract_column_name(parent.this) if hasattr(parent, "this") else None
+                # Only remove qualifier if the column is unambiguously available in current context
+                if column_name and is_column_unambiguously_available(
+                    expression_context, column_name
+                ):
+                    parent.set("table", None)
+def is_column_unambiguously_available(expression_context: exp.Select, column_name: str) -> bool:
+    """
+    Check if a column name is unambiguously available in the current context.
+    Returns True if the column appears exactly once across all accessible CTEs.
+    Enhanced to handle more column expression types and edge cases.
+    """
+    current_tables = get_cte_names_from_from_clause(expression_context)
+    column_count_in_from = 0
+    # If no tables in FROM clause, be conservative
+    if not current_tables:
+        return False
+    # Count how many times this column appears in accessible CTEs
+    for cte in expression_context.ctes:
+        if cte.alias_or_name in current_tables:
+            if hasattr(cte, "this") and hasattr(cte.this, "expressions"):
+                for expr in cte.this.expressions:
+                    expr_column_name = _extract_column_name(expr)
+                    # Case-insensitive comparison for robustness
+                    if expr_column_name and expr_column_name.lower() == column_name.lower():
+                        column_count_in_from += 1
+    # Column is unambiguous if it appears exactly once in the FROM clause CTEs
+    return column_count_in_from == 1
+def _extract_column_name(expr) -> str:
+    """
+    Extract column name from various expression types.
+    Enhanced to handle more SQLGlot expression types.
+    """
+    if hasattr(expr, "alias_or_name") and expr.alias_or_name:
+        return expr.alias_or_name
+    elif hasattr(expr, "this"):
+        if hasattr(expr.this, "this"):
+            return str(expr.this.this)
+        elif hasattr(expr.this, "name"):
+            return str(expr.this.name)
+        else:
+            return str(expr.this)
+    elif hasattr(expr, "name"):
+        return str(expr.name)
+    else:
+        return str(expr)
+def get_cte_names_from_from_clause(expression_context: exp.Select) -> t.Set[str]:
+    """
+    Get the set of CTE names that are referenced in the FROM clause of the expression.
+    Args:
+        expression_context: The SELECT expression to analyze
+    Returns:
+        Set of CTE alias names referenced in the FROM clause (including joins)
+    """
+    referenced_cte_names = set()
+    # Get the main table from FROM clause
+    from_clause = expression_context.args.get("from")
+    if from_clause and from_clause.this:
+        main_table = from_clause.this
+        if hasattr(main_table, "alias_or_name") and main_table.alias_or_name:
+            referenced_cte_names.add(main_table.alias_or_name)
+    # Get tables from joins
+    if expression_context.args.get("joins"):
+        join_tables = get_tables_from_expression_with_join(expression_context)
+        for table in join_tables:
+            if hasattr(table, "alias_or_name") and table.alias_or_name:
+                referenced_cte_names.add(table.alias_or_name)
+    return referenced_cte_names
 def normalize_dict(session: SESSION, data: t.Dict) -> t.Dict:
     if isinstance(data, dict):

sqlframe/base/operations.py CHANGED Viewed

@@ -27,10 +27,9 @@ class Operation(IntEnum):
     WHERE = 2
     GROUP_BY = 3
     HAVING = 4
-    SELECT_CONSTRAINED = 5
-    SELECT = 6
-    ORDER_BY = 7
-    LIMIT = 8
+    SELECT = 5
+    ORDER_BY = 6
+    LIMIT = 7
 # We want to decorate a function (self: DF, *args, **kwargs) -> T

{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 3.39.3
+Version: 3.40.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman
@@ -18,7 +18,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: more-itertools
 Requires-Dist: prettytable <4
-Requires-Dist: sqlglot <27.9,>=24.0.0
+Requires-Dist: sqlglot <27.13,>=24.0.0
 Requires-Dist: typing-extensions
 Provides-Extra: bigquery
 Requires-Dist: google-cloud-bigquery-storage <3,>=2 ; extra == 'bigquery'

{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 sqlframe/__init__.py,sha256=SB80yLTITBXHI2GCDS6n6bN5ObHqgPjfpRPAUwxaots,3403
-sqlframe/_version.py,sha256=Vixv4hfZnHHXCXSmZD4wlHJUBkhCMzDLIyo5HqkJdes,714
+sqlframe/_version.py,sha256=fOWY_ffL74_A_EHPCU75GCzXq1ZU-sD4WyU4XHtJjlI,714
 sqlframe/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=b2clI5HI1zEZKB_3Msx3FeAJQyft44ubUifJwQRVXyQ,1298
 sqlframe/base/catalog.py,sha256=-YulM2BMK8MoWbXi05AsJIPxd4AuiZDBCZuk4HoeMlE,38900
 sqlframe/base/column.py,sha256=f6rK6-hTiNx9WwJP7t6tqL3xEC2gwERPDlhWCS5iCBw,21417
-sqlframe/base/dataframe.py,sha256=HHjDaeap4_w4HRRj87lhQjFTczxLKhFD8b-9vhK2KsY,87592
+sqlframe/base/dataframe.py,sha256=fveiwPH-JQyUJdyB9PxzjHTvwwBBzBY4pUWq2OraH9A,87328
 sqlframe/base/decorators.py,sha256=IhE5xNQDkwJHacCvulq5WpUKyKmXm7dL2A3o5WuKGP4,2131
 sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
 sqlframe/base/function_alternatives.py,sha256=aTu3nQhIAkZoxrI1IpjpaHEAMxBNms0AnhS0EMR-TwY,51727
-sqlframe/base/functions.py,sha256=RVNoRzM19BUwypdc0izYrrQe2Fe4_e9SbtpDkdD2bec,227981
+sqlframe/base/functions.py,sha256=fc3jLuPAIJ3Hl4Bezm9Kgzsk4e5uFfgMgfajUCBKQG0,227919
 sqlframe/base/group.py,sha256=fBm8EUve7W7xz11nybTXr09ih-yZxL_vvEiZVE1eb_0,12025
-sqlframe/base/normalize.py,sha256=nXAJ5CwxVf4DV0GsH-q1w0p8gmjSMlv96k_ez1eVul8,3880
-sqlframe/base/operations.py,sha256=8dkMNqjG3xP1w_6euAj8FpwweD7t590HYjoeoCr5LqI,4465
+sqlframe/base/normalize.py,sha256=YPeopWr8ZRjevArYfrM-DZBkQp4t4UfAEwynoj4VvcU,11773
+sqlframe/base/operations.py,sha256=g-YNcbvNKTOBbYm23GKfB3fmydlR7ZZDAuZUtXIHtzw,4438
 sqlframe/base/readerwriter.py,sha256=Nb2VJ_HBmLQp5mK8JhnFooZh2ydAaboCAFVPb-4MNX4,31241
 sqlframe/base/session.py,sha256=99X-ShK9ohHCX6WdIJs0HhjfK23snaE3Gv6RYc5wqUI,27687
 sqlframe/base/table.py,sha256=rCeh1W5SWbtEVfkLAUiexzrZwNgmZeptLEmLcM1ABkE,6961
@@ -25,7 +25,7 @@ sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 sqlframe/base/mixins/catalog_mixins.py,sha256=9fZGWToz9xMJSzUl1vsVtj6TH3TysP3fBCKJLnGUQzE,23353
 sqlframe/base/mixins/dataframe_mixins.py,sha256=8D2AFtfc0tj9Q5qzlNAXdXOYw9RuD8kpe8wixo8pY5o,1534
 sqlframe/base/mixins/readwriter_mixins.py,sha256=ItQ_0jZ5RljgmLjGDIzLMRP_NQdy3wAyKwJ6K5NjaqA,4954
-sqlframe/base/mixins/table_mixins.py,sha256=3MhsOARkplwED1GRD0wq1vR8GNuop34kt3Jg8MATIjc,13791
+sqlframe/base/mixins/table_mixins.py,sha256=zoqrgaH1fOgnHkC6C4L8IUyspDa5SETP3OXVdKWxcUM,13917
 sqlframe/bigquery/__init__.py,sha256=kbaomhYAANPdxeDQhajv8IHfMg_ENKivtYK-rPwaV08,939
 sqlframe/bigquery/catalog.py,sha256=Dcpp1JKftc3ukdYpn6M1ujqixA-6_1k8aY21Y5Johyc,11899
 sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
@@ -130,8 +130,8 @@ sqlframe/standalone/udf.py,sha256=azmgtUjHNIPs0WMVNId05SHwiYn41MKVBhKXsQJ5dmY,27
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
 sqlframe/testing/utils.py,sha256=PFsGZpwNUE_4-g_f43_vstTqsK0AQ2lBneb5Eb6NkFo,13008
-sqlframe-3.39.3.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-3.39.3.dist-info/METADATA,sha256=eyKm8nGawKAujUOiCBn4PEFpSh_UzsnEV7LpKQVecRM,9069
-sqlframe-3.39.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-sqlframe-3.39.3.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-3.39.3.dist-info/RECORD,,
+sqlframe-3.40.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-3.40.0.dist-info/METADATA,sha256=43WXPdp-_riwus7pJqzCv6ct0oAEmZden39JcI-hKVU,9070
+sqlframe-3.40.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+sqlframe-3.40.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-3.40.0.dist-info/RECORD,,

{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-3.39.3.dist-info → sqlframe-3.40.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 3.39.3__py3-none-any.whl → 3.40.0__py3-none-any.whl

sqlframe 3.39.3py3-none-any.whl → 3.40.0py3-none-any.whl