PyPI - sqlframe - Versions diffs - 3.13.4__py3-none-any.whl → 3.14.1__py3-none-any.whl - Mend

sqlframe 3.13.4py3-none-any.whl → 3.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

sqlframe/_version.py +2 -2
sqlframe/base/dataframe.py +102 -61
sqlframe/base/mixins/table_mixins.py +335 -0
sqlframe/base/readerwriter.py +5 -4
sqlframe/base/session.py +8 -2
sqlframe/base/table.py +238 -0
sqlframe/bigquery/catalog.py +1 -0
sqlframe/bigquery/readwriter.py +2 -1
sqlframe/bigquery/session.py +3 -0
sqlframe/bigquery/table.py +24 -0
sqlframe/databricks/readwriter.py +2 -1
sqlframe/databricks/session.py +3 -0
sqlframe/databricks/table.py +24 -0
sqlframe/duckdb/readwriter.py +4 -1
sqlframe/duckdb/session.py +3 -0
sqlframe/duckdb/table.py +16 -0
sqlframe/postgres/readwriter.py +2 -1
sqlframe/postgres/session.py +3 -0
sqlframe/postgres/table.py +24 -0
sqlframe/redshift/readwriter.py +2 -1
sqlframe/redshift/session.py +3 -0
sqlframe/redshift/table.py +15 -0
sqlframe/snowflake/readwriter.py +2 -1
sqlframe/snowflake/session.py +3 -0
sqlframe/snowflake/table.py +23 -0
sqlframe/spark/readwriter.py +2 -1
sqlframe/spark/session.py +3 -0
sqlframe/spark/table.py +6 -0
sqlframe/standalone/readwriter.py +4 -1
sqlframe/standalone/session.py +3 -0
sqlframe/standalone/table.py +6 -0
{sqlframe-3.13.4.dist-info → sqlframe-3.14.1.dist-info}/METADATA +4 -4
{sqlframe-3.13.4.dist-info → sqlframe-3.14.1.dist-info}/RECORD +36 -26
{sqlframe-3.13.4.dist-info → sqlframe-3.14.1.dist-info}/LICENSE +0 -0
{sqlframe-3.13.4.dist-info → sqlframe-3.14.1.dist-info}/WHEEL +0 -0
{sqlframe-3.13.4.dist-info → sqlframe-3.14.1.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.13.4'
-__version_tuple__ = version_tuple = (3, 13, 4)
+__version__ = version = '3.14.1'
+__version_tuple__ = version_tuple = (3, 14, 1)

sqlframe/base/dataframe.py CHANGED Viewed

@@ -79,6 +79,23 @@ JOIN_HINTS = {
     "SHUFFLE_REPLICATE_NL",
 }
+JOIN_TYPE_MAPPING = {
+    "inner": "inner",
+    "cross": "cross",
+    "outer": "full_outer",
+    "full": "full_outer",
+    "fullouter": "full_outer",
+    "left": "left_outer",
+    "leftouter": "left_outer",
+    "right": "right_outer",
+    "rightouter": "right_outer",
+    "semi": "left_semi",
+    "leftsemi": "left_semi",
+    "left_semi": "left_semi",
+    "anti": "left_anti",
+    "leftanti": "left_anti",
+    "left_anti": "left_anti",
+}
 DF = t.TypeVar("DF", bound="BaseDataFrame")
@@ -872,6 +889,68 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         """
         return self.join.__wrapped__(self, other, how="cross")  # type: ignore
+    def _handle_self_join(self, other_df: DF, join_columns: t.List[Column]):
+        # If the two dataframes being joined come from the same branch, we then check if they have any columns that
+        # were created using the "branch_id" (df["column_name"]). If so, we know that we need to differentiate
+        # the two columns since they would end up with the same table name. We do this by checking for the unique
+        # uuids in the other df and finding columns that have metadata on them that match the uuids. If so, we know
+        # it comes from the other df and we change the table name to the other df's table name.
+        # See `test_self_join` for an example of this.
+        if self.branch_id == other_df.branch_id:
+            other_df_unique_uuids = other_df.known_uuids - self.known_uuids
+            for col in join_columns:
+                for col_expr in col.expression.find_all(exp.Column):
+                    if (
+                        "join_on_uuid" in col_expr.meta
+                        and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
+                    ):
+                        col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
+    @staticmethod
+    def _handle_join_column_names_only(
+        join_columns: t.List[Column],
+        join_expression: exp.Select,
+        other_df: DF,
+        table_names: t.List[str],
+    ):
+        potential_ctes = [
+            cte
+            for cte in join_expression.ctes
+            if cte.alias_or_name in table_names and cte.alias_or_name != other_df.latest_cte_name
+        ]
+        # Determine the table to reference for the left side of the join by checking each of the left side
+        # tables and see if they have the column being referenced.
+        join_column_pairs = []
+        for join_column in join_columns:
+            num_matching_ctes = 0
+            for cte in potential_ctes:
+                if join_column.alias_or_name in cte.this.named_selects:
+                    left_column = join_column.copy().set_table_name(cte.alias_or_name)
+                    right_column = join_column.copy().set_table_name(other_df.latest_cte_name)
+                    join_column_pairs.append((left_column, right_column))
+                    num_matching_ctes += 1
+                    # We only want to match one table to the column and that should be matched left -> right
+                    # so we break after the first match
+                    break
+            if num_matching_ctes == 0:
+                raise ValueError(
+                    f"Column `{join_column.alias_or_name}` does not exist in any of the tables."
+                )
+        join_clause = functools.reduce(
+            lambda x, y: x & y,
+            [left_column == right_column for left_column, right_column in join_column_pairs],
+        )
+        return join_column_pairs, join_clause
+    def _normalize_join_clause(
+        self, join_columns: t.List[Column], join_expression: t.Optional[exp.Select]
+    ) -> Column:
+        join_columns = self._ensure_and_normalize_cols(join_columns, join_expression)
+        if len(join_columns) > 1:
+            join_columns = [functools.reduce(lambda x, y: x & y, join_columns)]
+        join_clause = join_columns[0]
+        return join_clause
     @operation(Operation.FROM)
     def join(
         self,
@@ -882,37 +961,33 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
     ) -> Self:
         from sqlframe.base.functions import coalesce
-        if on is None:
+        if (on is None) and ("cross" not in how):
             logger.warning("Got no value for on. This appears to change the join to a cross join.")
             how = "cross"
+        if (on is not None) and ("cross" in how):
+            # Not a lot of doc, but Spark handles cross with predicate as an inner join
+            # https://learn.microsoft.com/en-us/dotnet/api/microsoft.spark.sql.dataframe.join
+            logger.warning("Got cross join with an 'on' value. This will result in an inner join.")
+            how = "inner"
         other_df = other_df._convert_leaf_to_cte()
         join_expression = self._add_ctes_to_expression(self.expression, other_df.expression.ctes)
         # We will determine actual "join on" expression later so we don't provide it at first
-        join_expression = join_expression.join(
-            join_expression.ctes[-1].alias, join_type=how.replace("_", " ")
-        )
+        join_type = JOIN_TYPE_MAPPING.get(how, how).replace("_", " ")
+        join_expression = join_expression.join(join_expression.ctes[-1].alias, join_type=join_type)
         self_columns = self._get_outer_select_columns(join_expression)
         other_columns = self._get_outer_select_columns(other_df.expression)
         join_columns = self._ensure_and_normalize_cols(on)
-        # If the two dataframes being joined come from the same branch, we then check if they have any columns that
-        # were created using the "branch_id" (df["column_name"]). If so, we know that we need to differentiate
-        # the two columns since they would end up with the same table name. We do this by checking for the unique
-        # uuids in the other df and finding columns that have metadata on them that match the uuids. If so, we know
-        # it comes from the other df and we change the table name to the other df's table name.
-        # See `test_self_join` for an example of this.
-        if self.branch_id == other_df.branch_id:
-            other_df_unique_uuids = other_df.known_uuids - self.known_uuids
-            for col in join_columns:
-                for col_expr in col.expression.find_all(exp.Column):
-                    if (
-                        "join_on_uuid" in col_expr.meta
-                        and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
-                    ):
-                        col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
+        self._handle_self_join(other_df, join_columns)
         # Determines the join clause and select columns to be used passed on what type of columns were provided for
         # the join. The columns returned changes based on how the on expression is provided.
-        if how != "cross":
+        select_columns = (
+            self_columns
+            if join_type in ["left anti", "left semi"]
+            else self_columns + other_columns
+        )
+        if join_type != "cross":
             if isinstance(join_columns[0].expression, exp.Column):
                 """
                 Unique characteristics of join on column names only:
@@ -923,38 +998,9 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                     table.alias_or_name
                     for table in get_tables_from_expression_with_join(join_expression)
                 ]
-                potential_ctes = [
-                    cte
-                    for cte in join_expression.ctes
-                    if cte.alias_or_name in table_names
-                    and cte.alias_or_name != other_df.latest_cte_name
-                ]
-                # Determine the table to reference for the left side of the join by checking each of the left side
-                # tables and see if they have the column being referenced.
-                join_column_pairs = []
-                for join_column in join_columns:
-                    num_matching_ctes = 0
-                    for cte in potential_ctes:
-                        if join_column.alias_or_name in cte.this.named_selects:
-                            left_column = join_column.copy().set_table_name(cte.alias_or_name)
-                            right_column = join_column.copy().set_table_name(
-                                other_df.latest_cte_name
-                            )
-                            join_column_pairs.append((left_column, right_column))
-                            num_matching_ctes += 1
-                            # We only want to match one table to the column and that should be matched left -> right
-                            # so we break after the first match
-                            break
-                    if num_matching_ctes == 0:
-                        raise ValueError(
-                            f"Column `{join_column.alias_or_name}` does not exist in any of the tables."
-                        )
-                join_clause = functools.reduce(
-                    lambda x, y: x & y,
-                    [
-                        left_column == right_column
-                        for left_column, right_column in join_column_pairs
-                    ],
+                join_column_pairs, join_clause = self._handle_join_column_names_only(
+                    join_columns, join_expression, other_df, table_names
                 )
                 join_column_names = [
                     coalesce(
@@ -972,7 +1018,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                         if not isinstance(column.expression.this, exp.Star)
                         else column.sql()
                     )
-                    for column in self_columns + other_columns
+                    for column in select_columns
                 ]
                 select_column_names = [
                     column_name
@@ -989,17 +1035,12 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 * There is no deduplication of the results.
                 * The left join dataframe columns go first and right come after. No sort preference is given to join columns
                 """
-                join_columns = self._ensure_and_normalize_cols(join_columns, join_expression)
-                if len(join_columns) > 1:
-                    join_columns = [functools.reduce(lambda x, y: x & y, join_columns)]
-                join_clause = join_columns[0]
-                select_column_names = [
-                    column.alias_or_name for column in self_columns + other_columns
-                ]
+                join_clause = self._normalize_join_clause(join_columns, join_expression)
+                select_column_names = [column.alias_or_name for column in select_columns]
             # Update the on expression with the actual join clause to replace the dummy one from before
         else:
-            select_column_names = [column.alias_or_name for column in self_columns + other_columns]
+            select_column_names = [column.alias_or_name for column in select_columns]
             join_clause = None
         join_expression.args["joins"][-1].set("on", join_clause.expression if join_clause else None)
         new_df = self.copy(expression=join_expression)

sqlframe/base/mixins/table_mixins.py ADDED Viewed

@@ -0,0 +1,335 @@
+import functools
+import logging
+import typing as t
+from sqlglot import exp
+try:
+    from sqlglot.expressions import Whens
+except ImportError:
+    Whens = None  # type: ignore
+from sqlglot.helper import object_to_dict
+from sqlframe.base.column import Column
+from sqlframe.base.table import (
+    DF,
+    Clause,
+    LazyExpression,
+    WhenMatched,
+    WhenNotMatched,
+    WhenNotMatchedBySource,
+    _BaseTable,
+)
+if t.TYPE_CHECKING:
+    from sqlframe.base._typing import ColumnOrLiteral
+logger = logging.getLogger(__name__)
+def ensure_cte() -> t.Callable[[t.Callable], t.Callable]:
+    def decorator(func: t.Callable) -> t.Callable:
+        @functools.wraps(func)
+        def wrapper(self: _BaseTable, *args, **kwargs) -> t.Any:
+            if len(self.expression.ctes) > 0:
+                return func(self, *args, **kwargs)  # type: ignore
+            self_class = self.__class__
+            self = self._convert_leaf_to_cte()
+            self = self_class(**object_to_dict(self))
+            return func(self, *args, **kwargs)  # type: ignore
+        wrapper.__wrapped__ = func  # type: ignore
+        return wrapper
+    return decorator
+class _BaseTableMixins(_BaseTable, t.Generic[DF]):
+    def _ensure_where_condition(
+        self, where: t.Optional[t.Union[Column, str, bool]] = None
+    ) -> exp.Expression:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        if where is None:
+            logger.warning("Empty value for `where`clause. Defaults to `True`.")
+            condition: exp.Expression = exp.Boolean(this=True)
+        else:
+            condition_list = self._ensure_and_normalize_cols(where, self.expression)
+            if len(condition_list) > 1:
+                condition_list = [functools.reduce(lambda x, y: x & y, condition_list)]
+            for col_expr in condition_list[0].expression.find_all(exp.Column):
+                if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                    col_expr.set("table", exp.to_identifier(self_name))
+            condition = condition_list[0].expression
+            if isinstance(condition, exp.Alias):
+                condition = condition.this
+        return condition
+class UpdateSupportMixin(_BaseTableMixins, t.Generic[DF]):
+    @ensure_cte()
+    def update(
+        self,
+        set_: t.Dict[t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]],
+        where: t.Optional[t.Union[Column, str, bool]] = None,
+    ) -> LazyExpression:
+        self_expr = self.expression.ctes[0].this.args["from"].this
+        condition = self._ensure_where_condition(where)
+        update_set = self._ensure_and_normalize_update_set(set_)
+        update_expr = exp.Update(
+            this=self_expr,
+            expressions=[
+                exp.EQ(
+                    this=key,
+                    expression=val,
+                )
+                for key, val in update_set.items()
+            ],
+            where=exp.Where(this=condition),
+        )
+        return LazyExpression(update_expr, self.session)
+    def _ensure_and_normalize_update_set(
+        self,
+        set_: t.Dict[t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]],
+    ) -> t.Dict[str, exp.Expression]:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        update_set = {}
+        for key, val in set_.items():
+            key_column: Column = self._ensure_and_normalize_col(key)
+            key_expr = list(key_column.expression.find_all(exp.Column))
+            if len(key_expr) > 1:
+                raise ValueError(f"Can only update one a single column at a time.")
+            key = key_expr[0].alias_or_name
+            val_column: Column = self._ensure_and_normalize_col(val)
+            for col_expr in val_column.expression.find_all(exp.Column):
+                if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                    col_expr.set("table", exp.to_identifier(self_name))
+                else:
+                    raise ValueError(
+                        f"Column `{col_expr.alias_or_name}` does not exist in the table."
+                    )
+            update_set[key] = val_column.expression
+        return update_set
+class DeleteSupportMixin(_BaseTableMixins, t.Generic[DF]):
+    @ensure_cte()
+    def delete(
+        self,
+        where: t.Optional[t.Union[Column, str, bool]] = None,
+    ) -> LazyExpression:
+        self_expr = self.expression.ctes[0].this.args["from"].this
+        condition = self._ensure_where_condition(where)
+        delete_expr = exp.Delete(
+            this=self_expr,
+            where=exp.Where(this=condition),
+        )
+        return LazyExpression(delete_expr, self.session)
+class MergeSupportMixin(_BaseTable, t.Generic[DF]):
+    _merge_supported_clauses: t.Iterable[
+        t.Union[t.Type[WhenMatched], t.Type[WhenNotMatched], t.Type[WhenNotMatchedBySource]]
+    ]
+    _merge_support_star: bool
+    @ensure_cte()
+    def merge(
+        self,
+        other_df: DF,
+        condition: t.Union[str, t.List[str], Column, t.List[Column], bool],
+        clauses: t.Iterable[t.Union[WhenMatched, WhenNotMatched, WhenNotMatchedBySource]],
+    ) -> LazyExpression:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        self_expr = self.expression.ctes[0].this.args["from"].this
+        other_df = other_df._convert_leaf_to_cte()
+        if condition is None:
+            raise ValueError("condition cannot be None")
+        condition_columns: Column = self._ensure_and_normalize_condition(condition, other_df)
+        other_name = self._create_hash_from_expression(other_df.expression)
+        other_expr = exp.Subquery(
+            this=other_df.expression, alias=exp.TableAlias(this=exp.to_identifier(other_name))
+        )
+        for col_expr in condition_columns.expression.find_all(exp.Column):
+            if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                col_expr.set("table", exp.to_identifier(self_name))
+            if col_expr.table == other_df.latest_cte_name:
+                col_expr.set("table", exp.to_identifier(other_name))
+        merge_expressions = []
+        for clause in clauses:
+            if not isinstance(clause, tuple(self._merge_supported_clauses)):
+                raise ValueError(
+                    f"Unsupported clause type {type(clause.clause)} for merge operation"
+                )
+            expression = None
+            if clause.clause.condition is not None:
+                cond_clause = self._ensure_and_normalize_condition(
+                    clause.clause.condition, other_df, True
+                )
+                for col_expr in cond_clause.expression.find_all(exp.Column):
+                    if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                        col_expr.set("table", exp.to_identifier(self_name))
+                    if col_expr.table == other_df.latest_cte_name:
+                        col_expr.set("table", exp.to_identifier(other_name))
+            else:
+                cond_clause = None
+            if clause.clause.clause_type == Clause.UPDATE:
+                update_set = self._ensure_and_normalize_assignments(
+                    clause.clause.assignments, other_df
+                )
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Update(
+                        expressions=[
+                            exp.EQ(
+                                this=key,
+                                expression=val,
+                            )
+                            for key, val in update_set.items()
+                        ]
+                    ),
+                )
+            if clause.clause.clause_type == Clause.UPDATE_ALL:
+                if not self._support_star:
+                    raise ValueError("Merge operation does not support UPDATE_ALL")
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Update(expressions=[exp.Star()]),
+                )
+            elif clause.clause.clause_type == Clause.INSERT:
+                insert_values = self._ensure_and_normalize_assignments(
+                    clause.clause.assignments, other_df
+                )
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Insert(
+                        this=exp.Tuple(expressions=[key for key in insert_values.keys()]),
+                        expression=exp.Tuple(expressions=[val for val in insert_values.values()]),
+                    ),
+                )
+            elif clause.clause.clause_type == Clause.INSERT_ALL:
+                if not self._support_star:
+                    raise ValueError("Merge operation does not support INSERT_ALL")
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Insert(expression=exp.Star()),
+                )
+            elif clause.clause.clause_type == Clause.DELETE:
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.var("DELETE"),
+                )
+            if expression:
+                merge_expressions.append(expression)
+        if Whens is None:
+            merge_expr = exp.merge(
+                *merge_expressions,
+                into=self_expr,
+                using=other_expr,
+                on=condition_columns.expression,
+            )
+        else:
+            merge_expr = exp.merge(
+                Whens(expressions=merge_expressions),
+                into=self_expr,
+                using=other_expr,
+                on=condition_columns.expression,
+            )
+        return LazyExpression(merge_expr, self.session)
+    def _ensure_and_normalize_condition(
+        self,
+        condition: t.Union[str, t.List[str], Column, t.List[Column], bool],
+        other_df: DF,
+        clause: t.Optional[bool] = False,
+    ):
+        join_expression = self._add_ctes_to_expression(
+            self.expression, other_df.expression.copy().ctes
+        )
+        condition = self._ensure_and_normalize_cols(condition, self.expression)
+        self._handle_self_join(other_df, condition)
+        if isinstance(condition[0].expression, exp.Column) and not clause:
+            table_names = [
+                table.alias_or_name
+                for table in [
+                    self.expression.args["from"].this,
+                    other_df.expression.args["from"].this,
+                ]
+            ]
+            join_column_pairs, join_clause = self._handle_join_column_names_only(
+                condition, join_expression, other_df, table_names
+            )
+        else:
+            join_clause = self._normalize_join_clause(condition, join_expression)
+        return join_clause
+    def _ensure_and_normalize_assignments(
+        self,
+        assignments: t.Dict[
+            t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]
+        ],
+        other_df,
+    ) -> t.Dict[exp.Column, exp.Expression]:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        other_name = self._create_hash_from_expression(other_df.expression)
+        update_set = {}
+        for key, val in assignments.items():
+            key_column: Column = self._ensure_and_normalize_col(key)
+            key_expr = list(key_column.expression.find_all(exp.Column))
+            if len(key_expr) > 1:
+                raise ValueError(f"Target expression `{key_expr}` should be a single column.")
+            column_key = exp.column(key_expr[0].alias_or_name)
+            val = self._ensure_and_normalize_col(val)
+            val = self._ensure_and_normalize_cols(val, other_df.expression)[0]
+            if self.branch_id == other_df.branch_id:
+                other_df_unique_uuids = other_df.known_uuids - self.known_uuids
+                for col_expr in val.expression.find_all(exp.Column):
+                    if (
+                        "join_on_uuid" in col_expr.meta
+                        and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
+                    ):
+                        col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
+            for col_expr in val.expression.find_all(exp.Column):
+                if not col_expr.table or col_expr.table == other_df.latest_cte_name:
+                    col_expr.set("table", exp.to_identifier(other_name))
+                elif col_expr.table == self.expression.args["from"].this.alias_or_name:
+                    col_expr.set("table", exp.to_identifier(self_name))
+                else:
+                    raise ValueError(
+                        f"Column `{col_expr.alias_or_name}` does not exist in any of the tables."
+                    )
+            if isinstance(val.expression, exp.Alias):
+                val.expression = val.expression.this
+            update_set[column_key] = val.expression
+        return update_set

sqlframe/base/readerwriter.py CHANGED Viewed

@@ -21,19 +21,20 @@ else:
 if t.TYPE_CHECKING:
     from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
     from sqlframe.base.column import Column
-    from sqlframe.base.session import DF, _BaseSession
+    from sqlframe.base.session import DF, TABLE, _BaseSession
     from sqlframe.base.types import StructType
     SESSION = t.TypeVar("SESSION", bound=_BaseSession)
 else:
     SESSION = t.TypeVar("SESSION")
     DF = t.TypeVar("DF")
+    TABLE = t.TypeVar("TABLE")
 logger = logging.getLogger(__name__)
-class _BaseDataFrameReader(t.Generic[SESSION, DF]):
+class _BaseDataFrameReader(t.Generic[SESSION, DF, TABLE]):
     def __init__(self, spark: SESSION):
         self._session = spark
         self.state_format_to_read: t.Optional[str] = None
@@ -42,7 +43,7 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
     def session(self) -> SESSION:
         return self._session
-    def table(self, tableName: str) -> DF:
+    def table(self, tableName: str) -> TABLE:
         tableName = normalize_string(tableName, from_dialect="input", is_table=True)
         if df := self.session.temp_views.get(tableName):
             return df
@@ -50,7 +51,7 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
         self.session.catalog.add_table(table)
         columns = self.session.catalog.get_columns_from_schema(table)
-        return self.session._create_df(
+        return self.session._create_table(
             exp.Select()
             .from_(tableName, dialect=self.session.input_dialect)
             .select(*columns, dialect=self.session.input_dialect)

sqlframe/base/session.py CHANGED Viewed

@@ -27,6 +27,7 @@ from sqlframe.base.catalog import _BaseCatalog
 from sqlframe.base.dataframe import BaseDataFrame
 from sqlframe.base.normalize import normalize_dict
 from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
+from sqlframe.base.table import _BaseTable
 from sqlframe.base.udf import _BaseUDFRegistration
 from sqlframe.base.util import (
     get_column_mapping_from_schema_input,
@@ -65,17 +66,19 @@ CATALOG = t.TypeVar("CATALOG", bound=_BaseCatalog)
 READER = t.TypeVar("READER", bound=_BaseDataFrameReader)
 WRITER = t.TypeVar("WRITER", bound=_BaseDataFrameWriter)
 DF = t.TypeVar("DF", bound=BaseDataFrame)
+TABLE = t.TypeVar("TABLE", bound=_BaseTable)
 UDF_REGISTRATION = t.TypeVar("UDF_REGISTRATION", bound=_BaseUDFRegistration)
 _MISSING = "MISSING"
-class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION]):
+class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGISTRATION]):
     _instance = None
     _reader: t.Type[READER]
     _writer: t.Type[WRITER]
     _catalog: t.Type[CATALOG]
     _df: t.Type[DF]
+    _table: t.Type[TABLE]
     _udf_registration: t.Type[UDF_REGISTRATION]
     SANITIZE_COLUMN_NAMES = False
@@ -158,12 +161,15 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION
             return name.replace("(", "_").replace(")", "_")
         return name
-    def table(self, tableName: str) -> DF:
+    def table(self, tableName: str) -> TABLE:
         return self.read.table(tableName)
     def _create_df(self, *args, **kwargs) -> DF:
         return self._df(self, *args, **kwargs)
+    def _create_table(self, *args, **kwargs) -> TABLE:
+        return self._table(self, *args, **kwargs)
     def __new__(cls, *args, **kwargs):
         if _BaseSession._instance is None:
             _BaseSession._instance = super().__new__(cls)

sqlframe 3.13.4__py3-none-any.whl → 3.14.1__py3-none-any.whl

sqlframe 3.13.4py3-none-any.whl → 3.14.1py3-none-any.whl