PyPI - sqlframe - Versions diffs - 3.13.3__py3-none-any.whl → 3.14.0__py3-none-any.whl - Mend

sqlframe 3.13.3py3-none-any.whl → 3.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

sqlframe/_version.py +2 -2
sqlframe/base/dataframe.py +78 -54
sqlframe/base/mixins/table_mixins.py +335 -0
sqlframe/base/readerwriter.py +5 -4
sqlframe/base/session.py +8 -2
sqlframe/base/table.py +238 -0
sqlframe/bigquery/catalog.py +1 -0
sqlframe/bigquery/readwriter.py +2 -1
sqlframe/bigquery/session.py +3 -0
sqlframe/bigquery/table.py +24 -0
sqlframe/databricks/readwriter.py +2 -1
sqlframe/databricks/session.py +3 -0
sqlframe/databricks/table.py +24 -0
sqlframe/duckdb/readwriter.py +4 -1
sqlframe/duckdb/session.py +3 -0
sqlframe/duckdb/table.py +16 -0
sqlframe/postgres/readwriter.py +2 -1
sqlframe/postgres/session.py +3 -0
sqlframe/postgres/table.py +24 -0
sqlframe/redshift/readwriter.py +2 -1
sqlframe/redshift/session.py +3 -0
sqlframe/redshift/table.py +15 -0
sqlframe/snowflake/readwriter.py +2 -1
sqlframe/snowflake/session.py +3 -0
sqlframe/snowflake/table.py +23 -0
sqlframe/spark/readwriter.py +2 -1
sqlframe/spark/session.py +3 -0
sqlframe/spark/table.py +6 -0
sqlframe/standalone/readwriter.py +4 -1
sqlframe/standalone/session.py +3 -0
sqlframe/standalone/table.py +6 -0
{sqlframe-3.13.3.dist-info → sqlframe-3.14.0.dist-info}/METADATA +1 -1
{sqlframe-3.13.3.dist-info → sqlframe-3.14.0.dist-info}/RECORD +36 -26
{sqlframe-3.13.3.dist-info → sqlframe-3.14.0.dist-info}/LICENSE +0 -0
{sqlframe-3.13.3.dist-info → sqlframe-3.14.0.dist-info}/WHEEL +0 -0
{sqlframe-3.13.3.dist-info → sqlframe-3.14.0.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.13.3'
-__version_tuple__ = version_tuple = (3, 13, 3)
+__version__ = version = '3.14.0'
+__version_tuple__ = version_tuple = (3, 14, 0)

sqlframe/base/dataframe.py CHANGED Viewed

@@ -481,6 +481,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                     cte = cte.transform(replace_id_value, replaced_cte_names)  # type: ignore
                 if cte.alias_or_name in existing_cte_counts:
                     existing_cte_counts[cte.alias_or_name] += 10
+                    # Add unique where filter to ensure that the hash of the CTE is unique
                     cte.set(
                         "this",
                         cte.this.where(
@@ -502,6 +503,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                             new_cte_alias, dialect=self.session.input_dialect, into=exp.TableAlias
                         ),
                     )
+                    existing_cte_counts[new_cte_alias] = 0
                 existing_ctes.append(cte)
         else:
             existing_ctes = ctes
@@ -755,15 +757,20 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 ]
                 cte_names_in_join = [x.this for x in join_table_identifiers]
                 # If we have columns that resolve to multiple CTE expressions then we want to use each CTE left-to-right
-                # and therefore we allow multiple columns with the same name in the result. This matches the behavior
-                # of Spark.
+                # (or right to left if a right join) and therefore we allow multiple columns with the same
+                # name in the result. This matches the behavior of Spark.
                 resolved_column_position: t.Dict[exp.Column, int] = {
                     col.copy(): -1 for col in ambiguous_cols
                 }
                 for ambiguous_col in ambiguous_cols:
+                    ctes = (
+                        list(reversed(self.expression.ctes))
+                        if self.expression.args["joins"][0].args.get("side", "") == "right"
+                        else self.expression.ctes
+                    )
                     ctes_with_column = [
                         cte
-                        for cte in self.expression.ctes
+                        for cte in ctes
                         if cte.alias_or_name in cte_names_in_join
                         and ambiguous_col.alias_or_name in cte.this.named_selects
                     ]
@@ -865,6 +872,68 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         """
         return self.join.__wrapped__(self, other, how="cross")  # type: ignore
+    def _handle_self_join(self, other_df: DF, join_columns: t.List[Column]):
+        # If the two dataframes being joined come from the same branch, we then check if they have any columns that
+        # were created using the "branch_id" (df["column_name"]). If so, we know that we need to differentiate
+        # the two columns since they would end up with the same table name. We do this by checking for the unique
+        # uuids in the other df and finding columns that have metadata on them that match the uuids. If so, we know
+        # it comes from the other df and we change the table name to the other df's table name.
+        # See `test_self_join` for an example of this.
+        if self.branch_id == other_df.branch_id:
+            other_df_unique_uuids = other_df.known_uuids - self.known_uuids
+            for col in join_columns:
+                for col_expr in col.expression.find_all(exp.Column):
+                    if (
+                        "join_on_uuid" in col_expr.meta
+                        and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
+                    ):
+                        col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
+    @staticmethod
+    def _handle_join_column_names_only(
+        join_columns: t.List[Column],
+        join_expression: exp.Select,
+        other_df: DF,
+        table_names: t.List[str],
+    ):
+        potential_ctes = [
+            cte
+            for cte in join_expression.ctes
+            if cte.alias_or_name in table_names and cte.alias_or_name != other_df.latest_cte_name
+        ]
+        # Determine the table to reference for the left side of the join by checking each of the left side
+        # tables and see if they have the column being referenced.
+        join_column_pairs = []
+        for join_column in join_columns:
+            num_matching_ctes = 0
+            for cte in potential_ctes:
+                if join_column.alias_or_name in cte.this.named_selects:
+                    left_column = join_column.copy().set_table_name(cte.alias_or_name)
+                    right_column = join_column.copy().set_table_name(other_df.latest_cte_name)
+                    join_column_pairs.append((left_column, right_column))
+                    num_matching_ctes += 1
+                    # We only want to match one table to the column and that should be matched left -> right
+                    # so we break after the first match
+                    break
+            if num_matching_ctes == 0:
+                raise ValueError(
+                    f"Column `{join_column.alias_or_name}` does not exist in any of the tables."
+                )
+        join_clause = functools.reduce(
+            lambda x, y: x & y,
+            [left_column == right_column for left_column, right_column in join_column_pairs],
+        )
+        return join_column_pairs, join_clause
+    def _normalize_join_clause(
+        self, join_columns: t.List[Column], join_expression: t.Optional[exp.Select]
+    ) -> Column:
+        join_columns = self._ensure_and_normalize_cols(join_columns, join_expression)
+        if len(join_columns) > 1:
+            join_columns = [functools.reduce(lambda x, y: x & y, join_columns)]
+        join_clause = join_columns[0]
+        return join_clause
     @operation(Operation.FROM)
     def join(
         self,
@@ -888,21 +957,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         self_columns = self._get_outer_select_columns(join_expression)
         other_columns = self._get_outer_select_columns(other_df.expression)
         join_columns = self._ensure_and_normalize_cols(on)
-        # If the two dataframes being joined come from the same branch, we then check if they have any columns that
-        # were created using the "branch_id" (df["column_name"]). If so, we know that we need to differentiate
-        # the two columns since they would end up with the same table name. We do this by checking for the unique
-        # uuids in the other df and finding columns that have metadata on them that match the uuids. If so, we know
-        # it comes from the other df and we change the table name to the other df's table name.
-        # See `test_self_join` for an example of this.
-        if self.branch_id == other_df.branch_id:
-            other_df_unique_uuids = other_df.known_uuids - self.known_uuids
-            for col in join_columns:
-                for col_expr in col.expression.find_all(exp.Column):
-                    if (
-                        "join_on_uuid" in col_expr.meta
-                        and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
-                    ):
-                        col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
+        self._handle_self_join(other_df, join_columns)
         # Determines the join clause and select columns to be used passed on what type of columns were provided for
         # the join. The columns returned changes based on how the on expression is provided.
         if how != "cross":
@@ -916,38 +972,9 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                     table.alias_or_name
                     for table in get_tables_from_expression_with_join(join_expression)
                 ]
-                potential_ctes = [
-                    cte
-                    for cte in join_expression.ctes
-                    if cte.alias_or_name in table_names
-                    and cte.alias_or_name != other_df.latest_cte_name
-                ]
-                # Determine the table to reference for the left side of the join by checking each of the left side
-                # tables and see if they have the column being referenced.
-                join_column_pairs = []
-                for join_column in join_columns:
-                    num_matching_ctes = 0
-                    for cte in potential_ctes:
-                        if join_column.alias_or_name in cte.this.named_selects:
-                            left_column = join_column.copy().set_table_name(cte.alias_or_name)
-                            right_column = join_column.copy().set_table_name(
-                                other_df.latest_cte_name
-                            )
-                            join_column_pairs.append((left_column, right_column))
-                            num_matching_ctes += 1
-                            # We only want to match one table to the column and that should be matched left -> right
-                            # so we break after the first match
-                            break
-                    if num_matching_ctes == 0:
-                        raise ValueError(
-                            f"Column `{join_column.alias_or_name}` does not exist in any of the tables."
-                        )
-                join_clause = functools.reduce(
-                    lambda x, y: x & y,
-                    [
-                        left_column == right_column
-                        for left_column, right_column in join_column_pairs
-                    ],
+                join_column_pairs, join_clause = self._handle_join_column_names_only(
+                    join_columns, join_expression, other_df, table_names
                 )
                 join_column_names = [
                     coalesce(
@@ -982,10 +1009,7 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 * There is no deduplication of the results.
                 * The left join dataframe columns go first and right come after. No sort preference is given to join columns
                 """
-                join_columns = self._ensure_and_normalize_cols(join_columns, join_expression)
-                if len(join_columns) > 1:
-                    join_columns = [functools.reduce(lambda x, y: x & y, join_columns)]
-                join_clause = join_columns[0]
+                join_clause = self._normalize_join_clause(join_columns, join_expression)
                 select_column_names = [
                     column.alias_or_name for column in self_columns + other_columns
                 ]

sqlframe/base/mixins/table_mixins.py ADDED Viewed

@@ -0,0 +1,335 @@
+import functools
+import logging
+import typing as t
+from sqlglot import exp
+try:
+    from sqlglot.expressions import Whens
+except ImportError:
+    Whens = None  # type: ignore
+from sqlglot.helper import object_to_dict
+from sqlframe.base.column import Column
+from sqlframe.base.table import (
+    DF,
+    Clause,
+    LazyExpression,
+    WhenMatched,
+    WhenNotMatched,
+    WhenNotMatchedBySource,
+    _BaseTable,
+)
+if t.TYPE_CHECKING:
+    from sqlframe.base._typing import ColumnOrLiteral
+logger = logging.getLogger(__name__)
+def ensure_cte() -> t.Callable[[t.Callable], t.Callable]:
+    def decorator(func: t.Callable) -> t.Callable:
+        @functools.wraps(func)
+        def wrapper(self: _BaseTable, *args, **kwargs) -> t.Any:
+            if len(self.expression.ctes) > 0:
+                return func(self, *args, **kwargs)  # type: ignore
+            self_class = self.__class__
+            self = self._convert_leaf_to_cte()
+            self = self_class(**object_to_dict(self))
+            return func(self, *args, **kwargs)  # type: ignore
+        wrapper.__wrapped__ = func  # type: ignore
+        return wrapper
+    return decorator
+class _BaseTableMixins(_BaseTable, t.Generic[DF]):
+    def _ensure_where_condition(
+        self, where: t.Optional[t.Union[Column, str, bool]] = None
+    ) -> exp.Expression:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        if where is None:
+            logger.warning("Empty value for `where`clause. Defaults to `True`.")
+            condition: exp.Expression = exp.Boolean(this=True)
+        else:
+            condition_list = self._ensure_and_normalize_cols(where, self.expression)
+            if len(condition_list) > 1:
+                condition_list = [functools.reduce(lambda x, y: x & y, condition_list)]
+            for col_expr in condition_list[0].expression.find_all(exp.Column):
+                if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                    col_expr.set("table", exp.to_identifier(self_name))
+            condition = condition_list[0].expression
+            if isinstance(condition, exp.Alias):
+                condition = condition.this
+        return condition
+class UpdateSupportMixin(_BaseTableMixins, t.Generic[DF]):
+    @ensure_cte()
+    def update(
+        self,
+        set_: t.Dict[t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]],
+        where: t.Optional[t.Union[Column, str, bool]] = None,
+    ) -> LazyExpression:
+        self_expr = self.expression.ctes[0].this.args["from"].this
+        condition = self._ensure_where_condition(where)
+        update_set = self._ensure_and_normalize_update_set(set_)
+        update_expr = exp.Update(
+            this=self_expr,
+            expressions=[
+                exp.EQ(
+                    this=key,
+                    expression=val,
+                )
+                for key, val in update_set.items()
+            ],
+            where=exp.Where(this=condition),
+        )
+        return LazyExpression(update_expr, self.session)
+    def _ensure_and_normalize_update_set(
+        self,
+        set_: t.Dict[t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]],
+    ) -> t.Dict[str, exp.Expression]:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        update_set = {}
+        for key, val in set_.items():
+            key_column: Column = self._ensure_and_normalize_col(key)
+            key_expr = list(key_column.expression.find_all(exp.Column))
+            if len(key_expr) > 1:
+                raise ValueError(f"Can only update one a single column at a time.")
+            key = key_expr[0].alias_or_name
+            val_column: Column = self._ensure_and_normalize_col(val)
+            for col_expr in val_column.expression.find_all(exp.Column):
+                if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                    col_expr.set("table", exp.to_identifier(self_name))
+                else:
+                    raise ValueError(
+                        f"Column `{col_expr.alias_or_name}` does not exist in the table."
+                    )
+            update_set[key] = val_column.expression
+        return update_set
+class DeleteSupportMixin(_BaseTableMixins, t.Generic[DF]):
+    @ensure_cte()
+    def delete(
+        self,
+        where: t.Optional[t.Union[Column, str, bool]] = None,
+    ) -> LazyExpression:
+        self_expr = self.expression.ctes[0].this.args["from"].this
+        condition = self._ensure_where_condition(where)
+        delete_expr = exp.Delete(
+            this=self_expr,
+            where=exp.Where(this=condition),
+        )
+        return LazyExpression(delete_expr, self.session)
+class MergeSupportMixin(_BaseTable, t.Generic[DF]):
+    _merge_supported_clauses: t.Iterable[
+        t.Union[t.Type[WhenMatched], t.Type[WhenNotMatched], t.Type[WhenNotMatchedBySource]]
+    ]
+    _merge_support_star: bool
+    @ensure_cte()
+    def merge(
+        self,
+        other_df: DF,
+        condition: t.Union[str, t.List[str], Column, t.List[Column], bool],
+        clauses: t.Iterable[t.Union[WhenMatched, WhenNotMatched, WhenNotMatchedBySource]],
+    ) -> LazyExpression:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        self_expr = self.expression.ctes[0].this.args["from"].this
+        other_df = other_df._convert_leaf_to_cte()
+        if condition is None:
+            raise ValueError("condition cannot be None")
+        condition_columns: Column = self._ensure_and_normalize_condition(condition, other_df)
+        other_name = self._create_hash_from_expression(other_df.expression)
+        other_expr = exp.Subquery(
+            this=other_df.expression, alias=exp.TableAlias(this=exp.to_identifier(other_name))
+        )
+        for col_expr in condition_columns.expression.find_all(exp.Column):
+            if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                col_expr.set("table", exp.to_identifier(self_name))
+            if col_expr.table == other_df.latest_cte_name:
+                col_expr.set("table", exp.to_identifier(other_name))
+        merge_expressions = []
+        for clause in clauses:
+            if not isinstance(clause, tuple(self._merge_supported_clauses)):
+                raise ValueError(
+                    f"Unsupported clause type {type(clause.clause)} for merge operation"
+                )
+            expression = None
+            if clause.clause.condition is not None:
+                cond_clause = self._ensure_and_normalize_condition(
+                    clause.clause.condition, other_df, True
+                )
+                for col_expr in cond_clause.expression.find_all(exp.Column):
+                    if col_expr.table == self.expression.args["from"].this.alias_or_name:
+                        col_expr.set("table", exp.to_identifier(self_name))
+                    if col_expr.table == other_df.latest_cte_name:
+                        col_expr.set("table", exp.to_identifier(other_name))
+            else:
+                cond_clause = None
+            if clause.clause.clause_type == Clause.UPDATE:
+                update_set = self._ensure_and_normalize_assignments(
+                    clause.clause.assignments, other_df
+                )
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Update(
+                        expressions=[
+                            exp.EQ(
+                                this=key,
+                                expression=val,
+                            )
+                            for key, val in update_set.items()
+                        ]
+                    ),
+                )
+            if clause.clause.clause_type == Clause.UPDATE_ALL:
+                if not self._support_star:
+                    raise ValueError("Merge operation does not support UPDATE_ALL")
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Update(expressions=[exp.Star()]),
+                )
+            elif clause.clause.clause_type == Clause.INSERT:
+                insert_values = self._ensure_and_normalize_assignments(
+                    clause.clause.assignments, other_df
+                )
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Insert(
+                        this=exp.Tuple(expressions=[key for key in insert_values.keys()]),
+                        expression=exp.Tuple(expressions=[val for val in insert_values.values()]),
+                    ),
+                )
+            elif clause.clause.clause_type == Clause.INSERT_ALL:
+                if not self._support_star:
+                    raise ValueError("Merge operation does not support INSERT_ALL")
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.Insert(expression=exp.Star()),
+                )
+            elif clause.clause.clause_type == Clause.DELETE:
+                expression = exp.When(
+                    matched=clause.clause.matched,
+                    source=clause.clause.by_source,
+                    condition=cond_clause.expression if cond_clause else None,
+                    then=exp.var("DELETE"),
+                )
+            if expression:
+                merge_expressions.append(expression)
+        if Whens is None:
+            merge_expr = exp.merge(
+                *merge_expressions,
+                into=self_expr,
+                using=other_expr,
+                on=condition_columns.expression,
+            )
+        else:
+            merge_expr = exp.merge(
+                Whens(expressions=merge_expressions),
+                into=self_expr,
+                using=other_expr,
+                on=condition_columns.expression,
+            )
+        return LazyExpression(merge_expr, self.session)
+    def _ensure_and_normalize_condition(
+        self,
+        condition: t.Union[str, t.List[str], Column, t.List[Column], bool],
+        other_df: DF,
+        clause: t.Optional[bool] = False,
+    ):
+        join_expression = self._add_ctes_to_expression(
+            self.expression, other_df.expression.copy().ctes
+        )
+        condition = self._ensure_and_normalize_cols(condition, self.expression)
+        self._handle_self_join(other_df, condition)
+        if isinstance(condition[0].expression, exp.Column) and not clause:
+            table_names = [
+                table.alias_or_name
+                for table in [
+                    self.expression.args["from"].this,
+                    other_df.expression.args["from"].this,
+                ]
+            ]
+            join_column_pairs, join_clause = self._handle_join_column_names_only(
+                condition, join_expression, other_df, table_names
+            )
+        else:
+            join_clause = self._normalize_join_clause(condition, join_expression)
+        return join_clause
+    def _ensure_and_normalize_assignments(
+        self,
+        assignments: t.Dict[
+            t.Union[Column, str], t.Union[Column, "ColumnOrLiteral", exp.Expression]
+        ],
+        other_df,
+    ) -> t.Dict[exp.Column, exp.Expression]:
+        self_name = self.expression.ctes[0].this.args["from"].this.alias_or_name
+        other_name = self._create_hash_from_expression(other_df.expression)
+        update_set = {}
+        for key, val in assignments.items():
+            key_column: Column = self._ensure_and_normalize_col(key)
+            key_expr = list(key_column.expression.find_all(exp.Column))
+            if len(key_expr) > 1:
+                raise ValueError(f"Target expression `{key_expr}` should be a single column.")
+            column_key = exp.column(key_expr[0].alias_or_name)
+            val = self._ensure_and_normalize_col(val)
+            val = self._ensure_and_normalize_cols(val, other_df.expression)[0]
+            if self.branch_id == other_df.branch_id:
+                other_df_unique_uuids = other_df.known_uuids - self.known_uuids
+                for col_expr in val.expression.find_all(exp.Column):
+                    if (
+                        "join_on_uuid" in col_expr.meta
+                        and col_expr.meta["join_on_uuid"] in other_df_unique_uuids
+                    ):
+                        col_expr.set("table", exp.to_identifier(other_df.latest_cte_name))
+            for col_expr in val.expression.find_all(exp.Column):
+                if not col_expr.table or col_expr.table == other_df.latest_cte_name:
+                    col_expr.set("table", exp.to_identifier(other_name))
+                elif col_expr.table == self.expression.args["from"].this.alias_or_name:
+                    col_expr.set("table", exp.to_identifier(self_name))
+                else:
+                    raise ValueError(
+                        f"Column `{col_expr.alias_or_name}` does not exist in any of the tables."
+                    )
+            if isinstance(val.expression, exp.Alias):
+                val.expression = val.expression.this
+            update_set[column_key] = val.expression
+        return update_set

sqlframe/base/readerwriter.py CHANGED Viewed

@@ -21,19 +21,20 @@ else:
 if t.TYPE_CHECKING:
     from sqlframe.base._typing import OptionalPrimitiveType, PathOrPaths
     from sqlframe.base.column import Column
-    from sqlframe.base.session import DF, _BaseSession
+    from sqlframe.base.session import DF, TABLE, _BaseSession
     from sqlframe.base.types import StructType
     SESSION = t.TypeVar("SESSION", bound=_BaseSession)
 else:
     SESSION = t.TypeVar("SESSION")
     DF = t.TypeVar("DF")
+    TABLE = t.TypeVar("TABLE")
 logger = logging.getLogger(__name__)
-class _BaseDataFrameReader(t.Generic[SESSION, DF]):
+class _BaseDataFrameReader(t.Generic[SESSION, DF, TABLE]):
     def __init__(self, spark: SESSION):
         self._session = spark
         self.state_format_to_read: t.Optional[str] = None
@@ -42,7 +43,7 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
     def session(self) -> SESSION:
         return self._session
-    def table(self, tableName: str) -> DF:
+    def table(self, tableName: str) -> TABLE:
         tableName = normalize_string(tableName, from_dialect="input", is_table=True)
         if df := self.session.temp_views.get(tableName):
             return df
@@ -50,7 +51,7 @@ class _BaseDataFrameReader(t.Generic[SESSION, DF]):
         self.session.catalog.add_table(table)
         columns = self.session.catalog.get_columns_from_schema(table)
-        return self.session._create_df(
+        return self.session._create_table(
             exp.Select()
             .from_(tableName, dialect=self.session.input_dialect)
             .select(*columns, dialect=self.session.input_dialect)

sqlframe/base/session.py CHANGED Viewed

@@ -27,6 +27,7 @@ from sqlframe.base.catalog import _BaseCatalog
 from sqlframe.base.dataframe import BaseDataFrame
 from sqlframe.base.normalize import normalize_dict
 from sqlframe.base.readerwriter import _BaseDataFrameReader, _BaseDataFrameWriter
+from sqlframe.base.table import _BaseTable
 from sqlframe.base.udf import _BaseUDFRegistration
 from sqlframe.base.util import (
     get_column_mapping_from_schema_input,
@@ -65,17 +66,19 @@ CATALOG = t.TypeVar("CATALOG", bound=_BaseCatalog)
 READER = t.TypeVar("READER", bound=_BaseDataFrameReader)
 WRITER = t.TypeVar("WRITER", bound=_BaseDataFrameWriter)
 DF = t.TypeVar("DF", bound=BaseDataFrame)
+TABLE = t.TypeVar("TABLE", bound=_BaseTable)
 UDF_REGISTRATION = t.TypeVar("UDF_REGISTRATION", bound=_BaseUDFRegistration)
 _MISSING = "MISSING"
-class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION]):
+class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, TABLE, CONN, UDF_REGISTRATION]):
     _instance = None
     _reader: t.Type[READER]
     _writer: t.Type[WRITER]
     _catalog: t.Type[CATALOG]
     _df: t.Type[DF]
+    _table: t.Type[TABLE]
     _udf_registration: t.Type[UDF_REGISTRATION]
     SANITIZE_COLUMN_NAMES = False
@@ -158,12 +161,15 @@ class _BaseSession(t.Generic[CATALOG, READER, WRITER, DF, CONN, UDF_REGISTRATION
             return name.replace("(", "_").replace(")", "_")
         return name
-    def table(self, tableName: str) -> DF:
+    def table(self, tableName: str) -> TABLE:
         return self.read.table(tableName)
     def _create_df(self, *args, **kwargs) -> DF:
         return self._df(self, *args, **kwargs)
+    def _create_table(self, *args, **kwargs) -> TABLE:
+        return self._table(self, *args, **kwargs)
     def __new__(cls, *args, **kwargs):
         if _BaseSession._instance is None:
             _BaseSession._instance = super().__new__(cls)

sqlframe 3.13.3__py3-none-any.whl → 3.14.0__py3-none-any.whl

sqlframe 3.13.3py3-none-any.whl → 3.14.0py3-none-any.whl