PyPI - sqlframe - Versions diffs - 3.22.1__py3-none-any.whl → 3.24.0__py3-none-any.whl - Mend

sqlframe 3.22.1py3-none-any.whl → 3.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sqlframe/__init__.py +12 -0
sqlframe/_version.py +2 -2
sqlframe/base/catalog.py +5 -4
sqlframe/base/column.py +57 -0
sqlframe/base/dataframe.py +2 -2
sqlframe/base/functions.py +14 -8
sqlframe/base/group.py +2 -0
sqlframe/base/mixins/catalog_mixins.py +147 -11
sqlframe/base/mixins/dataframe_mixins.py +4 -1
sqlframe/base/operations.py +42 -14
sqlframe/base/readerwriter.py +4 -1
sqlframe/base/window.py +6 -6
sqlframe/bigquery/catalog.py +6 -3
sqlframe/databricks/catalog.py +185 -11
sqlframe/databricks/readwriter.py +293 -13
sqlframe/duckdb/catalog.py +12 -9
sqlframe/postgres/catalog.py +10 -7
sqlframe/py.typed +1 -0
sqlframe/redshift/catalog.py +11 -8
sqlframe/snowflake/catalog.py +12 -9
sqlframe/spark/catalog.py +21 -5
sqlframe/standalone/catalog.py +4 -1
{sqlframe-3.22.1.dist-info → sqlframe-3.24.0.dist-info}/METADATA +5 -5
{sqlframe-3.22.1.dist-info → sqlframe-3.24.0.dist-info}/RECORD +27 -26
{sqlframe-3.22.1.dist-info → sqlframe-3.24.0.dist-info}/LICENSE +0 -0
{sqlframe-3.22.1.dist-info → sqlframe-3.24.0.dist-info}/WHEEL +0 -0
{sqlframe-3.22.1.dist-info → sqlframe-3.24.0.dist-info}/top_level.txt +0 -0

sqlframe/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import importlib
 import sys
 import typing as t
+from contextlib import contextmanager
 from unittest.mock import MagicMock
 if t.TYPE_CHECKING:
@@ -98,3 +99,14 @@ def deactivate() -> None:
         except ImportError:
             pass
     ACTIVATE_CONFIG.clear()
+@contextmanager
+def activate_context(
+    engine: t.Optional[str] = None,
+    conn: t.Optional[CONN] = None,
+    config: t.Optional[t.Dict[str, t.Any]] = None,
+):
+    activate(engine, conn, config)
+    yield
+    deactivate()

sqlframe/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '3.22.1'
-__version_tuple__ = version_tuple = (3, 22, 1)
+__version__ = version = '3.24.0'
+__version_tuple__ = version_tuple = (3, 24, 0)

sqlframe/base/catalog.py CHANGED Viewed

@@ -14,16 +14,17 @@ if t.TYPE_CHECKING:
     from sqlglot.schema import ColumnMapping
     from sqlframe.base._typing import StorageLevel, UserDefinedFunctionLike
-    from sqlframe.base.session import DF, _BaseSession
+    from sqlframe.base.session import DF, TABLE, _BaseSession
     from sqlframe.base.types import DataType, StructType
     SESSION = t.TypeVar("SESSION", bound=_BaseSession)
 else:
     DF = t.TypeVar("DF")
+    TABLE = t.TypeVar("TABLE")
     SESSION = t.TypeVar("SESSION")
-class _BaseCatalog(t.Generic[SESSION, DF]):
+class _BaseCatalog(t.Generic[SESSION, DF, TABLE]):
     """User-facing catalog API, accessible through `SparkSession.catalog`."""
     TEMP_CATALOG_FILTER: t.Optional[exp.Expression] = None
@@ -688,7 +689,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
         source: t.Optional[str] = None,
         schema: t.Optional[StructType] = None,
         **options: str,
-    ) -> DF:
+    ) -> TABLE:
         """Creates a table based on the dataset in a data source.
         It returns the DataFrame associated with the external table.
@@ -716,7 +717,7 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
         schema: t.Optional[StructType] = None,
         description: t.Optional[str] = None,
         **options: str,
-    ) -> DF:
+    ) -> TABLE:
         """Creates a table based on the dataset in a data source.
         .. versionadded:: 2.2.0

sqlframe/base/column.py CHANGED Viewed

@@ -128,6 +128,21 @@ class Column:
             "Tried to call a column which is unexpected. Did you mean to call a method on a DataFrame? If so, make sure the method is typed correctly and is supported. If not, please open an issue requesting support: https://github.com/eakmanrq/sqlframe/issues"
         )
+    def __getattr__(self, name: str) -> Column:
+        """
+        Enables accessing nested fields using dot notation for struct types.
+        For example:
+            df.select(df.r.a)  # This is equivalent to df.select(df.r.getField("a"))
+        This method is called when the attribute doesn't exist in the class,
+        and delegates to getField method.
+        """
+        # Handle special method names (like __iter__) properly by raising AttributeError
+        if name.startswith("__") and name.endswith("__"):
+            raise AttributeError(f"{self.__class__.__name__} object has no attribute '{name}'")
+        return self.getField(name)
     @classmethod
     def ensure_col(cls, value: t.Optional[t.Union[ColumnOrName, exp.Expression]]) -> Column:
         col = get_func_from_session("col")
@@ -459,3 +474,45 @@ class Column:
         if isinstance(key.expression, exp.Literal) and key.expression.is_number:
             key = key + lit(1)
         return element_at(self, key)
+    def getField(self, name: t.Any) -> Column:
+        """
+        An expression that gets a field by name in a StructType.
+        .. versionadded:: 1.3.0
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+        Parameters
+        ----------
+        name
+            a literal value, or a :class:`Column` expression.
+            The result will only be true at a location if the field matches in the Column.
+             .. deprecated:: 3.0.0
+                 :class:`Column` as a parameter is deprecated.
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column got by name.
+        Examples
+        --------
+        >>> from sqlframe.base.types import Row
+        >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))])
+        >>> df.select(df.r.getField("b")).show()
+        +---+
+        |r.b|
+        +---+
+        |  b|
+        +---+
+        >>> df.select(df.r.a).show()
+        +---+
+        |r.a|
+        +---+
+        |  1|
+        +---+
+        """
+        return self.getItem(name)

sqlframe/base/dataframe.py CHANGED Viewed

@@ -1730,8 +1730,8 @@ class BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
     @operation(Operation.SELECT)
     def unpivot(
         self,
-        ids: t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]],
-        values: t.Optional[t.Union[ColumnOrName, t.List[ColumnOrName], t.Tuple[ColumnOrName, ...]]],
+        ids: t.Union[ColumnOrName, t.Collection[ColumnOrName]],
+        values: t.Optional[t.Union[ColumnOrName, t.Collection[ColumnOrName]]],
         variableColumnName: str,
         valueColumnName: str,
     ) -> Self:

sqlframe/base/functions.py CHANGED Viewed

@@ -856,15 +856,21 @@ def expr(str: str) -> Column:
 @meta(unsupported_engines=["postgres"])
 def struct(col: t.Union[ColumnOrName, t.Iterable[ColumnOrName]], *cols: ColumnOrName) -> Column:
-    from sqlframe.base.function_alternatives import struct_with_eq
     session = _get_session()
-    if session._is_snowflake:
-        return struct_with_eq(col, *cols)
-    columns = ensure_list(col) + list(cols)
-    return Column.invoke_expression_over_column(None, expression.Struct, expressions=columns)
+    col_func = get_func_from_session("col")
+    columns = [col_func(x) for x in ensure_list(col) + list(cols)]
+    expressions = []
+    for column in columns:
+        expressions.append(
+            expression.PropertyEQ(
+                this=expression.parse_identifier(
+                    column.alias_or_name, dialect=session.input_dialect
+                ),
+                expression=column.column_expression,
+            )
+        )
+    return Column(expression.Struct(expressions=expressions))
 @meta(unsupported_engines=["bigquery", "duckdb", "postgres", "snowflake"])

sqlframe/base/group.py CHANGED Viewed

@@ -16,6 +16,8 @@ else:
 # https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html
 # https://stackoverflow.com/questions/37975227/what-is-the-difference-between-cube-rollup-and-groupby-operators
 class _BaseGroupedData(t.Generic[DF]):
+    last_op: Operation
     def __init__(
         self,
         df: DF,

sqlframe/base/mixins/catalog_mixins.py CHANGED Viewed

@@ -6,16 +6,23 @@ from sqlglot import exp
 from sqlframe.base.catalog import (
     DF,
     SESSION,
+    TABLE,
     CatalogMetadata,
     Column,
     Database,
     Table,
     _BaseCatalog,
 )
-from sqlframe.base.util import normalize_string, schema_, to_schema
+from sqlframe.base.types import StructType
+from sqlframe.base.util import (
+    get_column_mapping_from_schema_input,
+    normalize_string,
+    schema_,
+    to_schema,
+)
-class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF]):
+class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
     QUALIFY_INFO_SCHEMA_WITH_DATABASE = False
     UPPERCASE_INFO_SCHEMA = False
@@ -52,7 +59,7 @@ class _BaseInfoSchemaMixin(_BaseCatalog, t.Generic[SESSION, DF]):
         )
-class GetCurrentCatalogFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF]):
+class GetCurrentCatalogFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
     CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.func("current_catalog")
     def currentCatalog(self) -> str:
@@ -74,7 +81,7 @@ class GetCurrentCatalogFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF]):
         )
-class GetCurrentDatabaseFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF]):
+class GetCurrentDatabaseFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
     CURRENT_DATABASE_EXPRESSION: exp.Expression = exp.func("current_schema")
     def currentDatabase(self) -> str:
@@ -94,7 +101,7 @@ class GetCurrentDatabaseFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF]):
         )
-class SetCurrentCatalogFromUseMixin(_BaseCatalog, t.Generic[SESSION, DF]):
+class SetCurrentCatalogFromUseMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
     def setCurrentCatalog(self, catalogName: str) -> None:
         """Sets the current default catalog in this session.
@@ -114,7 +121,136 @@ class SetCurrentCatalogFromUseMixin(_BaseCatalog, t.Generic[SESSION, DF]):
         )
-class ListDatabasesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
+class CreateTableFromFunctionMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
+    def createTable(
+        self,
+        tableName: str,
+        path: t.Optional[str] = None,
+        source: t.Optional[str] = None,
+        schema: t.Optional[StructType] = None,
+        description: t.Optional[str] = None,
+        **options: str,
+    ) -> TABLE:
+        """Creates a table based on the dataset in a data source.
+        .. versionadded:: 2.2.0
+        Parameters
+        ----------
+        tableName : str
+            name of the table to create.
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name.
+        path : str, t.Optional
+            the path in which the data for this table exists.
+            When ``path`` is specified, an external table is
+            created from the data at the given path. Otherwise a managed table is created.
+        source : str, t.Optional
+            the source of this table such as 'parquet, 'orc', etc.
+            If ``source`` is not specified, the default data source configured by
+            ``spark.sql.sources.default`` will be used.
+        schema : class:`StructType`, t.Optional
+            the schema for this table.
+        description : str, t.Optional
+            the description of this table.
+            .. versionchanged:: 3.1.0
+                Added the ``description`` parameter.
+        **options : dict, t.Optional
+            extra options to specify in the table.
+        Returns
+        -------
+        :class:`DataFrame`
+            The DataFrame associated with the table.
+        Examples
+        --------
+        Creating a managed table.
+        >>> _ = spark.catalog.createTable("tbl1", schema=spark.range(1).schema, source='parquet')
+        >>> _ = spark.sql("DROP TABLE tbl1")
+        Creating an external table
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     _ = spark.catalog.createTable(
+        ...         "tbl2", schema=spark.range(1).schema, path=d, source='parquet')
+        >>> _ = spark.sql("DROP TABLE tbl2")
+        """
+        if source is not None:
+            raise NotImplementedError("Providing source to create table is not supported")
+        if path is not None:
+            raise NotImplementedError("Creating a external table is not supported")
+        replace: t.Union[str, bool, None] = options.pop("replace", None)
+        exists: t.Union[str, bool, None] = options.pop("exists", None)
+        if isinstance(replace, str) and replace.lower() == "true":
+            replace = True
+        if isinstance(exists, str) and exists.lower() == "true":
+            exists = True
+        if schema is None:
+            raise ValueError("schema must be specified.")
+        column_mapping = get_column_mapping_from_schema_input(
+            schema, dialect=self.session.input_dialect
+        )
+        expressions = [
+            exp.ColumnDef(this=exp.parse_identifier(k, dialect=self.session.input_dialect), kind=v)
+            for k, v in column_mapping.items()
+        ]
+        name = normalize_string(tableName, from_dialect="input", is_table=True)
+        output_expression_container = exp.Create(
+            this=exp.Schema(
+                this=exp.to_table(name, dialect=self.session.input_dialect),
+                expressions=expressions,
+            ),
+            kind="TABLE",
+            exists=exists,
+            replace=replace,
+        )
+        if self.session._has_connection:
+            self.session._collect(output_expression_container)
+        df = self.session.table(name)
+        return df
+    def createExternalTable(
+        self,
+        tableName: str,
+        path: t.Optional[str] = None,
+        source: t.Optional[str] = None,
+        schema: t.Optional[StructType] = None,
+        **options: str,
+    ) -> TABLE:
+        """Creates a table based on the dataset in a data source.
+        It returns the DataFrame associated with the external table.
+        The data source is specified by the ``source`` and a set of ``options``.
+        If ``source`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+        t.Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
+        created external table.
+        .. versionadded:: 2.0.0
+        Returns
+        -------
+        :class:`DataFrame`
+        """
+        return self.createTable(tableName, path=path, source=source, schema=schema, **options)
+class ListDatabasesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF, TABLE]):
     def listDatabases(self, pattern: t.Optional[str] = None) -> t.List[Database]:
         """
         Returns a t.List of databases available across all sessions.
@@ -169,7 +305,7 @@ class ListDatabasesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION,
         return databases
-class ListCatalogsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
+class ListCatalogsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF, TABLE]):
     def listCatalogs(self, pattern: t.Optional[str] = None) -> t.List[CatalogMetadata]:
         """
         Returns a t.List of databases available across all sessions.
@@ -221,7 +357,7 @@ class ListCatalogsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, D
         return catalogs
-class SetCurrentDatabaseFromSearchPathMixin(_BaseCatalog, t.Generic[SESSION, DF]):
+class SetCurrentDatabaseFromSearchPathMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
     def setCurrentDatabase(self, dbName: str) -> None:
         """
         Sets the current default database in this session.
@@ -235,7 +371,7 @@ class SetCurrentDatabaseFromSearchPathMixin(_BaseCatalog, t.Generic[SESSION, DF]
         self.session._execute(f'SET search_path TO "{dbName}"')
-class SetCurrentDatabaseFromUseMixin(_BaseCatalog, t.Generic[SESSION, DF]):
+class SetCurrentDatabaseFromUseMixin(_BaseCatalog, t.Generic[SESSION, DF, TABLE]):
     def setCurrentDatabase(self, dbName: str) -> None:
         """
         Sets the current default database in this session.
@@ -257,7 +393,7 @@ class SetCurrentDatabaseFromUseMixin(_BaseCatalog, t.Generic[SESSION, DF]):
         self.session._collect(exp.Use(this=schema))
-class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
+class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF, TABLE]):
     def listTables(
         self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
     ) -> t.List[Table]:
@@ -395,7 +531,7 @@ class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]
         return tables
-class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
+class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF, TABLE]):
     def listColumns(
         self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
     ) -> t.List[Column]:

sqlframe/base/mixins/dataframe_mixins.py CHANGED Viewed

@@ -4,6 +4,9 @@ import typing as t
 from sqlglot import exp
+if t.TYPE_CHECKING:
+    from sqlframe.base._typing import StorageLevel
 from sqlframe.base.catalog import Column
 from sqlframe.base.dataframe import (
     GROUP_DATA,
@@ -28,7 +31,7 @@ class NoCachePersistSupportMixin(BaseDataFrame, t.Generic[SESSION, WRITER, NA, S
         logger.warning("This engine does not support caching. Ignoring cache() call.")
         return self
-    def persist(self) -> Self:
+    def persist(self, storageLevel: "StorageLevel" = "MEMORY_AND_DISK_SER") -> Self:
         logger.warning("This engine does not support persist. Ignoring persist() call.")
         return self

sqlframe/base/operations.py CHANGED Viewed

@@ -6,10 +6,19 @@ import functools
 import typing as t
 from enum import IntEnum
+from typing_extensions import Concatenate, ParamSpec
 if t.TYPE_CHECKING:
     from sqlframe.base.dataframe import BaseDataFrame
     from sqlframe.base.group import _BaseGroupedData
+    DF = t.TypeVar("DF", bound=BaseDataFrame)
+    T = t.TypeVar("T", bound=t.Union[BaseDataFrame, _BaseGroupedData])
+else:
+    DF = t.TypeVar("DF")
+    T = t.TypeVar("T")
+P = ParamSpec("P")  # represents arbitrary args + kwargs
 class Operation(IntEnum):
     INIT = -1
@@ -23,7 +32,17 @@ class Operation(IntEnum):
     LIMIT = 7
-def operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
+# We want to decorate a function (self: DF, *args, **kwargs) -> T
+#       where DF is a subclass of BaseDataFrame
+#       where T is a subclass of BaseDataFrame or _BaseGroupedData
+# And keep its signature, i.e. produce a function of the same shape
+# Hence we work with `t.Callable[Concatenate[DF, P], T]`
+def operation(
+    op: Operation,
+) -> t.Callable[
+    [t.Callable[Concatenate[DF, P], T]],  # accept such a function
+    t.Callable[Concatenate[DF, P], T],  # and return such a function
+]:
     """
     Decorator used around DataFrame methods to indicate what type of operation is being performed from the
     ordered Operation enums. This is used to determine which operations should be performed on a CTE vs.
@@ -35,9 +54,11 @@ def operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
     in cases where there is overlap in names.
     """
-    def decorator(func: t.Callable) -> t.Callable:
+    def decorator(
+        func: t.Callable[Concatenate[DF, P], T],
+    ) -> t.Callable[Concatenate[DF, P], T]:
         @functools.wraps(func)
-        def wrapper(self: BaseDataFrame, *args, **kwargs) -> BaseDataFrame:
+        def wrapper(self: DF, *args, **kwargs) -> T:
             if self.last_op == Operation.INIT:
                 self = self._convert_leaf_to_cte()
                 self.last_op = Operation.NO_OP
@@ -45,17 +66,22 @@ def operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
             new_op = op if op != Operation.NO_OP else last_op
             if new_op < last_op or (last_op == new_op == Operation.SELECT):
                 self = self._convert_leaf_to_cte()
-            df: t.Union[BaseDataFrame, _BaseGroupedData] = func(self, *args, **kwargs)
-            df.last_op = new_op  # type: ignore
-            return df  # type: ignore
+            df = func(self, *args, **kwargs)
+            df.last_op = new_op
+            return df
-        wrapper.__wrapped__ = func  # type: ignore
+        wrapper.__wrapped__ = func
         return wrapper
     return decorator
-def group_operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
+# Here decorate a function (self: _BaseGroupedData[DF], *args, **kwargs) -> DF
+# Hence we work with t.Callable[Concatenate[_BaseGroupedData[DF], P], DF]
+# We simplify the parameters, as Pyright (used for VSCode autocomplete) doesn't unterstand this
+def group_operation(
+    op: Operation,
+) -> t.Callable[[t.Callable[P, DF]], t.Callable[P, DF]]:
     """
     Decorator used around DataFrame methods to indicate what type of operation is being performed from the
     ordered Operation enums. This is used to determine which operations should be performed on a CTE vs.
@@ -67,9 +93,11 @@ def group_operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
     in cases where there is overlap in names.
     """
-    def decorator(func: t.Callable) -> t.Callable:
+    def decorator(
+        func: t.Callable[Concatenate[_BaseGroupedData[DF], P], DF],
+    ) -> t.Callable[Concatenate[_BaseGroupedData[DF], P], DF]:
         @functools.wraps(func)
-        def wrapper(self: _BaseGroupedData, *args, **kwargs) -> BaseDataFrame:
+        def wrapper(self: _BaseGroupedData[DF], *args, **kwargs) -> DF:
             if self._df.last_op == Operation.INIT:
                 self._df = self._df._convert_leaf_to_cte()
                 self._df.last_op = Operation.NO_OP
@@ -77,11 +105,11 @@ def group_operation(op: Operation) -> t.Callable[[t.Callable], t.Callable]:
             new_op = op if op != Operation.NO_OP else last_op
             if new_op < last_op or (last_op == new_op == Operation.SELECT):
                 self._df = self._df._convert_leaf_to_cte()
-            df: BaseDataFrame = func(self, *args, **kwargs)
-            df.last_op = new_op  # type: ignore
+            df = func(self, *args, **kwargs)
+            df.last_op = new_op
             return df
-        wrapper.__wrapped__ = func  # type: ignore
+        wrapper.__wrapped__ = func
         return wrapper
-    return decorator
+    return decorator  # type: ignore

sqlframe/base/readerwriter.py CHANGED Viewed

@@ -444,7 +444,10 @@ class _BaseDataFrameWriter(t.Generic[SESSION, DF]):
         return self.copy(_df=df)
     def saveAsTable(
-        self, name: str, format: t.Optional[str] = None, mode: t.Optional[str] = None
+        self,
+        name: str,
+        format: t.Optional[str] = None,
+        mode: t.Optional[str] = None,
     ) -> Self:
         if format is not None:
             raise NotImplementedError("Providing Format in the save as table is not supported")

sqlframe/base/window.py CHANGED Viewed

@@ -27,11 +27,11 @@ class Window:
     currentRow: int = 0
     @classmethod
-    def partitionBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+    def partitionBy(cls, *cols: t.Union[ColumnOrName, t.Collection[ColumnOrName]]) -> WindowSpec:
         return WindowSpec().partitionBy(*cols)
     @classmethod
-    def orderBy(cls, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+    def orderBy(cls, *cols: t.Union[ColumnOrName, t.Collection[ColumnOrName]]) -> WindowSpec:
         return WindowSpec().orderBy(*cols)
     @classmethod
@@ -55,10 +55,10 @@ class WindowSpec:
         return self.expression.sql(dialect=_BaseSession().input_dialect, **kwargs)
-    def partitionBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+    def partitionBy(self, *cols: t.Union[ColumnOrName, t.Collection[ColumnOrName]]) -> WindowSpec:
         from sqlframe.base.column import Column
-        cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
+        cols = flatten(cols) if isinstance(cols[0], t.Collection) else cols  # type: ignore
         expressions = [Column.ensure_col(x).expression for x in cols]  # type: ignore
         window_spec = self.copy()
         partition_by_expressions = window_spec.expression.args.get("partition_by", [])
@@ -66,10 +66,10 @@ class WindowSpec:
         window_spec.expression.set("partition_by", partition_by_expressions)
         return window_spec
-    def orderBy(self, *cols: t.Union[ColumnOrName, t.List[ColumnOrName]]) -> WindowSpec:
+    def orderBy(self, *cols: t.Union[ColumnOrName, t.Collection[ColumnOrName]]) -> WindowSpec:
         from sqlframe.base.column import Column
-        cols = flatten(cols) if isinstance(cols[0], (list, set, tuple)) else cols  # type: ignore
+        cols = flatten(cols) if isinstance(cols[0], t.Collection) else cols  # type: ignore
         expressions = [Column.ensure_col(x).expression for x in cols]  # type: ignore
         window_spec = self.copy()
         if window_spec.expression.args.get("order") is None:

sqlframe/bigquery/catalog.py CHANGED Viewed

@@ -7,6 +7,7 @@ from sqlglot import exp
 from sqlframe.base.catalog import CatalogMetadata, Column, Function
 from sqlframe.base.mixins.catalog_mixins import (
+    CreateTableFromFunctionMixin,
     ListDatabasesFromInfoSchemaMixin,
     ListTablesFromInfoSchemaMixin,
     _BaseInfoSchemaMixin,
@@ -18,12 +19,14 @@ if t.TYPE_CHECKING:
     from sqlframe.bigquery.dataframe import BigQueryDataFrame  # noqa
     from sqlframe.bigquery.session import BigQuerySession  # noqa
+    from sqlframe.bigquery.table import BigQueryTable  # noqa
 class BigQueryCatalog(
-    ListDatabasesFromInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame"],
-    ListTablesFromInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame"],
-    _BaseInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame"],
+    CreateTableFromFunctionMixin["BigQuerySession", "BigQueryDataFrame", "BigQueryTable"],
+    ListDatabasesFromInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame", "BigQueryTable"],
+    ListTablesFromInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame", "BigQueryTable"],
+    _BaseInfoSchemaMixin["BigQuerySession", "BigQueryDataFrame", "BigQueryTable"],
 ):
     QUALIFY_INFO_SCHEMA_WITH_DATABASE = True
     UPPERCASE_INFO_SCHEMA = True

sqlframe 3.22.1__py3-none-any.whl → 3.24.0__py3-none-any.whl

sqlframe 3.22.1py3-none-any.whl → 3.24.0py3-none-any.whl