PyPI - sqlframe - Versions diffs - 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

sqlframe 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

sqlframe/_version.py +2 -2
sqlframe/base/dataframe.py +54 -1
sqlframe/base/exceptions.py +12 -0
sqlframe/base/mixins/dataframe_mixins.py +24 -33
sqlframe/base/types.py +2 -2
sqlframe/base/util.py +51 -0
sqlframe/bigquery/dataframe.py +33 -13
sqlframe/duckdb/dataframe.py +6 -15
sqlframe/postgres/catalog.py +123 -3
sqlframe/postgres/dataframe.py +6 -10
sqlframe/redshift/dataframe.py +3 -14
sqlframe/snowflake/dataframe.py +23 -13
sqlframe/spark/dataframe.py +25 -15
sqlframe/testing/__init__.py +3 -0
sqlframe/testing/utils.py +320 -0
{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/METADATA +1 -1
{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/RECORD +20 -18
{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/LICENSE +0 -0
{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/WHEEL +0 -0
{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/top_level.txt +0 -0

sqlframe/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.10.0'
-__version_tuple__ = version_tuple = (1, 10, 0)
+__version__ = version = '1.11.0'
+__version_tuple__ = version_tuple = (1, 11, 0)

sqlframe/base/dataframe.py CHANGED Viewed

@@ -22,6 +22,7 @@ from sqlglot.optimizer.pushdown_projections import pushdown_projections
 from sqlglot.optimizer.qualify import qualify
 from sqlglot.optimizer.qualify_columns import quote_identifiers
+from sqlframe.base.catalog import Column as CatalogColumn
 from sqlframe.base.decorators import normalize
 from sqlframe.base.operations import Operation, operation
 from sqlframe.base.transforms import replace_id_value
@@ -29,6 +30,7 @@ from sqlframe.base.util import (
     get_func_from_session,
     get_tables_from_expression_with_join,
     quote_preserving_alias_or_name,
+    sqlglot_to_spark,
     verify_openai_installed,
 )
@@ -231,6 +233,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
     def __copy__(self):
         return self.copy()
+    @property
+    def _typed_columns(self) -> t.List[CatalogColumn]:
+        raise NotImplementedError
     @property
     def write(self) -> WRITER:
         return self.session._writer(self)
@@ -293,7 +299,24 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         StructType([StructField('age', LongType(), True),
                     StructField('name', StringType(), True)])
         """
-        raise NotImplementedError
+        from sqlframe.base import types
+        try:
+            return types.StructType(
+                [
+                    types.StructField(
+                        c.name,
+                        sqlglot_to_spark(
+                            exp.DataType.build(c.dataType, dialect=self.session.output_dialect)
+                        ),
+                    )
+                    for c in self._typed_columns
+                ]
+            )
+        except NotImplementedError as e:
+            raise NotImplementedError(
+                "This engine does not support schema inference likely since it does not have an active connection."
+            ) from e
     def _replace_cte_names_with_hashes(self, expression: exp.Select):
         replacement_mapping = {}
@@ -1537,6 +1560,36 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 table.add_row(list(row))
         print(table)
+    def printSchema(self, level: t.Optional[int] = None) -> None:
+        def print_schema(
+            column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
+        ):
+            if level and current_level >= level:
+                return
+            if current_level > 0:
+                print(" |   " * current_level, end="")
+            print(
+                f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
+            )
+            if column_type.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
+                for column_def in column_type.expressions:
+                    print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
+            if column_type.this == exp.DataType.Type.ARRAY:
+                for data_type in column_type.expressions:
+                    print_schema("element", data_type, True, current_level + 1)
+            if column_type.this == exp.DataType.Type.MAP:
+                print_schema("key", column_type.expressions[0], True, current_level + 1)
+                print_schema("value", column_type.expressions[1], True, current_level + 1)
+        print("root")
+        for column in self._typed_columns:
+            print_schema(
+                column.name,
+                exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
+                column.nullable,
+                0,
+            )
     def toPandas(self) -> pd.DataFrame:
         sql_kwargs = dict(
             pretty=False, optimize=False, dialect=self.session.output_dialect, as_list=True

sqlframe/base/exceptions.py CHANGED Viewed

@@ -12,3 +12,15 @@ class RowError(SQLFrameException):
 class TableSchemaError(SQLFrameException):
     pass
+class PandasDiffError(SQLFrameException):
+    pass
+class DataFrameDiffError(SQLFrameException):
+    pass
+class SchemaDiffError(SQLFrameException):
+    pass

sqlframe/base/mixins/dataframe_mixins.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import logging
+import sys
 import typing as t
 from sqlglot import exp
@@ -12,11 +14,30 @@ from sqlframe.base.dataframe import (
     _BaseDataFrame,
 )
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
-class PrintSchemaFromTempObjectsMixin(
+logger = logging.getLogger(__name__)
+class NoCachePersistSupportMixin(_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
+    def cache(self) -> Self:
+        logger.warning("This engine does not support caching. Ignoring cache() call.")
+        return self
+    def persist(self) -> Self:
+        logger.warning("This engine does not support persist. Ignoring persist() call.")
+        return self
+class TypedColumnsFromTempViewMixin(
     _BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
 ):
-    def _get_columns_from_temp_object(self) -> t.List[Column]:
+    @property
+    def _typed_columns(self) -> t.List[Column]:
         table = exp.to_table(self.session._random_id)
         self.session._execute(
             exp.Create(
@@ -27,37 +48,7 @@ class PrintSchemaFromTempObjectsMixin(
                 expression=self.expression,
             )
         )
         return self.session.catalog.listColumns(
             table.sql(dialect=self.session.input_dialect), include_temp=True
         )
-    def printSchema(self, level: t.Optional[int] = None) -> None:
-        def print_schema(
-            column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
-        ):
-            if level and current_level >= level:
-                return
-            if current_level > 0:
-                print(" |   " * current_level, end="")
-            print(
-                f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
-            )
-            if column_type.this == exp.DataType.Type.STRUCT:
-                for column_def in column_type.expressions:
-                    print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
-            if column_type.this == exp.DataType.Type.ARRAY:
-                for data_type in column_type.expressions:
-                    print_schema("element", data_type, True, current_level + 1)
-            if column_type.this == exp.DataType.Type.MAP:
-                print_schema("key", column_type.expressions[0], True, current_level + 1)
-                print_schema("value", column_type.expressions[1], True, current_level + 1)
-        columns = self._get_columns_from_temp_object()
-        print("root")
-        for column in columns:
-            print_schema(
-                column.name,
-                exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
-                column.nullable,
-                0,
-            )

sqlframe/base/types.py CHANGED Viewed

@@ -22,14 +22,14 @@ class DataType:
         return not self.__eq__(other)
     def __str__(self) -> str:
-        return self.typeName()
+        return self.simpleString()
     @classmethod
     def typeName(cls) -> str:
         return cls.__name__[:-4].lower()
     def simpleString(self) -> str:
-        return str(self)
+        return self.typeName()
     def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
         return str(self)

sqlframe/base/util.py CHANGED Viewed

@@ -291,3 +291,54 @@ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
         return col.sql(dialect=_BaseSession().input_dialect)
     # We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
     return col.alias_or_name
+def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
+    from sqlframe.base import types
+    primitive_mapping = {
+        exp.DataType.Type.VARCHAR: types.VarcharType,
+        exp.DataType.Type.CHAR: types.CharType,
+        exp.DataType.Type.TEXT: types.StringType,
+        exp.DataType.Type.BINARY: types.BinaryType,
+        exp.DataType.Type.BOOLEAN: types.BooleanType,
+        exp.DataType.Type.INT: types.IntegerType,
+        exp.DataType.Type.BIGINT: types.LongType,
+        exp.DataType.Type.SMALLINT: types.ShortType,
+        exp.DataType.Type.FLOAT: types.FloatType,
+        exp.DataType.Type.DOUBLE: types.DoubleType,
+        exp.DataType.Type.DECIMAL: types.DecimalType,
+        exp.DataType.Type.TIMESTAMP: types.TimestampType,
+        exp.DataType.Type.TIMESTAMPTZ: types.TimestampType,
+        exp.DataType.Type.TIMESTAMPLTZ: types.TimestampType,
+        exp.DataType.Type.TIMESTAMPNTZ: types.TimestampType,
+        exp.DataType.Type.DATE: types.DateType,
+    }
+    if sqlglot_dtype.this in primitive_mapping:
+        pyspark_class = primitive_mapping[sqlglot_dtype.this]
+        if issubclass(pyspark_class, types.DataTypeWithLength) and sqlglot_dtype.expressions:
+            return pyspark_class(length=int(sqlglot_dtype.expressions[0].this.this))
+        elif issubclass(pyspark_class, types.DecimalType) and sqlglot_dtype.expressions:
+            return pyspark_class(
+                precision=int(sqlglot_dtype.expressions[0].this.this),
+                scale=int(sqlglot_dtype.expressions[1].this.this),
+            )
+        return pyspark_class()
+    if sqlglot_dtype.this == exp.DataType.Type.ARRAY:
+        return types.ArrayType(sqlglot_to_spark(sqlglot_dtype.expressions[0]))
+    elif sqlglot_dtype.this == exp.DataType.Type.MAP:
+        return types.MapType(
+            sqlglot_to_spark(sqlglot_dtype.expressions[0]),
+            sqlglot_to_spark(sqlglot_dtype.expressions[1]),
+        )
+    elif sqlglot_dtype.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
+        return types.StructType(
+            [
+                types.StructField(
+                    name=field.this.alias_or_name,
+                    dataType=sqlglot_to_spark(field.args["kind"]),
+                )
+                for field in sqlglot_dtype.expressions
+            ]
+        )
+    raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")

sqlframe/bigquery/dataframe.py CHANGED Viewed

@@ -1,21 +1,17 @@
 from __future__ import annotations
 import logging
-import sys
 import typing as t
+from sqlframe.base.catalog import Column as CatalogColumn
 from sqlframe.base.dataframe import (
     _BaseDataFrame,
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.bigquery.group import BigQueryGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
     from sqlframe.bigquery.session import BigQuerySession
@@ -33,22 +29,46 @@ class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFr
 class BigQueryDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "BigQuerySession",
         "BigQueryDataFrameWriter",
         "BigQueryDataFrameNaFunctions",
         "BigQueryDataFrameStatFunctions",
         "BigQueryGroupedData",
-    ]
+    ],
 ):
     _na = BigQueryDataFrameNaFunctions
     _stat = BigQueryDataFrameStatFunctions
     _group_data = BigQueryGroupedData
-    def cache(self) -> Self:
-        logger.warning("BigQuery does not support caching. Ignoring cache() call.")
-        return self
+    @property
+    def _typed_columns(self) -> t.List[CatalogColumn]:
+        from google.cloud import bigquery
+        def field_to_column(field: bigquery.SchemaField) -> CatalogColumn:
+            if field.field_type == "RECORD":
+                data_type = "STRUCT<"
+                for subfield in field.fields:
+                    column = field_to_column(subfield)
+                    data_type += f"{column.name} {column.dataType},"
+                data_type += ">"
+            elif field.field_type == "INTEGER":
+                data_type = "INT64"
+            else:
+                data_type = field.field_type
+            if field.mode == "REPEATED":
+                data_type = f"ARRAY<{data_type}>"
+            return CatalogColumn(
+                name=field.name,
+                dataType=data_type,
+                nullable=field.is_nullable,
+                description=None,
+                isPartition=False,
+                isBucket=False,
+            )
-    def persist(self) -> Self:
-        logger.warning("BigQuery does not support persist. Ignoring persist() call.")
-        return self
+        job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
+        sql = self.session._to_sql(self.expression)
+        query_job = self.session._client.query(sql, job_config=job_config)
+        return [field_to_column(field) for field in query_job.schema]

sqlframe/duckdb/dataframe.py CHANGED Viewed

@@ -9,14 +9,12 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
-from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
+from sqlframe.base.mixins.dataframe_mixins import (
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
+)
 from sqlframe.duckdb.group import DuckDBGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.duckdb.session import DuckDBSession  # noqa
     from sqlframe.duckdb.readwriter import DuckDBDataFrameWriter  # noqa
@@ -35,7 +33,8 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
 class DuckDBDataFrame(
-    PrintSchemaFromTempObjectsMixin,
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
     _BaseDataFrame[
         "DuckDBSession",
         "DuckDBDataFrameWriter",
@@ -47,11 +46,3 @@ class DuckDBDataFrame(
     _na = DuckDBDataFrameNaFunctions
     _stat = DuckDBDataFrameStatFunctions
     _group_data = DuckDBGroupedData
-    def cache(self) -> Self:
-        logger.warning("DuckDB does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("DuckDB does not support persist. Ignoring persist() call.")
-        return self

sqlframe/postgres/catalog.py CHANGED Viewed

@@ -7,16 +7,17 @@ import typing as t
 from sqlglot import exp, parse_one
-from sqlframe.base.catalog import Function, _BaseCatalog
+from sqlframe.base.catalog import Column, Function, _BaseCatalog
+from sqlframe.base.decorators import normalize
 from sqlframe.base.mixins.catalog_mixins import (
     GetCurrentCatalogFromFunctionMixin,
     GetCurrentDatabaseFromFunctionMixin,
     ListCatalogsFromInfoSchemaMixin,
-    ListColumnsFromInfoSchemaMixin,
     ListDatabasesFromInfoSchemaMixin,
     ListTablesFromInfoSchemaMixin,
     SetCurrentDatabaseFromSearchPathMixin,
 )
+from sqlframe.base.util import to_schema
 if t.TYPE_CHECKING:
     from sqlframe.postgres.session import PostgresSession  # noqa
@@ -30,12 +31,131 @@ class PostgresCatalog(
     ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
     SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
     ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
-    ListColumnsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
     _BaseCatalog["PostgresSession", "PostgresDataFrame"],
 ):
     CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
     TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
+    @normalize(["tableName", "dbName"])
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
+        """Returns a t.List of columns for the given table/view in the specified database.
+        .. versionadded:: 2.0.0
+        Parameters
+        ----------
+        tableName : str
+            name of the table to t.List columns.
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+        dbName : str, t.Optional
+            name of the database to find the table to t.List columns.
+        Returns
+        -------
+        t.List
+            A t.List of :class:`Column`.
+        Notes
+        -----
+        The order of arguments here is different from that of its JVM counterpart
+        because Python does not support method overloading.
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary views.
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
+        >>> spark.catalog.t.listColumns("tblA")
+        [Column(name='name', description=None, dataType='string', nullable=True, ...
+        >>> _ = spark.sql("DROP TABLE tblA")
+        """
+        if df := self.session.temp_views.get(tableName):
+            return [
+                Column(
+                    name=x,
+                    description=None,
+                    dataType="",
+                    nullable=True,
+                    isPartition=False,
+                    isBucket=False,
+                )
+                for x in df.columns
+            ]
+        table = exp.to_table(tableName, dialect=self.session.input_dialect)
+        schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
+        if not table.db:
+            if schema and schema.db:
+                table.set("db", schema.args["db"])
+            else:
+                table.set(
+                    "db",
+                    exp.parse_identifier(
+                        self.currentDatabase(), dialect=self.session.input_dialect
+                    ),
+                )
+        if not table.catalog:
+            if schema and schema.catalog:
+                table.set("catalog", schema.args["catalog"])
+            else:
+                table.set(
+                    "catalog",
+                    exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
+                )
+        source_table = self._get_info_schema_table("columns", database=table.db)
+        select = parse_one(
+            f"""
+        SELECT
+    att.attname AS column_name,
+    pg_catalog.format_type(att.atttypid, NULL) AS data_type,
+    col.is_nullable
+FROM
+    pg_catalog.pg_attribute att
+JOIN
+    pg_catalog.pg_class cls ON cls.oid = att.attrelid
+JOIN
+    pg_catalog.pg_namespace nsp ON nsp.oid = cls.relnamespace
+JOIN
+    information_schema.columns col ON col.table_schema = nsp.nspname AND col.table_name = cls.relname AND col.column_name = att.attname
+WHERE
+    cls.relname = '{table.name}' AND   -- replace with your table name
+    att.attnum > 0 AND
+    NOT att.attisdropped
+ORDER BY
+    att.attnum;
+        """,
+            dialect="postgres",
+        )
+        if table.db:
+            schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
+            if include_temp and self.TEMP_SCHEMA_FILTER:
+                schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
+            select = select.where(schema_filter)  # type: ignore
+        if table.catalog:
+            catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
+            if include_temp and self.TEMP_CATALOG_FILTER:
+                catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
+            select = select.where(catalog_filter)  # type: ignore
+        results = self.session._fetch_rows(select)
+        return [
+            Column(
+                name=x["column_name"],
+                description=None,
+                dataType=x["data_type"],
+                nullable=x["is_nullable"] == "YES",
+                isPartition=False,
+                isBucket=False,
+            )
+            for x in results
+        ]
     def listFunctions(
         self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
     ) -> t.List[Function]:

sqlframe/postgres/dataframe.py CHANGED Viewed

@@ -9,7 +9,10 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
-from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
+from sqlframe.base.mixins.dataframe_mixins import (
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
+)
 from sqlframe.postgres.group import PostgresGroupedData
 if sys.version_info >= (3, 11):
@@ -34,7 +37,8 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
 class PostgresDataFrame(
-    PrintSchemaFromTempObjectsMixin,
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
     _BaseDataFrame[
         "PostgresSession",
         "PostgresDataFrameWriter",
@@ -46,11 +50,3 @@ class PostgresDataFrame(
     _na = PostgresDataFrameNaFunctions
     _stat = PostgresDataFrameStatFunctions
     _group_data = PostgresGroupedData
-    def cache(self) -> Self:
-        logger.warning("Postgres does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Postgres does not support persist. Ignoring persist() call.")
-        return self

sqlframe/redshift/dataframe.py CHANGED Viewed

@@ -9,13 +9,9 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.redshift.group import RedshiftGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.redshift.readwriter import RedshiftDataFrameWriter
     from sqlframe.redshift.session import RedshiftSession
@@ -33,22 +29,15 @@ class RedshiftDataFrameStatFunctions(_BaseDataFrameStatFunctions["RedshiftDataFr
 class RedshiftDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "RedshiftSession",
         "RedshiftDataFrameWriter",
         "RedshiftDataFrameNaFunctions",
         "RedshiftDataFrameStatFunctions",
         "RedshiftGroupedData",
-    ]
+    ],
 ):
     _na = RedshiftDataFrameNaFunctions
     _stat = RedshiftDataFrameStatFunctions
     _group_data = RedshiftGroupedData
-    def cache(self) -> Self:
-        logger.warning("Redshift does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Redshift does not support persist. Ignoring persist() call.")
-        return self

sqlframe/snowflake/dataframe.py CHANGED Viewed

@@ -4,18 +4,15 @@ import logging
 import sys
 import typing as t
+from sqlframe.base.catalog import Column as CatalogColumn
 from sqlframe.base.dataframe import (
     _BaseDataFrame,
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.snowflake.group import SnowflakeGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.snowflake.readwriter import SnowflakeDataFrameWriter
     from sqlframe.snowflake.session import SnowflakeSession
@@ -33,22 +30,35 @@ class SnowflakeDataFrameStatFunctions(_BaseDataFrameStatFunctions["SnowflakeData
 class SnowflakeDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "SnowflakeSession",
         "SnowflakeDataFrameWriter",
         "SnowflakeDataFrameNaFunctions",
         "SnowflakeDataFrameStatFunctions",
         "SnowflakeGroupedData",
-    ]
+    ],
 ):
     _na = SnowflakeDataFrameNaFunctions
     _stat = SnowflakeDataFrameStatFunctions
     _group_data = SnowflakeGroupedData
-    def cache(self) -> Self:
-        logger.warning("Snowflake does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Snowflake does not support persist. Ignoring persist() call.")
-        return self
+    @property
+    def _typed_columns(self) -> t.List[CatalogColumn]:
+        df = self._convert_leaf_to_cte()
+        df = df.limit(0)
+        self.session._execute(df.expression)
+        query_id = self.session._cur.sfqid
+        columns = []
+        for row in self.session._fetch_rows(f"DESCRIBE RESULT '{query_id}'"):
+            columns.append(
+                CatalogColumn(
+                    name=row.name,
+                    dataType=row.type,
+                    nullable=row["null?"] == "Y",
+                    description=row.comment,
+                    isPartition=False,
+                    isBucket=False,
+                )
+            )
+        return columns

sqlframe/spark/dataframe.py CHANGED Viewed

@@ -1,26 +1,23 @@
 from __future__ import annotations
 import logging
-import sys
 import typing as t
+from sqlglot import exp
+from sqlframe.base.catalog import Column
 from sqlframe.base.dataframe import (
     _BaseDataFrame,
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.spark.group import SparkGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.spark.readwriter import SparkDataFrameWriter
     from sqlframe.spark.session import SparkSession
 logger = logging.getLogger(__name__)
@@ -33,22 +30,35 @@ class SparkDataFrameStatFunctions(_BaseDataFrameStatFunctions["SparkDataFrame"])
 class SparkDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "SparkSession",
         "SparkDataFrameWriter",
         "SparkDataFrameNaFunctions",
         "SparkDataFrameStatFunctions",
         "SparkGroupedData",
-    ]
+    ],
 ):
     _na = SparkDataFrameNaFunctions
     _stat = SparkDataFrameStatFunctions
     _group_data = SparkGroupedData
-    def cache(self) -> Self:
-        logger.warning("Spark does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Spark does not support persist. Ignoring persist() call.")
-        return self
+    @property
+    def _typed_columns(self) -> t.List[Column]:
+        columns = []
+        for field in self.session.spark_session.sql(
+            self.session._to_sql(self.expression)
+        ).schema.fields:
+            columns.append(
+                Column(
+                    name=field.name,
+                    dataType=exp.DataType.build(field.dataType.simpleString(), dialect="spark").sql(
+                        dialect="spark"
+                    ),
+                    nullable=field.nullable,
+                    description=None,
+                    isPartition=False,
+                    isBucket=False,
+                )
+            )
+        return columns

sqlframe/testing/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from sqlframe.testing.utils import assertDataFrameEqual, assertSchemaEqual
+__all__ = ["assertDataFrameEqual", "assertSchemaEqual"]

sqlframe/testing/utils.py ADDED Viewed

@@ -0,0 +1,320 @@
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
+from __future__ import annotations
+import difflib
+import os
+import typing as t
+from itertools import zip_longest
+from sqlframe.base import types
+from sqlframe.base.dataframe import _BaseDataFrame
+from sqlframe.base.exceptions import (
+    DataFrameDiffError,
+    SchemaDiffError,
+    SQLFrameException,
+)
+from sqlframe.base.util import verify_pandas_installed
+if t.TYPE_CHECKING:
+    import pandas as pd
+def _terminal_color_support():
+    try:
+        # determine if environment supports color
+        script = "$(test $(tput colors)) && $(test $(tput colors) -ge 8) && echo true || echo false"
+        return os.popen(script).read()
+    except Exception:
+        return False
+def _context_diff(actual: t.List[str], expected: t.List[str], n: int = 3):
+    """
+    Modified from difflib context_diff API,
+    see original code here: https://github.com/python/cpython/blob/main/Lib/difflib.py#L1180
+    """
+    def red(s: str) -> str:
+        red_color = "\033[31m"
+        no_color = "\033[0m"
+        return red_color + str(s) + no_color
+    prefix = dict(insert="+ ", delete="- ", replace="! ", equal="  ")
+    for group in difflib.SequenceMatcher(None, actual, expected).get_grouped_opcodes(n):
+        yield "*** actual ***"
+        if any(tag in {"replace", "delete"} for tag, _, _, _, _ in group):
+            for tag, i1, i2, _, _ in group:
+                for line in actual[i1:i2]:
+                    if tag != "equal" and _terminal_color_support():
+                        yield red(prefix[tag] + str(line))
+                    else:
+                        yield prefix[tag] + str(line)
+        yield "\n"
+        yield "*** expected ***"
+        if any(tag in {"replace", "insert"} for tag, _, _, _, _ in group):
+            for tag, _, _, j1, j2 in group:
+                for line in expected[j1:j2]:
+                    if tag != "equal" and _terminal_color_support():
+                        yield red(prefix[tag] + str(line))
+                    else:
+                        yield prefix[tag] + str(line)
+# Source: https://github.com/apache/spark/blob/master/python/pyspark/testing/utils.py#L519
+def assertDataFrameEqual(
+    actual: t.Union[_BaseDataFrame, pd.DataFrame, t.List[types.Row]],
+    expected: t.Union[_BaseDataFrame, pd.DataFrame, t.List[types.Row]],
+    checkRowOrder: bool = False,
+    rtol: float = 1e-5,
+    atol: float = 1e-8,
+):
+    r"""
+    A util function to assert equality between `actual` and `expected`
+    (DataFrames or lists of Rows), with optional parameters `checkRowOrder`, `rtol`, and `atol`.
+    Supports Spark, Spark Connect, pandas, and pandas-on-Spark DataFrames.
+    For more information about pandas-on-Spark DataFrame equality, see the docs for
+    `assertPandasOnSparkEqual`.
+    .. versionadded:: 3.5.0
+    Parameters
+    ----------
+    actual : DataFrame (Spark, Spark Connect, pandas, or pandas-on-Spark) or list of Rows
+        The DataFrame that is being compared or tested.
+    expected : DataFrame (Spark, Spark Connect, pandas, or pandas-on-Spark) or list of Rows
+        The expected result of the operation, for comparison with the actual result.
+    checkRowOrder : bool, optional
+        A flag indicating whether the order of rows should be considered in the comparison.
+        If set to `False` (default), the row order is not taken into account.
+        If set to `True`, the order of rows is important and will be checked during comparison.
+        (See Notes)
+    rtol : float, optional
+        The relative tolerance, used in asserting approximate equality for float values in actual
+        and expected. Set to 1e-5 by default. (See Notes)
+    atol : float, optional
+        The absolute tolerance, used in asserting approximate equality for float values in actual
+        and expected. Set to 1e-8 by default. (See Notes)
+    Notes
+    -----
+    When `assertDataFrameEqual` fails, the error message uses the Python `difflib` library to
+    display a diff log of each row that differs in `actual` and `expected`.
+    For `checkRowOrder`, note that PySpark DataFrame ordering is non-deterministic, unless
+    explicitly sorted.
+    Note that schema equality is checked only when `expected` is a DataFrame (not a list of Rows).
+    For DataFrames with float values, assertDataFrame asserts approximate equality.
+    Two float values a and b are approximately equal if the following equation is True:
+    ``absolute(a - b) <= (atol + rtol * absolute(b))``.
+    Examples
+    --------
+    >>> df1 = spark.createDataFrame(data=[("1", 1000), ("2", 3000)], schema=["id", "amount"])
+    >>> df2 = spark.createDataFrame(data=[("1", 1000), ("2", 3000)], schema=["id", "amount"])
+    >>> assertDataFrameEqual(df1, df2)  # pass, DataFrames are identical
+    >>> df1 = spark.createDataFrame(data=[("1", 0.1), ("2", 3.23)], schema=["id", "amount"])
+    >>> df2 = spark.createDataFrame(data=[("1", 0.109), ("2", 3.23)], schema=["id", "amount"])
+    >>> assertDataFrameEqual(df1, df2, rtol=1e-1)  # pass, DataFrames are approx equal by rtol
+    >>> df1 = spark.createDataFrame(data=[(1, 1000), (2, 3000)], schema=["id", "amount"])
+    >>> list_of_rows = [Row(1, 1000), Row(2, 3000)]
+    >>> assertDataFrameEqual(df1, list_of_rows)  # pass, actual and expected data are equal
+    >>> import pyspark.pandas as ps
+    >>> df1 = ps.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
+    >>> df2 = ps.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
+    >>> assertDataFrameEqual(df1, df2)  # pass, pandas-on-Spark DataFrames are equal
+    >>> df1 = spark.createDataFrame(
+    ...     data=[("1", 1000.00), ("2", 3000.00), ("3", 2000.00)], schema=["id", "amount"])
+    >>> df2 = spark.createDataFrame(
+    ...     data=[("1", 1001.00), ("2", 3000.00), ("3", 2003.00)], schema=["id", "amount"])
+    >>> assertDataFrameEqual(df1, df2)  # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+    ...
+    PySparkAssertionError: [DIFFERENT_ROWS] Results do not match: ( 66.66667 % )
+    *** actual ***
+    ! Row(id='1', amount=1000.0)
+    Row(id='2', amount=3000.0)
+    ! Row(id='3', amount=2000.0)
+    *** expected ***
+    ! Row(id='1', amount=1001.0)
+    Row(id='2', amount=3000.0)
+    ! Row(id='3', amount=2003.0)
+    """
+    import pandas as pd
+    if actual is None and expected is None:
+        return True
+    elif actual is None or expected is None:
+        raise SQLFrameException("Missing required arguments: actual and expected")
+    def compare_rows(r1: types.Row, r2: types.Row):
+        def compare_vals(val1, val2):
+            if isinstance(val1, list) and isinstance(val2, list):
+                return len(val1) == len(val2) and all(
+                    compare_vals(x, y) for x, y in zip(val1, val2)
+                )
+            elif isinstance(val1, types.Row) and isinstance(val2, types.Row):
+                return all(compare_vals(x, y) for x, y in zip(val1, val2))
+            elif isinstance(val1, dict) and isinstance(val2, dict):
+                return (
+                    len(val1.keys()) == len(val2.keys())
+                    and val1.keys() == val2.keys()
+                    and all(compare_vals(val1[k], val2[k]) for k in val1.keys())
+                )
+            elif isinstance(val1, float) and isinstance(val2, float):
+                if abs(val1 - val2) > (atol + rtol * abs(val2)):
+                    return False
+            else:
+                if val1 != val2:
+                    return False
+            return True
+        if r1 is None and r2 is None:
+            return True
+        elif r1 is None or r2 is None:
+            return False
+        return compare_vals(r1, r2)
+    def assert_rows_equal(rows1: t.List[types.Row], rows2: t.List[types.Row]):
+        zipped = list(zip_longest(rows1, rows2))
+        diff_rows_cnt = 0
+        diff_rows = False
+        rows_str1 = ""
+        rows_str2 = ""
+        # count different rows
+        for r1, r2 in zipped:
+            rows_str1 += str(r1) + "\n"
+            rows_str2 += str(r2) + "\n"
+            if not compare_rows(r1, r2):
+                diff_rows_cnt += 1
+                diff_rows = True
+        generated_diff = _context_diff(
+            actual=rows_str1.splitlines(), expected=rows_str2.splitlines(), n=len(zipped)
+        )
+        if diff_rows:
+            error_msg = "Results do not match: "
+            percent_diff = (diff_rows_cnt / len(zipped)) * 100
+            error_msg += "( %.5f %% )" % percent_diff
+            error_msg += "\n" + "\n".join(generated_diff)
+            raise DataFrameDiffError("Rows are different:\n%s" % error_msg)
+    # convert actual and expected to list
+    if not isinstance(actual, list) and not isinstance(expected, list):
+        # only compare schema if expected is not a List
+        assertSchemaEqual(actual.schema, expected.schema)  # type: ignore
+    if not isinstance(actual, list):
+        actual_list = actual.collect()  # type: ignore
+    else:
+        actual_list = actual
+    if not isinstance(expected, list):
+        expected_list = expected.collect()  # type: ignore
+    else:
+        expected_list = expected
+    if not checkRowOrder:
+        # rename duplicate columns for sorting
+        actual_list = sorted(actual_list, key=lambda x: str(x))
+        expected_list = sorted(expected_list, key=lambda x: str(x))
+    assert_rows_equal(actual_list, expected_list)
+def assertSchemaEqual(actual: types.StructType, expected: types.StructType):
+    r"""
+    A util function to assert equality between DataFrame schemas `actual` and `expected`.
+    .. versionadded:: 3.5.0
+    Parameters
+    ----------
+    actual : StructType
+        The DataFrame schema that is being compared or tested.
+    expected : StructType
+        The expected schema, for comparison with the actual schema.
+    Notes
+    -----
+    When assertSchemaEqual fails, the error message uses the Python `difflib` library to display
+    a diff log of the `actual` and `expected` schemas.
+    Examples
+    --------
+    >>> from pyspark.sql.types import StructType, StructField, ArrayType, IntegerType, DoubleType
+    >>> s1 = StructType([StructField("names", ArrayType(DoubleType(), True), True)])
+    >>> s2 = StructType([StructField("names", ArrayType(DoubleType(), True), True)])
+    >>> assertSchemaEqual(s1, s2)  # pass, schemas are identical
+    >>> df1 = spark.createDataFrame(data=[(1, 1000), (2, 3000)], schema=["id", "number"])
+    >>> df2 = spark.createDataFrame(data=[("1", 1000), ("2", 5000)], schema=["id", "amount"])
+    >>> assertSchemaEqual(df1.schema, df2.schema)  # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+    ...
+    PySparkAssertionError: [DIFFERENT_SCHEMA] Schemas do not match.
+    --- actual
+    +++ expected
+    - StructType([StructField('id', LongType(), True), StructField('number', LongType(), True)])
+    ?                               ^^                               ^^^^^
+    + StructType([StructField('id', StringType(), True), StructField('amount', LongType(), True)])
+    ?                               ^^^^                              ++++ ^
+    """
+    if not isinstance(actual, types.StructType):
+        raise RuntimeError("actual must be a StructType")
+    if not isinstance(expected, types.StructType):
+        raise RuntimeError("expected must be a StructType")
+    def compare_schemas_ignore_nullable(s1: types.StructType, s2: types.StructType):
+        if len(s1) != len(s2):
+            return False
+        zipped = zip_longest(s1, s2)
+        for sf1, sf2 in zipped:
+            if not compare_structfields_ignore_nullable(sf1, sf2):
+                return False
+        return True
+    def compare_structfields_ignore_nullable(
+        actualSF: types.StructField, expectedSF: types.StructField
+    ):
+        if actualSF is None and expectedSF is None:
+            return True
+        elif actualSF is None or expectedSF is None:
+            return False
+        if actualSF.name != expectedSF.name:
+            return False
+        else:
+            return compare_datatypes_ignore_nullable(actualSF.dataType, expectedSF.dataType)
+    def compare_datatypes_ignore_nullable(dt1: t.Any, dt2: t.Any):
+        # checks datatype equality, using recursion to ignore nullable
+        if dt1.typeName() == dt2.typeName():
+            if dt1.typeName() == "array":
+                return compare_datatypes_ignore_nullable(dt1.elementType, dt2.elementType)
+            elif dt1.typeName() == "struct":
+                return compare_schemas_ignore_nullable(dt1, dt2)
+            else:
+                return True
+        else:
+            return False
+    # ignore nullable flag by default
+    if not compare_schemas_ignore_nullable(actual, expected):
+        generated_diff = difflib.ndiff(str(actual).splitlines(), str(expected).splitlines())
+        error_msg = "\n".join(generated_diff)
+        raise SchemaDiffError(error_msg)

{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.10.0
+Version: 1.11.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 sqlframe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sqlframe/_version.py,sha256=PEdW0PLUrZm2JiH_V3EAqPOK-ZxEDfT2nPsBGV10Pow,413
+sqlframe/_version.py,sha256=rZqhcUFwPMyj_mTWUN2A6qcFr8Ptv08CSbXbruC3jR4,413
 sqlframe/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/_typing.py,sha256=DuTay8-o9W-pw3RPZCgLunKNJLS9PkaV11G_pxXp9NY,1256
 sqlframe/base/catalog.py,sha256=ATDGirouUjal05P4ymL-wIi8rgjg_8w4PoACamiO64A,37245
 sqlframe/base/column.py,sha256=5bfJWj9dnStHUxLSrWMD-gwiC4-aHKC8lhoC62nhM1k,16153
-sqlframe/base/dataframe.py,sha256=uL4neDTMy1a9XJH46YLQryzdDci4iDxNXBtiJOzfHfs,67718
+sqlframe/base/dataframe.py,sha256=Tf5euWTGxFmYirgHK5ZXUI41so5ruo-asVmUwj9DFdo,70015
 sqlframe/base/decorators.py,sha256=I5osMgx9BuCgbtp4jVM2DNwYJVLzCv-OtTedhQEik0g,1882
-sqlframe/base/exceptions.py,sha256=pCB9hXX4jxZWzNg3JN1i38cv3BmpUlee5NoLYx3YXIQ,208
+sqlframe/base/exceptions.py,sha256=9Uwvqn2eAkDpqm4BrRgbL61qM-GMCbJEMAW8otxO46s,370
 sqlframe/base/function_alternatives.py,sha256=QESqZy7Osp9-CV5Yoi6XFat5SE8PzCVZ3o7gOFmIY7g,45888
 sqlframe/base/functions.py,sha256=hJDpE7GYQpQ1iHjdr1hG_hu0mAIb60vNoghjEcgMREI,187550
 sqlframe/base/group.py,sha256=TES9CleVmH3x-0X-tqmuUKfCKSWjH5vg1aU3R6dDmFc,4059
@@ -15,17 +15,17 @@ sqlframe/base/operations.py,sha256=-AhNuEzcV7ZExoP1oY3blaKip-joQyJeQVvfBTs_2g4,3
 sqlframe/base/readerwriter.py,sha256=5NPQMiOrw6I54U243R_6-ynnWYsNksgqwRpPp4IFjIw,25288
 sqlframe/base/session.py,sha256=2C0OsPoP49AuqVNtPiazTdVpwQA1668g5WOydrYP6SA,22001
 sqlframe/base/transforms.py,sha256=y0j3SGDz3XCmNGrvassk1S-owllUWfkHyMgZlY6SFO4,467
-sqlframe/base/types.py,sha256=QbJaJvSv2jaKRxOnIVpun1zHPn8AqJeWVWMbDvNwavE,11977
-sqlframe/base/util.py,sha256=o97w8XUXqLDGEjwwjJG-87LRINDsozmVPUo1gB7yw4w,9364
+sqlframe/base/types.py,sha256=K6mjafbX7oIk65CapwamcO2I8nf-poRIpKKt9XDNEaQ,11987
+sqlframe/base/util.py,sha256=tWccrZne-Acn4N2RxYr87mfI_GDMf_K9hRD7BnhGBq0,11756
 sqlframe/base/window.py,sha256=8hOv-ignPPIsZA9FzvYzcLE9J_glalVaYjIAUdRUX3o,4943
 sqlframe/base/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sqlframe/base/mixins/catalog_mixins.py,sha256=NhuPGxIqPjyuC_V_NALN1sn9v9h0-xwFOlJyJgsvyek,14212
-sqlframe/base/mixins/dataframe_mixins.py,sha256=U2tKIY5pCLnoPy1boAQ1YWLgK1E-ZT4x47oRWtGoYLQ,2360
+sqlframe/base/mixins/dataframe_mixins.py,sha256=FoR3bymPk-vT7NhJsDwZe6ONHheMd5UJhzID2lM1joQ,1411
 sqlframe/base/mixins/readwriter_mixins.py,sha256=QnxGVL8ftZfYlBNG0Bl24N_bnA2YioSxUsTSgKIbuvQ,4723
 sqlframe/bigquery/__init__.py,sha256=i2NsMbiXOj2xphCtPuNk6cVw4iYeq5_B1I9dVI9aGAk,712
 sqlframe/bigquery/catalog.py,sha256=h3aQAQAJg6MMvFpP8Ku0S4pcx30n5qYrqHhWSomxb6A,9319
 sqlframe/bigquery/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
-sqlframe/bigquery/dataframe.py,sha256=fPQ6043aSS_ds30WsvrYOgNZJPH0jq7BeNHGLQ2MEW4,1372
+sqlframe/bigquery/dataframe.py,sha256=Y2uy4FEYw0KxIHgnaA9uMwdIzxJzTlD_NSzIe7P7kxA,2405
 sqlframe/bigquery/functions.py,sha256=2YqJmBG0F0o10cztFZoP-G4px1QMKuHST6jlj1snUfY,11331
 sqlframe/bigquery/functions.pyi,sha256=JiyLFLiO0jyJec6j1o4ujPVQ7Tma-c9YHlm-3UQYD9M,13642
 sqlframe/bigquery/group.py,sha256=UVBNBRTo8OqS-_cS5YwvTeJYgYxeG-d6R3kfyHmlFqw,391
@@ -36,7 +36,7 @@ sqlframe/bigquery/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
 sqlframe/duckdb/__init__.py,sha256=t85TA3ufZtL1weQNFmEs8itCSwbJFtw03-p0GT4XGf8,669
 sqlframe/duckdb/catalog.py,sha256=rt3XuP3m4DbhuibOFyvx_95F2zZa6uDwCI_TmcvKy1A,3895
 sqlframe/duckdb/column.py,sha256=wkEPcp3xVsH5nC3kpacXqNkRv9htPtBgt-0uFRxIRNs,56
-sqlframe/duckdb/dataframe.py,sha256=RPdXSOv_wCB0R5E5fzRMYOGFHilb4egqRk3UgiT6WEU,1530
+sqlframe/duckdb/dataframe.py,sha256=WmBrrmrfxDpYuikSMFqacgV2Jawkx4sSYE-_mwnL4Jw,1225
 sqlframe/duckdb/functions.py,sha256=pz40eqR4U_s42p7UeaefJD5yU1vo6mqNoVz0iKN5eRk,1671
 sqlframe/duckdb/functions.pyi,sha256=nU-6a2cfLDkuMCdYrNRLfa6-i8Aa0CxQQ1nLT6roIdI,5813
 sqlframe/duckdb/group.py,sha256=IkhbW42Ng1U5YT3FkIdiB4zBqRkW4QyTb-1detY1e_4,383
@@ -45,9 +45,9 @@ sqlframe/duckdb/session.py,sha256=j75iIsmaxl5x7oqyhN_VolvEclKj7QmaFfIis-SmoKM,21
 sqlframe/duckdb/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
 sqlframe/duckdb/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
 sqlframe/postgres/__init__.py,sha256=Sz_MtgV_oh_QhfZTC7iKM07ICUmNcJEDV0kEkSW9ZKU,712
-sqlframe/postgres/catalog.py,sha256=L5heEav8PTtKJDofJTf-51_cCLpZud5lDvZC-RFZIaw,3722
+sqlframe/postgres/catalog.py,sha256=uGMKo4RXOU6fA4IjcfebukEI18QswVk3cnB_G7S6_Fw,8130
 sqlframe/postgres/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
-sqlframe/postgres/dataframe.py,sha256=feGvQo7GD-YGmWWGc5h94CMVZm0gcgUQsdlAktXS4Ac,1492
+sqlframe/postgres/dataframe.py,sha256=f-w6UHxZtmeZ5oMbaqJaZ8FrYeOhzyveNlZOK57ke0k,1289
 sqlframe/postgres/functions.py,sha256=b9ccP5vY8EDZXkJbhE_LjAlH50_6wcUF9VbzPrariec,2374
 sqlframe/postgres/functions.pyi,sha256=um-qE2g9iPs0-53vJ46lArbfvDqAbFIwrxLJgcrPM_8,5536
 sqlframe/postgres/group.py,sha256=KUXeSFKWTSH9yCRJAhW85OvjZaG6Zr4In9LR_ie3yGU,391
@@ -58,7 +58,7 @@ sqlframe/postgres/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
 sqlframe/redshift/__init__.py,sha256=jamKYQtQaKjjXnQ01QGPHvatbrZSw9sWno_VOUGSz6I,712
 sqlframe/redshift/catalog.py,sha256=JBDWIu4FQhi4_POB9pxW0T5A-6qdSK7BCq_Cp-V6tIM,4717
 sqlframe/redshift/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
-sqlframe/redshift/dataframe.py,sha256=mtxmKVnvuYNQnirEvuXICY53WRiN8L1QCtSsvPJ-4jE,1372
+sqlframe/redshift/dataframe.py,sha256=aTC0DOPDFwWH1_b9T0Pif80cYSGudIp0D-cmkR7Ci_M,1104
 sqlframe/redshift/functions.py,sha256=DR5kodYAcKatUqopwrEQtxryI4ZSqaH47_y3WLht4Wg,455
 sqlframe/redshift/group.py,sha256=5MGZYJfHpzoRSQ0N_pn4KUk4Mk2gocQwU3K1-jAbvGg,391
 sqlframe/redshift/readwriter.py,sha256=g3FYKSsJKqcSnElprzzz29ZctoXq9tRB0Mj9Bm1HycI,870
@@ -68,7 +68,7 @@ sqlframe/redshift/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,3
 sqlframe/snowflake/__init__.py,sha256=nuQ3cuHjDpW4ELZfbd2qOYmtXmcYl7MtsrdOrRdozo0,746
 sqlframe/snowflake/catalog.py,sha256=uDjBgDdCyxaDkGNX_8tb-lol7MwwazcClUBAZsOSj70,5014
 sqlframe/snowflake/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
-sqlframe/snowflake/dataframe.py,sha256=OJ27NudBUE3XX9mc8ywooGhYV4ijF9nX2K_nkHRcTx4,1393
+sqlframe/snowflake/dataframe.py,sha256=jUyQNCe3K6SH4PtmrR67YN0SLqkHakMxLiB261fDgkc,1862
 sqlframe/snowflake/functions.py,sha256=cIO56ZsOpjg6ICLjTh-osG1h1UjjEtM39_ieMiWkmyI,2466
 sqlframe/snowflake/functions.pyi,sha256=MkNif_sIceHMNhl-qvLir2DJ1jPqwyaahltdpgY4Jq0,6213
 sqlframe/snowflake/group.py,sha256=pPP1l2RRo_LgkXrji8a87n2PKo-63ZRPT-WUtvVcBME,395
@@ -79,7 +79,7 @@ sqlframe/snowflake/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,
 sqlframe/spark/__init__.py,sha256=WhYQAZMJN1EMNAVGUH7BEinxNdYtXOrrr-6HUniJOyI,649
 sqlframe/spark/catalog.py,sha256=rIX5DtPnINbcPZRUe4Z1bOpkJoNRlrO9qWkUeTQClNc,32612
 sqlframe/spark/column.py,sha256=E1tUa62Y5HajkhgFuebU9zohrGyieudcHzTT8gfalio,40
-sqlframe/spark/dataframe.py,sha256=V3z5Bx9snLgYh4bDwJfJb5mj1P7UsZF8DMlLwZXopBg,1309
+sqlframe/spark/dataframe.py,sha256=_TD-h7oz0-i80r90v17UoLDoIzcGNchU2SL13ujOOic,1779
 sqlframe/spark/functions.py,sha256=PkK4MBpVADhnDbrgFDii5zFaNrhi4y-OYX3Lcu-SW0k,530
 sqlframe/spark/functions.pyi,sha256=bjz6s8E6OB0c4KfTTsls7rhb_R9mIYvkaeaXefMziqM,11617
 sqlframe/spark/group.py,sha256=MrvV_v-YkBc6T1zz882WrEqtWjlooWIyHBCmTQg3fCA,379
@@ -97,8 +97,10 @@ sqlframe/standalone/readwriter.py,sha256=EZNyDJ4ID6sGNog3uP4-e9RvchX4biJJDNtc5hk
 sqlframe/standalone/session.py,sha256=wQmdu2sv6KMTAv0LRFk7TY7yzlh3xvmsyqilEtRecbY,1191
 sqlframe/standalone/types.py,sha256=KwNyuXIo-2xVVd4bZED3YrQOobKCtemlxGrJL7DrTC8,34
 sqlframe/standalone/window.py,sha256=6GKPzuxeSapJakBaKBeT9VpED1ACdjggDv9JRILDyV0,35
-sqlframe-1.10.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
-sqlframe-1.10.0.dist-info/METADATA,sha256=Z983jSJyRrSOcfu5Y8DFI-aROxse_TVWFPVGkJ-WV70,7497
-sqlframe-1.10.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-sqlframe-1.10.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
-sqlframe-1.10.0.dist-info/RECORD,,
+sqlframe/testing/__init__.py,sha256=VVCosQhitU74A3NnE52O4mNtGZONapuEXcc20QmSlnQ,132
+sqlframe/testing/utils.py,sha256=9DDYVuocO7tygee3RaajuJNZ24sJwf_LY556kKg7kTw,13011
+sqlframe-1.11.0.dist-info/LICENSE,sha256=VZu79YgW780qxaFJMr0t5ZgbOYEh04xWoxaWOaqIGWk,1068
+sqlframe-1.11.0.dist-info/METADATA,sha256=JTMUu99Ygcz_fK15KTHUb9OqQcPiQoUjQ1-7RQ09COE,7497
+sqlframe-1.11.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+sqlframe-1.11.0.dist-info/top_level.txt,sha256=T0_RpoygaZSF6heeWwIDQgaP0varUdSK1pzjeJZRjM8,9
+sqlframe-1.11.0.dist-info/RECORD,,

{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sqlframe-1.10.0.dist-info → sqlframe-1.11.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

sqlframe 1.10.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

sqlframe 1.10.0py3-none-any.whl → 1.11.0py3-none-any.whl