PyPI - sqlframe - Versions diffs - 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl - Mend

sqlframe 1.9.0py3-none-any.whl → 1.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

sqlframe/_version.py +2 -2
sqlframe/base/dataframe.py +54 -1
sqlframe/base/exceptions.py +12 -0
sqlframe/base/function_alternatives.py +96 -0
sqlframe/base/functions.py +4013 -1
sqlframe/base/mixins/dataframe_mixins.py +24 -33
sqlframe/base/session.py +2 -2
sqlframe/base/types.py +3 -3
sqlframe/base/util.py +56 -0
sqlframe/bigquery/dataframe.py +33 -13
sqlframe/bigquery/functions.py +4 -0
sqlframe/bigquery/functions.pyi +37 -1
sqlframe/duckdb/dataframe.py +6 -15
sqlframe/duckdb/functions.py +3 -0
sqlframe/duckdb/functions.pyi +29 -0
sqlframe/postgres/catalog.py +123 -3
sqlframe/postgres/dataframe.py +6 -10
sqlframe/postgres/functions.py +6 -0
sqlframe/postgres/functions.pyi +28 -0
sqlframe/redshift/dataframe.py +3 -14
sqlframe/snowflake/dataframe.py +23 -13
sqlframe/snowflake/functions.py +3 -0
sqlframe/snowflake/functions.pyi +27 -0
sqlframe/spark/dataframe.py +25 -15
sqlframe/spark/functions.pyi +161 -1
sqlframe/testing/__init__.py +3 -0
sqlframe/testing/utils.py +320 -0
{sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/METADATA +1 -1
{sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/RECORD +32 -30
{sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/LICENSE +0 -0
{sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/WHEEL +0 -0
{sqlframe-1.9.0.dist-info → sqlframe-1.11.0.dist-info}/top_level.txt +0 -0

sqlframe/postgres/catalog.py CHANGED Viewed

@@ -7,16 +7,17 @@ import typing as t
 from sqlglot import exp, parse_one
-from sqlframe.base.catalog import Function, _BaseCatalog
+from sqlframe.base.catalog import Column, Function, _BaseCatalog
+from sqlframe.base.decorators import normalize
 from sqlframe.base.mixins.catalog_mixins import (
     GetCurrentCatalogFromFunctionMixin,
     GetCurrentDatabaseFromFunctionMixin,
     ListCatalogsFromInfoSchemaMixin,
-    ListColumnsFromInfoSchemaMixin,
     ListDatabasesFromInfoSchemaMixin,
     ListTablesFromInfoSchemaMixin,
     SetCurrentDatabaseFromSearchPathMixin,
 )
+from sqlframe.base.util import to_schema
 if t.TYPE_CHECKING:
     from sqlframe.postgres.session import PostgresSession  # noqa
@@ -30,12 +31,131 @@ class PostgresCatalog(
     ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
     SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
     ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
-    ListColumnsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
     _BaseCatalog["PostgresSession", "PostgresDataFrame"],
 ):
     CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
     TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
+    @normalize(["tableName", "dbName"])
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
+        """Returns a t.List of columns for the given table/view in the specified database.
+        .. versionadded:: 2.0.0
+        Parameters
+        ----------
+        tableName : str
+            name of the table to t.List columns.
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+        dbName : str, t.Optional
+            name of the database to find the table to t.List columns.
+        Returns
+        -------
+        t.List
+            A t.List of :class:`Column`.
+        Notes
+        -----
+        The order of arguments here is different from that of its JVM counterpart
+        because Python does not support method overloading.
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary views.
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
+        >>> spark.catalog.t.listColumns("tblA")
+        [Column(name='name', description=None, dataType='string', nullable=True, ...
+        >>> _ = spark.sql("DROP TABLE tblA")
+        """
+        if df := self.session.temp_views.get(tableName):
+            return [
+                Column(
+                    name=x,
+                    description=None,
+                    dataType="",
+                    nullable=True,
+                    isPartition=False,
+                    isBucket=False,
+                )
+                for x in df.columns
+            ]
+        table = exp.to_table(tableName, dialect=self.session.input_dialect)
+        schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
+        if not table.db:
+            if schema and schema.db:
+                table.set("db", schema.args["db"])
+            else:
+                table.set(
+                    "db",
+                    exp.parse_identifier(
+                        self.currentDatabase(), dialect=self.session.input_dialect
+                    ),
+                )
+        if not table.catalog:
+            if schema and schema.catalog:
+                table.set("catalog", schema.args["catalog"])
+            else:
+                table.set(
+                    "catalog",
+                    exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
+                )
+        source_table = self._get_info_schema_table("columns", database=table.db)
+        select = parse_one(
+            f"""
+        SELECT
+    att.attname AS column_name,
+    pg_catalog.format_type(att.atttypid, NULL) AS data_type,
+    col.is_nullable
+FROM
+    pg_catalog.pg_attribute att
+JOIN
+    pg_catalog.pg_class cls ON cls.oid = att.attrelid
+JOIN
+    pg_catalog.pg_namespace nsp ON nsp.oid = cls.relnamespace
+JOIN
+    information_schema.columns col ON col.table_schema = nsp.nspname AND col.table_name = cls.relname AND col.column_name = att.attname
+WHERE
+    cls.relname = '{table.name}' AND   -- replace with your table name
+    att.attnum > 0 AND
+    NOT att.attisdropped
+ORDER BY
+    att.attnum;
+        """,
+            dialect="postgres",
+        )
+        if table.db:
+            schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
+            if include_temp and self.TEMP_SCHEMA_FILTER:
+                schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
+            select = select.where(schema_filter)  # type: ignore
+        if table.catalog:
+            catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
+            if include_temp and self.TEMP_CATALOG_FILTER:
+                catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
+            select = select.where(catalog_filter)  # type: ignore
+        results = self.session._fetch_rows(select)
+        return [
+            Column(
+                name=x["column_name"],
+                description=None,
+                dataType=x["data_type"],
+                nullable=x["is_nullable"] == "YES",
+                isPartition=False,
+                isBucket=False,
+            )
+            for x in results
+        ]
     def listFunctions(
         self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
     ) -> t.List[Function]:

sqlframe/postgres/dataframe.py CHANGED Viewed

@@ -9,7 +9,10 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
-from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
+from sqlframe.base.mixins.dataframe_mixins import (
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
+)
 from sqlframe.postgres.group import PostgresGroupedData
 if sys.version_info >= (3, 11):
@@ -34,7 +37,8 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
 class PostgresDataFrame(
-    PrintSchemaFromTempObjectsMixin,
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
     _BaseDataFrame[
         "PostgresSession",
         "PostgresDataFrameWriter",
@@ -46,11 +50,3 @@ class PostgresDataFrame(
     _na = PostgresDataFrameNaFunctions
     _stat = PostgresDataFrameStatFunctions
     _group_data = PostgresGroupedData
-    def cache(self) -> Self:
-        logger.warning("Postgres does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Postgres does not support persist. Ignoring persist() call.")
-        return self

sqlframe/postgres/functions.py CHANGED Viewed

@@ -16,6 +16,7 @@ globals().update(
 from sqlframe.base.function_alternatives import (  # noqa
+    any_value_ignore_nulls_not_supported as any_value,
     e_literal as e,
     expm1_from_exp as expm1,
     log1p_from_log as log1p,
@@ -40,6 +41,7 @@ from sqlframe.base.function_alternatives import (  # noqa
     date_add_by_multiplication as date_add,
     date_sub_by_multiplication as date_sub,
     date_diff_with_subtraction as date_diff,
+    date_diff_with_subtraction as datediff,
     add_months_by_multiplication as add_months,
     months_between_from_age_and_extract as months_between,
     from_unixtime_from_timestamp as from_unixtime,
@@ -58,4 +60,8 @@ from sqlframe.base.function_alternatives import (  # noqa
     get_json_object_using_arrow_op as get_json_object,
     array_min_from_subquery as array_min,
     array_max_from_subquery as array_max,
+    left_cast_len as left,
+    right_cast_len as right,
+    position_cast_start as position,
+    try_element_at_zero_based as try_element_at,
 )

sqlframe/postgres/functions.pyi CHANGED Viewed

@@ -1,4 +1,5 @@
 from sqlframe.base.function_alternatives import (  # noqa
+    any_value_ignore_nulls_not_supported as any_value,
     e_literal as e,
     expm1_from_exp as expm1,
     log1p_from_log as log1p,
@@ -23,6 +24,7 @@ from sqlframe.base.function_alternatives import (  # noqa
     date_add_by_multiplication as date_add,
     date_sub_by_multiplication as date_sub,
     date_diff_with_subtraction as date_diff,
+    date_diff_with_subtraction as datediff,
     add_months_by_multiplication as add_months,
     months_between_from_age_and_extract as months_between,
     from_unixtime_from_timestamp as from_unixtime,
@@ -41,6 +43,10 @@ from sqlframe.base.function_alternatives import (  # noqa
     get_json_object_using_arrow_op as get_json_object,
     array_min_from_subquery as array_min,
     array_max_from_subquery as array_max,
+    left_cast_len as left,
+    right_cast_len as right,
+    position_cast_start as position,
+    try_element_at_zero_based as try_element_at,
 )
 from sqlframe.base.functions import (
     abs as abs,
@@ -64,9 +70,13 @@ from sqlframe.base.functions import (
     bit_length as bit_length,
     bitwiseNOT as bitwiseNOT,
     bitwise_not as bitwise_not,
+    bool_and as bool_and,
+    bool_or as bool_or,
+    call_function as call_function,
     cbrt as cbrt,
     ceil as ceil,
     ceiling as ceiling,
+    char as char,
     coalesce as coalesce,
     col as col,
     collect_list as collect_list,
@@ -84,8 +94,10 @@ from sqlframe.base.functions import (
     cume_dist as cume_dist,
     current_date as current_date,
     current_timestamp as current_timestamp,
+    current_user as current_user,
     date_format as date_format,
     date_trunc as date_trunc,
+    dateadd as dateadd,
     degrees as degrees,
     dense_rank as dense_rank,
     desc as desc,
@@ -94,18 +106,22 @@ from sqlframe.base.functions import (
     exp as exp,
     explode as explode,
     expr as expr,
+    extract as extract,
     factorial as factorial,
     floor as floor,
     greatest as greatest,
+    ifnull as ifnull,
     initcap as initcap,
     input_file_name as input_file_name,
     instr as instr,
     lag as lag,
+    lcase as lcase,
     lead as lead,
     least as least,
     length as length,
     levenshtein as levenshtein,
     lit as lit,
+    ln as ln,
     locate as locate,
     log as log,
     log10 as log10,
@@ -117,19 +133,25 @@ from sqlframe.base.functions import (
     md5 as md5,
     mean as mean,
     min as min,
+    now as now,
     nth_value as nth_value,
     ntile as ntile,
     nullif as nullif,
+    nvl as nvl,
+    nvl2 as nvl2,
     octet_length as octet_length,
     overlay as overlay,
     percent_rank as percent_rank,
     percentile as percentile,
     pow as pow,
+    power as power,
     radians as radians,
     rank as rank,
+    regexp_like as regexp_like,
     regexp_replace as regexp_replace,
     repeat as repeat,
     reverse as reverse,
+    rlike as rlike,
     row_number as row_number,
     rpad as rpad,
     rtrim as rtrim,
@@ -137,12 +159,14 @@ from sqlframe.base.functions import (
     shiftRight as shiftRight,
     shiftleft as shiftleft,
     shiftright as shiftright,
+    sign as sign,
     signum as signum,
     sin as sin,
     sinh as sinh,
     size as size,
     soundex as soundex,
     sqrt as sqrt,
+    startswith as startswith,
     stddev as stddev,
     stddev_pop as stddev_pop,
     stddev_samp as stddev_samp,
@@ -156,11 +180,15 @@ from sqlframe.base.functions import (
     toDegrees as toDegrees,
     toRadians as toRadians,
     to_date as to_date,
+    to_number as to_number,
     to_timestamp as to_timestamp,
     translate as translate,
     trim as trim,
     trunc as trunc,
+    ucase as ucase,
+    unix_date as unix_date,
     upper as upper,
+    user as user,
     var_pop as var_pop,
     var_samp as var_samp,
     variance as variance,

sqlframe/redshift/dataframe.py CHANGED Viewed

@@ -9,13 +9,9 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.redshift.group import RedshiftGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.redshift.readwriter import RedshiftDataFrameWriter
     from sqlframe.redshift.session import RedshiftSession
@@ -33,22 +29,15 @@ class RedshiftDataFrameStatFunctions(_BaseDataFrameStatFunctions["RedshiftDataFr
 class RedshiftDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "RedshiftSession",
         "RedshiftDataFrameWriter",
         "RedshiftDataFrameNaFunctions",
         "RedshiftDataFrameStatFunctions",
         "RedshiftGroupedData",
-    ]
+    ],
 ):
     _na = RedshiftDataFrameNaFunctions
     _stat = RedshiftDataFrameStatFunctions
     _group_data = RedshiftGroupedData
-    def cache(self) -> Self:
-        logger.warning("Redshift does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Redshift does not support persist. Ignoring persist() call.")
-        return self

sqlframe/snowflake/dataframe.py CHANGED Viewed

@@ -4,18 +4,15 @@ import logging
 import sys
 import typing as t
+from sqlframe.base.catalog import Column as CatalogColumn
 from sqlframe.base.dataframe import (
     _BaseDataFrame,
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.snowflake.group import SnowflakeGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.snowflake.readwriter import SnowflakeDataFrameWriter
     from sqlframe.snowflake.session import SnowflakeSession
@@ -33,22 +30,35 @@ class SnowflakeDataFrameStatFunctions(_BaseDataFrameStatFunctions["SnowflakeData
 class SnowflakeDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "SnowflakeSession",
         "SnowflakeDataFrameWriter",
         "SnowflakeDataFrameNaFunctions",
         "SnowflakeDataFrameStatFunctions",
         "SnowflakeGroupedData",
-    ]
+    ],
 ):
     _na = SnowflakeDataFrameNaFunctions
     _stat = SnowflakeDataFrameStatFunctions
     _group_data = SnowflakeGroupedData
-    def cache(self) -> Self:
-        logger.warning("Snowflake does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Snowflake does not support persist. Ignoring persist() call.")
-        return self
+    @property
+    def _typed_columns(self) -> t.List[CatalogColumn]:
+        df = self._convert_leaf_to_cte()
+        df = df.limit(0)
+        self.session._execute(df.expression)
+        query_id = self.session._cur.sfqid
+        columns = []
+        for row in self.session._fetch_rows(f"DESCRIBE RESULT '{query_id}'"):
+            columns.append(
+                CatalogColumn(
+                    name=row.name,
+                    dataType=row.type,
+                    nullable=row["null?"] == "Y",
+                    description=row.comment,
+                    isPartition=False,
+                    isBucket=False,
+                )
+            )
+        return columns

sqlframe/snowflake/functions.py CHANGED Viewed

@@ -16,6 +16,7 @@ globals().update(
 from sqlframe.base.function_alternatives import (  # noqa
+    any_value_ignore_nulls_not_supported as any_value,
     e_literal as e,
     expm1_from_exp as expm1,
     log1p_from_log as log1p,
@@ -32,6 +33,7 @@ from sqlframe.base.function_alternatives import (  # noqa
     struct_with_eq as struct,
     make_date_date_from_parts as make_date,
     date_add_no_date_sub as date_add,
+    date_add_no_date_sub as dateadd,
     date_sub_by_date_add as date_sub,
     add_months_using_func as add_months,
     months_between_cast_as_date_cast_roundoff as months_between,
@@ -60,4 +62,5 @@ from sqlframe.base.function_alternatives import (  # noqa
     flatten_using_array_flatten as flatten,
     map_concat_using_map_cat as map_concat,
     sequence_from_array_generate_range as sequence,
+    to_number_using_to_double as to_number,
 )

sqlframe/snowflake/functions.pyi CHANGED Viewed

@@ -1,4 +1,5 @@
 from sqlframe.base.function_alternatives import (  # noqa
+    any_value_ignore_nulls_not_supported as any_value,
     e_literal as e,
     expm1_from_exp as expm1,
     log1p_from_log as log1p,
@@ -15,6 +16,7 @@ from sqlframe.base.function_alternatives import (  # noqa
     struct_with_eq as struct,
     make_date_date_from_parts as make_date,
     date_add_no_date_sub as date_add,
+    date_add_no_date_sub as dateadd,
     date_sub_by_date_add as date_sub,
     add_months_using_func as add_months,
     months_between_cast_as_date_cast_roundoff as months_between,
@@ -43,6 +45,7 @@ from sqlframe.base.function_alternatives import (  # noqa
     flatten_using_array_flatten as flatten,
     map_concat_using_map_cat as map_concat,
     sequence_from_array_generate_range as sequence,
+    to_number_using_to_double as to_number,
 )
 from sqlframe.base.functions import (
     abs as abs,
@@ -69,9 +72,13 @@ from sqlframe.base.functions import (
     avg as avg,
     bit_length as bit_length,
     bitwiseNOT as bitwiseNOT,
+    bool_and as bool_and,
+    bool_or as bool_or,
+    call_function as call_function,
     cbrt as cbrt,
     ceil as ceil,
     ceiling as ceiling,
+    char as char,
     coalesce as coalesce,
     col as col,
     collect_list as collect_list,
@@ -85,14 +92,17 @@ from sqlframe.base.functions import (
     count as count,
     countDistinct as countDistinct,
     count_distinct as count_distinct,
+    count_if as count_if,
     covar_pop as covar_pop,
     covar_samp as covar_samp,
     cume_dist as cume_dist,
     current_date as current_date,
     current_timestamp as current_timestamp,
+    current_user as current_user,
     date_diff as date_diff,
     date_format as date_format,
     date_trunc as date_trunc,
+    datediff as datediff,
     dayofmonth as dayofmonth,
     dayofweek as dayofweek,
     dayofyear as dayofyear,
@@ -104,21 +114,26 @@ from sqlframe.base.functions import (
     exp as exp,
     explode as explode,
     expr as expr,
+    extract as extract,
     factorial as factorial,
     floor as floor,
     greatest as greatest,
     grouping_id as grouping_id,
     hash as hash,
     hour as hour,
+    ifnull as ifnull,
     initcap as initcap,
     input_file_name as input_file_name,
     instr as instr,
     kurtosis as kurtosis,
     lag as lag,
+    lcase as lcase,
     lead as lead,
     least as least,
+    left as left,
     length as length,
     lit as lit,
+    ln as ln,
     locate as locate,
     log as log,
     log10 as log10,
@@ -136,35 +151,44 @@ from sqlframe.base.functions import (
     minute as minute,
     month as month,
     next_day as next_day,
+    now as now,
     nth_value as nth_value,
     ntile as ntile,
     nullif as nullif,
+    nvl as nvl,
+    nvl2 as nvl2,
     octet_length as octet_length,
     percent_rank as percent_rank,
     percentile as percentile,
     posexplode as posexplode,
+    position as position,
     pow as pow,
+    power as power,
     quarter as quarter,
     radians as radians,
     rand as rand,
     rank as rank,
     regexp_replace as regexp_replace,
     repeat as repeat,
+    right as right,
     round as round,
     row_number as row_number,
     rpad as rpad,
     rtrim as rtrim,
     second as second,
+    sha as sha,
     sha1 as sha1,
     sha2 as sha2,
     shiftLeft as shiftLeft,
     shiftRight as shiftRight,
+    sign as sign,
     signum as signum,
     sin as sin,
     sinh as sinh,
     size as size,
     soundex as soundex,
     sqrt as sqrt,
+    startswith as startswith,
     stddev as stddev,
     stddev_pop as stddev_pop,
     stddev_samp as stddev_samp,
@@ -182,7 +206,10 @@ from sqlframe.base.functions import (
     translate as translate,
     trim as trim,
     trunc as trunc,
+    ucase as ucase,
+    unix_date as unix_date,
     upper as upper,
+    user as user,
     var_pop as var_pop,
     var_samp as var_samp,
     variance as variance,

sqlframe/spark/dataframe.py CHANGED Viewed

@@ -1,26 +1,23 @@
 from __future__ import annotations
 import logging
-import sys
 import typing as t
+from sqlglot import exp
+from sqlframe.base.catalog import Column
 from sqlframe.base.dataframe import (
     _BaseDataFrame,
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
 from sqlframe.spark.group import SparkGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.spark.readwriter import SparkDataFrameWriter
     from sqlframe.spark.session import SparkSession
 logger = logging.getLogger(__name__)
@@ -33,22 +30,35 @@ class SparkDataFrameStatFunctions(_BaseDataFrameStatFunctions["SparkDataFrame"])
 class SparkDataFrame(
+    NoCachePersistSupportMixin,
     _BaseDataFrame[
         "SparkSession",
         "SparkDataFrameWriter",
         "SparkDataFrameNaFunctions",
         "SparkDataFrameStatFunctions",
         "SparkGroupedData",
-    ]
+    ],
 ):
     _na = SparkDataFrameNaFunctions
     _stat = SparkDataFrameStatFunctions
     _group_data = SparkGroupedData
-    def cache(self) -> Self:
-        logger.warning("Spark does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Spark does not support persist. Ignoring persist() call.")
-        return self
+    @property
+    def _typed_columns(self) -> t.List[Column]:
+        columns = []
+        for field in self.session.spark_session.sql(
+            self.session._to_sql(self.expression)
+        ).schema.fields:
+            columns.append(
+                Column(
+                    name=field.name,
+                    dataType=exp.DataType.build(field.dataType.simpleString(), dialect="spark").sql(
+                        dialect="spark"
+                    ),
+                    nullable=field.nullable,
+                    description=None,
+                    isPartition=False,
+                    isBucket=False,
+                )
+            )
+        return columns

sqlframe 1.9.0__py3-none-any.whl → 1.11.0__py3-none-any.whl

sqlframe 1.9.0py3-none-any.whl → 1.11.0py3-none-any.whl