PyPI - sqlframe - Versions diffs - 1.10.0__tar.gz → 1.11.0__tar.gz - Mend

sqlframe 1.10.0tar.gz → 1.11.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

{sqlframe-1.10.0 → sqlframe-1.11.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.10.0
+Version: 1.11.0
 Summary: Turning PySpark Into a Universal DataFrame API
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-1.10.0 → sqlframe-1.11.0}/docs/bigquery.md RENAMED Viewed

@@ -217,8 +217,10 @@ See something that you would like to see supported? [Open an issue](https://gith
 * [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
 * [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
 * [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
+* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
 * [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
 * [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
+* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
 * [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
     * Vertical Argument is not Supported
 * [sort](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.sort.html)

{sqlframe-1.10.0 → sqlframe-1.11.0}/docs/duckdb.md RENAMED Viewed

@@ -192,6 +192,7 @@ See something that you would like to see supported? [Open an issue](https://gith
 * [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
 * [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
 * [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
+* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
 * [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
 * [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
     * Vertical Argument is not Supported

{sqlframe-1.10.0 → sqlframe-1.11.0}/docs/postgres.md RENAMED Viewed

@@ -203,6 +203,7 @@ See something that you would like to see supported? [Open an issue](https://gith
 * [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
 * [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
 * [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
+* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
 * [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
 * [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
     * Vertical Argument is not Supported

{sqlframe-1.10.0 → sqlframe-1.11.0}/docs/snowflake.md RENAMED Viewed

@@ -212,7 +212,9 @@ See something that you would like to see supported? [Open an issue](https://gith
 * [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
 * [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
 * [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
+* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
 * [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
+* [schema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.schema.html)
 * [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
 * [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)
     * Vertical Argument is not Supported

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.10.0'
-__version_tuple__ = version_tuple = (1, 10, 0)
+__version__ = version = '1.11.0'
+__version_tuple__ = version_tuple = (1, 11, 0)

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/dataframe.py RENAMED Viewed

@@ -22,6 +22,7 @@ from sqlglot.optimizer.pushdown_projections import pushdown_projections
 from sqlglot.optimizer.qualify import qualify
 from sqlglot.optimizer.qualify_columns import quote_identifiers
+from sqlframe.base.catalog import Column as CatalogColumn
 from sqlframe.base.decorators import normalize
 from sqlframe.base.operations import Operation, operation
 from sqlframe.base.transforms import replace_id_value
@@ -29,6 +30,7 @@ from sqlframe.base.util import (
     get_func_from_session,
     get_tables_from_expression_with_join,
     quote_preserving_alias_or_name,
+    sqlglot_to_spark,
     verify_openai_installed,
 )
@@ -231,6 +233,10 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
     def __copy__(self):
         return self.copy()
+    @property
+    def _typed_columns(self) -> t.List[CatalogColumn]:
+        raise NotImplementedError
     @property
     def write(self) -> WRITER:
         return self.session._writer(self)
@@ -293,7 +299,24 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
         StructType([StructField('age', LongType(), True),
                     StructField('name', StringType(), True)])
         """
-        raise NotImplementedError
+        from sqlframe.base import types
+        try:
+            return types.StructType(
+                [
+                    types.StructField(
+                        c.name,
+                        sqlglot_to_spark(
+                            exp.DataType.build(c.dataType, dialect=self.session.output_dialect)
+                        ),
+                    )
+                    for c in self._typed_columns
+                ]
+            )
+        except NotImplementedError as e:
+            raise NotImplementedError(
+                "This engine does not support schema inference likely since it does not have an active connection."
+            ) from e
     def _replace_cte_names_with_hashes(self, expression: exp.Select):
         replacement_mapping = {}
@@ -1537,6 +1560,36 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
                 table.add_row(list(row))
         print(table)
+    def printSchema(self, level: t.Optional[int] = None) -> None:
+        def print_schema(
+            column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
+        ):
+            if level and current_level >= level:
+                return
+            if current_level > 0:
+                print(" |   " * current_level, end="")
+            print(
+                f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
+            )
+            if column_type.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
+                for column_def in column_type.expressions:
+                    print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
+            if column_type.this == exp.DataType.Type.ARRAY:
+                for data_type in column_type.expressions:
+                    print_schema("element", data_type, True, current_level + 1)
+            if column_type.this == exp.DataType.Type.MAP:
+                print_schema("key", column_type.expressions[0], True, current_level + 1)
+                print_schema("value", column_type.expressions[1], True, current_level + 1)
+        print("root")
+        for column in self._typed_columns:
+            print_schema(
+                column.name,
+                exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
+                column.nullable,
+                0,
+            )
     def toPandas(self) -> pd.DataFrame:
         sql_kwargs = dict(
             pretty=False, optimize=False, dialect=self.session.output_dialect, as_list=True

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/exceptions.py RENAMED Viewed

@@ -12,3 +12,15 @@ class RowError(SQLFrameException):
 class TableSchemaError(SQLFrameException):
     pass
+class PandasDiffError(SQLFrameException):
+    pass
+class DataFrameDiffError(SQLFrameException):
+    pass
+class SchemaDiffError(SQLFrameException):
+    pass

sqlframe-1.11.0/sqlframe/base/mixins/dataframe_mixins.py ADDED Viewed

@@ -0,0 +1,54 @@
+import logging
+import sys
+import typing as t
+from sqlglot import exp
+from sqlframe.base.catalog import Column
+from sqlframe.base.dataframe import (
+    GROUP_DATA,
+    NA,
+    SESSION,
+    STAT,
+    WRITER,
+    _BaseDataFrame,
+)
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+logger = logging.getLogger(__name__)
+class NoCachePersistSupportMixin(_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
+    def cache(self) -> Self:
+        logger.warning("This engine does not support caching. Ignoring cache() call.")
+        return self
+    def persist(self) -> Self:
+        logger.warning("This engine does not support persist. Ignoring persist() call.")
+        return self
+class TypedColumnsFromTempViewMixin(
+    _BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
+):
+    @property
+    def _typed_columns(self) -> t.List[Column]:
+        table = exp.to_table(self.session._random_id)
+        self.session._execute(
+            exp.Create(
+                this=table,
+                kind="VIEW",
+                replace=True,
+                properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
+                expression=self.expression,
+            )
+        )
+        return self.session.catalog.listColumns(
+            table.sql(dialect=self.session.input_dialect), include_temp=True
+        )

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/types.py RENAMED Viewed

@@ -22,14 +22,14 @@ class DataType:
         return not self.__eq__(other)
     def __str__(self) -> str:
-        return self.typeName()
+        return self.simpleString()
     @classmethod
     def typeName(cls) -> str:
         return cls.__name__[:-4].lower()
     def simpleString(self) -> str:
-        return str(self)
+        return self.typeName()
     def jsonValue(self) -> t.Union[str, t.Dict[str, t.Any]]:
         return str(self)

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/base/util.py RENAMED Viewed

@@ -291,3 +291,54 @@ def quote_preserving_alias_or_name(col: t.Union[exp.Column, exp.Alias]) -> str:
         return col.sql(dialect=_BaseSession().input_dialect)
     # We may get things like `Null()` expression or maybe literals so we just return the alias or name in those cases
     return col.alias_or_name
+def sqlglot_to_spark(sqlglot_dtype: exp.DataType) -> types.DataType:
+    from sqlframe.base import types
+    primitive_mapping = {
+        exp.DataType.Type.VARCHAR: types.VarcharType,
+        exp.DataType.Type.CHAR: types.CharType,
+        exp.DataType.Type.TEXT: types.StringType,
+        exp.DataType.Type.BINARY: types.BinaryType,
+        exp.DataType.Type.BOOLEAN: types.BooleanType,
+        exp.DataType.Type.INT: types.IntegerType,
+        exp.DataType.Type.BIGINT: types.LongType,
+        exp.DataType.Type.SMALLINT: types.ShortType,
+        exp.DataType.Type.FLOAT: types.FloatType,
+        exp.DataType.Type.DOUBLE: types.DoubleType,
+        exp.DataType.Type.DECIMAL: types.DecimalType,
+        exp.DataType.Type.TIMESTAMP: types.TimestampType,
+        exp.DataType.Type.TIMESTAMPTZ: types.TimestampType,
+        exp.DataType.Type.TIMESTAMPLTZ: types.TimestampType,
+        exp.DataType.Type.TIMESTAMPNTZ: types.TimestampType,
+        exp.DataType.Type.DATE: types.DateType,
+    }
+    if sqlglot_dtype.this in primitive_mapping:
+        pyspark_class = primitive_mapping[sqlglot_dtype.this]
+        if issubclass(pyspark_class, types.DataTypeWithLength) and sqlglot_dtype.expressions:
+            return pyspark_class(length=int(sqlglot_dtype.expressions[0].this.this))
+        elif issubclass(pyspark_class, types.DecimalType) and sqlglot_dtype.expressions:
+            return pyspark_class(
+                precision=int(sqlglot_dtype.expressions[0].this.this),
+                scale=int(sqlglot_dtype.expressions[1].this.this),
+            )
+        return pyspark_class()
+    if sqlglot_dtype.this == exp.DataType.Type.ARRAY:
+        return types.ArrayType(sqlglot_to_spark(sqlglot_dtype.expressions[0]))
+    elif sqlglot_dtype.this == exp.DataType.Type.MAP:
+        return types.MapType(
+            sqlglot_to_spark(sqlglot_dtype.expressions[0]),
+            sqlglot_to_spark(sqlglot_dtype.expressions[1]),
+        )
+    elif sqlglot_dtype.this in (exp.DataType.Type.STRUCT, exp.DataType.Type.OBJECT):
+        return types.StructType(
+            [
+                types.StructField(
+                    name=field.this.alias_or_name,
+                    dataType=sqlglot_to_spark(field.args["kind"]),
+                )
+                for field in sqlglot_dtype.expressions
+            ]
+        )
+    raise NotImplementedError(f"Unsupported data type: {sqlglot_dtype}")

sqlframe-1.11.0/sqlframe/bigquery/dataframe.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+import logging
+import typing as t
+from sqlframe.base.catalog import Column as CatalogColumn
+from sqlframe.base.dataframe import (
+    _BaseDataFrame,
+    _BaseDataFrameNaFunctions,
+    _BaseDataFrameStatFunctions,
+)
+from sqlframe.base.mixins.dataframe_mixins import NoCachePersistSupportMixin
+from sqlframe.bigquery.group import BigQueryGroupedData
+if t.TYPE_CHECKING:
+    from sqlframe.bigquery.readwriter import BigQueryDataFrameWriter
+    from sqlframe.bigquery.session import BigQuerySession
+logger = logging.getLogger(__name__)
+class BigQueryDataFrameNaFunctions(_BaseDataFrameNaFunctions["BigQueryDataFrame"]):
+    pass
+class BigQueryDataFrameStatFunctions(_BaseDataFrameStatFunctions["BigQueryDataFrame"]):
+    pass
+class BigQueryDataFrame(
+    NoCachePersistSupportMixin,
+    _BaseDataFrame[
+        "BigQuerySession",
+        "BigQueryDataFrameWriter",
+        "BigQueryDataFrameNaFunctions",
+        "BigQueryDataFrameStatFunctions",
+        "BigQueryGroupedData",
+    ],
+):
+    _na = BigQueryDataFrameNaFunctions
+    _stat = BigQueryDataFrameStatFunctions
+    _group_data = BigQueryGroupedData
+    @property
+    def _typed_columns(self) -> t.List[CatalogColumn]:
+        from google.cloud import bigquery
+        def field_to_column(field: bigquery.SchemaField) -> CatalogColumn:
+            if field.field_type == "RECORD":
+                data_type = "STRUCT<"
+                for subfield in field.fields:
+                    column = field_to_column(subfield)
+                    data_type += f"{column.name} {column.dataType},"
+                data_type += ">"
+            elif field.field_type == "INTEGER":
+                data_type = "INT64"
+            else:
+                data_type = field.field_type
+            if field.mode == "REPEATED":
+                data_type = f"ARRAY<{data_type}>"
+            return CatalogColumn(
+                name=field.name,
+                dataType=data_type,
+                nullable=field.is_nullable,
+                description=None,
+                isPartition=False,
+                isBucket=False,
+            )
+        job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
+        sql = self.session._to_sql(self.expression)
+        query_job = self.session._client.query(sql, job_config=job_config)
+        return [field_to_column(field) for field in query_job.schema]

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/duckdb/dataframe.py RENAMED Viewed

@@ -9,14 +9,12 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
-from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
+from sqlframe.base.mixins.dataframe_mixins import (
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
+)
 from sqlframe.duckdb.group import DuckDBGroupedData
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
 if t.TYPE_CHECKING:
     from sqlframe.duckdb.session import DuckDBSession  # noqa
     from sqlframe.duckdb.readwriter import DuckDBDataFrameWriter  # noqa
@@ -35,7 +33,8 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
 class DuckDBDataFrame(
-    PrintSchemaFromTempObjectsMixin,
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
     _BaseDataFrame[
         "DuckDBSession",
         "DuckDBDataFrameWriter",
@@ -47,11 +46,3 @@ class DuckDBDataFrame(
     _na = DuckDBDataFrameNaFunctions
     _stat = DuckDBDataFrameStatFunctions
     _group_data = DuckDBGroupedData
-    def cache(self) -> Self:
-        logger.warning("DuckDB does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("DuckDB does not support persist. Ignoring persist() call.")
-        return self

sqlframe-1.11.0/sqlframe/postgres/catalog.py ADDED Viewed

@@ -0,0 +1,227 @@
+# This code is based on code from Apache Spark under the license found in the LICENSE file located in the 'sqlframe' folder.
+from __future__ import annotations
+import fnmatch
+import typing as t
+from sqlglot import exp, parse_one
+from sqlframe.base.catalog import Column, Function, _BaseCatalog
+from sqlframe.base.decorators import normalize
+from sqlframe.base.mixins.catalog_mixins import (
+    GetCurrentCatalogFromFunctionMixin,
+    GetCurrentDatabaseFromFunctionMixin,
+    ListCatalogsFromInfoSchemaMixin,
+    ListDatabasesFromInfoSchemaMixin,
+    ListTablesFromInfoSchemaMixin,
+    SetCurrentDatabaseFromSearchPathMixin,
+)
+from sqlframe.base.util import to_schema
+if t.TYPE_CHECKING:
+    from sqlframe.postgres.session import PostgresSession  # noqa
+    from sqlframe.postgres.dataframe import PostgresDataFrame  # noqa
+class PostgresCatalog(
+    GetCurrentCatalogFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
+    GetCurrentDatabaseFromFunctionMixin["PostgresSession", "PostgresDataFrame"],
+    ListDatabasesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
+    ListCatalogsFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
+    SetCurrentDatabaseFromSearchPathMixin["PostgresSession", "PostgresDataFrame"],
+    ListTablesFromInfoSchemaMixin["PostgresSession", "PostgresDataFrame"],
+    _BaseCatalog["PostgresSession", "PostgresDataFrame"],
+):
+    CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
+    TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
+    @normalize(["tableName", "dbName"])
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
+        """Returns a t.List of columns for the given table/view in the specified database.
+        .. versionadded:: 2.0.0
+        Parameters
+        ----------
+        tableName : str
+            name of the table to t.List columns.
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+        dbName : str, t.Optional
+            name of the database to find the table to t.List columns.
+        Returns
+        -------
+        t.List
+            A t.List of :class:`Column`.
+        Notes
+        -----
+        The order of arguments here is different from that of its JVM counterpart
+        because Python does not support method overloading.
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary views.
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
+        >>> spark.catalog.t.listColumns("tblA")
+        [Column(name='name', description=None, dataType='string', nullable=True, ...
+        >>> _ = spark.sql("DROP TABLE tblA")
+        """
+        if df := self.session.temp_views.get(tableName):
+            return [
+                Column(
+                    name=x,
+                    description=None,
+                    dataType="",
+                    nullable=True,
+                    isPartition=False,
+                    isBucket=False,
+                )
+                for x in df.columns
+            ]
+        table = exp.to_table(tableName, dialect=self.session.input_dialect)
+        schema = to_schema(dbName, dialect=self.session.input_dialect) if dbName else None
+        if not table.db:
+            if schema and schema.db:
+                table.set("db", schema.args["db"])
+            else:
+                table.set(
+                    "db",
+                    exp.parse_identifier(
+                        self.currentDatabase(), dialect=self.session.input_dialect
+                    ),
+                )
+        if not table.catalog:
+            if schema and schema.catalog:
+                table.set("catalog", schema.args["catalog"])
+            else:
+                table.set(
+                    "catalog",
+                    exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
+                )
+        source_table = self._get_info_schema_table("columns", database=table.db)
+        select = parse_one(
+            f"""
+        SELECT
+    att.attname AS column_name,
+    pg_catalog.format_type(att.atttypid, NULL) AS data_type,
+    col.is_nullable
+FROM
+    pg_catalog.pg_attribute att
+JOIN
+    pg_catalog.pg_class cls ON cls.oid = att.attrelid
+JOIN
+    pg_catalog.pg_namespace nsp ON nsp.oid = cls.relnamespace
+JOIN
+    information_schema.columns col ON col.table_schema = nsp.nspname AND col.table_name = cls.relname AND col.column_name = att.attname
+WHERE
+    cls.relname = '{table.name}' AND   -- replace with your table name
+    att.attnum > 0 AND
+    NOT att.attisdropped
+ORDER BY
+    att.attnum;
+        """,
+            dialect="postgres",
+        )
+        if table.db:
+            schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
+            if include_temp and self.TEMP_SCHEMA_FILTER:
+                schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
+            select = select.where(schema_filter)  # type: ignore
+        if table.catalog:
+            catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
+            if include_temp and self.TEMP_CATALOG_FILTER:
+                catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
+            select = select.where(catalog_filter)  # type: ignore
+        results = self.session._fetch_rows(select)
+        return [
+            Column(
+                name=x["column_name"],
+                description=None,
+                dataType=x["data_type"],
+                nullable=x["is_nullable"] == "YES",
+                isPartition=False,
+                isBucket=False,
+            )
+            for x in results
+        ]
+    def listFunctions(
+        self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
+    ) -> t.List[Function]:
+        """
+        Returns a t.List of functions registered in the specified database.
+        .. versionadded:: 3.4.0
+        Parameters
+        ----------
+        dbName : str
+            name of the database to t.List the functions.
+            ``dbName`` can be qualified with catalog name.
+        pattern : str
+            The pattern that the function name needs to match.
+            .. versionchanged: 3.5.0
+                Adds ``pattern`` argument.
+        Returns
+        -------
+        t.List
+            A t.List of :class:`Function`.
+        Notes
+        -----
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary functions.
+        Examples
+        --------
+        >>> spark.catalog.t.listFunctions()
+        [Function(name=...
+        >>> spark.catalog.t.listFunctions(pattern="to_*")
+        [Function(name=...
+        >>> spark.catalog.t.listFunctions(pattern="*not_existing_func*")
+        []
+        """
+        # SO: https://stackoverflow.com/questions/44143816/any-way-to-list-all-user-defined-postgresql-functions
+        query = parse_one(
+            """SELECT n.nspname as "namespace",
+  p.proname as "name"
+FROM pg_catalog.pg_proc p
+     LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace
+WHERE pg_catalog.pg_function_is_visible(p.oid)
+      AND n.nspname <> 'pg_catalog'
+      AND n.nspname <> 'information_schema'
+ORDER BY 1, 2;
+        """,
+            dialect=self.session.input_dialect,
+        )
+        functions = self.session._fetch_rows(query)
+        catalog = self.currentCatalog()
+        results = [
+            Function(
+                name=x["name"],
+                catalog=catalog,
+                namespace=[x["namespace"]],
+                description=None,
+                className="",
+                isTemporary=False,
+            )
+            for x in functions
+        ]
+        if pattern:
+            results = [x for x in results if fnmatch.fnmatch(x.name, pattern)]
+        return results

{sqlframe-1.10.0 → sqlframe-1.11.0}/sqlframe/postgres/dataframe.py RENAMED Viewed

@@ -9,7 +9,10 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
-from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
+from sqlframe.base.mixins.dataframe_mixins import (
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
+)
 from sqlframe.postgres.group import PostgresGroupedData
 if sys.version_info >= (3, 11):
@@ -34,7 +37,8 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
 class PostgresDataFrame(
-    PrintSchemaFromTempObjectsMixin,
+    NoCachePersistSupportMixin,
+    TypedColumnsFromTempViewMixin,
     _BaseDataFrame[
         "PostgresSession",
         "PostgresDataFrameWriter",
@@ -46,11 +50,3 @@ class PostgresDataFrame(
     _na = PostgresDataFrameNaFunctions
     _stat = PostgresDataFrameStatFunctions
     _group_data = PostgresGroupedData
-    def cache(self) -> Self:
-        logger.warning("Postgres does not support caching. Ignoring cache() call.")
-        return self
-    def persist(self) -> Self:
-        logger.warning("Postgres does not support persist. Ignoring persist() call.")
-        return self

sqlframe 1.10.0__tar.gz → 1.11.0__tar.gz

sqlframe 1.10.0tar.gz → 1.11.0tar.gz