PyPI - sqlframe - Versions diffs - 1.1.2__tar.gz → 1.2.0__tar.gz - Mend

sqlframe 1.1.2tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

{sqlframe-1.1.2 → sqlframe-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.1.2
+Version: 1.2.0
 Summary: Taking the Spark out of PySpark by converting to SQL
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-1.1.2 → sqlframe-1.2.0}/docs/duckdb.md RENAMED Viewed

@@ -171,6 +171,7 @@ df_store = session.createDataFrame(
 * [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
 * [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
 * [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
+* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
 * [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
 * [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
 * [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)

{sqlframe-1.1.2 → sqlframe-1.2.0}/docs/postgres.md RENAMED Viewed

@@ -174,6 +174,7 @@ df_store = session.createDataFrame(
 * [na](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.na.html)
 * [orderBy](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.orderBy.html)
 * [persist](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.persist.html)
+* [printSchema](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.printSchema.html)
 * [replace](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.replace.html)
 * [select](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.select.html)
 * [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.1.2'
-__version_tuple__ = version_tuple = (1, 1, 2)
+__version__ = version = '1.2.0'
+__version_tuple__ = version_tuple = (1, 2, 0)

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/catalog.py RENAMED Viewed

@@ -26,6 +26,9 @@ else:
 class _BaseCatalog(t.Generic[SESSION, DF]):
     """User-facing catalog API, accessible through `SparkSession.catalog`."""
+    TEMP_CATALOG_FILTER: t.Optional[exp.Expression] = None
+    TEMP_SCHEMA_FILTER: t.Optional[exp.Expression] = None
     def __init__(self, sparkSession: SESSION, schema: t.Optional[MappingSchema] = None) -> None:
         """Create a new Catalog that wraps the underlying JVM object."""
         self.session = sparkSession
@@ -569,7 +572,9 @@ class _BaseCatalog(t.Generic[SESSION, DF]):
         """
         raise NotImplementedError
-    def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
         """Returns a t.List of columns for the given table/view in the specified database.
         .. versionadded:: 2.0.0

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/dataframe.py RENAMED Viewed

@@ -1093,15 +1093,16 @@ class _BaseDataFrame(t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]):
     @operation(Operation.SELECT)
     def withColumn(self, colName: str, col: Column) -> Self:
         col = self._ensure_and_normalize_col(col)
+        col_name = self._ensure_and_normalize_col(colName).alias_or_name
         existing_col_names = self.expression.named_selects
         existing_col_index = (
-            existing_col_names.index(colName) if colName in existing_col_names else None
+            existing_col_names.index(col_name) if col_name in existing_col_names else None
         )
         if existing_col_index:
             expression = self.expression.copy()
-            expression.expressions[existing_col_index] = col.alias(colName).expression
+            expression.expressions[existing_col_index] = col.alias(col_name).expression
             return self.copy(expression=expression)
-        return self.copy().select(col.alias(colName), append=True)
+        return self.select.__wrapped__(self, col.alias(col_name), append=True)  # type: ignore
     @operation(Operation.SELECT)
     def withColumnRenamed(self, existing: str, new: str) -> Self:

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/mixins/catalog_mixins.py RENAMED Viewed

@@ -315,7 +315,9 @@ class ListTablesFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]
 class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF]):
     @normalize(["tableName", "dbName"])
-    def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
         """Returns a t.List of columns for the given table/view in the specified database.
         .. versionadded:: 2.0.0
@@ -385,12 +387,6 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
                     "catalog",
                     exp.parse_identifier(self.currentCatalog(), dialect=self.session.input_dialect),
                 )
-        # if self.QUALIFY_INFO_SCHEMA_WITH_DATABASE:
-        #     if not table.db:
-        #         raise ValueError("dbName must be specified when listing columns from INFORMATION_SCHEMA")
-        #     source_table = f"{table.db}.INFORMATION_SCHEMA.COLUMNS"
-        # else:
-        #     source_table = "INFORMATION_SCHEMA.COLUMNS"
         source_table = self._get_info_schema_table("columns", database=table.db)
         select = (
             exp.select(
@@ -402,9 +398,15 @@ class ListColumnsFromInfoSchemaMixin(_BaseInfoSchemaMixin, t.Generic[SESSION, DF
             .where(exp.column("table_name").eq(table.name))
         )
         if table.db:
-            select = select.where(exp.column("table_schema").eq(table.db))
+            schema_filter: exp.Expression = exp.column("table_schema").eq(table.db)
+            if include_temp and self.TEMP_SCHEMA_FILTER:
+                schema_filter = exp.Or(this=schema_filter, expression=self.TEMP_SCHEMA_FILTER)
+            select = select.where(schema_filter)
         if table.catalog:
-            select = select.where(exp.column("table_catalog").eq(table.catalog))
+            catalog_filter: exp.Expression = exp.column("table_catalog").eq(table.catalog)
+            if include_temp and self.TEMP_CATALOG_FILTER:
+                catalog_filter = exp.Or(this=catalog_filter, expression=self.TEMP_CATALOG_FILTER)
+            select = select.where(catalog_filter)
         results = self.session._fetch_rows(select)
         return [
             Column(

sqlframe-1.2.0/sqlframe/base/mixins/dataframe_mixins.py ADDED Viewed

@@ -0,0 +1,63 @@
+import typing as t
+from sqlglot import exp
+from sqlframe.base.catalog import Column
+from sqlframe.base.dataframe import (
+    GROUP_DATA,
+    NA,
+    SESSION,
+    STAT,
+    WRITER,
+    _BaseDataFrame,
+)
+class PrintSchemaFromTempObjectsMixin(
+    _BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA]
+):
+    def _get_columns_from_temp_object(self) -> t.List[Column]:
+        table = exp.to_table(self.session._random_id)
+        self.session._execute(
+            exp.Create(
+                this=table,
+                kind="VIEW",
+                replace=True,
+                properties=exp.Properties(expressions=[exp.TemporaryProperty()]),
+                expression=self.expression,
+            )
+        )
+        return self.session.catalog.listColumns(
+            table.sql(dialect=self.session.input_dialect), include_temp=True
+        )
+    def printSchema(self, level: t.Optional[int] = None) -> None:
+        def print_schema(
+            column_name: str, column_type: exp.DataType, nullable: bool, current_level: int
+        ):
+            if level and current_level >= level:
+                return
+            if current_level > 0:
+                print(" |   " * current_level, end="")
+            print(
+                f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})"
+            )
+            if column_type.this == exp.DataType.Type.STRUCT:
+                for column_def in column_type.expressions:
+                    print_schema(column_def.name, column_def.args["kind"], True, current_level + 1)
+            if column_type.this == exp.DataType.Type.ARRAY:
+                for data_type in column_type.expressions:
+                    print_schema("element", data_type, True, current_level + 1)
+            if column_type.this == exp.DataType.Type.MAP:
+                print_schema("key", column_type.expressions[0], True, current_level + 1)
+                print_schema("value", column_type.expressions[1], True, current_level + 1)
+        columns = self._get_columns_from_temp_object()
+        print("root")
+        for column in columns:
+            print_schema(
+                column.name,
+                exp.DataType.build(column.dataType, dialect=self.session.output_dialect),
+                column.nullable,
+                0,
+            )

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/base/normalize.py RENAMED Viewed

@@ -20,7 +20,6 @@ def normalize(session: SESSION, expression_context: exp.Select, expr: t.List[NOR
     expr = ensure_list(expr)
     expressions = _ensure_expressions(expr)
     for expression in expressions:
-        # normalize_identifiers(expression, session.input_dialect)
         identifiers = expression.find_all(exp.Identifier)
         for identifier in identifiers:
             identifier.transform(session.input_dialect.normalize_identifier)

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/bigquery/catalog.py RENAMED Viewed

@@ -46,7 +46,9 @@ class BigQueryCatalog(
         return to_schema(self.session.default_dataset).db
     @normalize(["tableName", "dbName"])
-    def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
         """Returns a t.List of columns for the given table/view in the specified database.
         .. versionadded:: 2.0.0

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/catalog.py RENAMED Viewed

@@ -36,6 +36,8 @@ class DuckDBCatalog(
     ListColumnsFromInfoSchemaMixin["DuckDBSession", "DuckDBDataFrame"],
     _BaseCatalog["DuckDBSession", "DuckDBDataFrame"],
 ):
+    TEMP_CATALOG_FILTER = exp.column("table_catalog").eq("temp")
     def listFunctions(
         self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None
     ) -> t.List[Function]:

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/duckdb/dataframe.py RENAMED Viewed

@@ -9,6 +9,7 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
 from sqlframe.duckdb.group import DuckDBGroupedData
 if sys.version_info >= (3, 11):
@@ -34,13 +35,14 @@ class DuckDBDataFrameStatFunctions(_BaseDataFrameStatFunctions["DuckDBDataFrame"
 class DuckDBDataFrame(
+    PrintSchemaFromTempObjectsMixin,
     _BaseDataFrame[
         "DuckDBSession",
         "DuckDBDataFrameWriter",
         "DuckDBDataFrameNaFunctions",
         "DuckDBDataFrameStatFunctions",
         "DuckDBGroupedData",
-    ]
+    ],
 ):
     _na = DuckDBDataFrameNaFunctions
     _stat = DuckDBDataFrameStatFunctions

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/catalog.py RENAMED Viewed

@@ -34,6 +34,7 @@ class PostgresCatalog(
     _BaseCatalog["PostgresSession", "PostgresDataFrame"],
 ):
     CURRENT_CATALOG_EXPRESSION: exp.Expression = exp.column("current_catalog")
+    TEMP_SCHEMA_FILTER = exp.column("table_schema").like("pg_temp_%")
     def listFunctions(
         self, dbName: t.Optional[str] = None, pattern: t.Optional[str] = None

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/postgres/dataframe.py RENAMED Viewed

@@ -9,6 +9,7 @@ from sqlframe.base.dataframe import (
     _BaseDataFrameNaFunctions,
     _BaseDataFrameStatFunctions,
 )
+from sqlframe.base.mixins.dataframe_mixins import PrintSchemaFromTempObjectsMixin
 from sqlframe.postgres.group import PostgresGroupedData
 if sys.version_info >= (3, 11):
@@ -33,13 +34,14 @@ class PostgresDataFrameStatFunctions(_BaseDataFrameStatFunctions["PostgresDataFr
 class PostgresDataFrame(
+    PrintSchemaFromTempObjectsMixin,
     _BaseDataFrame[
         "PostgresSession",
         "PostgresDataFrameWriter",
         "PostgresDataFrameNaFunctions",
         "PostgresDataFrameStatFunctions",
         "PostgresGroupedData",
-    ]
+    ],
 ):
     _na = PostgresDataFrameNaFunctions
     _stat = PostgresDataFrameStatFunctions

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe/spark/catalog.py RENAMED Viewed

@@ -468,7 +468,9 @@ class SparkCatalog(
                 )
         return [Table(*x) for x in self._spark_catalog.listTables(dbName, pattern)]
-    def listColumns(self, tableName: str, dbName: t.Optional[str] = None) -> t.List[Column]:
+    def listColumns(
+        self, tableName: str, dbName: t.Optional[str] = None, include_temp: bool = False
+    ) -> t.List[Column]:
         """Returns a t.List of columns for the given table/view in the specified database.
         .. versionadded:: 2.0.0

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sqlframe
-Version: 1.1.2
+Version: 1.2.0
 Summary: Taking the Spark out of PySpark by converting to SQL
 Home-page: https://github.com/eakmanrq/sqlframe
 Author: Ryan Eakman

{sqlframe-1.1.2 → sqlframe-1.2.0}/sqlframe.egg-info/SOURCES.txt RENAMED Viewed

@@ -64,6 +64,7 @@ sqlframe/base/util.py
 sqlframe/base/window.py
 sqlframe/base/mixins/__init__.py
 sqlframe/base/mixins/catalog_mixins.py
+sqlframe/base/mixins/dataframe_mixins.py
 sqlframe/base/mixins/readwriter_mixins.py
 sqlframe/bigquery/__init__.py
 sqlframe/bigquery/catalog.py
@@ -163,10 +164,12 @@ tests/integration/engines/bigquery/test_bigquery_catalog.py
 tests/integration/engines/bigquery/test_bigquery_session.py
 tests/integration/engines/duck/__init__.py
 tests/integration/engines/duck/test_duckdb_catalog.py
+tests/integration/engines/duck/test_duckdb_dataframe.py
 tests/integration/engines/duck/test_duckdb_reader.py
 tests/integration/engines/duck/test_duckdb_session.py
 tests/integration/engines/postgres/__init__.py
 tests/integration/engines/postgres/test_postgres_catalog.py
+tests/integration/engines/postgres/test_postgres_dataframe.py
 tests/integration/engines/postgres/test_postgres_session.py
 tests/integration/engines/redshift/__init__.py
 tests/integration/engines/redshift/test_redshift_catalog.py

sqlframe-1.2.0/tests/integration/engines/duck/test_duckdb_dataframe.py ADDED Viewed

@@ -0,0 +1,79 @@
+import datetime
+from sqlframe.base.types import Row
+from sqlframe.duckdb import DuckDBDataFrame, DuckDBSession
+pytest_plugins = ["tests.integration.fixtures"]
+def test_print_schema_basic(duckdb_employee: DuckDBDataFrame, capsys):
+    duckdb_employee.printSchema()
+    captured = capsys.readouterr()
+    assert (
+        captured.out.strip()
+        == """
+root
+ |-- employee_id: int (nullable = true)
+ |-- fname: text (nullable = true)
+ |-- lname: text (nullable = true)
+ |-- age: int (nullable = true)
+ |-- store_id: int (nullable = true)""".strip()
+    )
+def test_print_schema_nested(duckdb_session: DuckDBSession, capsys):
+    df = duckdb_session.createDataFrame(
+        [
+            (
+                1,
+                2.0,
+                "foo",
+                {"a": 1},
+                [Row(a=1, b=2)],
+                [1, 2, 3],
+                Row(a=1),
+                datetime.date(2022, 1, 1),
+                datetime.datetime(2022, 1, 1, 0, 0, 0),
+                datetime.datetime(2022, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
+                True,
+            )
+        ],
+        [
+            "bigint_col",
+            "double_col",
+            "string_col",
+            "map<string,bigint>_col",
+            "array<struct<a:bigint,b:bigint>>",
+            "array<bigint>_col",
+            "struct<a:bigint>_col",
+            "date_col",
+            "timestamp_col",
+            "timestamptz_col",
+            "boolean_col",
+        ],
+    )
+    df.printSchema()
+    captured = capsys.readouterr()
+    assert (
+        captured.out.strip()
+        == """
+root
+ |-- bigint_col: bigint (nullable = true)
+ |-- double_col: double (nullable = true)
+ |-- string_col: text (nullable = true)
+ |-- map<string,bigint>_col: map(text, bigint) (nullable = true)
+ |    |-- key: text (nullable = true)
+ |    |-- value: bigint (nullable = true)
+ |-- array<struct<a:bigint,b:bigint>>: struct(a bigint, b bigint)[] (nullable = true)
+ |    |-- element: struct(a bigint, b bigint) (nullable = true)
+ |    |    |-- a: bigint (nullable = true)
+ |    |    |-- b: bigint (nullable = true)
+ |-- array<bigint>_col: bigint[] (nullable = true)
+ |    |-- element: bigint (nullable = true)
+ |-- struct<a:bigint>_col: struct(a bigint) (nullable = true)
+ |    |-- a: bigint (nullable = true)
+ |-- date_col: date (nullable = true)
+ |-- timestamp_col: timestamp (nullable = true)
+ |-- timestamptz_col: timestamptz (nullable = true)
+ |-- boolean_col: boolean (nullable = true)""".strip()
+    )

sqlframe-1.2.0/tests/integration/engines/postgres/test_postgres_dataframe.py ADDED Viewed

@@ -0,0 +1,64 @@
+import datetime
+from sqlframe.base.types import Row
+from sqlframe.duckdb import DuckDBDataFrame, DuckDBSession
+pytest_plugins = ["tests.integration.fixtures"]
+def test_print_schema_basic(postgres_employee: DuckDBDataFrame, capsys):
+    postgres_employee.printSchema()
+    captured = capsys.readouterr()
+    assert (
+        captured.out.strip()
+        == """
+root
+ |-- employee_id: int (nullable = true)
+ |-- fname: text (nullable = true)
+ |-- lname: text (nullable = true)
+ |-- age: int (nullable = true)
+ |-- store_id: int (nullable = true)""".strip()
+    )
+def test_print_schema_nested(postgres_session: DuckDBSession, capsys):
+    df = postgres_session.createDataFrame(
+        [
+            (
+                1,
+                2.0,
+                "foo",
+                [1, 2, 3],
+                datetime.date(2022, 1, 1),
+                datetime.datetime(2022, 1, 1, 0, 0, 0),
+                datetime.datetime(2022, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
+                True,
+            )
+        ],
+        [
+            "bigint_col",
+            "double_col",
+            "string_col",
+            "array<bigint>_col",
+            "date_col",
+            "timestamp_col",
+            "timestamptz_col",
+            "boolean_col",
+        ],
+    )
+    df.printSchema()
+    captured = capsys.readouterr()
+    # array does not include type
+    assert (
+        captured.out.strip()
+        == """
+root
+ |-- bigint_col: bigint (nullable = true)
+ |-- double_col: double precision (nullable = true)
+ |-- string_col: text (nullable = true)
+ |-- array<bigint>_col: array (nullable = true)
+ |-- date_col: date (nullable = true)
+ |-- timestamp_col: timestamp (nullable = true)
+ |-- timestamptz_col: timestamptz (nullable = true)
+ |-- boolean_col: boolean (nullable = true)""".strip()
+    )

{sqlframe-1.1.2 → sqlframe-1.2.0}/tests/integration/engines/test_int_functions.py RENAMED Viewed

@@ -6,7 +6,6 @@ import typing as t
 from collections import Counter
 import pytest
-from pyspark.sql import DataFrame
 from pyspark.sql import SparkSession as PySparkSession
 from sqlglot import exp
@@ -175,7 +174,6 @@ def test_col(get_session_and_func, arg):
 )
 def test_typeof(get_session_and_func, get_types, arg, expected):
     session, typeof = get_session_and_func("typeof")
-    types = get_types(session)
     # If we just pass a struct in for values then Spark will automatically explode the struct into columns
     # it won't do this though if there is another column so that is why we include an ignore column
     df = session.createDataFrame([(1, arg)], schema=["ignore_col", "col"])

{sqlframe-1.1.2 → sqlframe-1.2.0}/tests/unit/standalone/test_dataframe.py RENAMED Viewed

@@ -48,7 +48,7 @@ def test_persist_storagelevel(standalone_employee: StandaloneDataFrame, compare_
 def test_with_column_duplicate_alias(standalone_employee: StandaloneDataFrame):
-    df = standalone_employee.withColumn("fname", F.col("age").cast("string"))
+    df = standalone_employee.withColumn("fName", F.col("age").cast("string"))
     assert df.columns == ["employee_id", "fname", "lname", "age", "store_id"]
     # Make sure that the new columns is added with an alias to `fname`
     assert (
@@ -57,6 +57,25 @@ def test_with_column_duplicate_alias(standalone_employee: StandaloneDataFrame):
     )
+# https://github.com/eakmanrq/sqlframe/issues/19
+def test_with_column_dual_expression(standalone_employee: StandaloneDataFrame):
+    df1 = standalone_employee.withColumn("new_col1", standalone_employee.age)
+    df2 = df1.withColumn("new_col2", standalone_employee.store_id)
+    assert df2.columns == [
+        "employee_id",
+        "fname",
+        "lname",
+        "age",
+        "store_id",
+        "new_col1",
+        "new_col2",
+    ]
+    assert (
+        df2.sql(pretty=False)
+        == "SELECT `a1`.`employee_id` AS `employee_id`, CAST(`a1`.`fname` AS STRING) AS `fname`, CAST(`a1`.`lname` AS STRING) AS `lname`, `a1`.`age` AS `age`, `a1`.`store_id` AS `store_id`, `a1`.`age` AS `new_col1`, `a1`.`store_id` AS `new_col2` FROM VALUES (1, 'Jack', 'Shephard', 37, 1), (2, 'John', 'Locke', 65, 1), (3, 'Kate', 'Austen', 37, 2), (4, 'Claire', 'Littleton', 27, 2), (5, 'Hugo', 'Reyes', 29, 100) AS `a1`(`employee_id`, `fname`, `lname`, `age`, `store_id`)"
+    )
 def test_where_expr(standalone_employee: StandaloneDataFrame):
     df = standalone_employee.where("fname = 'Jack' AND age = 37")
     assert df.columns == ["employee_id", "fname", "lname", "age", "store_id"]